LLVM 23.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSelectionDAGInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
84#include "llvm/Support/Debug.h"
86#include "llvm/Support/Format.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <optional>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "ppc-lowering"
104
106 "disable-p10-store-forward",
107 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
108 cl::init(false));
109
110static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisableSCO("disable-ppc-sco",
120cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
121
122static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124
125static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126cl::desc("use absolute jump tables on ppc"), cl::Hidden);
127
128static cl::opt<bool>
129 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130 cl::desc("disable vector permute decomposition"),
131 cl::init(true), cl::Hidden);
132
134 "disable-auto-paired-vec-st",
135 cl::desc("disable automatically generated 32byte paired vector stores"),
136 cl::init(true), cl::Hidden);
137
139 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
140 cl::desc("Set minimum number of entries to use a jump table on PPC"));
141
143 "ppc-min-bit-test-cmps", cl::init(3), cl::Hidden,
144 cl::desc("Set minimum of largest number of comparisons to use bit test for "
145 "switch on PPC."));
146
148 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
149 cl::desc("max depth when checking alias info in GatherAllAliases()"));
150
152 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
153 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
154 "function to use initial-exec"));
155
156STATISTIC(NumTailCalls, "Number of tail calls");
157STATISTIC(NumSiblingCalls, "Number of sibling calls");
158STATISTIC(ShufflesHandledWithVPERM,
159 "Number of shuffles lowered to a VPERM or XXPERM");
160STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
161
162static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
163
164static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
165
167 unsigned OpIdx, bool IsByte,
168 const PPCInstrInfo *TII);
169
170// A faster local-[exec|dynamic] TLS access sequence (enabled with the
171// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
172// variables; consistent with the IBM XL compiler, we apply a max size of
173// slightly under 32KB.
175
176// FIXME: Remove this once the bug has been fixed!
178
180 const PPCSubtarget &STI)
181 : TargetLowering(TM, STI), Subtarget(STI) {
182 // Initialize map that relates the PPC addressing modes to the computed flags
183 // of a load/store instruction. The map is used to determine the optimal
184 // addressing mode when selecting load and stores.
185 initializeAddrModeMap();
186 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
187 // arguments are at least 4/8 bytes aligned.
188 bool isPPC64 = Subtarget.isPPC64();
189 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
190 const MVT RegVT = Subtarget.getScalarIntVT();
191
192 // Set up the register classes.
193 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
194 if (!useSoftFloat()) {
195 if (hasSPE()) {
196 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
197 // EFPU2 APU only supports f32
198 if (!Subtarget.hasEFPU2())
199 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
200 } else {
201 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
202 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
203 }
204 }
205
208
209 // PowerPC uses addo_carry,subo_carry to propagate carry.
212
213 // On P10, the default lowering generates better code using the
214 // setbc instruction.
215 if (!Subtarget.hasP10Vector()) {
218 if (isPPC64) {
221 }
222 }
223
224 // Match BITREVERSE to customized fast code sequence in the td file.
227
228 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
230
231 // Custom lower inline assembly to check for special registers.
234
235 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
236 for (MVT VT : MVT::integer_valuetypes()) {
239 }
240
241 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
243
244 if (Subtarget.isISA3_0()) {
245 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
246 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
247 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
248 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
249 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
250 } else {
251 // No extending loads from f16 or HW conversions back and forth.
252 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
254 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
257 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
260 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
261 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
262 }
263
264 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
265
266 // PowerPC has pre-inc load and store's.
277 if (!Subtarget.hasSPE()) {
282 }
283
284 if (Subtarget.useCRBits()) {
286
287 if (isPPC64 || Subtarget.hasFPCVT()) {
292
294 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
296 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
297
302
304 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
306 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
307 } else {
312 }
313
314 // PowerPC does not support direct load/store of condition registers.
317
318 // FIXME: Remove this once the ANDI glue bug is fixed:
319 if (ANDIGlueBug)
321
322 for (MVT VT : MVT::integer_valuetypes()) {
325 setTruncStoreAction(VT, MVT::i1, Expand);
326 }
327
328 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
329 }
330
331 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
332 // PPC (the libcall is not available).
337
338 // We do not currently implement these libm ops for PowerPC.
339 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
340 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
341 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
342 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
344 setOperationAction(ISD::FREM, MVT::ppcf128, LibCall);
345
346 // PowerPC has no SREM/UREM instructions unless we are on P9
347 // On P9 we may use a hardware instruction to compute the remainder.
348 // When the result of both the remainder and the division is required it is
349 // more efficient to compute the remainder from the result of the division
350 // rather than use the remainder instruction. The instructions are legalized
351 // directly because the DivRemPairsPass performs the transformation at the IR
352 // level.
353 if (Subtarget.isISA3_0()) {
358 } else {
363 }
364
365 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
374
375 // Handle constrained floating-point operations of scalar.
376 // TODO: Handle SPE specific operation.
382
387
388 if (!Subtarget.hasSPE()) {
391 }
392
393 if (Subtarget.hasVSX()) {
396 }
397
398 if (Subtarget.hasFSQRT()) {
401 }
402
403 if (Subtarget.hasFPRND()) {
408
413 }
414
415 // We don't support sin/cos/sqrt/fmod/pow
426
427 // MASS transformation for LLVM intrinsics with replicating fast-math flag
428 // to be consistent to PPCGenScalarMASSEntries pass
429 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
442 }
443
444 if (Subtarget.hasSPE()) {
447 } else {
448 setOperationAction(ISD::FMA , MVT::f64, Legal);
449 setOperationAction(ISD::FMA , MVT::f32, Legal);
452 }
453
454 if (Subtarget.hasSPE())
455 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
456
457 // If we're enabling GP optimizations, use hardware square root
458 if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
460
461 if (!Subtarget.hasFSQRT() &&
462 !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
464
465 if (Subtarget.hasFCPSGN()) {
468 } else {
471 }
472
473 if (Subtarget.hasFPRND()) {
478
483 }
484
485 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
486 // instruction xxbrd to speed up scalar BSWAP64.
487 if (Subtarget.isISA3_1()) {
490 } else {
493 ((Subtarget.hasP8Vector()) && isPPC64) ? Custom
494 : Expand);
495 }
496
497 // CTPOP or CTTZ were introduced in P8/P9 respectively
498 if (Subtarget.isISA3_0()) {
499 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
500 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
501 } else {
502 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
503 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
504 }
505
506 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
509 } else {
512 }
513
514 // PowerPC does not have ROTR
517
518 if (!Subtarget.useCRBits()) {
519 // PowerPC does not have Select
524 }
525
526 // PowerPC wants to turn select_cc of FP into fsel when possible.
529
530 // PowerPC wants to optimize integer setcc a bit
531 if (!Subtarget.useCRBits())
533
534 if (Subtarget.hasFPU()) {
538
542 }
543
544 // PowerPC does not have BRCOND which requires SetCC
545 if (!Subtarget.useCRBits())
547
549
550 if (Subtarget.hasSPE()) {
551 // SPE has built-in conversions
558
559 // SPE supports signaling compare of f32/f64.
562 } else {
563 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
566
567 // PowerPC does not have [U|S]INT_TO_FP
572 }
573
574 if (Subtarget.hasDirectMove() && isPPC64) {
579
588 } else {
593 }
594
595 // We cannot sextinreg(i1). Expand to shifts.
597
598 // Custom handling for PowerPC ucmp instruction
599 if (isPPC64) {
600 // UCMP involves using carries, which only works in 64-bit
603 } else {
606 }
607
608 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
609 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
610 // support continuation, user-level threading, and etc.. As a result, no
611 // other SjLj exception interfaces are implemented and please don't build
612 // your own exception handling based on them.
613 // LLVM/Clang supports zero-cost DWARF exception handling.
616
617 // We want to legalize GlobalAddress and ConstantPool nodes into the
618 // appropriate instructions to materialize the address.
629
630 // TRAP is legal.
631 setOperationAction(ISD::TRAP, MVT::Other, Legal);
632
633 // TRAMPOLINE is custom lowered.
636
637 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
639
640 if (Subtarget.is64BitELFABI()) {
641 // VAARG always uses double-word chunks, so promote anything smaller.
643 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
645 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
647 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
649 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
651 } else if (Subtarget.is32BitELFABI()) {
652 // VAARG is custom lowered with the 32-bit SVR4 ABI.
655 } else
657
658 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
659 if (Subtarget.is32BitELFABI())
661 else
663
664 // Use the default implementation.
665 setOperationAction(ISD::VAEND , MVT::Other, Expand);
674
675 if (Subtarget.isISA3_0() && isPPC64) {
676 setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
677 setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
678 setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
679 setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
680 setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
681 setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
682 setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
683 setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
684 }
685
686 // We want to custom lower some of our intrinsics.
692
693 // To handle counter-based loop conditions.
696
701
702 // Comparisons that require checking two conditions.
703 if (Subtarget.hasSPE()) {
708 }
721
724
725 if (Subtarget.has64BitSupport()) {
726 // They also have instructions for converting between i64 and fp.
735 // This is just the low 32 bits of a (signed) fp->i64 conversion.
736 // We cannot do this with Promote because i64 is not a legal type.
739
740 if (Subtarget.hasLFIWAX() || isPPC64) {
743 }
744 } else {
745 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
746 if (Subtarget.hasSPE()) {
749 } else {
752 }
753 }
754
755 // With the instructions enabled under FPCVT, we can do everything.
756 if (Subtarget.hasFPCVT()) {
757 if (Subtarget.has64BitSupport()) {
766 }
767
776 }
777
778 if (Subtarget.use64BitRegs()) {
779 // 64-bit PowerPC implementations can support i64 types directly
780 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
781 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
783 // 64-bit PowerPC wants to expand i128 shifts itself.
787 } else {
788 // 32-bit PowerPC wants to expand i64 shifts itself.
792 }
793
794 // PowerPC has better expansions for funnel shifts than the generic
795 // TargetLowering::expandFunnelShift.
796 if (Subtarget.has64BitSupport()) {
799 }
802
803 if (Subtarget.hasVSX()) {
814 }
815
816 if (Subtarget.hasAltivec()) {
817 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
824 }
825 // First set operation action for all vector types to expand. Then we
826 // will selectively turn on ones that can be effectively codegen'd.
828 // add/sub are legal for all supported vector VT's.
831
832 // For v2i64, these are only valid with P8Vector. This is corrected after
833 // the loop.
834 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
839 }
840 else {
845 }
846
847 if (Subtarget.hasVSX()) {
853 }
854
855 // Vector instructions introduced in P8
856 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
859 }
860 else {
863 }
864
865 // Vector instructions introduced in P9
866 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
868 else
870
871 // We promote all shuffles to v16i8.
873 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
874
875 // We promote all non-typed operations to v4i32.
877 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
879 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
881 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
883 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
885 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
888 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
890 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
891
892 // No other operations are legal.
931
932 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
933 setTruncStoreAction(VT, InnerVT, Expand);
936 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
937 }
938 }
940 if (!Subtarget.hasP8Vector()) {
941 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
942 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
943 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
944 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
945 }
946
947 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
948 // with merges, splats, etc.
950
951 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
952 // are cheap, so handle them before they get expanded to scalar.
958
959 setOperationAction(ISD::AND , MVT::v4i32, Legal);
960 setOperationAction(ISD::OR , MVT::v4i32, Legal);
961 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
962 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
964 Subtarget.useCRBits() ? Legal : Expand);
965 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
975 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
978
979 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
980 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
981 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
982 if (Subtarget.hasAltivec())
983 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
985 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
986 if (Subtarget.hasP8Altivec())
987 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
988
989 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
990 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
991 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
992 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
993
994 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
995 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
996
997 if (Subtarget.hasVSX()) {
998 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
999 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
1001 }
1002
1003 if (Subtarget.hasP8Altivec())
1004 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1005 else
1006 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1007
1008 if (Subtarget.isISA3_1()) {
1009 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
1010 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
1011 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
1012 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
1013 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
1014 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
1015 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
1016 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
1017 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
1018 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
1019 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
1020 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
1021 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
1022 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
1023 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
1024 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
1025 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
1026 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
1027 }
1028
1029 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1030 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1031
1034 // LE is P8+/64-bit so direct moves are supported and these operations
1035 // are legal. The custom transformation requires 64-bit since we need a
1036 // pair of stores that will cover a 128-bit load for P10.
1037 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1041 }
1042
1047
1048 // Altivec does not contain unordered floating-point compare instructions
1049 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1050 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1051 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1052 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1053
1054 if (Subtarget.hasVSX()) {
1057 if (Subtarget.hasP8Vector()) {
1060 }
1061 if (Subtarget.hasDirectMove() && isPPC64) {
1070 }
1072
1073 // The nearbyint variants are not allowed to raise the inexact exception
1074 // so we can only code-gen them with fpexcept.ignore.
1079
1080 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1081 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1082 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1083 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1084 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1087
1088 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1089 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1092
1093 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1094 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1095
1096 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1097 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1098
1099 // Share the Altivec comparison restrictions.
1100 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1101 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1102 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1103 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1104
1105 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1106 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1107
1109
1110 if (Subtarget.hasP8Vector())
1111 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1112
1113 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1114
1115 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1116 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1117 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1118
1119 if (Subtarget.hasP8Altivec()) {
1120 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1121 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1122 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1123
1124 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1125 // SRL, but not for SRA because of the instructions available:
1126 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1127 // doing
1128 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1129 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1130 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1131
1132 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1133 }
1134 else {
1135 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1136 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1137 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1138
1139 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1140
1141 // VSX v2i64 only supports non-arithmetic operations.
1142 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1143 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1144 }
1145
1146 if (Subtarget.isISA3_1())
1147 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1148 else
1149 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1150
1151 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1152 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1154 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1155
1157
1166
1167 // Custom handling for partial vectors of integers converted to
1168 // floating point. We already have optimal handling for v2i32 through
1169 // the DAG combine, so those aren't necessary.
1186
1187 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1188 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1189 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1190 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1193
1196
1197 // Handle constrained floating-point operations of vector.
1198 // The predictor is `hasVSX` because altivec instruction has
1199 // no exception but VSX vector instruction has.
1213
1227
1228 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1229 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1230
1231 for (MVT FPT : MVT::fp_valuetypes())
1232 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1233
1234 // Expand the SELECT to SELECT_CC
1236
1237 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1238 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1239
1240 // No implementation for these ops for PowerPC.
1242 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1243 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1244 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1247 }
1248
1249 if (Subtarget.hasP8Altivec()) {
1250 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1251 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1252 }
1253
1254 if (Subtarget.hasP9Vector()) {
1257
1258 // Test data class instructions store results in CR bits.
1259 if (Subtarget.useCRBits()) {
1264 }
1265
1266 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1267 // SRL, but not for SRA because of the instructions available:
1268 // VS{RL} and VS{RL}O.
1269 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1270 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1271 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1272
1273 setOperationAction(ISD::FADD, MVT::f128, Legal);
1274 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1275 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1276 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1278
1279 setOperationAction(ISD::FMA, MVT::f128, Legal);
1286
1288 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1290 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1293
1297
1298 // Handle constrained floating-point operations of fp128
1315 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1316 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1317 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1318 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1319 } else if (Subtarget.hasVSX()) {
1322
1323 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1324 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1325
1326 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1327 // fp_to_uint and int_to_fp.
1330
1331 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1332 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1333 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1334 setOperationAction(ISD::FABS, MVT::f128, Expand);
1336 setOperationAction(ISD::FMA, MVT::f128, Expand);
1338
1339 // Expand the fp_extend if the target type is fp128.
1342
1343 // Expand the fp_round if the source type is fp128.
1344 for (MVT VT : {MVT::f32, MVT::f64}) {
1347 }
1348
1353
1354 // Lower following f128 select_cc pattern:
1355 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1357
1358 // We need to handle f128 SELECT_CC with integer result type.
1360 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1361 }
1362
1363 if (Subtarget.hasP9Altivec()) {
1364 if (Subtarget.isISA3_1()) {
1369 } else {
1372 }
1380
1381 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1382 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1383 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1384 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1385 }
1386
1387 if (Subtarget.hasP10Vector()) {
1389 }
1390
1393 Legal);
1395 Legal);
1397 Legal);
1399 Legal);
1400 }
1401
1402 if (Subtarget.pairedVectorMemops()) {
1403 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1404 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1405 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1406 }
1407 if (Subtarget.hasMMA()) {
1408 if (Subtarget.isISAFuture()) {
1409 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1410 addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1411 addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
1412 setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1413 setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1414 setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
1415 setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
1416 } else {
1417 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1418 }
1419 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1420 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1422 }
1423
1424 if (Subtarget.has64BitSupport())
1426
1427 if (Subtarget.isISA3_1())
1428 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1429
1430 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1431
1432 if (!isPPC64) {
1435 }
1436
1441 }
1442
1444
1445 if (Subtarget.hasAltivec()) {
1446 // Altivec instructions set fields to all zeros or all ones.
1448 }
1449
1452 else if (isPPC64)
1454 else
1456
1457 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1458
1459 // We have target-specific dag combine patterns for the following nodes:
1463 if (Subtarget.hasFPCVT())
1466 if (Subtarget.useCRBits())
1470
1472
1474
1475 if (Subtarget.useCRBits()) {
1477 }
1478
1479 if (Subtarget.hasP8Vector())
1481
1482 // With 32 condition bits, we don't need to sink (and duplicate) compares
1483 // aggressively in CodeGenPrep.
1484 if (Subtarget.useCRBits()) {
1486 }
1487
1488 // TODO: The default entry number is set to 64. This stops most jump table
1489 // generation on PPC. But it is good for current PPC HWs because the indirect
1490 // branch instruction mtctr to the jump table may lead to bad branch predict.
1491 // Re-evaluate this value on future HWs that can do better with mtctr.
1493
1494 // The default minimum of largest number in a BitTest cluster is 3.
1496
1498 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1499
1500 auto CPUDirective = Subtarget.getCPUDirective();
1501 switch (CPUDirective) {
1502 default: break;
1503 case PPC::DIR_970:
1504 case PPC::DIR_A2:
1505 case PPC::DIR_E500:
1506 case PPC::DIR_E500mc:
1507 case PPC::DIR_E5500:
1508 case PPC::DIR_PWR4:
1509 case PPC::DIR_PWR5:
1510 case PPC::DIR_PWR5X:
1511 case PPC::DIR_PWR6:
1512 case PPC::DIR_PWR6X:
1513 case PPC::DIR_PWR7:
1514 case PPC::DIR_PWR8:
1515 case PPC::DIR_PWR9:
1516 case PPC::DIR_PWR10:
1517 case PPC::DIR_PWR11:
1521 break;
1522 }
1523
1524 if (Subtarget.enableMachineScheduler())
1526 else
1528
1530
1531 // The Freescale cores do better with aggressive inlining of memcpy and
1532 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1533 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1534 MaxStoresPerMemset = 32;
1536 MaxStoresPerMemcpy = 32;
1540 } else if (CPUDirective == PPC::DIR_A2) {
1541 // The A2 also benefits from (very) aggressive inlining of memcpy and
1542 // friends. The overhead of a the function call, even when warm, can be
1543 // over one hundred cycles.
1544 MaxStoresPerMemset = 128;
1545 MaxStoresPerMemcpy = 128;
1546 MaxStoresPerMemmove = 128;
1547 MaxLoadsPerMemcmp = 128;
1548 } else {
1551 }
1552
1553 // Enable generation of STXVP instructions by default for mcpu=future.
1554 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1555 DisableAutoPairedVecSt.getNumOccurrences() == 0)
1556 DisableAutoPairedVecSt = false;
1557
1558 IsStrictFPEnabled = true;
1559
1560 // Let the subtarget (CPU) decide if a predictable select is more expensive
1561 // than the corresponding branch. This information is used in CGP to decide
1562 // when to convert selects into branches.
1563 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1564
1566}
1567
1568// *********************************** NOTE ************************************
1569// For selecting load and store instructions, the addressing modes are defined
1570// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1571// patterns to match the load the store instructions.
1572//
1573// The TD definitions for the addressing modes correspond to their respective
1574// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1575// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1576// address mode flags of a particular node. Afterwards, the computed address
1577// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1578// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1579// accordingly, based on the preferred addressing mode.
1580//
1581// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1582// MemOpFlags contains all the possible flags that can be used to compute the
1583// optimal addressing mode for load and store instructions.
1584// AddrMode contains all the possible load and store addressing modes available
1585// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1586//
1587// When adding new load and store instructions, it is possible that new address
1588// flags may need to be added into MemOpFlags, and a new addressing mode will
1589// need to be added to AddrMode. An entry of the new addressing mode (consisting
1590// of the minimal and main distinguishing address flags for the new load/store
1591// instructions) will need to be added into initializeAddrModeMap() below.
1592// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1593// need to be updated to account for selecting the optimal addressing mode.
1594// *****************************************************************************
1595/// Initialize the map that relates the different addressing modes of the load
1596/// and store instructions to a set of flags. This ensures the load/store
1597/// instruction is correctly matched during instruction selection.
1598void PPCTargetLowering::initializeAddrModeMap() {
1599 AddrModesMap[PPC::AM_DForm] = {
1600 // LWZ, STW
1605 // LBZ, LHZ, STB, STH
1610 // LHA
1615 // LFS, LFD, STFS, STFD
1620 };
1621 AddrModesMap[PPC::AM_DSForm] = {
1622 // LWA
1626 // LD, STD
1630 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1634 };
1635 AddrModesMap[PPC::AM_DQForm] = {
1636 // LXV, STXV
1640 };
1641 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1643 // TODO: Add mapping for quadword load/store.
1644}
1645
1646/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1647/// the desired ByVal argument alignment.
1648static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1649 if (MaxAlign == MaxMaxAlign)
1650 return;
1651 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1652 if (MaxMaxAlign >= 32 &&
1653 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1654 MaxAlign = Align(32);
1655 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1656 MaxAlign < 16)
1657 MaxAlign = Align(16);
1658 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1659 Align EltAlign;
1660 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1661 if (EltAlign > MaxAlign)
1662 MaxAlign = EltAlign;
1663 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1664 for (auto *EltTy : STy->elements()) {
1665 Align EltAlign;
1666 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1667 if (EltAlign > MaxAlign)
1668 MaxAlign = EltAlign;
1669 if (MaxAlign == MaxMaxAlign)
1670 break;
1671 }
1672 }
1673}
1674
1675/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1676/// function arguments in the caller parameter area.
1678 const DataLayout &DL) const {
1679 // 16byte and wider vectors are passed on 16byte boundary.
1680 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1681 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1682 if (Subtarget.hasAltivec())
1683 getMaxByValAlign(Ty, Alignment, Align(16));
1684 return Alignment;
1685}
1686
1688 return Subtarget.useSoftFloat();
1689}
1690
1692 return Subtarget.hasSPE();
1693}
1694
1696 return VT.isScalarInteger();
1697}
1698
1700 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1701 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1702 return false;
1703
1704 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1705 if (VTy->getScalarType()->isIntegerTy()) {
1706 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1707 if (ElemSizeInBits == 32) {
1708 Index = Subtarget.isLittleEndian() ? 2 : 1;
1709 return true;
1710 }
1711 if (ElemSizeInBits == 64) {
1712 Index = Subtarget.isLittleEndian() ? 1 : 0;
1713 return true;
1714 }
1715 }
1716 }
1717 return false;
1718}
1719
1721 EVT VT) const {
1722 if (!VT.isVector())
1723 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1724
1726}
1727
1729 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1730 return true;
1731}
1732
1733//===----------------------------------------------------------------------===//
1734// Node matching predicates, for use by the tblgen matching code.
1735//===----------------------------------------------------------------------===//
1736
1737/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1740 return CFP->getValueAPF().isZero();
1741 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1742 // Maybe this has already been legalized into the constant pool?
1743 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1744 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1745 return CFP->getValueAPF().isZero();
1746 }
1747 return false;
1748}
1749
1750/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1751/// true if Op is undef or if it matches the specified value.
1752static bool isConstantOrUndef(int Op, int Val) {
1753 return Op < 0 || Op == Val;
1754}
1755
1756/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1757/// VPKUHUM instruction.
1758/// The ShuffleKind distinguishes between big-endian operations with
1759/// two different inputs (0), either-endian operations with two identical
1760/// inputs (1), and little-endian operations with two different inputs (2).
1761/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1763 SelectionDAG &DAG) {
1764 bool IsLE = DAG.getDataLayout().isLittleEndian();
1765 if (ShuffleKind == 0) {
1766 if (IsLE)
1767 return false;
1768 for (unsigned i = 0; i != 16; ++i)
1769 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1770 return false;
1771 } else if (ShuffleKind == 2) {
1772 if (!IsLE)
1773 return false;
1774 for (unsigned i = 0; i != 16; ++i)
1775 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1776 return false;
1777 } else if (ShuffleKind == 1) {
1778 unsigned j = IsLE ? 0 : 1;
1779 for (unsigned i = 0; i != 8; ++i)
1780 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1781 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1782 return false;
1783 }
1784 return true;
1785}
1786
1787/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1788/// VPKUWUM instruction.
1789/// The ShuffleKind distinguishes between big-endian operations with
1790/// two different inputs (0), either-endian operations with two identical
1791/// inputs (1), and little-endian operations with two different inputs (2).
1792/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1794 SelectionDAG &DAG) {
1795 bool IsLE = DAG.getDataLayout().isLittleEndian();
1796 if (ShuffleKind == 0) {
1797 if (IsLE)
1798 return false;
1799 for (unsigned i = 0; i != 16; i += 2)
1800 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1801 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1802 return false;
1803 } else if (ShuffleKind == 2) {
1804 if (!IsLE)
1805 return false;
1806 for (unsigned i = 0; i != 16; i += 2)
1807 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1808 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1809 return false;
1810 } else if (ShuffleKind == 1) {
1811 unsigned j = IsLE ? 0 : 2;
1812 for (unsigned i = 0; i != 8; i += 2)
1813 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1814 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1815 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1816 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1817 return false;
1818 }
1819 return true;
1820}
1821
1822/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1823/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1824/// current subtarget.
1825///
1826/// The ShuffleKind distinguishes between big-endian operations with
1827/// two different inputs (0), either-endian operations with two identical
1828/// inputs (1), and little-endian operations with two different inputs (2).
1829/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1831 SelectionDAG &DAG) {
1832 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1833 if (!Subtarget.hasP8Vector())
1834 return false;
1835
1836 bool IsLE = DAG.getDataLayout().isLittleEndian();
1837 if (ShuffleKind == 0) {
1838 if (IsLE)
1839 return false;
1840 for (unsigned i = 0; i != 16; i += 4)
1841 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1842 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1843 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1844 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1845 return false;
1846 } else if (ShuffleKind == 2) {
1847 if (!IsLE)
1848 return false;
1849 for (unsigned i = 0; i != 16; i += 4)
1850 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1851 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1852 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1853 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1854 return false;
1855 } else if (ShuffleKind == 1) {
1856 unsigned j = IsLE ? 0 : 4;
1857 for (unsigned i = 0; i != 8; i += 4)
1858 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1859 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1860 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1861 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1862 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1863 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1864 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1865 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1866 return false;
1867 }
1868 return true;
1869}
1870
1871/// isVMerge - Common function, used to match vmrg* shuffles.
1872///
1873static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1874 unsigned LHSStart, unsigned RHSStart) {
1875 if (N->getValueType(0) != MVT::v16i8)
1876 return false;
1877 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1878 "Unsupported merge size!");
1879
1880 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1881 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1882 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1883 LHSStart+j+i*UnitSize) ||
1884 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1885 RHSStart+j+i*UnitSize))
1886 return false;
1887 }
1888 return true;
1889}
1890
1891/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1892/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1893/// The ShuffleKind distinguishes between big-endian merges with two
1894/// different inputs (0), either-endian merges with two identical inputs (1),
1895/// and little-endian merges with two different inputs (2). For the latter,
1896/// the input operands are swapped (see PPCInstrAltivec.td).
1898 unsigned ShuffleKind, SelectionDAG &DAG) {
1899 if (DAG.getDataLayout().isLittleEndian()) {
1900 if (ShuffleKind == 1) // unary
1901 return isVMerge(N, UnitSize, 0, 0);
1902 else if (ShuffleKind == 2) // swapped
1903 return isVMerge(N, UnitSize, 0, 16);
1904 else
1905 return false;
1906 } else {
1907 if (ShuffleKind == 1) // unary
1908 return isVMerge(N, UnitSize, 8, 8);
1909 else if (ShuffleKind == 0) // normal
1910 return isVMerge(N, UnitSize, 8, 24);
1911 else
1912 return false;
1913 }
1914}
1915
1916/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1917/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1918/// The ShuffleKind distinguishes between big-endian merges with two
1919/// different inputs (0), either-endian merges with two identical inputs (1),
1920/// and little-endian merges with two different inputs (2). For the latter,
1921/// the input operands are swapped (see PPCInstrAltivec.td).
1923 unsigned ShuffleKind, SelectionDAG &DAG) {
1924 if (DAG.getDataLayout().isLittleEndian()) {
1925 if (ShuffleKind == 1) // unary
1926 return isVMerge(N, UnitSize, 8, 8);
1927 else if (ShuffleKind == 2) // swapped
1928 return isVMerge(N, UnitSize, 8, 24);
1929 else
1930 return false;
1931 } else {
1932 if (ShuffleKind == 1) // unary
1933 return isVMerge(N, UnitSize, 0, 0);
1934 else if (ShuffleKind == 0) // normal
1935 return isVMerge(N, UnitSize, 0, 16);
1936 else
1937 return false;
1938 }
1939}
1940
1941/**
1942 * Common function used to match vmrgew and vmrgow shuffles
1943 *
1944 * The indexOffset determines whether to look for even or odd words in
1945 * the shuffle mask. This is based on the of the endianness of the target
1946 * machine.
1947 * - Little Endian:
1948 * - Use offset of 0 to check for odd elements
1949 * - Use offset of 4 to check for even elements
1950 * - Big Endian:
1951 * - Use offset of 0 to check for even elements
1952 * - Use offset of 4 to check for odd elements
1953 * A detailed description of the vector element ordering for little endian and
1954 * big endian can be found at
1955 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1956 * Targeting your applications - what little endian and big endian IBM XL C/C++
1957 * compiler differences mean to you
1958 *
1959 * The mask to the shuffle vector instruction specifies the indices of the
1960 * elements from the two input vectors to place in the result. The elements are
1961 * numbered in array-access order, starting with the first vector. These vectors
1962 * are always of type v16i8, thus each vector will contain 16 elements of size
1963 * 8. More info on the shuffle vector can be found in the
1964 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1965 * Language Reference.
1966 *
1967 * The RHSStartValue indicates whether the same input vectors are used (unary)
1968 * or two different input vectors are used, based on the following:
1969 * - If the instruction uses the same vector for both inputs, the range of the
1970 * indices will be 0 to 15. In this case, the RHSStart value passed should
1971 * be 0.
1972 * - If the instruction has two different vectors then the range of the
1973 * indices will be 0 to 31. In this case, the RHSStart value passed should
1974 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1975 * to 31 specify elements in the second vector).
1976 *
1977 * \param[in] N The shuffle vector SD Node to analyze
1978 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1979 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1980 * vector to the shuffle_vector instruction
1981 * \return true iff this shuffle vector represents an even or odd word merge
1982 */
1983static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1984 unsigned RHSStartValue) {
1985 if (N->getValueType(0) != MVT::v16i8)
1986 return false;
1987
1988 for (unsigned i = 0; i < 2; ++i)
1989 for (unsigned j = 0; j < 4; ++j)
1990 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1991 i*RHSStartValue+j+IndexOffset) ||
1992 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1993 i*RHSStartValue+j+IndexOffset+8))
1994 return false;
1995 return true;
1996}
1997
1998/**
1999 * Determine if the specified shuffle mask is suitable for the vmrgew or
2000 * vmrgow instructions.
2001 *
2002 * \param[in] N The shuffle vector SD Node to analyze
2003 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2004 * \param[in] ShuffleKind Identify the type of merge:
2005 * - 0 = big-endian merge with two different inputs;
2006 * - 1 = either-endian merge with two identical inputs;
2007 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2008 * little-endian merges).
2009 * \param[in] DAG The current SelectionDAG
2010 * \return true iff this shuffle mask
2011 */
2013 unsigned ShuffleKind, SelectionDAG &DAG) {
2014 if (DAG.getDataLayout().isLittleEndian()) {
2015 unsigned indexOffset = CheckEven ? 4 : 0;
2016 if (ShuffleKind == 1) // Unary
2017 return isVMerge(N, indexOffset, 0);
2018 else if (ShuffleKind == 2) // swapped
2019 return isVMerge(N, indexOffset, 16);
2020 else
2021 return false;
2022 }
2023 else {
2024 unsigned indexOffset = CheckEven ? 0 : 4;
2025 if (ShuffleKind == 1) // Unary
2026 return isVMerge(N, indexOffset, 0);
2027 else if (ShuffleKind == 0) // Normal
2028 return isVMerge(N, indexOffset, 16);
2029 else
2030 return false;
2031 }
2032 return false;
2033}
2034
2035/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2036/// amount, otherwise return -1.
2037/// The ShuffleKind distinguishes between big-endian operations with two
2038/// different inputs (0), either-endian operations with two identical inputs
2039/// (1), and little-endian operations with two different inputs (2). For the
2040/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2041int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2042 SelectionDAG &DAG) {
2043 if (N->getValueType(0) != MVT::v16i8)
2044 return -1;
2045
2047
2048 // Find the first non-undef value in the shuffle mask.
2049 unsigned i;
2050 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2051 /*search*/;
2052
2053 if (i == 16) return -1; // all undef.
2054
2055 // Otherwise, check to see if the rest of the elements are consecutively
2056 // numbered from this value.
2057 unsigned ShiftAmt = SVOp->getMaskElt(i);
2058 if (ShiftAmt < i) return -1;
2059
2060 ShiftAmt -= i;
2061 bool isLE = DAG.getDataLayout().isLittleEndian();
2062
2063 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2064 // Check the rest of the elements to see if they are consecutive.
2065 for (++i; i != 16; ++i)
2066 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2067 return -1;
2068 } else if (ShuffleKind == 1) {
2069 // Check the rest of the elements to see if they are consecutive.
2070 for (++i; i != 16; ++i)
2071 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2072 return -1;
2073 } else
2074 return -1;
2075
2076 if (isLE)
2077 ShiftAmt = 16 - ShiftAmt;
2078
2079 return ShiftAmt;
2080}
2081
2082/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2083/// specifies a splat of a single element that is suitable for input to
2084/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2086 EVT VT = N->getValueType(0);
2087 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2088 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2089
2090 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2091 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2092
2093 // The consecutive indices need to specify an element, not part of two
2094 // different elements. So abandon ship early if this isn't the case.
2095 if (N->getMaskElt(0) % EltSize != 0)
2096 return false;
2097
2098 // This is a splat operation if each element of the permute is the same, and
2099 // if the value doesn't reference the second vector.
2100 unsigned ElementBase = N->getMaskElt(0);
2101
2102 // FIXME: Handle UNDEF elements too!
2103 if (ElementBase >= 16)
2104 return false;
2105
2106 // Check that the indices are consecutive, in the case of a multi-byte element
2107 // splatted with a v16i8 mask.
2108 for (unsigned i = 1; i != EltSize; ++i)
2109 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2110 return false;
2111
2112 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2113 // An UNDEF element is a sequence of UNDEF bytes.
2114 if (N->getMaskElt(i) < 0) {
2115 for (unsigned j = 1; j != EltSize; ++j)
2116 if (N->getMaskElt(i + j) >= 0)
2117 return false;
2118 } else
2119 for (unsigned j = 0; j != EltSize; ++j)
2120 if (N->getMaskElt(i + j) != N->getMaskElt(j))
2121 return false;
2122 }
2123 return true;
2124}
2125
2126/// Check that the mask is shuffling N byte elements. Within each N byte
2127/// element of the mask, the indices could be either in increasing or
2128/// decreasing order as long as they are consecutive.
2129/// \param[in] N the shuffle vector SD Node to analyze
2130/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2131/// Word/DoubleWord/QuadWord).
2132/// \param[in] StepLen the delta indices number among the N byte element, if
2133/// the mask is in increasing/decreasing order then it is 1/-1.
2134/// \return true iff the mask is shuffling N byte elements.
2135static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2136 int StepLen) {
2137 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2138 "Unexpected element width.");
2139 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2140
2141 unsigned NumOfElem = 16 / Width;
2142 unsigned MaskVal[16]; // Width is never greater than 16
2143 for (unsigned i = 0; i < NumOfElem; ++i) {
2144 MaskVal[0] = N->getMaskElt(i * Width);
2145 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2146 return false;
2147 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2148 return false;
2149 }
2150
2151 for (unsigned int j = 1; j < Width; ++j) {
2152 MaskVal[j] = N->getMaskElt(i * Width + j);
2153 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2154 return false;
2155 }
2156 }
2157 }
2158
2159 return true;
2160}
2161
2162bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2163 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2164 if (!isNByteElemShuffleMask(N, 4, 1))
2165 return false;
2166
2167 // Now we look at mask elements 0,4,8,12
2168 unsigned M0 = N->getMaskElt(0) / 4;
2169 unsigned M1 = N->getMaskElt(4) / 4;
2170 unsigned M2 = N->getMaskElt(8) / 4;
2171 unsigned M3 = N->getMaskElt(12) / 4;
2172 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2173 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2174
2175 // Below, let H and L be arbitrary elements of the shuffle mask
2176 // where H is in the range [4,7] and L is in the range [0,3].
2177 // H, 1, 2, 3 or L, 5, 6, 7
2178 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2179 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2180 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2181 InsertAtByte = IsLE ? 12 : 0;
2182 Swap = M0 < 4;
2183 return true;
2184 }
2185 // 0, H, 2, 3 or 4, L, 6, 7
2186 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2187 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2188 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2189 InsertAtByte = IsLE ? 8 : 4;
2190 Swap = M1 < 4;
2191 return true;
2192 }
2193 // 0, 1, H, 3 or 4, 5, L, 7
2194 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2195 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2196 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2197 InsertAtByte = IsLE ? 4 : 8;
2198 Swap = M2 < 4;
2199 return true;
2200 }
2201 // 0, 1, 2, H or 4, 5, 6, L
2202 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2203 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2204 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2205 InsertAtByte = IsLE ? 0 : 12;
2206 Swap = M3 < 4;
2207 return true;
2208 }
2209
2210 // If both vector operands for the shuffle are the same vector, the mask will
2211 // contain only elements from the first one and the second one will be undef.
2212 if (N->getOperand(1).isUndef()) {
2213 ShiftElts = 0;
2214 Swap = true;
2215 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2216 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2217 InsertAtByte = IsLE ? 12 : 0;
2218 return true;
2219 }
2220 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2221 InsertAtByte = IsLE ? 8 : 4;
2222 return true;
2223 }
2224 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2225 InsertAtByte = IsLE ? 4 : 8;
2226 return true;
2227 }
2228 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2229 InsertAtByte = IsLE ? 0 : 12;
2230 return true;
2231 }
2232 }
2233
2234 return false;
2235}
2236
2238 bool &Swap, bool IsLE) {
2239 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2240 // Ensure each byte index of the word is consecutive.
2241 if (!isNByteElemShuffleMask(N, 4, 1))
2242 return false;
2243
2244 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2245 unsigned M0 = N->getMaskElt(0) / 4;
2246 unsigned M1 = N->getMaskElt(4) / 4;
2247 unsigned M2 = N->getMaskElt(8) / 4;
2248 unsigned M3 = N->getMaskElt(12) / 4;
2249
2250 // If both vector operands for the shuffle are the same vector, the mask will
2251 // contain only elements from the first one and the second one will be undef.
2252 if (N->getOperand(1).isUndef()) {
2253 assert(M0 < 4 && "Indexing into an undef vector?");
2254 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2255 return false;
2256
2257 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2258 Swap = false;
2259 return true;
2260 }
2261
2262 // Ensure each word index of the ShuffleVector Mask is consecutive.
2263 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2264 return false;
2265
2266 if (IsLE) {
2267 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2268 // Input vectors don't need to be swapped if the leading element
2269 // of the result is one of the 3 left elements of the second vector
2270 // (or if there is no shift to be done at all).
2271 Swap = false;
2272 ShiftElts = (8 - M0) % 8;
2273 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2274 // Input vectors need to be swapped if the leading element
2275 // of the result is one of the 3 left elements of the first vector
2276 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2277 Swap = true;
2278 ShiftElts = (4 - M0) % 4;
2279 }
2280
2281 return true;
2282 } else { // BE
2283 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2284 // Input vectors don't need to be swapped if the leading element
2285 // of the result is one of the 4 elements of the first vector.
2286 Swap = false;
2287 ShiftElts = M0;
2288 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2289 // Input vectors need to be swapped if the leading element
2290 // of the result is one of the 4 elements of the right vector.
2291 Swap = true;
2292 ShiftElts = M0 - 4;
2293 }
2294
2295 return true;
2296 }
2297}
2298
2300 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2301
2302 if (!isNByteElemShuffleMask(N, Width, -1))
2303 return false;
2304
2305 for (int i = 0; i < 16; i += Width)
2306 if (N->getMaskElt(i) != i + Width - 1)
2307 return false;
2308
2309 return true;
2310}
2311
2315
2319
2323
2327
2328/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2329/// if the inputs to the instruction should be swapped and set \p DM to the
2330/// value for the immediate.
2331/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2332/// AND element 0 of the result comes from the first input (LE) or second input
2333/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2334/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2335/// mask.
2337 bool &Swap, bool IsLE) {
2338 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2339
2340 // Ensure each byte index of the double word is consecutive.
2341 if (!isNByteElemShuffleMask(N, 8, 1))
2342 return false;
2343
2344 unsigned M0 = N->getMaskElt(0) / 8;
2345 unsigned M1 = N->getMaskElt(8) / 8;
2346 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2347
2348 // If both vector operands for the shuffle are the same vector, the mask will
2349 // contain only elements from the first one and the second one will be undef.
2350 if (N->getOperand(1).isUndef()) {
2351 if ((M0 | M1) < 2) {
2352 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2353 Swap = false;
2354 return true;
2355 } else
2356 return false;
2357 }
2358
2359 if (IsLE) {
2360 if (M0 > 1 && M1 < 2) {
2361 Swap = false;
2362 } else if (M0 < 2 && M1 > 1) {
2363 M0 = (M0 + 2) % 4;
2364 M1 = (M1 + 2) % 4;
2365 Swap = true;
2366 } else
2367 return false;
2368
2369 // Note: if control flow comes here that means Swap is already set above
2370 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2371 return true;
2372 } else { // BE
2373 if (M0 < 2 && M1 > 1) {
2374 Swap = false;
2375 } else if (M0 > 1 && M1 < 2) {
2376 M0 = (M0 + 2) % 4;
2377 M1 = (M1 + 2) % 4;
2378 Swap = true;
2379 } else
2380 return false;
2381
2382 // Note: if control flow comes here that means Swap is already set above
2383 DM = (M0 << 1) + (M1 & 1);
2384 return true;
2385 }
2386}
2387
2388
2389/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2390/// appropriate for PPC mnemonics (which have a big endian bias - namely
2391/// elements are counted from the left of the vector register).
2392unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2393 SelectionDAG &DAG) {
2395 assert(isSplatShuffleMask(SVOp, EltSize));
2396 EVT VT = SVOp->getValueType(0);
2397
2398 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2399 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2400 : SVOp->getMaskElt(0);
2401
2402 if (DAG.getDataLayout().isLittleEndian())
2403 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2404 else
2405 return SVOp->getMaskElt(0) / EltSize;
2406}
2407
2408/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2409/// by using a vspltis[bhw] instruction of the specified element size, return
2410/// the constant being splatted. The ByteSize field indicates the number of
2411/// bytes of each element [124] -> [bhw].
2413 SDValue OpVal;
2414
2415 // If ByteSize of the splat is bigger than the element size of the
2416 // build_vector, then we have a case where we are checking for a splat where
2417 // multiple elements of the buildvector are folded together into a single
2418 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2419 unsigned EltSize = 16/N->getNumOperands();
2420 if (EltSize < ByteSize) {
2421 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2422 SDValue UniquedVals[4];
2423 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2424
2425 // See if all of the elements in the buildvector agree across.
2426 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2427 if (N->getOperand(i).isUndef()) continue;
2428 // If the element isn't a constant, bail fully out.
2429 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2430
2431 if (!UniquedVals[i&(Multiple-1)].getNode())
2432 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2433 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2434 return SDValue(); // no match.
2435 }
2436
2437 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2438 // either constant or undef values that are identical for each chunk. See
2439 // if these chunks can form into a larger vspltis*.
2440
2441 // Check to see if all of the leading entries are either 0 or -1. If
2442 // neither, then this won't fit into the immediate field.
2443 bool LeadingZero = true;
2444 bool LeadingOnes = true;
2445 for (unsigned i = 0; i != Multiple-1; ++i) {
2446 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2447
2448 LeadingZero &= isNullConstant(UniquedVals[i]);
2449 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2450 }
2451 // Finally, check the least significant entry.
2452 if (LeadingZero) {
2453 if (!UniquedVals[Multiple-1].getNode())
2454 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2455 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2456 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2457 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2458 }
2459 if (LeadingOnes) {
2460 if (!UniquedVals[Multiple-1].getNode())
2461 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2462 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2463 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2464 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2465 }
2466
2467 return SDValue();
2468 }
2469
2470 // Check to see if this buildvec has a single non-undef value in its elements.
2471 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2472 if (N->getOperand(i).isUndef()) continue;
2473 if (!OpVal.getNode())
2474 OpVal = N->getOperand(i);
2475 else if (OpVal != N->getOperand(i))
2476 return SDValue();
2477 }
2478
2479 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2480
2481 unsigned ValSizeInBytes = EltSize;
2482 uint64_t Value = 0;
2483 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2484 Value = CN->getZExtValue();
2485 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2486 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2487 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2488 }
2489
2490 // If the splat value is larger than the element value, then we can never do
2491 // this splat. The only case that we could fit the replicated bits into our
2492 // immediate field for would be zero, and we prefer to use vxor for it.
2493 if (ValSizeInBytes < ByteSize) return SDValue();
2494
2495 // If the element value is larger than the splat value, check if it consists
2496 // of a repeated bit pattern of size ByteSize.
2497 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2498 return SDValue();
2499
2500 // Properly sign extend the value.
2501 int MaskVal = SignExtend32(Value, ByteSize * 8);
2502
2503 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2504 if (MaskVal == 0) return SDValue();
2505
2506 // Finally, if this value fits in a 5 bit sext field, return it
2507 if (SignExtend32<5>(MaskVal) == MaskVal)
2508 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2509 return SDValue();
2510}
2511
2512//===----------------------------------------------------------------------===//
2513// Addressing Mode Selection
2514//===----------------------------------------------------------------------===//
2515
2516/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2517/// or 64-bit immediate, and if the value can be accurately represented as a
2518/// sign extension from a 16-bit value. If so, this returns true and the
2519/// immediate.
2520bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2521 if (!isa<ConstantSDNode>(N))
2522 return false;
2523
2524 Imm = (int16_t)N->getAsZExtVal();
2525 if (N->getValueType(0) == MVT::i32)
2526 return Imm == (int32_t)N->getAsZExtVal();
2527 else
2528 return Imm == (int64_t)N->getAsZExtVal();
2529}
2531 return isIntS16Immediate(Op.getNode(), Imm);
2532}
2533
2534/// Used when computing address flags for selecting loads and stores.
2535/// If we have an OR, check if the LHS and RHS are provably disjoint.
2536/// An OR of two provably disjoint values is equivalent to an ADD.
2537/// Most PPC load/store instructions compute the effective address as a sum,
2538/// so doing this conversion is useful.
2539static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2540 if (N.getOpcode() != ISD::OR)
2541 return false;
2542 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2543 if (!LHSKnown.Zero.getBoolValue())
2544 return false;
2545 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2546 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2547}
2548
2549/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2550/// be represented as an indexed [r+r] operation.
2552 SDValue &Index,
2553 SelectionDAG &DAG) const {
2554 for (SDNode *U : N->users()) {
2555 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2556 if (Memop->getMemoryVT() == MVT::f64) {
2557 Base = N.getOperand(0);
2558 Index = N.getOperand(1);
2559 return true;
2560 }
2561 }
2562 }
2563 return false;
2564}
2565
2566/// isIntS34Immediate - This method tests if value of node given can be
2567/// accurately represented as a sign extension from a 34-bit value. If so,
2568/// this returns true and the immediate.
2569bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2570 if (!isa<ConstantSDNode>(N))
2571 return false;
2572
2573 Imm = cast<ConstantSDNode>(N)->getSExtValue();
2574 return isInt<34>(Imm);
2575}
2577 return isIntS34Immediate(Op.getNode(), Imm);
2578}
2579
2580/// SelectAddressRegReg - Given the specified addressed, check to see if it
2581/// can be represented as an indexed [r+r] operation. Returns false if it
2582/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2583/// non-zero and N can be represented by a base register plus a signed 16-bit
2584/// displacement, make a more precise judgement by checking (displacement % \p
2585/// EncodingAlignment).
2587 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2588 MaybeAlign EncodingAlignment) const {
2589 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2590 // a [pc+imm].
2592 return false;
2593
2594 int16_t Imm = 0;
2595 if (N.getOpcode() == ISD::ADD) {
2596 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2597 // SPE load/store can only handle 8-bit offsets.
2598 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2599 return true;
2600 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2601 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2602 return false; // r+i
2603 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2604 return false; // r+i
2605
2606 Base = N.getOperand(0);
2607 Index = N.getOperand(1);
2608 return true;
2609 } else if (N.getOpcode() == ISD::OR) {
2610 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2611 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2612 return false; // r+i can fold it if we can.
2613
2614 // If this is an or of disjoint bitfields, we can codegen this as an add
2615 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2616 // disjoint.
2617 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2618
2619 if (LHSKnown.Zero.getBoolValue()) {
2620 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2621 // If all of the bits are known zero on the LHS or RHS, the add won't
2622 // carry.
2623 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2624 Base = N.getOperand(0);
2625 Index = N.getOperand(1);
2626 return true;
2627 }
2628 }
2629 }
2630
2631 return false;
2632}
2633
2634// If we happen to be doing an i64 load or store into a stack slot that has
2635// less than a 4-byte alignment, then the frame-index elimination may need to
2636// use an indexed load or store instruction (because the offset may not be a
2637// multiple of 4). The extra register needed to hold the offset comes from the
2638// register scavenger, and it is possible that the scavenger will need to use
2639// an emergency spill slot. As a result, we need to make sure that a spill slot
2640// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2641// stack slot.
2642static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2643 // FIXME: This does not handle the LWA case.
2644 if (VT != MVT::i64)
2645 return;
2646
2647 // NOTE: We'll exclude negative FIs here, which come from argument
2648 // lowering, because there are no known test cases triggering this problem
2649 // using packed structures (or similar). We can remove this exclusion if
2650 // we find such a test case. The reason why this is so test-case driven is
2651 // because this entire 'fixup' is only to prevent crashes (from the
2652 // register scavenger) on not-really-valid inputs. For example, if we have:
2653 // %a = alloca i1
2654 // %b = bitcast i1* %a to i64*
2655 // store i64* a, i64 b
2656 // then the store should really be marked as 'align 1', but is not. If it
2657 // were marked as 'align 1' then the indexed form would have been
2658 // instruction-selected initially, and the problem this 'fixup' is preventing
2659 // won't happen regardless.
2660 if (FrameIdx < 0)
2661 return;
2662
2664 MachineFrameInfo &MFI = MF.getFrameInfo();
2665
2666 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2667 return;
2668
2669 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2670 FuncInfo->setHasNonRISpills();
2671}
2672
2673/// Returns true if the address N can be represented by a base register plus
2674/// a signed 16-bit displacement [r+imm], and if it is not better
2675/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2676/// displacements that are multiples of that value.
2678 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2679 MaybeAlign EncodingAlignment) const {
2680 // FIXME dl should come from parent load or store, not from address
2681 SDLoc dl(N);
2682
2683 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2684 // a [pc+imm].
2686 return false;
2687
2688 // If this can be more profitably realized as r+r, fail.
2689 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2690 return false;
2691
2692 if (N.getOpcode() == ISD::ADD) {
2693 int16_t imm = 0;
2694 if (isIntS16Immediate(N.getOperand(1), imm) &&
2695 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2696 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2697 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2698 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2699 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2700 } else {
2701 Base = N.getOperand(0);
2702 }
2703 return true; // [r+i]
2704 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2705 // Match LOAD (ADD (X, Lo(G))).
2706 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2707 "Cannot handle constant offsets yet!");
2708 Disp = N.getOperand(1).getOperand(0); // The global address.
2713 Base = N.getOperand(0);
2714 return true; // [&g+r]
2715 }
2716 } else if (N.getOpcode() == ISD::OR) {
2717 int16_t imm = 0;
2718 if (isIntS16Immediate(N.getOperand(1), imm) &&
2719 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2720 // If this is an or of disjoint bitfields, we can codegen this as an add
2721 // (for better address arithmetic) if the LHS and RHS of the OR are
2722 // provably disjoint.
2723 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2724
2725 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2726 // If all of the bits are known zero on the LHS or RHS, the add won't
2727 // carry.
2728 if (FrameIndexSDNode *FI =
2729 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2730 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2731 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2732 } else {
2733 Base = N.getOperand(0);
2734 }
2735 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2736 return true;
2737 }
2738 }
2739 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2740 // Loading from a constant address.
2741
2742 // If this address fits entirely in a 16-bit sext immediate field, codegen
2743 // this as "d, 0"
2744 int16_t Imm;
2745 if (isIntS16Immediate(CN, Imm) &&
2746 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2747 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2748 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2749 CN->getValueType(0));
2750 return true;
2751 }
2752
2753 // Handle 32-bit sext immediates with LIS + addr mode.
2754 if ((CN->getValueType(0) == MVT::i32 ||
2755 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2756 (!EncodingAlignment ||
2757 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2758 int Addr = (int)CN->getZExtValue();
2759
2760 // Otherwise, break this down into an LIS + disp.
2761 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2762
2763 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2764 MVT::i32);
2765 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2766 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2767 return true;
2768 }
2769 }
2770
2771 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2773 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2774 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2775 } else
2776 Base = N;
2777 return true; // [r+0]
2778}
2779
2780/// Similar to the 16-bit case but for instructions that take a 34-bit
2781/// displacement field (prefixed loads/stores).
2783 SDValue &Base,
2784 SelectionDAG &DAG) const {
2785 // Only on 64-bit targets.
2786 if (N.getValueType() != MVT::i64)
2787 return false;
2788
2789 SDLoc dl(N);
2790 int64_t Imm = 0;
2791
2792 if (N.getOpcode() == ISD::ADD) {
2793 if (!isIntS34Immediate(N.getOperand(1), Imm))
2794 return false;
2795 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2796 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2797 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2798 else
2799 Base = N.getOperand(0);
2800 return true;
2801 }
2802
2803 if (N.getOpcode() == ISD::OR) {
2804 if (!isIntS34Immediate(N.getOperand(1), Imm))
2805 return false;
2806 // If this is an or of disjoint bitfields, we can codegen this as an add
2807 // (for better address arithmetic) if the LHS and RHS of the OR are
2808 // provably disjoint.
2809 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2810 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2811 return false;
2812 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2813 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2814 else
2815 Base = N.getOperand(0);
2816 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2817 return true;
2818 }
2819
2820 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2821 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2822 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2823 return true;
2824 }
2825
2826 return false;
2827}
2828
2829/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2830/// represented as an indexed [r+r] operation.
2832 SDValue &Index,
2833 SelectionDAG &DAG) const {
2834 // Check to see if we can easily represent this as an [r+r] address. This
2835 // will fail if it thinks that the address is more profitably represented as
2836 // reg+imm, e.g. where imm = 0.
2837 if (SelectAddressRegReg(N, Base, Index, DAG))
2838 return true;
2839
2840 // If the address is the result of an add, we will utilize the fact that the
2841 // address calculation includes an implicit add. However, we can reduce
2842 // register pressure if we do not materialize a constant just for use as the
2843 // index register. We only get rid of the add if it is not an add of a
2844 // value and a 16-bit signed constant and both have a single use.
2845 int16_t imm = 0;
2846 if (N.getOpcode() == ISD::ADD &&
2847 (!isIntS16Immediate(N.getOperand(1), imm) ||
2848 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2849 Base = N.getOperand(0);
2850 Index = N.getOperand(1);
2851 return true;
2852 }
2853
2854 // Otherwise, do it the hard way, using R0 as the base register.
2855 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2856 N.getValueType());
2857 Index = N;
2858 return true;
2859}
2860
2861template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2862 Ty *PCRelCand = dyn_cast<Ty>(N);
2863 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2864}
2865
2866/// Returns true if this address is a PC Relative address.
2867/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2868/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2870 // This is a materialize PC Relative node. Always select this as PC Relative.
2871 Base = N;
2872 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2873 return true;
2878 return true;
2879 return false;
2880}
2881
2882/// Returns true if we should use a direct load into vector instruction
2883/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2884static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2885
2886 // If there are any other uses other than scalar to vector, then we should
2887 // keep it as a scalar load -> direct move pattern to prevent multiple
2888 // loads.
2890 if (!LD)
2891 return false;
2892
2893 EVT MemVT = LD->getMemoryVT();
2894 if (!MemVT.isSimple())
2895 return false;
2896 switch(MemVT.getSimpleVT().SimpleTy) {
2897 case MVT::i64:
2898 break;
2899 case MVT::i32:
2900 if (!ST.hasP8Vector())
2901 return false;
2902 break;
2903 case MVT::i16:
2904 case MVT::i8:
2905 if (!ST.hasP9Vector())
2906 return false;
2907 break;
2908 default:
2909 return false;
2910 }
2911
2912 SDValue LoadedVal(N, 0);
2913 if (!LoadedVal.hasOneUse())
2914 return false;
2915
2916 for (SDUse &Use : LD->uses())
2917 if (Use.getResNo() == 0 &&
2918 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2919 Use.getUser()->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2920 return false;
2921
2922 return true;
2923}
2924
2925/// getPreIndexedAddressParts - returns true by value, base pointer and
2926/// offset pointer and addressing mode by reference if the node's address
2927/// can be legally represented as pre-indexed load / store address.
2929 SDValue &Offset,
2931 SelectionDAG &DAG) const {
2932 if (DisablePPCPreinc) return false;
2933
2934 bool isLoad = true;
2935 SDValue Ptr;
2936 EVT VT;
2937 Align Alignment;
2938 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2939 Ptr = LD->getBasePtr();
2940 VT = LD->getMemoryVT();
2941 Alignment = LD->getAlign();
2942 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2943 Ptr = ST->getBasePtr();
2944 VT = ST->getMemoryVT();
2945 Alignment = ST->getAlign();
2946 isLoad = false;
2947 } else
2948 return false;
2949
2950 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2951 // instructions because we can fold these into a more efficient instruction
2952 // instead, (such as LXSD).
2953 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2954 return false;
2955 }
2956
2957 // PowerPC doesn't have preinc load/store instructions for vectors
2958 if (VT.isVector())
2959 return false;
2960
2961 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2962 // Common code will reject creating a pre-inc form if the base pointer
2963 // is a frame index, or if N is a store and the base pointer is either
2964 // the same as or a predecessor of the value being stored. Check for
2965 // those situations here, and try with swapped Base/Offset instead.
2966 bool Swap = false;
2967
2969 Swap = true;
2970 else if (!isLoad) {
2971 SDValue Val = cast<StoreSDNode>(N)->getValue();
2972 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2973 Swap = true;
2974 }
2975
2976 if (Swap)
2978
2979 AM = ISD::PRE_INC;
2980 return true;
2981 }
2982
2983 // LDU/STU can only handle immediates that are a multiple of 4.
2984 if (VT != MVT::i64) {
2985 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
2986 return false;
2987 } else {
2988 // LDU/STU need an address with at least 4-byte alignment.
2989 if (Alignment < Align(4))
2990 return false;
2991
2992 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2993 return false;
2994 }
2995
2996 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2997 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2998 // sext i32 to i64 when addr mode is r+i.
2999 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3000 LD->getExtensionType() == ISD::SEXTLOAD &&
3002 return false;
3003 }
3004
3005 AM = ISD::PRE_INC;
3006 return true;
3007}
3008
3009//===----------------------------------------------------------------------===//
3010// LowerOperation implementation
3011//===----------------------------------------------------------------------===//
3012
3013/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3014/// and LoOpFlags to the target MO flags.
3015static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3016 unsigned &HiOpFlags, unsigned &LoOpFlags,
3017 const GlobalValue *GV = nullptr) {
3018 HiOpFlags = PPCII::MO_HA;
3019 LoOpFlags = PPCII::MO_LO;
3020
3021 // Don't use the pic base if not in PIC relocation model.
3022 if (IsPIC) {
3023 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3024 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3025 }
3026}
3027
3028static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3029 SelectionDAG &DAG) {
3030 SDLoc DL(HiPart);
3031 EVT PtrVT = HiPart.getValueType();
3032 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3033
3034 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3035 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3036
3037 // With PIC, the first instruction is actually "GR+hi(&G)".
3038 if (isPIC)
3039 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3040 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3041
3042 // Generate non-pic code that has direct accesses to the constant pool.
3043 // The address of the global is just (hi(&g)+lo(&g)).
3044 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3045}
3046
3048 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3049 FuncInfo->setUsesTOCBasePtr();
3050}
3051
3055
3056SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3057 SDValue GA) const {
3058 EVT VT = Subtarget.getScalarIntVT();
3059 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3060 : Subtarget.isAIXABI()
3061 ? DAG.getRegister(PPC::R2, VT)
3062 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3063 SDValue Ops[] = { GA, Reg };
3064 return DAG.getMemIntrinsicNode(
3065 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3068}
3069
3070SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3071 SelectionDAG &DAG) const {
3072 EVT PtrVT = Op.getValueType();
3073 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3074 const Constant *C = CP->getConstVal();
3075
3076 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3077 // The actual address of the GlobalValue is stored in the TOC.
3078 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3079 if (Subtarget.isUsingPCRelativeCalls()) {
3080 SDLoc DL(CP);
3081 EVT Ty = getPointerTy(DAG.getDataLayout());
3082 SDValue ConstPool = DAG.getTargetConstantPool(
3083 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3084 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3085 }
3086 setUsesTOCBasePtr(DAG);
3087 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3088 return getTOCEntry(DAG, SDLoc(CP), GA);
3089 }
3090
3091 unsigned MOHiFlag, MOLoFlag;
3092 bool IsPIC = isPositionIndependent();
3093 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3094
3095 if (IsPIC && Subtarget.isSVR4ABI()) {
3096 SDValue GA =
3098 return getTOCEntry(DAG, SDLoc(CP), GA);
3099 }
3100
3101 SDValue CPIHi =
3102 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3103 SDValue CPILo =
3104 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3105 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3106}
3107
3108// For 64-bit PowerPC, prefer the more compact relative encodings.
3109// This trades 32 bits per jump table entry for one or two instructions
3110// on the jump site.
3117
3120 return false;
3121 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3122 return true;
3124}
3125
3127 SelectionDAG &DAG) const {
3128 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3130
3131 switch (getTargetMachine().getCodeModel()) {
3132 case CodeModel::Small:
3133 case CodeModel::Medium:
3135 default:
3136 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3138 }
3139}
3140
3141const MCExpr *
3143 unsigned JTI,
3144 MCContext &Ctx) const {
3145 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3147
3148 switch (getTargetMachine().getCodeModel()) {
3149 case CodeModel::Small:
3150 case CodeModel::Medium:
3152 default:
3153 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3154 }
3155}
3156
3157SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3158 EVT PtrVT = Op.getValueType();
3160
3161 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3162 if (Subtarget.isUsingPCRelativeCalls()) {
3163 SDLoc DL(JT);
3164 EVT Ty = getPointerTy(DAG.getDataLayout());
3165 SDValue GA =
3167 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3168 return MatAddr;
3169 }
3170
3171 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3172 // The actual address of the GlobalValue is stored in the TOC.
3173 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3174 setUsesTOCBasePtr(DAG);
3175 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3176 return getTOCEntry(DAG, SDLoc(JT), GA);
3177 }
3178
3179 unsigned MOHiFlag, MOLoFlag;
3180 bool IsPIC = isPositionIndependent();
3181 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3182
3183 if (IsPIC && Subtarget.isSVR4ABI()) {
3184 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3186 return getTOCEntry(DAG, SDLoc(GA), GA);
3187 }
3188
3189 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3190 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3191 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3192}
3193
3194SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3195 SelectionDAG &DAG) const {
3196 EVT PtrVT = Op.getValueType();
3197 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3198 const BlockAddress *BA = BASDN->getBlockAddress();
3199
3200 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3201 if (Subtarget.isUsingPCRelativeCalls()) {
3202 SDLoc DL(BASDN);
3203 EVT Ty = getPointerTy(DAG.getDataLayout());
3204 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3206 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3207 return MatAddr;
3208 }
3209
3210 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3211 // The actual BlockAddress is stored in the TOC.
3212 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3213 setUsesTOCBasePtr(DAG);
3214 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3215 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3216 }
3217
3218 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3219 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3220 return getTOCEntry(
3221 DAG, SDLoc(BASDN),
3222 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3223
3224 unsigned MOHiFlag, MOLoFlag;
3225 bool IsPIC = isPositionIndependent();
3226 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3227 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3228 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3229 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3230}
3231
3232SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3233 SelectionDAG &DAG) const {
3234 if (Subtarget.isAIXABI())
3235 return LowerGlobalTLSAddressAIX(Op, DAG);
3236
3237 return LowerGlobalTLSAddressLinux(Op, DAG);
3238}
3239
3240/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3241/// and then apply the update.
3243 SelectionDAG &DAG,
3244 const TargetMachine &TM) {
3245 // Initialize TLS model opt setting lazily:
3246 // (1) Use initial-exec for single TLS var references within current function.
3247 // (2) Use local-dynamic for multiple TLS var references within current
3248 // function.
3249 PPCFunctionInfo *FuncInfo =
3251 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3253 // Iterate over all instructions within current function, collect all TLS
3254 // global variables (global variables taken as the first parameter to
3255 // Intrinsic::threadlocal_address).
3256 const Function &Func = DAG.getMachineFunction().getFunction();
3257 for (const BasicBlock &BB : Func)
3258 for (const Instruction &I : BB)
3259 if (I.getOpcode() == Instruction::Call)
3260 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3261 if (Function *CF = CI->getCalledFunction())
3262 if (CF->isDeclaration() &&
3263 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3264 if (const GlobalValue *GV =
3265 dyn_cast<GlobalValue>(I.getOperand(0))) {
3266 TLSModel::Model GVModel = TM.getTLSModel(GV);
3267 if (GVModel == TLSModel::LocalDynamic)
3268 TLSGV.insert(GV);
3269 }
3270
3271 unsigned TLSGVCnt = TLSGV.size();
3272 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3273 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3274 FuncInfo->setAIXFuncUseTLSIEForLD();
3276 }
3277
3278 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3279 LLVM_DEBUG(
3280 dbgs() << DAG.getMachineFunction().getName()
3281 << " function is using the TLS-IE model for TLS-LD access.\n");
3282 Model = TLSModel::InitialExec;
3283 }
3284}
3285
3286SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3287 SelectionDAG &DAG) const {
3288 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3289
3290 if (DAG.getTarget().useEmulatedTLS())
3291 report_fatal_error("Emulated TLS is not yet supported on AIX");
3292
3293 SDLoc dl(GA);
3294 const GlobalValue *GV = GA->getGlobal();
3295 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3296 bool Is64Bit = Subtarget.isPPC64();
3298
3299 // Apply update to the TLS model.
3300 if (Subtarget.hasAIXShLibTLSModelOpt())
3302
3303 // TLS variables are accessed through TOC entries.
3304 // To support this, set the DAG to use the TOC base pointer.
3305 setUsesTOCBasePtr(DAG);
3306
3307 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3308
3309 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3310 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3311 bool HasAIXSmallTLSGlobalAttr = false;
3312 SDValue VariableOffsetTGA =
3313 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3314 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3315 SDValue TLSReg;
3316
3317 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3318 if (GVar->hasAttribute("aix-small-tls"))
3319 HasAIXSmallTLSGlobalAttr = true;
3320
3321 if (Is64Bit) {
3322 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3323 // involves a load of the variable offset (from the TOC), followed by an
3324 // add of the loaded variable offset to R13 (the thread pointer).
3325 // This code sequence looks like:
3326 // ld reg1,var[TC](2)
3327 // add reg2, reg1, r13 // r13 contains the thread pointer
3328 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3329
3330 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3331 // global variable attribute, produce a faster access sequence for
3332 // local-exec TLS variables where the offset from the TLS base is encoded
3333 // as an immediate operand.
3334 //
3335 // We only utilize the faster local-exec access sequence when the TLS
3336 // variable has a size within the policy limit. We treat types that are
3337 // not sized or are empty as being over the policy size limit.
3338 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3339 IsTLSLocalExecModel) {
3340 Type *GVType = GV->getValueType();
3341 if (GVType->isSized() && !GVType->isEmptyTy() &&
3342 GV->getDataLayout().getTypeAllocSize(GVType) <=
3344 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3345 }
3346 } else {
3347 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3348 // involves loading the variable offset from the TOC, generating a call to
3349 // .__get_tpointer to get the thread pointer (which will be in R3), and
3350 // adding the two together:
3351 // lwz reg1,var[TC](2)
3352 // bla .__get_tpointer
3353 // add reg2, reg1, r3
3354 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3355
3356 // We do not implement the 32-bit version of the faster access sequence
3357 // for local-exec that is controlled by the -maix-small-local-exec-tls
3358 // option, or the "aix-small-tls" global variable attribute.
3359 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3360 report_fatal_error("The small-local-exec TLS access sequence is "
3361 "currently only supported on AIX (64-bit mode).");
3362 }
3363 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3364 }
3365
3366 if (Model == TLSModel::LocalDynamic) {
3367 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3368
3369 // We do not implement the 32-bit version of the faster access sequence
3370 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3371 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3372 report_fatal_error("The small-local-dynamic TLS access sequence is "
3373 "currently only supported on AIX (64-bit mode).");
3374
3375 // For local-dynamic on AIX, we need to generate one TOC entry for each
3376 // variable offset, and a single module-handle TOC entry for the entire
3377 // file.
3378
3379 SDValue VariableOffsetTGA =
3380 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3381 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3382
3384 GlobalVariable *TLSGV =
3385 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3386 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3388 assert(TLSGV && "Not able to create GV for _$TLSML.");
3389 SDValue ModuleHandleTGA =
3390 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3391 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3392 SDValue ModuleHandle =
3393 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3394
3395 // With the -maix-small-local-dynamic-tls option, produce a faster access
3396 // sequence for local-dynamic TLS variables where the offset from the
3397 // module-handle is encoded as an immediate operand.
3398 //
3399 // We only utilize the faster local-dynamic access sequence when the TLS
3400 // variable has a size within the policy limit. We treat types that are
3401 // not sized or are empty as being over the policy size limit.
3402 if (HasAIXSmallLocalDynamicTLS) {
3403 Type *GVType = GV->getValueType();
3404 if (GVType->isSized() && !GVType->isEmptyTy() &&
3405 GV->getDataLayout().getTypeAllocSize(GVType) <=
3407 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3408 ModuleHandle);
3409 }
3410
3411 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3412 }
3413
3414 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3415 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3416 // need to generate two TOC entries, one for the variable offset, one for the
3417 // region handle. The global address for the TOC entry of the region handle is
3418 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3419 // entry of the variable offset is created with MO_TLSGD_FLAG.
3420 SDValue VariableOffsetTGA =
3421 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3422 SDValue RegionHandleTGA =
3423 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3424 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3425 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3426 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3427 RegionHandle);
3428}
3429
3430SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3431 SelectionDAG &DAG) const {
3432 // FIXME: TLS addresses currently use medium model code sequences,
3433 // which is the most useful form. Eventually support for small and
3434 // large models could be added if users need it, at the cost of
3435 // additional complexity.
3436 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3437 if (DAG.getTarget().useEmulatedTLS())
3438 return LowerToTLSEmulatedModel(GA, DAG);
3439
3440 SDLoc dl(GA);
3441 const GlobalValue *GV = GA->getGlobal();
3442 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3443 bool is64bit = Subtarget.isPPC64();
3444 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3445 PICLevel::Level picLevel = M->getPICLevel();
3446
3447 const TargetMachine &TM = getTargetMachine();
3448 TLSModel::Model Model = TM.getTLSModel(GV);
3449
3450 if (Model == TLSModel::LocalExec) {
3451 if (Subtarget.isUsingPCRelativeCalls()) {
3452 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3453 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3455 SDValue MatAddr =
3456 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3457 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3458 }
3459
3460 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3462 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3464 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3465 : DAG.getRegister(PPC::R2, MVT::i32);
3466
3467 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3468 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3469 }
3470
3471 if (Model == TLSModel::InitialExec) {
3472 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3474 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3475 SDValue TGATLS = DAG.getTargetGlobalAddress(
3476 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3477 SDValue TPOffset;
3478 if (IsPCRel) {
3479 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3480 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3481 MachinePointerInfo());
3482 } else {
3483 SDValue GOTPtr;
3484 if (is64bit) {
3485 setUsesTOCBasePtr(DAG);
3486 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3487 GOTPtr =
3488 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3489 } else {
3490 if (!TM.isPositionIndependent())
3491 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3492 else if (picLevel == PICLevel::SmallPIC)
3493 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3494 else
3495 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3496 }
3497 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3498 }
3499 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3500 }
3501
3502 if (Model == TLSModel::GeneralDynamic) {
3503 if (Subtarget.isUsingPCRelativeCalls()) {
3504 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3506 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3507 }
3508
3509 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3510 SDValue GOTPtr;
3511 if (is64bit) {
3512 setUsesTOCBasePtr(DAG);
3513 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3514 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3515 GOTReg, TGA);
3516 } else {
3517 if (picLevel == PICLevel::SmallPIC)
3518 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3519 else
3520 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3521 }
3522 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3523 GOTPtr, TGA, TGA);
3524 }
3525
3526 if (Model == TLSModel::LocalDynamic) {
3527 if (Subtarget.isUsingPCRelativeCalls()) {
3528 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3530 SDValue MatPCRel =
3531 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3532 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3533 }
3534
3535 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3536 SDValue GOTPtr;
3537 if (is64bit) {
3538 setUsesTOCBasePtr(DAG);
3539 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3540 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3541 GOTReg, TGA);
3542 } else {
3543 if (picLevel == PICLevel::SmallPIC)
3544 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3545 else
3546 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3547 }
3548 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3549 PtrVT, GOTPtr, TGA, TGA);
3550 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3551 PtrVT, TLSAddr, TGA);
3552 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3553 }
3554
3555 llvm_unreachable("Unknown TLS model!");
3556}
3557
3558SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3559 SelectionDAG &DAG) const {
3560 EVT PtrVT = Op.getValueType();
3561 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3562 SDLoc DL(GSDN);
3563 const GlobalValue *GV = GSDN->getGlobal();
3564
3565 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3566 // The actual address of the GlobalValue is stored in the TOC.
3567 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3568 if (Subtarget.isUsingPCRelativeCalls()) {
3569 EVT Ty = getPointerTy(DAG.getDataLayout());
3571 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3573 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3574 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3575 MachinePointerInfo());
3576 return Load;
3577 } else {
3578 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3580 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3581 }
3582 }
3583 setUsesTOCBasePtr(DAG);
3584 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3585 return getTOCEntry(DAG, DL, GA);
3586 }
3587
3588 unsigned MOHiFlag, MOLoFlag;
3589 bool IsPIC = isPositionIndependent();
3590 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3591
3592 if (IsPIC && Subtarget.isSVR4ABI()) {
3593 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3594 GSDN->getOffset(),
3596 return getTOCEntry(DAG, DL, GA);
3597 }
3598
3599 SDValue GAHi =
3600 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3601 SDValue GALo =
3602 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3603
3604 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3605}
3606
3607SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3608 bool IsStrict = Op->isStrictFPOpcode();
3609 ISD::CondCode CC =
3610 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3611 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3612 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3613 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3614 EVT LHSVT = LHS.getValueType();
3615 SDLoc dl(Op);
3616
3617 // Soften the setcc with libcall if it is fp128.
3618 if (LHSVT == MVT::f128) {
3619 assert(!Subtarget.hasP9Vector() &&
3620 "SETCC for f128 is already legal under Power9!");
3621 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3622 Op->getOpcode() == ISD::STRICT_FSETCCS);
3623 if (RHS.getNode())
3624 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3625 DAG.getCondCode(CC));
3626 if (IsStrict)
3627 return DAG.getMergeValues({LHS, Chain}, dl);
3628 return LHS;
3629 }
3630
3631 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3632
3633 if (Op.getValueType() == MVT::v2i64) {
3634 // When the operands themselves are v2i64 values, we need to do something
3635 // special because VSX has no underlying comparison operations for these.
3636 if (LHS.getValueType() == MVT::v2i64) {
3637 // Equality can be handled by casting to the legal type for Altivec
3638 // comparisons, everything else needs to be expanded.
3639 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3640 return SDValue();
3641 SDValue SetCC32 = DAG.getSetCC(
3642 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3643 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3644 int ShuffV[] = {1, 0, 3, 2};
3645 SDValue Shuff =
3646 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3647 return DAG.getBitcast(MVT::v2i64,
3648 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3649 dl, MVT::v4i32, Shuff, SetCC32));
3650 }
3651
3652 // We handle most of these in the usual way.
3653 return Op;
3654 }
3655
3656 // If we're comparing for equality to zero, expose the fact that this is
3657 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3658 // fold the new nodes.
3659 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3660 return V;
3661
3662 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3663 // Leave comparisons against 0 and -1 alone for now, since they're usually
3664 // optimized. FIXME: revisit this when we can custom lower all setcc
3665 // optimizations.
3666 if (C->isAllOnes() || C->isZero())
3667 return SDValue();
3668 }
3669
3670 // If we have an integer seteq/setne, turn it into a compare against zero
3671 // by xor'ing the rhs with the lhs, which is faster than setting a
3672 // condition register, reading it back out, and masking the correct bit. The
3673 // normal approach here uses sub to do this instead of xor. Using xor exposes
3674 // the result to other bit-twiddling opportunities.
3675 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3676 EVT VT = Op.getValueType();
3677 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3678 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3679 }
3680 return SDValue();
3681}
3682
3683SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3684 SDNode *Node = Op.getNode();
3685 EVT VT = Node->getValueType(0);
3686 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3687 SDValue InChain = Node->getOperand(0);
3688 SDValue VAListPtr = Node->getOperand(1);
3689 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3690 SDLoc dl(Node);
3691
3692 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3693
3694 // gpr_index
3695 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3696 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3697 InChain = GprIndex.getValue(1);
3698
3699 if (VT == MVT::i64) {
3700 // Check if GprIndex is even
3701 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3702 DAG.getConstant(1, dl, MVT::i32));
3703 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3704 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3705 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3706 DAG.getConstant(1, dl, MVT::i32));
3707 // Align GprIndex to be even if it isn't
3708 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3709 GprIndex);
3710 }
3711
3712 // fpr index is 1 byte after gpr
3713 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3714 DAG.getConstant(1, dl, MVT::i32));
3715
3716 // fpr
3717 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3718 FprPtr, MachinePointerInfo(SV), MVT::i8);
3719 InChain = FprIndex.getValue(1);
3720
3721 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3722 DAG.getConstant(8, dl, MVT::i32));
3723
3724 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3725 DAG.getConstant(4, dl, MVT::i32));
3726
3727 // areas
3728 SDValue OverflowArea =
3729 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3730 InChain = OverflowArea.getValue(1);
3731
3732 SDValue RegSaveArea =
3733 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3734 InChain = RegSaveArea.getValue(1);
3735
3736 // select overflow_area if index > 8
3737 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3738 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3739
3740 // adjustment constant gpr_index * 4/8
3741 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3742 VT.isInteger() ? GprIndex : FprIndex,
3743 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3744 MVT::i32));
3745
3746 // OurReg = RegSaveArea + RegConstant
3747 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3748 RegConstant);
3749
3750 // Floating types are 32 bytes into RegSaveArea
3751 if (VT.isFloatingPoint())
3752 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3753 DAG.getConstant(32, dl, MVT::i32));
3754
3755 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3756 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3757 VT.isInteger() ? GprIndex : FprIndex,
3758 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3759 MVT::i32));
3760
3761 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3762 VT.isInteger() ? VAListPtr : FprPtr,
3763 MachinePointerInfo(SV), MVT::i8);
3764
3765 // determine if we should load from reg_save_area or overflow_area
3766 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3767
3768 // increase overflow_area by 4/8 if gpr/fpr > 8
3769 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3770 DAG.getConstant(VT.isInteger() ? 4 : 8,
3771 dl, MVT::i32));
3772
3773 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3774 OverflowAreaPlusN);
3775
3776 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3777 MachinePointerInfo(), MVT::i32);
3778
3779 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3780}
3781
3782SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3783 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3784
3785 // We have to copy the entire va_list struct:
3786 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3787 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3788 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3789 false, true, /*CI=*/nullptr, std::nullopt,
3790 MachinePointerInfo(), MachinePointerInfo());
3791}
3792
3793SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3794 SelectionDAG &DAG) const {
3795 return Op.getOperand(0);
3796}
3797
3798SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3799 MachineFunction &MF = DAG.getMachineFunction();
3800 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3801
3802 assert((Op.getOpcode() == ISD::INLINEASM ||
3803 Op.getOpcode() == ISD::INLINEASM_BR) &&
3804 "Expecting Inline ASM node.");
3805
3806 // If an LR store is already known to be required then there is not point in
3807 // checking this ASM as well.
3808 if (MFI.isLRStoreRequired())
3809 return Op;
3810
3811 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3812 // type MVT::Glue. We want to ignore this last operand if that is the case.
3813 unsigned NumOps = Op.getNumOperands();
3814 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3815 --NumOps;
3816
3817 // Check all operands that may contain the LR.
3818 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3819 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3820 unsigned NumVals = Flags.getNumOperandRegisters();
3821 ++i; // Skip the ID value.
3822
3823 switch (Flags.getKind()) {
3824 default:
3825 llvm_unreachable("Bad flags!");
3829 i += NumVals;
3830 break;
3834 for (; NumVals; --NumVals, ++i) {
3835 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3836 if (Reg != PPC::LR && Reg != PPC::LR8)
3837 continue;
3838 MFI.setLRStoreRequired();
3839 return Op;
3840 }
3841 break;
3842 }
3843 }
3844 }
3845
3846 return Op;
3847}
3848
3849SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3850 SelectionDAG &DAG) const {
3851 SDValue Chain = Op.getOperand(0);
3852 SDValue Trmp = Op.getOperand(1); // trampoline
3853 SDValue FPtr = Op.getOperand(2); // nested function
3854 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3855 SDLoc dl(Op);
3856
3857 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3858
3859 if (Subtarget.isAIXABI()) {
3860 // On AIX we create a trampoline descriptor by combining the
3861 // entry point and TOC from the global descriptor (FPtr) with the
3862 // nest argument as the environment pointer.
3863 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
3864 MaybeAlign PointerAlign(PointerSize);
3865 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
3868 : MachineMemOperand::MONone;
3869
3870 uint64_t TOCPointerOffset = 1 * PointerSize;
3871 uint64_t EnvPointerOffset = 2 * PointerSize;
3872 SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT);
3873 SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT);
3874
3875 const Value *TrampolineAddr =
3876 cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3877 const Function *Func =
3878 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
3879
3880 SDValue OutChains[3];
3881
3882 // Copy the entry point address from the global descriptor to the
3883 // trampoline buffer.
3884 SDValue LoadEntryPoint =
3885 DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0),
3886 PointerAlign, MMOFlags);
3887 SDValue EPLoadChain = LoadEntryPoint.getValue(1);
3888 OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp,
3889 MachinePointerInfo(TrampolineAddr, 0));
3890
3891 // Copy the TOC pointer from the global descriptor to the trampoline
3892 // buffer.
3893 SDValue TOCFromDescriptorPtr =
3894 DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset);
3895 SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr,
3896 MachinePointerInfo(Func, TOCPointerOffset),
3897 PointerAlign, MMOFlags);
3898 SDValue TrampolineTOCPointer =
3899 DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset);
3900 SDValue TOCLoadChain = TOCReg.getValue(1);
3901 OutChains[1] =
3902 DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer,
3903 MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
3904
3905 // Store the nest argument into the environment pointer in the trampoline
3906 // buffer.
3907 SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset);
3908 OutChains[2] =
3909 DAG.getStore(Chain, dl, Nest, EnvPointer,
3910 MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
3911
3913 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
3914 return TokenFactor;
3915 }
3916
3917 bool isPPC64 = (PtrVT == MVT::i64);
3918 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3919
3921 Args.emplace_back(Trmp, IntPtrTy);
3922 // TrampSize == (isPPC64 ? 48 : 40);
3923 Args.emplace_back(
3924 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()),
3925 IntPtrTy);
3926 Args.emplace_back(FPtr, IntPtrTy);
3927 Args.emplace_back(Nest, IntPtrTy);
3928
3929 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3930 TargetLowering::CallLoweringInfo CLI(DAG);
3931 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3933 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3934
3935 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3936 return CallResult.second;
3937}
3938
3939SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3940 MachineFunction &MF = DAG.getMachineFunction();
3941 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3942 EVT PtrVT = getPointerTy(MF.getDataLayout());
3943
3944 SDLoc dl(Op);
3945
3946 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3947 // vastart just stores the address of the VarArgsFrameIndex slot into the
3948 // memory location argument.
3949 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3950 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3951 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3952 MachinePointerInfo(SV));
3953 }
3954
3955 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3956 // We suppose the given va_list is already allocated.
3957 //
3958 // typedef struct {
3959 // char gpr; /* index into the array of 8 GPRs
3960 // * stored in the register save area
3961 // * gpr=0 corresponds to r3,
3962 // * gpr=1 to r4, etc.
3963 // */
3964 // char fpr; /* index into the array of 8 FPRs
3965 // * stored in the register save area
3966 // * fpr=0 corresponds to f1,
3967 // * fpr=1 to f2, etc.
3968 // */
3969 // char *overflow_arg_area;
3970 // /* location on stack that holds
3971 // * the next overflow argument
3972 // */
3973 // char *reg_save_area;
3974 // /* where r3:r10 and f1:f8 (if saved)
3975 // * are stored
3976 // */
3977 // } va_list[1];
3978
3979 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3980 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3981 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3982 PtrVT);
3983 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3984 PtrVT);
3985
3986 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3987 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3988
3989 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3990 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3991
3992 uint64_t FPROffset = 1;
3993 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3994
3995 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3996
3997 // Store first byte : number of int regs
3998 SDValue firstStore =
3999 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4000 MachinePointerInfo(SV), MVT::i8);
4001 uint64_t nextOffset = FPROffset;
4002 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4003 ConstFPROffset);
4004
4005 // Store second byte : number of float regs
4006 SDValue secondStore =
4007 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4008 MachinePointerInfo(SV, nextOffset), MVT::i8);
4009 nextOffset += StackOffset;
4010 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4011
4012 // Store second word : arguments given on stack
4013 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4014 MachinePointerInfo(SV, nextOffset));
4015 nextOffset += FrameOffset;
4016 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4017
4018 // Store third word : arguments given in registers
4019 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4020 MachinePointerInfo(SV, nextOffset));
4021}
4022
4023/// FPR - The set of FP registers that should be allocated for arguments
4024/// on Darwin and AIX.
4025static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4026 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4027 PPC::F11, PPC::F12, PPC::F13};
4028
4029/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4030/// the stack.
4031static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4032 unsigned PtrByteSize) {
4033 unsigned ArgSize = ArgVT.getStoreSize();
4034 if (Flags.isByVal())
4035 ArgSize = Flags.getByValSize();
4036
4037 // Round up to multiples of the pointer size, except for array members,
4038 // which are always packed.
4039 if (!Flags.isInConsecutiveRegs())
4040 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4041
4042 return ArgSize;
4043}
4044
4045/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4046/// on the stack.
4048 ISD::ArgFlagsTy Flags,
4049 unsigned PtrByteSize) {
4050 Align Alignment(PtrByteSize);
4051
4052 // Altivec parameters are padded to a 16 byte boundary.
4053 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4054 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4055 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4056 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4057 Alignment = Align(16);
4058
4059 // ByVal parameters are aligned as requested.
4060 if (Flags.isByVal()) {
4061 auto BVAlign = Flags.getNonZeroByValAlign();
4062 if (BVAlign > PtrByteSize) {
4063 if (BVAlign.value() % PtrByteSize != 0)
4065 "ByVal alignment is not a multiple of the pointer size");
4066
4067 Alignment = BVAlign;
4068 }
4069 }
4070
4071 // Array members are always packed to their original alignment.
4072 if (Flags.isInConsecutiveRegs()) {
4073 // If the array member was split into multiple registers, the first
4074 // needs to be aligned to the size of the full type. (Except for
4075 // ppcf128, which is only aligned as its f64 components.)
4076 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4077 Alignment = Align(OrigVT.getStoreSize());
4078 else
4079 Alignment = Align(ArgVT.getStoreSize());
4080 }
4081
4082 return Alignment;
4083}
4084
4085/// CalculateStackSlotUsed - Return whether this argument will use its
4086/// stack slot (instead of being passed in registers). ArgOffset,
4087/// AvailableFPRs, and AvailableVRs must hold the current argument
4088/// position, and will be updated to account for this argument.
4089static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4090 unsigned PtrByteSize, unsigned LinkageSize,
4091 unsigned ParamAreaSize, unsigned &ArgOffset,
4092 unsigned &AvailableFPRs,
4093 unsigned &AvailableVRs) {
4094 bool UseMemory = false;
4095
4096 // Respect alignment of argument on the stack.
4097 Align Alignment =
4098 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4099 ArgOffset = alignTo(ArgOffset, Alignment);
4100 // If there's no space left in the argument save area, we must
4101 // use memory (this check also catches zero-sized arguments).
4102 if (ArgOffset >= LinkageSize + ParamAreaSize)
4103 UseMemory = true;
4104
4105 // Allocate argument on the stack.
4106 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4107 if (Flags.isInConsecutiveRegsLast())
4108 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4109 // If we overran the argument save area, we must use memory
4110 // (this check catches arguments passed partially in memory)
4111 if (ArgOffset > LinkageSize + ParamAreaSize)
4112 UseMemory = true;
4113
4114 // However, if the argument is actually passed in an FPR or a VR,
4115 // we don't use memory after all.
4116 if (!Flags.isByVal()) {
4117 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4118 if (AvailableFPRs > 0) {
4119 --AvailableFPRs;
4120 return false;
4121 }
4122 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4123 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4124 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4125 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4126 if (AvailableVRs > 0) {
4127 --AvailableVRs;
4128 return false;
4129 }
4130 }
4131
4132 return UseMemory;
4133}
4134
4135/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4136/// ensure minimum alignment required for target.
4138 unsigned NumBytes) {
4139 return alignTo(NumBytes, Lowering->getStackAlign());
4140}
4141
4142SDValue PPCTargetLowering::LowerFormalArguments(
4143 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4144 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4145 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4146 if (Subtarget.isAIXABI())
4147 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4148 InVals);
4149 if (Subtarget.is64BitELFABI())
4150 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4151 InVals);
4152 assert(Subtarget.is32BitELFABI());
4153 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4154 InVals);
4155}
4156
4157SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4158 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4159 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4160 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4161
4162 // 32-bit SVR4 ABI Stack Frame Layout:
4163 // +-----------------------------------+
4164 // +--> | Back chain |
4165 // | +-----------------------------------+
4166 // | | Floating-point register save area |
4167 // | +-----------------------------------+
4168 // | | General register save area |
4169 // | +-----------------------------------+
4170 // | | CR save word |
4171 // | +-----------------------------------+
4172 // | | VRSAVE save word |
4173 // | +-----------------------------------+
4174 // | | Alignment padding |
4175 // | +-----------------------------------+
4176 // | | Vector register save area |
4177 // | +-----------------------------------+
4178 // | | Local variable space |
4179 // | +-----------------------------------+
4180 // | | Parameter list area |
4181 // | +-----------------------------------+
4182 // | | LR save word |
4183 // | +-----------------------------------+
4184 // SP--> +--- | Back chain |
4185 // +-----------------------------------+
4186 //
4187 // Specifications:
4188 // System V Application Binary Interface PowerPC Processor Supplement
4189 // AltiVec Technology Programming Interface Manual
4190
4191 MachineFunction &MF = DAG.getMachineFunction();
4192 MachineFrameInfo &MFI = MF.getFrameInfo();
4193 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4194
4195 EVT PtrVT = getPointerTy(MF.getDataLayout());
4196 // Potential tail calls could cause overwriting of argument stack slots.
4197 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4198 (CallConv == CallingConv::Fast));
4199 const Align PtrAlign(4);
4200
4201 // Assign locations to all of the incoming arguments.
4203 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4204 *DAG.getContext());
4205
4206 // Reserve space for the linkage area on the stack.
4207 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4208 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4209 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4210
4211 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4212 CCValAssign &VA = ArgLocs[i];
4213
4214 // Arguments stored in registers.
4215 if (VA.isRegLoc()) {
4216 const TargetRegisterClass *RC;
4217 EVT ValVT = VA.getValVT();
4218
4219 switch (ValVT.getSimpleVT().SimpleTy) {
4220 default:
4221 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4222 case MVT::i1:
4223 case MVT::i32:
4224 RC = &PPC::GPRCRegClass;
4225 break;
4226 case MVT::f32:
4227 if (Subtarget.hasP8Vector())
4228 RC = &PPC::VSSRCRegClass;
4229 else if (Subtarget.hasSPE())
4230 RC = &PPC::GPRCRegClass;
4231 else
4232 RC = &PPC::F4RCRegClass;
4233 break;
4234 case MVT::f64:
4235 if (Subtarget.hasVSX())
4236 RC = &PPC::VSFRCRegClass;
4237 else if (Subtarget.hasSPE())
4238 // SPE passes doubles in GPR pairs.
4239 RC = &PPC::GPRCRegClass;
4240 else
4241 RC = &PPC::F8RCRegClass;
4242 break;
4243 case MVT::v16i8:
4244 case MVT::v8i16:
4245 case MVT::v4i32:
4246 RC = &PPC::VRRCRegClass;
4247 break;
4248 case MVT::v4f32:
4249 RC = &PPC::VRRCRegClass;
4250 break;
4251 case MVT::v2f64:
4252 case MVT::v2i64:
4253 RC = &PPC::VRRCRegClass;
4254 break;
4255 }
4256
4257 SDValue ArgValue;
4258 // Transform the arguments stored in physical registers into
4259 // virtual ones.
4260 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4261 assert(i + 1 < e && "No second half of double precision argument");
4262 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4263 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4264 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4265 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4266 if (!Subtarget.isLittleEndian())
4267 std::swap (ArgValueLo, ArgValueHi);
4268 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4269 ArgValueHi);
4270 } else {
4271 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4272 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4273 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4274 if (ValVT == MVT::i1)
4275 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4276 }
4277
4278 InVals.push_back(ArgValue);
4279 } else {
4280 // Argument stored in memory.
4281 assert(VA.isMemLoc());
4282
4283 // Get the extended size of the argument type in stack
4284 unsigned ArgSize = VA.getLocVT().getStoreSize();
4285 // Get the actual size of the argument type
4286 unsigned ObjSize = VA.getValVT().getStoreSize();
4287 unsigned ArgOffset = VA.getLocMemOffset();
4288 // Stack objects in PPC32 are right justified.
4289 ArgOffset += ArgSize - ObjSize;
4290 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4291
4292 // Create load nodes to retrieve arguments from the stack.
4293 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4294 InVals.push_back(
4295 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4296 }
4297 }
4298
4299 // Assign locations to all of the incoming aggregate by value arguments.
4300 // Aggregates passed by value are stored in the local variable space of the
4301 // caller's stack frame, right above the parameter list area.
4302 SmallVector<CCValAssign, 16> ByValArgLocs;
4303 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4304 ByValArgLocs, *DAG.getContext());
4305
4306 // Reserve stack space for the allocations in CCInfo.
4307 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4308
4309 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4310
4311 // Area that is at least reserved in the caller of this function.
4312 unsigned MinReservedArea = CCByValInfo.getStackSize();
4313 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4314
4315 // Set the size that is at least reserved in caller of this function. Tail
4316 // call optimized function's reserved stack space needs to be aligned so that
4317 // taking the difference between two stack areas will result in an aligned
4318 // stack.
4319 MinReservedArea =
4320 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4321 FuncInfo->setMinReservedArea(MinReservedArea);
4322
4324
4325 // If the function takes variable number of arguments, make a frame index for
4326 // the start of the first vararg value... for expansion of llvm.va_start.
4327 if (isVarArg) {
4328 static const MCPhysReg GPArgRegs[] = {
4329 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4330 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4331 };
4332 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4333
4334 static const MCPhysReg FPArgRegs[] = {
4335 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4336 PPC::F8
4337 };
4338 unsigned NumFPArgRegs = std::size(FPArgRegs);
4339
4340 if (useSoftFloat() || hasSPE())
4341 NumFPArgRegs = 0;
4342
4343 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4344 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4345
4346 // Make room for NumGPArgRegs and NumFPArgRegs.
4347 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4348 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4349
4351 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4352
4353 FuncInfo->setVarArgsFrameIndex(
4354 MFI.CreateStackObject(Depth, Align(8), false));
4355 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4356
4357 // The fixed integer arguments of a variadic function are stored to the
4358 // VarArgsFrameIndex on the stack so that they may be loaded by
4359 // dereferencing the result of va_next.
4360 for (MCPhysReg GPArgReg : GPArgRegs) {
4361 // Get an existing live-in vreg, or add a new one.
4362 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgReg);
4363 if (!VReg)
4364 VReg = MF.addLiveIn(GPArgReg, &PPC::GPRCRegClass);
4365
4366 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4367 SDValue Store =
4368 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4369 MemOps.push_back(Store);
4370 // Increment the address by four for the next argument to store
4371 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4372 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4373 }
4374
4375 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4376 // is set.
4377 // The double arguments are stored to the VarArgsFrameIndex
4378 // on the stack.
4379 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4380 // Get an existing live-in vreg, or add a new one.
4381 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4382 if (!VReg)
4383 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4384
4385 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4386 SDValue Store =
4387 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4388 MemOps.push_back(Store);
4389 // Increment the address by eight for the next argument to store
4390 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4391 PtrVT);
4392 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4393 }
4394 }
4395
4396 if (!MemOps.empty())
4397 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4398
4399 return Chain;
4400}
4401
4402// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4403// value to MVT::i64 and then truncate to the correct register size.
4404SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4405 EVT ObjectVT, SelectionDAG &DAG,
4406 SDValue ArgVal,
4407 const SDLoc &dl) const {
4408 if (Flags.isSExt())
4409 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4410 DAG.getValueType(ObjectVT));
4411 else if (Flags.isZExt())
4412 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4413 DAG.getValueType(ObjectVT));
4414
4415 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4416}
4417
4418SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4419 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4420 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4421 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4422 // TODO: add description of PPC stack frame format, or at least some docs.
4423 //
4424 bool isELFv2ABI = Subtarget.isELFv2ABI();
4425 bool isLittleEndian = Subtarget.isLittleEndian();
4426 MachineFunction &MF = DAG.getMachineFunction();
4427 MachineFrameInfo &MFI = MF.getFrameInfo();
4428 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4429
4430 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4431 "fastcc not supported on varargs functions");
4432
4433 EVT PtrVT = getPointerTy(MF.getDataLayout());
4434 // Potential tail calls could cause overwriting of argument stack slots.
4435 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4436 (CallConv == CallingConv::Fast));
4437 unsigned PtrByteSize = 8;
4438 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4439
4440 static const MCPhysReg GPR[] = {
4441 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4442 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4443 };
4444 static const MCPhysReg VR[] = {
4445 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4446 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4447 };
4448
4449 const unsigned Num_GPR_Regs = std::size(GPR);
4450 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4451 const unsigned Num_VR_Regs = std::size(VR);
4452
4453 // Do a first pass over the arguments to determine whether the ABI
4454 // guarantees that our caller has allocated the parameter save area
4455 // on its stack frame. In the ELFv1 ABI, this is always the case;
4456 // in the ELFv2 ABI, it is true if this is a vararg function or if
4457 // any parameter is located in a stack slot.
4458
4459 bool HasParameterArea = !isELFv2ABI || isVarArg;
4460 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4461 unsigned NumBytes = LinkageSize;
4462 unsigned AvailableFPRs = Num_FPR_Regs;
4463 unsigned AvailableVRs = Num_VR_Regs;
4464 for (const ISD::InputArg &In : Ins) {
4465 if (In.Flags.isNest())
4466 continue;
4467
4468 if (CalculateStackSlotUsed(In.VT, In.ArgVT, In.Flags, PtrByteSize,
4469 LinkageSize, ParamAreaSize, NumBytes,
4470 AvailableFPRs, AvailableVRs))
4471 HasParameterArea = true;
4472 }
4473
4474 // Add DAG nodes to load the arguments or copy them out of registers. On
4475 // entry to a function on PPC, the arguments start after the linkage area,
4476 // although the first ones are often in registers.
4477
4478 unsigned ArgOffset = LinkageSize;
4479 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4482 unsigned CurArgIdx = 0;
4483 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4484 SDValue ArgVal;
4485 bool needsLoad = false;
4486 EVT ObjectVT = Ins[ArgNo].VT;
4487 EVT OrigVT = Ins[ArgNo].ArgVT;
4488 unsigned ObjSize = ObjectVT.getStoreSize();
4489 unsigned ArgSize = ObjSize;
4490 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4491 if (Ins[ArgNo].isOrigArg()) {
4492 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4493 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4494 }
4495 // We re-align the argument offset for each argument, except when using the
4496 // fast calling convention, when we need to make sure we do that only when
4497 // we'll actually use a stack slot.
4498 unsigned CurArgOffset;
4499 Align Alignment;
4500 auto ComputeArgOffset = [&]() {
4501 /* Respect alignment of argument on the stack. */
4502 Alignment =
4503 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4504 ArgOffset = alignTo(ArgOffset, Alignment);
4505 CurArgOffset = ArgOffset;
4506 };
4507
4508 if (CallConv != CallingConv::Fast) {
4509 ComputeArgOffset();
4510
4511 /* Compute GPR index associated with argument offset. */
4512 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4513 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4514 }
4515
4516 // FIXME the codegen can be much improved in some cases.
4517 // We do not have to keep everything in memory.
4518 if (Flags.isByVal()) {
4519 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4520
4521 if (CallConv == CallingConv::Fast)
4522 ComputeArgOffset();
4523
4524 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4525 ObjSize = Flags.getByValSize();
4526 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4527 // Empty aggregate parameters do not take up registers. Examples:
4528 // struct { } a;
4529 // union { } b;
4530 // int c[0];
4531 // etc. However, we have to provide a place-holder in InVals, so
4532 // pretend we have an 8-byte item at the current address for that
4533 // purpose.
4534 if (!ObjSize) {
4535 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4536 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4537 InVals.push_back(FIN);
4538 continue;
4539 }
4540
4541 // Create a stack object covering all stack doublewords occupied
4542 // by the argument. If the argument is (fully or partially) on
4543 // the stack, or if the argument is fully in registers but the
4544 // caller has allocated the parameter save anyway, we can refer
4545 // directly to the caller's stack frame. Otherwise, create a
4546 // local copy in our own frame.
4547 int FI;
4548 if (HasParameterArea ||
4549 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4550 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4551 else
4552 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4553 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4554
4555 // Handle aggregates smaller than 8 bytes.
4556 if (ObjSize < PtrByteSize) {
4557 // The value of the object is its address, which differs from the
4558 // address of the enclosing doubleword on big-endian systems.
4559 SDValue Arg = FIN;
4560 if (!isLittleEndian) {
4561 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4562 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4563 }
4564 InVals.push_back(Arg);
4565
4566 if (GPR_idx != Num_GPR_Regs) {
4567 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4568 FuncInfo->addLiveInAttr(VReg, Flags);
4569 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4570 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4571 SDValue Store =
4572 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4573 MachinePointerInfo(&*FuncArg), ObjType);
4574 MemOps.push_back(Store);
4575 }
4576 // Whether we copied from a register or not, advance the offset
4577 // into the parameter save area by a full doubleword.
4578 ArgOffset += PtrByteSize;
4579 continue;
4580 }
4581
4582 // The value of the object is its address, which is the address of
4583 // its first stack doubleword.
4584 InVals.push_back(FIN);
4585
4586 // Store whatever pieces of the object are in registers to memory.
4587 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4588 if (GPR_idx == Num_GPR_Regs)
4589 break;
4590
4591 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4592 FuncInfo->addLiveInAttr(VReg, Flags);
4593 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4594 SDValue Addr = FIN;
4595 if (j) {
4596 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4597 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4598 }
4599 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4600 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4601 SDValue Store =
4602 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4603 MachinePointerInfo(&*FuncArg, j), ObjType);
4604 MemOps.push_back(Store);
4605 ++GPR_idx;
4606 }
4607 ArgOffset += ArgSize;
4608 continue;
4609 }
4610
4611 switch (ObjectVT.getSimpleVT().SimpleTy) {
4612 default: llvm_unreachable("Unhandled argument type!");
4613 case MVT::i1:
4614 case MVT::i32:
4615 case MVT::i64:
4616 if (Flags.isNest()) {
4617 // The 'nest' parameter, if any, is passed in R11.
4618 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4619 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4620
4621 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4622 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4623
4624 break;
4625 }
4626
4627 // These can be scalar arguments or elements of an integer array type
4628 // passed directly. Clang may use those instead of "byval" aggregate
4629 // types to avoid forcing arguments to memory unnecessarily.
4630 if (GPR_idx != Num_GPR_Regs) {
4631 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4632 FuncInfo->addLiveInAttr(VReg, Flags);
4633 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4634
4635 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4636 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4637 // value to MVT::i64 and then truncate to the correct register size.
4638 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4639 } else {
4640 if (CallConv == CallingConv::Fast)
4641 ComputeArgOffset();
4642
4643 needsLoad = true;
4644 ArgSize = PtrByteSize;
4645 }
4646 if (CallConv != CallingConv::Fast || needsLoad)
4647 ArgOffset += 8;
4648 break;
4649
4650 case MVT::f32:
4651 case MVT::f64:
4652 // These can be scalar arguments or elements of a float array type
4653 // passed directly. The latter are used to implement ELFv2 homogenous
4654 // float aggregates.
4655 if (FPR_idx != Num_FPR_Regs) {
4656 unsigned VReg;
4657
4658 if (ObjectVT == MVT::f32)
4659 VReg = MF.addLiveIn(FPR[FPR_idx],
4660 Subtarget.hasP8Vector()
4661 ? &PPC::VSSRCRegClass
4662 : &PPC::F4RCRegClass);
4663 else
4664 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4665 ? &PPC::VSFRCRegClass
4666 : &PPC::F8RCRegClass);
4667
4668 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4669 ++FPR_idx;
4670 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4671 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4672 // once we support fp <-> gpr moves.
4673
4674 // This can only ever happen in the presence of f32 array types,
4675 // since otherwise we never run out of FPRs before running out
4676 // of GPRs.
4677 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4678 FuncInfo->addLiveInAttr(VReg, Flags);
4679 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4680
4681 if (ObjectVT == MVT::f32) {
4682 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4683 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4684 DAG.getConstant(32, dl, MVT::i32));
4685 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4686 }
4687
4688 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4689 } else {
4690 if (CallConv == CallingConv::Fast)
4691 ComputeArgOffset();
4692
4693 needsLoad = true;
4694 }
4695
4696 // When passing an array of floats, the array occupies consecutive
4697 // space in the argument area; only round up to the next doubleword
4698 // at the end of the array. Otherwise, each float takes 8 bytes.
4699 if (CallConv != CallingConv::Fast || needsLoad) {
4700 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4701 ArgOffset += ArgSize;
4702 if (Flags.isInConsecutiveRegsLast())
4703 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4704 }
4705 break;
4706 case MVT::v4f32:
4707 case MVT::v4i32:
4708 case MVT::v8i16:
4709 case MVT::v16i8:
4710 case MVT::v2f64:
4711 case MVT::v2i64:
4712 case MVT::v1i128:
4713 case MVT::f128:
4714 // These can be scalar arguments or elements of a vector array type
4715 // passed directly. The latter are used to implement ELFv2 homogenous
4716 // vector aggregates.
4717 if (VR_idx != Num_VR_Regs) {
4718 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4719 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4720 ++VR_idx;
4721 } else {
4722 if (CallConv == CallingConv::Fast)
4723 ComputeArgOffset();
4724 needsLoad = true;
4725 }
4726 if (CallConv != CallingConv::Fast || needsLoad)
4727 ArgOffset += 16;
4728 break;
4729 }
4730
4731 // We need to load the argument to a virtual register if we determined
4732 // above that we ran out of physical registers of the appropriate type.
4733 if (needsLoad) {
4734 if (ObjSize < ArgSize && !isLittleEndian)
4735 CurArgOffset += ArgSize - ObjSize;
4736 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4737 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4738 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4739 }
4740
4741 InVals.push_back(ArgVal);
4742 }
4743
4744 // Area that is at least reserved in the caller of this function.
4745 unsigned MinReservedArea;
4746 if (HasParameterArea)
4747 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4748 else
4749 MinReservedArea = LinkageSize;
4750
4751 // Set the size that is at least reserved in caller of this function. Tail
4752 // call optimized functions' reserved stack space needs to be aligned so that
4753 // taking the difference between two stack areas will result in an aligned
4754 // stack.
4755 MinReservedArea =
4756 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4757 FuncInfo->setMinReservedArea(MinReservedArea);
4758
4759 // If the function takes variable number of arguments, make a frame index for
4760 // the start of the first vararg value... for expansion of llvm.va_start.
4761 // On ELFv2ABI spec, it writes:
4762 // C programs that are intended to be *portable* across different compilers
4763 // and architectures must use the header file <stdarg.h> to deal with variable
4764 // argument lists.
4765 if (isVarArg && MFI.hasVAStart()) {
4766 int Depth = ArgOffset;
4767
4768 FuncInfo->setVarArgsFrameIndex(
4769 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4770 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4771
4772 // If this function is vararg, store any remaining integer argument regs
4773 // to their spots on the stack so that they may be loaded by dereferencing
4774 // the result of va_next.
4775 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4776 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4777 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4778 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4779 SDValue Store =
4780 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4781 MemOps.push_back(Store);
4782 // Increment the address by four for the next argument to store
4783 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4784 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4785 }
4786 }
4787
4788 if (!MemOps.empty())
4789 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4790
4791 return Chain;
4792}
4793
4794/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4795/// adjusted to accommodate the arguments for the tailcall.
4796static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4797 unsigned ParamSize) {
4798
4799 if (!isTailCall) return 0;
4800
4802 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4803 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4804 // Remember only if the new adjustment is bigger.
4805 if (SPDiff < FI->getTailCallSPDelta())
4806 FI->setTailCallSPDelta(SPDiff);
4807
4808 return SPDiff;
4809}
4810
4811static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4812
4813static bool callsShareTOCBase(const Function *Caller,
4814 const GlobalValue *CalleeGV,
4815 const TargetMachine &TM) {
4816 // It does not make sense to call callsShareTOCBase() with a caller that
4817 // is PC Relative since PC Relative callers do not have a TOC.
4818#ifndef NDEBUG
4819 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4820 assert(!STICaller->isUsingPCRelativeCalls() &&
4821 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4822#endif
4823
4824 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4825 // don't have enough information to determine if the caller and callee share
4826 // the same TOC base, so we have to pessimistically assume they don't for
4827 // correctness.
4828 if (!CalleeGV)
4829 return false;
4830
4831 // If the callee is preemptable, then the static linker will use a plt-stub
4832 // which saves the toc to the stack, and needs a nop after the call
4833 // instruction to convert to a toc-restore.
4834 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4835 return false;
4836
4837 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4838 // We may need a TOC restore in the situation where the caller requires a
4839 // valid TOC but the callee is PC Relative and does not.
4840 const Function *F = dyn_cast<Function>(CalleeGV);
4841 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4842
4843 // If we have an Alias we can try to get the function from there.
4844 if (Alias) {
4845 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4846 F = dyn_cast<Function>(GlobalObj);
4847 }
4848
4849 // If we still have no valid function pointer we do not have enough
4850 // information to determine if the callee uses PC Relative calls so we must
4851 // assume that it does.
4852 if (!F)
4853 return false;
4854
4855 // If the callee uses PC Relative we cannot guarantee that the callee won't
4856 // clobber the TOC of the caller and so we must assume that the two
4857 // functions do not share a TOC base.
4858 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4859 if (STICallee->isUsingPCRelativeCalls())
4860 return false;
4861
4862 // If the GV is not a strong definition then we need to assume it can be
4863 // replaced by another function at link time. The function that replaces
4864 // it may not share the same TOC as the caller since the callee may be
4865 // replaced by a PC Relative version of the same function.
4866 if (!CalleeGV->isStrongDefinitionForLinker())
4867 return false;
4868
4869 // The medium and large code models are expected to provide a sufficiently
4870 // large TOC to provide all data addressing needs of a module with a
4871 // single TOC.
4872 if (CodeModel::Medium == TM.getCodeModel() ||
4874 return true;
4875
4876 // Any explicitly-specified sections and section prefixes must also match.
4877 // Also, if we're using -ffunction-sections, then each function is always in
4878 // a different section (the same is true for COMDAT functions).
4879 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4880 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4881 return false;
4882 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4883 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4884 return false;
4885 }
4886
4887 return true;
4888}
4889
4890static bool
4892 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4893 assert(Subtarget.is64BitELFABI());
4894
4895 const unsigned PtrByteSize = 8;
4896 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4897
4898 static const MCPhysReg GPR[] = {
4899 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4900 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4901 };
4902 static const MCPhysReg VR[] = {
4903 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4904 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4905 };
4906
4907 const unsigned NumGPRs = std::size(GPR);
4908 const unsigned NumFPRs = 13;
4909 const unsigned NumVRs = std::size(VR);
4910 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4911
4912 unsigned NumBytes = LinkageSize;
4913 unsigned AvailableFPRs = NumFPRs;
4914 unsigned AvailableVRs = NumVRs;
4915
4916 for (const ISD::OutputArg& Param : Outs) {
4917 if (Param.Flags.isNest()) continue;
4918
4919 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4920 LinkageSize, ParamAreaSize, NumBytes,
4921 AvailableFPRs, AvailableVRs))
4922 return true;
4923 }
4924 return false;
4925}
4926
4927static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4928 if (CB.arg_size() != CallerFn->arg_size())
4929 return false;
4930
4931 auto CalleeArgIter = CB.arg_begin();
4932 auto CalleeArgEnd = CB.arg_end();
4933 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4934
4935 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4936 const Value* CalleeArg = *CalleeArgIter;
4937 const Value* CallerArg = &(*CallerArgIter);
4938 if (CalleeArg == CallerArg)
4939 continue;
4940
4941 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4942 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4943 // }
4944 // 1st argument of callee is undef and has the same type as caller.
4945 if (CalleeArg->getType() == CallerArg->getType() &&
4946 isa<UndefValue>(CalleeArg))
4947 continue;
4948
4949 return false;
4950 }
4951
4952 return true;
4953}
4954
4955// Returns true if TCO is possible between the callers and callees
4956// calling conventions.
4957static bool
4959 CallingConv::ID CalleeCC) {
4960 // Tail calls are possible with fastcc and ccc.
4961 auto isTailCallableCC = [] (CallingConv::ID CC){
4962 return CC == CallingConv::C || CC == CallingConv::Fast;
4963 };
4964 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4965 return false;
4966
4967 // We can safely tail call both fastcc and ccc callees from a c calling
4968 // convention caller. If the caller is fastcc, we may have less stack space
4969 // than a non-fastcc caller with the same signature so disable tail-calls in
4970 // that case.
4971 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4972}
4973
4974bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4975 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
4976 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
4978 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
4979 bool isCalleeExternalSymbol) const {
4980 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4981
4982 if (DisableSCO && !TailCallOpt) return false;
4983
4984 // Variadic argument functions are not supported.
4985 if (isVarArg) return false;
4986
4987 // Check that the calling conventions are compatible for tco.
4988 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
4989 return false;
4990
4991 // Caller contains any byval parameter is not supported.
4992 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4993 return false;
4994
4995 // Callee contains any byval parameter is not supported, too.
4996 // Note: This is a quick work around, because in some cases, e.g.
4997 // caller's stack size > callee's stack size, we are still able to apply
4998 // sibling call optimization. For example, gcc is able to do SCO for caller1
4999 // in the following example, but not for caller2.
5000 // struct test {
5001 // long int a;
5002 // char ary[56];
5003 // } gTest;
5004 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5005 // b->a = v.a;
5006 // return 0;
5007 // }
5008 // void caller1(struct test a, struct test c, struct test *b) {
5009 // callee(gTest, b); }
5010 // void caller2(struct test *b) { callee(gTest, b); }
5011 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5012 return false;
5013
5014 // If callee and caller use different calling conventions, we cannot pass
5015 // parameters on stack since offsets for the parameter area may be different.
5016 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5017 return false;
5018
5019 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5020 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5021 // callee potentially have different TOC bases then we cannot tail call since
5022 // we need to restore the TOC pointer after the call.
5023 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5024 // We cannot guarantee this for indirect calls or calls to external functions.
5025 // When PC-Relative addressing is used, the concept of the TOC is no longer
5026 // applicable so this check is not required.
5027 // Check first for indirect calls.
5028 if (!Subtarget.isUsingPCRelativeCalls() &&
5029 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5030 return false;
5031
5032 // Check if we share the TOC base.
5033 if (!Subtarget.isUsingPCRelativeCalls() &&
5034 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5035 return false;
5036
5037 // TCO allows altering callee ABI, so we don't have to check further.
5038 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5039 return true;
5040
5041 if (DisableSCO) return false;
5042
5043 // If callee use the same argument list that caller is using, then we can
5044 // apply SCO on this case. If it is not, then we need to check if callee needs
5045 // stack for passing arguments.
5046 // PC Relative tail calls may not have a CallBase.
5047 // If there is no CallBase we cannot verify if we have the same argument
5048 // list so assume that we don't have the same argument list.
5049 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5050 needStackSlotPassParameters(Subtarget, Outs))
5051 return false;
5052 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5053 return false;
5054
5055 return true;
5056}
5057
5058/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5059/// for tail call optimization. Targets which want to do tail call
5060/// optimization should implement this function.
5061bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5062 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5063 CallingConv::ID CallerCC, bool isVarArg,
5064 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5065 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5066 return false;
5067
5068 // Variable argument functions are not supported.
5069 if (isVarArg)
5070 return false;
5071
5072 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5073 // Functions containing by val parameters are not supported.
5074 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5075 return false;
5076
5077 // Non-PIC/GOT tail calls are supported.
5078 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5079 return true;
5080
5081 // At the moment we can only do local tail calls (in same module, hidden
5082 // or protected) if we are generating PIC.
5083 if (CalleeGV)
5084 return CalleeGV->hasHiddenVisibility() ||
5085 CalleeGV->hasProtectedVisibility();
5086 }
5087
5088 return false;
5089}
5090
5091/// isCallCompatibleAddress - Return the immediate to use if the specified
5092/// 32-bit value is representable in the immediate field of a BxA instruction.
5095 if (!C) return nullptr;
5096
5097 int Addr = C->getZExtValue();
5098 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5099 SignExtend32<26>(Addr) != Addr)
5100 return nullptr; // Top 6 bits have to be sext of immediate.
5101
5102 return DAG
5104 (int)C->getZExtValue() >> 2, SDLoc(Op),
5106 .getNode();
5107}
5108
5109namespace {
5110
5111struct TailCallArgumentInfo {
5112 SDValue Arg;
5113 SDValue FrameIdxOp;
5114 int FrameIdx = 0;
5115
5116 TailCallArgumentInfo() = default;
5117};
5118
5119} // end anonymous namespace
5120
5121/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5123 SelectionDAG &DAG, SDValue Chain,
5124 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5125 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5126 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5127 SDValue Arg = TailCallArgs[i].Arg;
5128 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5129 int FI = TailCallArgs[i].FrameIdx;
5130 // Store relative to framepointer.
5131 MemOpChains.push_back(DAG.getStore(
5132 Chain, dl, Arg, FIN,
5134 }
5135}
5136
5137/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5138/// the appropriate stack slot for the tail call optimized function call.
5140 SDValue OldRetAddr, SDValue OldFP,
5141 int SPDiff, const SDLoc &dl) {
5142 if (SPDiff) {
5143 // Calculate the new stack slot for the return address.
5145 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5146 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5147 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5148 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5149 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5150 NewRetAddrLoc, true);
5151 SDValue NewRetAddrFrIdx =
5152 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5153 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5154 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5155 }
5156 return Chain;
5157}
5158
5159/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5160/// the position of the argument.
5162 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5163 int SPDiff, unsigned ArgOffset,
5164 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5165 int Offset = ArgOffset + SPDiff;
5166 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5167 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5168 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5169 SDValue FIN = DAG.getFrameIndex(FI, VT);
5170 TailCallArgumentInfo Info;
5171 Info.Arg = Arg;
5172 Info.FrameIdxOp = FIN;
5173 Info.FrameIdx = FI;
5174 TailCallArguments.push_back(Info);
5175}
5176
5177/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5178/// stack slot. Returns the chain as result and the loaded frame pointers in
5179/// LROpOut/FPOpout. Used when tail calling.
5180SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5181 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5182 SDValue &FPOpOut, const SDLoc &dl) const {
5183 if (SPDiff) {
5184 // Load the LR and FP stack slot for later adjusting.
5185 LROpOut = getReturnAddrFrameIndex(DAG);
5186 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5187 MachinePointerInfo());
5188 Chain = SDValue(LROpOut.getNode(), 1);
5189 }
5190 return Chain;
5191}
5192
5193/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5194/// by "Src" to address "Dst" of size "Size". Alignment information is
5195/// specified by the specific parameter attribute. The copy will be passed as
5196/// a byval function parameter.
5197/// Sometimes what we are copying is the end of a larger object, the part that
5198/// does not fit in registers.
5200 SDValue Chain, ISD::ArgFlagsTy Flags,
5201 SelectionDAG &DAG, const SDLoc &dl) {
5202 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5203 return DAG.getMemcpy(
5204 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5205 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5206}
5207
5208/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5209/// tail calls.
5211 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5212 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5213 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5214 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5216 if (!isTailCall) {
5217 if (isVector) {
5218 SDValue StackPtr;
5219 if (isPPC64)
5220 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5221 else
5222 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5223 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5224 DAG.getConstant(ArgOffset, dl, PtrVT));
5225 }
5226 MemOpChains.push_back(
5227 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5228 // Calculate and remember argument location.
5229 } else
5230 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5231 TailCallArguments);
5232}
5233
5234static void
5236 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5237 SDValue FPOp,
5238 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5239 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5240 // might overwrite each other in case of tail call optimization.
5241 SmallVector<SDValue, 8> MemOpChains2;
5242 // Do not flag preceding copytoreg stuff together with the following stuff.
5243 InGlue = SDValue();
5244 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5245 MemOpChains2, dl);
5246 if (!MemOpChains2.empty())
5247 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5248
5249 // Store the return address to the appropriate stack slot.
5250 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5251
5252 // Emit callseq_end just before tailcall node.
5253 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5254 InGlue = Chain.getValue(1);
5255}
5256
5257// Is this global address that of a function that can be called by name? (as
5258// opposed to something that must hold a descriptor for an indirect call).
5259static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5260 if (GV) {
5261 if (GV->isThreadLocal())
5262 return false;
5263
5264 return GV->getValueType()->isFunctionTy();
5265 }
5266
5267 return false;
5268}
5269
5270SDValue PPCTargetLowering::LowerCallResult(
5271 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5272 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5273 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5275 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5276 *DAG.getContext());
5277
5278 CCRetInfo.AnalyzeCallResult(
5279 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5281 : RetCC_PPC);
5282
5283 // Copy all of the result registers out of their specified physreg.
5284 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5285 CCValAssign &VA = RVLocs[i];
5286 assert(VA.isRegLoc() && "Can only return in registers!");
5287
5288 SDValue Val;
5289
5290 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5291 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5292 InGlue);
5293 Chain = Lo.getValue(1);
5294 InGlue = Lo.getValue(2);
5295 VA = RVLocs[++i]; // skip ahead to next loc
5296 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5297 InGlue);
5298 Chain = Hi.getValue(1);
5299 InGlue = Hi.getValue(2);
5300 if (!Subtarget.isLittleEndian())
5301 std::swap (Lo, Hi);
5302 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5303 } else {
5304 Val = DAG.getCopyFromReg(Chain, dl,
5305 VA.getLocReg(), VA.getLocVT(), InGlue);
5306 Chain = Val.getValue(1);
5307 InGlue = Val.getValue(2);
5308 }
5309
5310 switch (VA.getLocInfo()) {
5311 default: llvm_unreachable("Unknown loc info!");
5312 case CCValAssign::Full: break;
5313 case CCValAssign::AExt:
5314 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5315 break;
5316 case CCValAssign::ZExt:
5317 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5318 DAG.getValueType(VA.getValVT()));
5319 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5320 break;
5321 case CCValAssign::SExt:
5322 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5323 DAG.getValueType(VA.getValVT()));
5324 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5325 break;
5326 }
5327
5328 InVals.push_back(Val);
5329 }
5330
5331 return Chain;
5332}
5333
5334static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5335 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5336 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5337 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5338
5339 // PatchPoint calls are not indirect.
5340 if (isPatchPoint)
5341 return false;
5342
5344 return false;
5345
5346 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5347 // becuase the immediate function pointer points to a descriptor instead of
5348 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5349 // pointer immediate points to the global entry point, while the BLA would
5350 // need to jump to the local entry point (see rL211174).
5351 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5352 isBLACompatibleAddress(Callee, DAG))
5353 return false;
5354
5355 return true;
5356}
5357
5358// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5359static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5360 return Subtarget.isAIXABI() ||
5361 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5362}
5363
5365 const Function &Caller, const SDValue &Callee,
5366 const PPCSubtarget &Subtarget,
5367 const TargetMachine &TM,
5368 bool IsStrictFPCall = false) {
5369 if (CFlags.IsTailCall)
5370 return PPCISD::TC_RETURN;
5371
5372 unsigned RetOpc = 0;
5373 // This is a call through a function pointer.
5374 if (CFlags.IsIndirect) {
5375 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5376 // indirect calls. The save of the caller's TOC pointer to the stack will be
5377 // inserted into the DAG as part of call lowering. The restore of the TOC
5378 // pointer is modeled by using a pseudo instruction for the call opcode that
5379 // represents the 2 instruction sequence of an indirect branch and link,
5380 // immediately followed by a load of the TOC pointer from the stack save
5381 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5382 // as it is not saved or used.
5383 if (Subtarget.usePointerGlueHelper())
5384 RetOpc = PPCISD::BL_LOAD_TOC;
5385 else
5386 RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5387 : PPCISD::BCTRL;
5388 } else if (Subtarget.isUsingPCRelativeCalls()) {
5389 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5390 RetOpc = PPCISD::CALL_NOTOC;
5391 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5392 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5393 // immediately following the call instruction if the caller and callee may
5394 // have different TOC bases. At link time if the linker determines the calls
5395 // may not share a TOC base, the call is redirected to a trampoline inserted
5396 // by the linker. The trampoline will (among other things) save the callers
5397 // TOC pointer at an ABI designated offset in the linkage area and the
5398 // linker will rewrite the nop to be a load of the TOC pointer from the
5399 // linkage area into gpr2.
5400 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5401 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5402 RetOpc =
5403 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5404 } else
5405 RetOpc = PPCISD::CALL;
5406 if (IsStrictFPCall) {
5407 switch (RetOpc) {
5408 default:
5409 llvm_unreachable("Unknown call opcode");
5410 case PPCISD::BCTRL_LOAD_TOC:
5411 RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5412 break;
5413 case PPCISD::BCTRL:
5414 RetOpc = PPCISD::BCTRL_RM;
5415 break;
5416 case PPCISD::BL_LOAD_TOC:
5417 RetOpc = PPCISD::BL_LOAD_TOC_RM;
5418 break;
5419 case PPCISD::CALL_NOTOC:
5420 RetOpc = PPCISD::CALL_NOTOC_RM;
5421 break;
5422 case PPCISD::CALL:
5423 RetOpc = PPCISD::CALL_RM;
5424 break;
5425 case PPCISD::CALL_NOP:
5426 RetOpc = PPCISD::CALL_NOP_RM;
5427 break;
5428 }
5429 }
5430 return RetOpc;
5431}
5432
5433static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5434 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5435 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5436 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5437 return SDValue(Dest, 0);
5438
5439 // Returns true if the callee is local, and false otherwise.
5440 auto isLocalCallee = [&]() {
5442 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5443
5444 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5446 };
5447
5448 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5449 // a static relocation model causes some versions of GNU LD (2.17.50, at
5450 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5451 // built with secure-PLT.
5452 bool UsePlt =
5453 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5455
5456 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5457 const TargetMachine &TM = Subtarget.getTargetMachine();
5459 auto *S =
5460 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5461
5463 return DAG.getMCSymbol(S, PtrVT);
5464 };
5465
5466 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5467 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5468 if (isFunctionGlobalAddress(GV)) {
5469 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5470
5471 if (Subtarget.isAIXABI()) {
5472 return getAIXFuncEntryPointSymbolSDNode(GV);
5473 }
5474 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5475 UsePlt ? PPCII::MO_PLT : 0);
5476 }
5477
5479 const char *SymName = S->getSymbol();
5480 if (Subtarget.isAIXABI()) {
5481 // If there exists a user-declared function whose name is the same as the
5482 // ExternalSymbol's, then we pick up the user-declared version.
5484 if (const Function *F =
5485 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5486 return getAIXFuncEntryPointSymbolSDNode(F);
5487
5488 // On AIX, direct function calls reference the symbol for the function's
5489 // entry point, which is named by prepending a "." before the function's
5490 // C-linkage name. A Qualname is returned here because an external
5491 // function entry point is a csect with XTY_ER property.
5492 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5493 auto &Context = DAG.getMachineFunction().getContext();
5494 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5495 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5497 return Sec->getQualNameSymbol();
5498 };
5499
5500 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5501 }
5502 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5503 UsePlt ? PPCII::MO_PLT : 0);
5504 }
5505
5506 // No transformation needed.
5507 assert(Callee.getNode() && "What no callee?");
5508 return Callee;
5509}
5510
5512 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5513 "Expected a CALLSEQ_STARTSDNode.");
5514
5515 // The last operand is the chain, except when the node has glue. If the node
5516 // has glue, then the last operand is the glue, and the chain is the second
5517 // last operand.
5518 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5519 if (LastValue.getValueType() != MVT::Glue)
5520 return LastValue;
5521
5522 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5523}
5524
5525// Creates the node that moves a functions address into the count register
5526// to prepare for an indirect call instruction.
5527static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5528 SDValue &Glue, SDValue &Chain,
5529 const SDLoc &dl) {
5530 SDValue MTCTROps[] = {Chain, Callee, Glue};
5531 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5532 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5533 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5534 // The glue is the second value produced.
5535 Glue = Chain.getValue(1);
5536}
5537
5539 SDValue &Glue, SDValue &Chain,
5540 SDValue CallSeqStart,
5541 const CallBase *CB, const SDLoc &dl,
5542 bool hasNest,
5543 const PPCSubtarget &Subtarget) {
5544 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5545 // entry point, but to the function descriptor (the function entry point
5546 // address is part of the function descriptor though).
5547 // The function descriptor is a three doubleword structure with the
5548 // following fields: function entry point, TOC base address and
5549 // environment pointer.
5550 // Thus for a call through a function pointer, the following actions need
5551 // to be performed:
5552 // 1. Save the TOC of the caller in the TOC save area of its stack
5553 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5554 // 2. Load the address of the function entry point from the function
5555 // descriptor.
5556 // 3. Load the TOC of the callee from the function descriptor into r2.
5557 // 4. Load the environment pointer from the function descriptor into
5558 // r11.
5559 // 5. Branch to the function entry point address.
5560 // 6. On return of the callee, the TOC of the caller needs to be
5561 // restored (this is done in FinishCall()).
5562 //
5563 // The loads are scheduled at the beginning of the call sequence, and the
5564 // register copies are flagged together to ensure that no other
5565 // operations can be scheduled in between. E.g. without flagging the
5566 // copies together, a TOC access in the caller could be scheduled between
5567 // the assignment of the callee TOC and the branch to the callee, which leads
5568 // to incorrect code.
5569
5570 // Start by loading the function address from the descriptor.
5571 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5572 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5576
5577 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5578
5579 // Registers used in building the DAG.
5580 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5581 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5582
5583 // Offsets of descriptor members.
5584 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5585 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5586
5587 const MVT RegVT = Subtarget.getScalarIntVT();
5588 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5589
5590 // One load for the functions entry point address.
5591 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5592 Alignment, MMOFlags);
5593
5594 // One for loading the TOC anchor for the module that contains the called
5595 // function.
5596 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5597 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5598 SDValue TOCPtr =
5599 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5600 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5601
5602 // One for loading the environment pointer.
5603 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5604 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5605 SDValue LoadEnvPtr =
5606 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5607 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5608
5609
5610 // Then copy the newly loaded TOC anchor to the TOC pointer.
5611 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5612 Chain = TOCVal.getValue(0);
5613 Glue = TOCVal.getValue(1);
5614
5615 // If the function call has an explicit 'nest' parameter, it takes the
5616 // place of the environment pointer.
5617 assert((!hasNest || !Subtarget.isAIXABI()) &&
5618 "Nest parameter is not supported on AIX.");
5619 if (!hasNest) {
5620 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5621 Chain = EnvVal.getValue(0);
5622 Glue = EnvVal.getValue(1);
5623 }
5624
5625 // The rest of the indirect call sequence is the same as the non-descriptor
5626 // DAG.
5627 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5628}
5629
5631 SDValue &Glue, SDValue &Chain,
5632 SDValue CallSeqStart, const CallBase *CB,
5633 const SDLoc &dl, bool hasNest,
5634 const PPCSubtarget &Subtarget) {
5635 // On AIX there is a feature ("out of line glue code") which uses a special
5636 // trampoline function ._ptrgl to do the indirect call. If this option is
5637 // enabled we instead simply load the address of the descriptor into gpr11,
5638 // with the arguments in the 'normal' registers and branch to the ._ptrgl
5639 // stub.
5640 const MCRegister PtrGlueReg = Subtarget.getGlueCodeDescriptorRegister();
5641 SDValue MoveToPhysicalReg =
5642 DAG.getCopyToReg(Chain, dl, PtrGlueReg, Callee, Glue);
5643 Chain = MoveToPhysicalReg.getValue(0);
5644 Glue = MoveToPhysicalReg.getValue(1);
5645}
5646
5647static void
5649 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5650 SelectionDAG &DAG,
5651 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5652 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5653 const PPCSubtarget &Subtarget) {
5654 const bool IsPPC64 = Subtarget.isPPC64();
5655 // MVT for a general purpose register.
5656 const MVT RegVT = Subtarget.getScalarIntVT();
5657
5658 // First operand is always the chain.
5659 Ops.push_back(Chain);
5660
5661 // If it's a direct call pass the callee as the second operand.
5662 if (!CFlags.IsIndirect)
5663 Ops.push_back(Callee);
5664 else if (Subtarget.usePointerGlueHelper()) {
5665 Ops.push_back(Callee);
5666 // Add the register used to pass the descriptor address.
5667 Ops.push_back(
5668 DAG.getRegister(Subtarget.getGlueCodeDescriptorRegister(), RegVT));
5669 } else {
5670 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5671
5672 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5673 // on the stack (this would have been done in `LowerCall_64SVR4` or
5674 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5675 // represents both the indirect branch and a load that restores the TOC
5676 // pointer from the linkage area. The operand for the TOC restore is an add
5677 // of the TOC save offset to the stack pointer. This must be the second
5678 // operand: after the chain input but before any other variadic arguments.
5679 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5680 // saved or used.
5681 if (isTOCSaveRestoreRequired(Subtarget)) {
5682 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5683
5684 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5685 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5686 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5687 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5688 Ops.push_back(AddTOC);
5689 }
5690
5691 // Add the register used for the environment pointer.
5692 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5693 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5694 RegVT));
5695
5696
5697 // Add CTR register as callee so a bctr can be emitted later.
5698 if (CFlags.IsTailCall)
5699 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5700 }
5701
5702 // If this is a tail call add stack pointer delta.
5703 if (CFlags.IsTailCall)
5704 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5705
5706 // Add argument registers to the end of the list so that they are known live
5707 // into the call.
5708 for (const auto &[Reg, N] : RegsToPass)
5709 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
5710
5711 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5712 // no way to mark dependencies as implicit here.
5713 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5714 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5715 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5716 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5717
5718 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5719 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5720 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5721
5722 // Add a register mask operand representing the call-preserved registers.
5723 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5724 const uint32_t *Mask =
5725 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5726 assert(Mask && "Missing call preserved mask for calling convention");
5727 Ops.push_back(DAG.getRegisterMask(Mask));
5728
5729 // If the glue is valid, it is the last operand.
5730 if (Glue.getNode())
5731 Ops.push_back(Glue);
5732}
5733
5734SDValue PPCTargetLowering::FinishCall(
5735 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5736 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5737 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5738 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5739 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5740
5741 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5742 Subtarget.isAIXABI())
5743 setUsesTOCBasePtr(DAG);
5744
5745 unsigned CallOpc =
5746 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5747 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5748
5749 if (!CFlags.IsIndirect)
5750 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5751 else if (Subtarget.usesFunctionDescriptors()) {
5752 if (Subtarget.usePointerGlueHelper()) {
5753 prepareOutOfLineGlueCall(DAG, Callee, Glue, Chain, CallSeqStart, CB, dl,
5754 CFlags.HasNest, Subtarget);
5755 SDValue PtrGlueCallee =
5756 DAG.getExternalSymbol("_ptrgl", getPointerTy(DAG.getDataLayout()));
5757 Callee = transformCallee(PtrGlueCallee, DAG, dl, Subtarget);
5758 } else {
5759 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5760 dl, CFlags.HasNest, Subtarget);
5761 }
5762 } else {
5763 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5764 }
5765
5766 // Build the operand list for the call instruction.
5768 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5769 SPDiff, Subtarget);
5770
5771 // Emit tail call.
5772 if (CFlags.IsTailCall) {
5773 // Indirect tail call when using PC Relative calls do not have the same
5774 // constraints.
5775 assert(((Callee.getOpcode() == ISD::Register &&
5776 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5777 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5778 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5779 isa<ConstantSDNode>(Callee) ||
5780 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5781 "Expecting a global address, external symbol, absolute value, "
5782 "register or an indirect tail call when PC Relative calls are "
5783 "used.");
5784 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5785 assert(CallOpc == PPCISD::TC_RETURN &&
5786 "Unexpected call opcode for a tail call.");
5788 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5789 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5790 return Ret;
5791 }
5792
5793 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5794 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5795 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5796 Glue = Chain.getValue(1);
5797
5798 // When performing tail call optimization the callee pops its arguments off
5799 // the stack. Account for this here so these bytes can be pushed back on in
5800 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5801 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5803 ? NumBytes
5804 : 0;
5805
5806 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5807 Glue = Chain.getValue(1);
5808
5809 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5810 DAG, InVals);
5811}
5812
5814 CallingConv::ID CalleeCC = CB->getCallingConv();
5815 const Function *CallerFunc = CB->getCaller();
5816 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5817 const Function *CalleeFunc = CB->getCalledFunction();
5818 if (!CalleeFunc)
5819 return false;
5820 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5821
5824
5825 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5826 CalleeFunc->getAttributes(), Outs, *this,
5827 CalleeFunc->getDataLayout());
5828
5829 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5830 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5831 false /*isCalleeExternalSymbol*/);
5832}
5833
5834bool PPCTargetLowering::isEligibleForTCO(
5835 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5836 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5838 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5839 bool isCalleeExternalSymbol) const {
5840 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5841 return false;
5842
5843 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5844 return IsEligibleForTailCallOptimization_64SVR4(
5845 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5846 isCalleeExternalSymbol);
5847 else
5848 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5849 isVarArg, Ins);
5850}
5851
5852SDValue
5853PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5854 SmallVectorImpl<SDValue> &InVals) const {
5855 SelectionDAG &DAG = CLI.DAG;
5856 SDLoc &dl = CLI.DL;
5858 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5860 SDValue Chain = CLI.Chain;
5861 SDValue Callee = CLI.Callee;
5862 bool &isTailCall = CLI.IsTailCall;
5863 CallingConv::ID CallConv = CLI.CallConv;
5864 bool isVarArg = CLI.IsVarArg;
5865 bool isPatchPoint = CLI.IsPatchPoint;
5866 const CallBase *CB = CLI.CB;
5867
5868 if (isTailCall) {
5870 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5871 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5872 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5873 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5874
5875 isTailCall =
5876 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5877 &(MF.getFunction()), IsCalleeExternalSymbol);
5878 if (isTailCall) {
5879 ++NumTailCalls;
5880 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5881 ++NumSiblingCalls;
5882
5883 // PC Relative calls no longer guarantee that the callee is a Global
5884 // Address Node. The callee could be an indirect tail call in which
5885 // case the SDValue for the callee could be a load (to load the address
5886 // of a function pointer) or it may be a register copy (to move the
5887 // address of the callee from a function parameter into a virtual
5888 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5889 assert((Subtarget.isUsingPCRelativeCalls() ||
5890 isa<GlobalAddressSDNode>(Callee)) &&
5891 "Callee should be an llvm::Function object.");
5892
5893 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5894 << "\nTCO callee: ");
5895 LLVM_DEBUG(Callee.dump());
5896 }
5897 }
5898
5899 if (!isTailCall && CB && CB->isMustTailCall())
5900 report_fatal_error("failed to perform tail call elimination on a call "
5901 "site marked musttail");
5902
5903 // When long calls (i.e. indirect calls) are always used, calls are always
5904 // made via function pointer. If we have a function name, first translate it
5905 // into a pointer.
5906 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5907 !isTailCall)
5908 Callee = LowerGlobalAddress(Callee, DAG);
5909
5910 CallFlags CFlags(
5911 CallConv, isTailCall, isVarArg, isPatchPoint,
5912 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5913 // hasNest
5914 Subtarget.is64BitELFABI() &&
5915 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5916 CLI.NoMerge);
5917
5918 if (Subtarget.isAIXABI())
5919 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5920 InVals, CB);
5921
5922 assert(Subtarget.isSVR4ABI());
5923 if (Subtarget.isPPC64())
5924 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5925 InVals, CB);
5926 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5927 InVals, CB);
5928}
5929
5930SDValue PPCTargetLowering::LowerCall_32SVR4(
5931 SDValue Chain, SDValue Callee, CallFlags CFlags,
5933 const SmallVectorImpl<SDValue> &OutVals,
5934 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5936 const CallBase *CB) const {
5937 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5938 // of the 32-bit SVR4 ABI stack frame layout.
5939
5940 const CallingConv::ID CallConv = CFlags.CallConv;
5941 const bool IsVarArg = CFlags.IsVarArg;
5942 const bool IsTailCall = CFlags.IsTailCall;
5943
5944 assert((CallConv == CallingConv::C ||
5945 CallConv == CallingConv::Cold ||
5946 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5947
5948 const Align PtrAlign(4);
5949
5950 MachineFunction &MF = DAG.getMachineFunction();
5951
5952 // Mark this function as potentially containing a function that contains a
5953 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5954 // and restoring the callers stack pointer in this functions epilog. This is
5955 // done because by tail calling the called function might overwrite the value
5956 // in this function's (MF) stack pointer stack slot 0(SP).
5957 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5958 CallConv == CallingConv::Fast)
5959 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5960
5961 // Count how many bytes are to be pushed on the stack, including the linkage
5962 // area, parameter list area and the part of the local variable space which
5963 // contains copies of aggregates which are passed by value.
5964
5965 // Assign locations to all of the outgoing arguments.
5967 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5968
5969 // Reserve space for the linkage area on the stack.
5970 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5971 PtrAlign);
5972
5973 if (IsVarArg) {
5974 // Handle fixed and variable vector arguments differently.
5975 // Fixed vector arguments go into registers as long as registers are
5976 // available. Variable vector arguments always go into memory.
5977 unsigned NumArgs = Outs.size();
5978
5979 for (unsigned i = 0; i != NumArgs; ++i) {
5980 MVT ArgVT = Outs[i].VT;
5981 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5982 bool Result;
5983
5984 if (!ArgFlags.isVarArg()) {
5985 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5986 Outs[i].OrigTy, CCInfo);
5987 } else {
5989 ArgFlags, Outs[i].OrigTy, CCInfo);
5990 }
5991
5992 if (Result) {
5993#ifndef NDEBUG
5994 errs() << "Call operand #" << i << " has unhandled type "
5995 << ArgVT << "\n";
5996#endif
5997 llvm_unreachable(nullptr);
5998 }
5999 }
6000 } else {
6001 // All arguments are treated the same.
6002 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6003 }
6004
6005 // Assign locations to all of the outgoing aggregate by value arguments.
6006 SmallVector<CCValAssign, 16> ByValArgLocs;
6007 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6008
6009 // Reserve stack space for the allocations in CCInfo.
6010 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6011
6012 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6013
6014 // Size of the linkage area, parameter list area and the part of the local
6015 // space variable where copies of aggregates which are passed by value are
6016 // stored.
6017 unsigned NumBytes = CCByValInfo.getStackSize();
6018
6019 // Calculate by how many bytes the stack has to be adjusted in case of tail
6020 // call optimization.
6021 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6022
6023 // Adjust the stack pointer for the new arguments...
6024 // These operations are automatically eliminated by the prolog/epilog pass
6025 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6026 SDValue CallSeqStart = Chain;
6027
6028 // Load the return address and frame pointer so it can be moved somewhere else
6029 // later.
6030 SDValue LROp, FPOp;
6031 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6032
6033 // Set up a copy of the stack pointer for use loading and storing any
6034 // arguments that may not fit in the registers available for argument
6035 // passing.
6036 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6037
6039 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6040 SmallVector<SDValue, 8> MemOpChains;
6041
6042 bool seenFloatArg = false;
6043 // Walk the register/memloc assignments, inserting copies/loads.
6044 // i - Tracks the index into the list of registers allocated for the call
6045 // RealArgIdx - Tracks the index into the list of actual function arguments
6046 // j - Tracks the index into the list of byval arguments
6047 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6048 i != e;
6049 ++i, ++RealArgIdx) {
6050 CCValAssign &VA = ArgLocs[i];
6051 SDValue Arg = OutVals[RealArgIdx];
6052 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6053
6054 if (Flags.isByVal()) {
6055 // Argument is an aggregate which is passed by value, thus we need to
6056 // create a copy of it in the local variable space of the current stack
6057 // frame (which is the stack frame of the caller) and pass the address of
6058 // this copy to the callee.
6059 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6060 CCValAssign &ByValVA = ByValArgLocs[j++];
6061 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6062
6063 // Memory reserved in the local variable space of the callers stack frame.
6064 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6065
6066 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6067 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6068 StackPtr, PtrOff);
6069
6070 // Create a copy of the argument in the local area of the current
6071 // stack frame.
6072 SDValue MemcpyCall =
6073 CreateCopyOfByValArgument(Arg, PtrOff,
6074 CallSeqStart.getNode()->getOperand(0),
6075 Flags, DAG, dl);
6076
6077 // This must go outside the CALLSEQ_START..END.
6078 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6079 SDLoc(MemcpyCall));
6080 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6081 NewCallSeqStart.getNode());
6082 Chain = CallSeqStart = NewCallSeqStart;
6083
6084 // Pass the address of the aggregate copy on the stack either in a
6085 // physical register or in the parameter list area of the current stack
6086 // frame to the callee.
6087 Arg = PtrOff;
6088 }
6089
6090 // When useCRBits() is true, there can be i1 arguments.
6091 // It is because getRegisterType(MVT::i1) => MVT::i1,
6092 // and for other integer types getRegisterType() => MVT::i32.
6093 // Extend i1 and ensure callee will get i32.
6094 if (Arg.getValueType() == MVT::i1)
6095 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6096 dl, MVT::i32, Arg);
6097
6098 if (VA.isRegLoc()) {
6099 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6100 // Put argument in a physical register.
6101 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6102 bool IsLE = Subtarget.isLittleEndian();
6103 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6104 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6105 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6106 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6107 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6108 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6109 SVal.getValue(0)));
6110 } else
6111 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6112 } else {
6113 // Put argument in the parameter list area of the current stack frame.
6114 assert(VA.isMemLoc());
6115 unsigned LocMemOffset = VA.getLocMemOffset();
6116
6117 if (!IsTailCall) {
6118 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6119 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6120 StackPtr, PtrOff);
6121
6122 MemOpChains.push_back(
6123 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6124 } else {
6125 // Calculate and remember argument location.
6126 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6127 TailCallArguments);
6128 }
6129 }
6130 }
6131
6132 if (!MemOpChains.empty())
6133 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6134
6135 // Build a sequence of copy-to-reg nodes chained together with token chain
6136 // and flag operands which copy the outgoing args into the appropriate regs.
6137 SDValue InGlue;
6138 for (const auto &[Reg, N] : RegsToPass) {
6139 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6140 InGlue = Chain.getValue(1);
6141 }
6142
6143 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6144 // registers.
6145 if (IsVarArg) {
6146 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6147 SDValue Ops[] = { Chain, InGlue };
6148
6149 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6150 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6151
6152 InGlue = Chain.getValue(1);
6153 }
6154
6155 if (IsTailCall)
6156 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6157 TailCallArguments);
6158
6159 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6160 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6161}
6162
6163// Copy an argument into memory, being careful to do this outside the
6164// call sequence for the call to which the argument belongs.
6165SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6166 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6167 SelectionDAG &DAG, const SDLoc &dl) const {
6168 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6169 CallSeqStart.getNode()->getOperand(0),
6170 Flags, DAG, dl);
6171 // The MEMCPY must go outside the CALLSEQ_START..END.
6172 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6173 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6174 SDLoc(MemcpyCall));
6175 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6176 NewCallSeqStart.getNode());
6177 return NewCallSeqStart;
6178}
6179
6180SDValue PPCTargetLowering::LowerCall_64SVR4(
6181 SDValue Chain, SDValue Callee, CallFlags CFlags,
6183 const SmallVectorImpl<SDValue> &OutVals,
6184 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6186 const CallBase *CB) const {
6187 bool isELFv2ABI = Subtarget.isELFv2ABI();
6188 bool isLittleEndian = Subtarget.isLittleEndian();
6189 unsigned NumOps = Outs.size();
6190 bool IsSibCall = false;
6191 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6192
6193 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6194 unsigned PtrByteSize = 8;
6195
6196 MachineFunction &MF = DAG.getMachineFunction();
6197
6198 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6199 IsSibCall = true;
6200
6201 // Mark this function as potentially containing a function that contains a
6202 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6203 // and restoring the callers stack pointer in this functions epilog. This is
6204 // done because by tail calling the called function might overwrite the value
6205 // in this function's (MF) stack pointer stack slot 0(SP).
6206 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6207 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6208
6209 assert(!(IsFastCall && CFlags.IsVarArg) &&
6210 "fastcc not supported on varargs functions");
6211
6212 // Count how many bytes are to be pushed on the stack, including the linkage
6213 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6214 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6215 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6216 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6217 unsigned NumBytes = LinkageSize;
6218 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6219
6220 static const MCPhysReg GPR[] = {
6221 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6222 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6223 };
6224 static const MCPhysReg VR[] = {
6225 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6226 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6227 };
6228
6229 const unsigned NumGPRs = std::size(GPR);
6230 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6231 const unsigned NumVRs = std::size(VR);
6232
6233 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6234 // can be passed to the callee in registers.
6235 // For the fast calling convention, there is another check below.
6236 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6237 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6238 if (!HasParameterArea) {
6239 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6240 unsigned AvailableFPRs = NumFPRs;
6241 unsigned AvailableVRs = NumVRs;
6242 unsigned NumBytesTmp = NumBytes;
6243 for (unsigned i = 0; i != NumOps; ++i) {
6244 if (Outs[i].Flags.isNest()) continue;
6245 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6246 PtrByteSize, LinkageSize, ParamAreaSize,
6247 NumBytesTmp, AvailableFPRs, AvailableVRs))
6248 HasParameterArea = true;
6249 }
6250 }
6251
6252 // When using the fast calling convention, we don't provide backing for
6253 // arguments that will be in registers.
6254 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6255
6256 // Avoid allocating parameter area for fastcc functions if all the arguments
6257 // can be passed in the registers.
6258 if (IsFastCall)
6259 HasParameterArea = false;
6260
6261 // Add up all the space actually used.
6262 for (unsigned i = 0; i != NumOps; ++i) {
6263 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6264 EVT ArgVT = Outs[i].VT;
6265 EVT OrigVT = Outs[i].ArgVT;
6266
6267 if (Flags.isNest())
6268 continue;
6269
6270 if (IsFastCall) {
6271 if (Flags.isByVal()) {
6272 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6273 if (NumGPRsUsed > NumGPRs)
6274 HasParameterArea = true;
6275 } else {
6276 switch (ArgVT.getSimpleVT().SimpleTy) {
6277 default: llvm_unreachable("Unexpected ValueType for argument!");
6278 case MVT::i1:
6279 case MVT::i32:
6280 case MVT::i64:
6281 if (++NumGPRsUsed <= NumGPRs)
6282 continue;
6283 break;
6284 case MVT::v4i32:
6285 case MVT::v8i16:
6286 case MVT::v16i8:
6287 case MVT::v2f64:
6288 case MVT::v2i64:
6289 case MVT::v1i128:
6290 case MVT::f128:
6291 if (++NumVRsUsed <= NumVRs)
6292 continue;
6293 break;
6294 case MVT::v4f32:
6295 if (++NumVRsUsed <= NumVRs)
6296 continue;
6297 break;
6298 case MVT::f32:
6299 case MVT::f64:
6300 if (++NumFPRsUsed <= NumFPRs)
6301 continue;
6302 break;
6303 }
6304 HasParameterArea = true;
6305 }
6306 }
6307
6308 /* Respect alignment of argument on the stack. */
6309 auto Alignement =
6310 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6311 NumBytes = alignTo(NumBytes, Alignement);
6312
6313 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6314 if (Flags.isInConsecutiveRegsLast())
6315 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6316 }
6317
6318 unsigned NumBytesActuallyUsed = NumBytes;
6319
6320 // In the old ELFv1 ABI,
6321 // the prolog code of the callee may store up to 8 GPR argument registers to
6322 // the stack, allowing va_start to index over them in memory if its varargs.
6323 // Because we cannot tell if this is needed on the caller side, we have to
6324 // conservatively assume that it is needed. As such, make sure we have at
6325 // least enough stack space for the caller to store the 8 GPRs.
6326 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6327 // really requires memory operands, e.g. a vararg function.
6328 if (HasParameterArea)
6329 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6330 else
6331 NumBytes = LinkageSize;
6332
6333 // Tail call needs the stack to be aligned.
6334 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6335 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6336
6337 int SPDiff = 0;
6338
6339 // Calculate by how many bytes the stack has to be adjusted in case of tail
6340 // call optimization.
6341 if (!IsSibCall)
6342 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6343
6344 // To protect arguments on the stack from being clobbered in a tail call,
6345 // force all the loads to happen before doing any other lowering.
6346 if (CFlags.IsTailCall)
6347 Chain = DAG.getStackArgumentTokenFactor(Chain);
6348
6349 // Adjust the stack pointer for the new arguments...
6350 // These operations are automatically eliminated by the prolog/epilog pass
6351 if (!IsSibCall)
6352 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6353 SDValue CallSeqStart = Chain;
6354
6355 // Load the return address and frame pointer so it can be move somewhere else
6356 // later.
6357 SDValue LROp, FPOp;
6358 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6359
6360 // Set up a copy of the stack pointer for use loading and storing any
6361 // arguments that may not fit in the registers available for argument
6362 // passing.
6363 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6364
6365 // Figure out which arguments are going to go in registers, and which in
6366 // memory. Also, if this is a vararg function, floating point operations
6367 // must be stored to our stack, and loaded into integer regs as well, if
6368 // any integer regs are available for argument passing.
6369 unsigned ArgOffset = LinkageSize;
6370
6372 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6373
6374 SmallVector<SDValue, 8> MemOpChains;
6375 for (unsigned i = 0; i != NumOps; ++i) {
6376 SDValue Arg = OutVals[i];
6377 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6378 EVT ArgVT = Outs[i].VT;
6379 EVT OrigVT = Outs[i].ArgVT;
6380
6381 // PtrOff will be used to store the current argument to the stack if a
6382 // register cannot be found for it.
6383 SDValue PtrOff;
6384
6385 // We re-align the argument offset for each argument, except when using the
6386 // fast calling convention, when we need to make sure we do that only when
6387 // we'll actually use a stack slot.
6388 auto ComputePtrOff = [&]() {
6389 /* Respect alignment of argument on the stack. */
6390 auto Alignment =
6391 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6392 ArgOffset = alignTo(ArgOffset, Alignment);
6393
6394 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6395
6396 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6397 };
6398
6399 if (!IsFastCall) {
6400 ComputePtrOff();
6401
6402 /* Compute GPR index associated with argument offset. */
6403 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6404 GPR_idx = std::min(GPR_idx, NumGPRs);
6405 }
6406
6407 // Promote integers to 64-bit values.
6408 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6409 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6410 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6411 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6412 }
6413
6414 // FIXME memcpy is used way more than necessary. Correctness first.
6415 // Note: "by value" is code for passing a structure by value, not
6416 // basic types.
6417 if (Flags.isByVal()) {
6418 // Note: Size includes alignment padding, so
6419 // struct x { short a; char b; }
6420 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6421 // These are the proper values we need for right-justifying the
6422 // aggregate in a parameter register.
6423 unsigned Size = Flags.getByValSize();
6424
6425 // An empty aggregate parameter takes up no storage and no
6426 // registers.
6427 if (Size == 0)
6428 continue;
6429
6430 if (IsFastCall)
6431 ComputePtrOff();
6432
6433 // All aggregates smaller than 8 bytes must be passed right-justified.
6434 if (Size==1 || Size==2 || Size==4) {
6435 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6436 if (GPR_idx != NumGPRs) {
6437 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6438 MachinePointerInfo(), VT);
6439 MemOpChains.push_back(Load.getValue(1));
6440 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6441
6442 ArgOffset += PtrByteSize;
6443 continue;
6444 }
6445 }
6446
6447 if (GPR_idx == NumGPRs && Size < 8) {
6448 SDValue AddPtr = PtrOff;
6449 if (!isLittleEndian) {
6450 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6451 PtrOff.getValueType());
6452 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6453 }
6454 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6455 CallSeqStart,
6456 Flags, DAG, dl);
6457 ArgOffset += PtrByteSize;
6458 continue;
6459 }
6460 // Copy the object to parameter save area if it can not be entirely passed
6461 // by registers.
6462 // FIXME: we only need to copy the parts which need to be passed in
6463 // parameter save area. For the parts passed by registers, we don't need
6464 // to copy them to the stack although we need to allocate space for them
6465 // in parameter save area.
6466 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6467 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6468 CallSeqStart,
6469 Flags, DAG, dl);
6470
6471 // When a register is available, pass a small aggregate right-justified.
6472 if (Size < 8 && GPR_idx != NumGPRs) {
6473 // The easiest way to get this right-justified in a register
6474 // is to copy the structure into the rightmost portion of a
6475 // local variable slot, then load the whole slot into the
6476 // register.
6477 // FIXME: The memcpy seems to produce pretty awful code for
6478 // small aggregates, particularly for packed ones.
6479 // FIXME: It would be preferable to use the slot in the
6480 // parameter save area instead of a new local variable.
6481 SDValue AddPtr = PtrOff;
6482 if (!isLittleEndian) {
6483 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6484 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6485 }
6486 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6487 CallSeqStart,
6488 Flags, DAG, dl);
6489
6490 // Load the slot into the register.
6491 SDValue Load =
6492 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6493 MemOpChains.push_back(Load.getValue(1));
6494 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6495
6496 // Done with this argument.
6497 ArgOffset += PtrByteSize;
6498 continue;
6499 }
6500
6501 // For aggregates larger than PtrByteSize, copy the pieces of the
6502 // object that fit into registers from the parameter save area.
6503 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6504 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6505 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6506 if (GPR_idx != NumGPRs) {
6507 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6508 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6509 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6510 MachinePointerInfo(), ObjType);
6511
6512 MemOpChains.push_back(Load.getValue(1));
6513 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6514 ArgOffset += PtrByteSize;
6515 } else {
6516 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6517 break;
6518 }
6519 }
6520 continue;
6521 }
6522
6523 switch (Arg.getSimpleValueType().SimpleTy) {
6524 default: llvm_unreachable("Unexpected ValueType for argument!");
6525 case MVT::i1:
6526 case MVT::i32:
6527 case MVT::i64:
6528 if (Flags.isNest()) {
6529 // The 'nest' parameter, if any, is passed in R11.
6530 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6531 break;
6532 }
6533
6534 // These can be scalar arguments or elements of an integer array type
6535 // passed directly. Clang may use those instead of "byval" aggregate
6536 // types to avoid forcing arguments to memory unnecessarily.
6537 if (GPR_idx != NumGPRs) {
6538 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6539 } else {
6540 if (IsFastCall)
6541 ComputePtrOff();
6542
6543 assert(HasParameterArea &&
6544 "Parameter area must exist to pass an argument in memory.");
6545 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6546 true, CFlags.IsTailCall, false, MemOpChains,
6547 TailCallArguments, dl);
6548 if (IsFastCall)
6549 ArgOffset += PtrByteSize;
6550 }
6551 if (!IsFastCall)
6552 ArgOffset += PtrByteSize;
6553 break;
6554 case MVT::f32:
6555 case MVT::f64: {
6556 // These can be scalar arguments or elements of a float array type
6557 // passed directly. The latter are used to implement ELFv2 homogenous
6558 // float aggregates.
6559
6560 // Named arguments go into FPRs first, and once they overflow, the
6561 // remaining arguments go into GPRs and then the parameter save area.
6562 // Unnamed arguments for vararg functions always go to GPRs and
6563 // then the parameter save area. For now, put all arguments to vararg
6564 // routines always in both locations (FPR *and* GPR or stack slot).
6565 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6566 bool NeededLoad = false;
6567
6568 // First load the argument into the next available FPR.
6569 if (FPR_idx != NumFPRs)
6570 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6571
6572 // Next, load the argument into GPR or stack slot if needed.
6573 if (!NeedGPROrStack)
6574 ;
6575 else if (GPR_idx != NumGPRs && !IsFastCall) {
6576 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6577 // once we support fp <-> gpr moves.
6578
6579 // In the non-vararg case, this can only ever happen in the
6580 // presence of f32 array types, since otherwise we never run
6581 // out of FPRs before running out of GPRs.
6582 SDValue ArgVal;
6583
6584 // Double values are always passed in a single GPR.
6585 if (Arg.getValueType() != MVT::f32) {
6586 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6587
6588 // Non-array float values are extended and passed in a GPR.
6589 } else if (!Flags.isInConsecutiveRegs()) {
6590 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6591 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6592
6593 // If we have an array of floats, we collect every odd element
6594 // together with its predecessor into one GPR.
6595 } else if (ArgOffset % PtrByteSize != 0) {
6596 SDValue Lo, Hi;
6597 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6598 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6599 if (!isLittleEndian)
6600 std::swap(Lo, Hi);
6601 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6602
6603 // The final element, if even, goes into the first half of a GPR.
6604 } else if (Flags.isInConsecutiveRegsLast()) {
6605 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6606 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6607 if (!isLittleEndian)
6608 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6609 DAG.getConstant(32, dl, MVT::i32));
6610
6611 // Non-final even elements are skipped; they will be handled
6612 // together the with subsequent argument on the next go-around.
6613 } else
6614 ArgVal = SDValue();
6615
6616 if (ArgVal.getNode())
6617 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6618 } else {
6619 if (IsFastCall)
6620 ComputePtrOff();
6621
6622 // Single-precision floating-point values are mapped to the
6623 // second (rightmost) word of the stack doubleword.
6624 if (Arg.getValueType() == MVT::f32 &&
6625 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6626 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6627 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6628 }
6629
6630 assert(HasParameterArea &&
6631 "Parameter area must exist to pass an argument in memory.");
6632 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6633 true, CFlags.IsTailCall, false, MemOpChains,
6634 TailCallArguments, dl);
6635
6636 NeededLoad = true;
6637 }
6638 // When passing an array of floats, the array occupies consecutive
6639 // space in the argument area; only round up to the next doubleword
6640 // at the end of the array. Otherwise, each float takes 8 bytes.
6641 if (!IsFastCall || NeededLoad) {
6642 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6643 Flags.isInConsecutiveRegs()) ? 4 : 8;
6644 if (Flags.isInConsecutiveRegsLast())
6645 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6646 }
6647 break;
6648 }
6649 case MVT::v4f32:
6650 case MVT::v4i32:
6651 case MVT::v8i16:
6652 case MVT::v16i8:
6653 case MVT::v2f64:
6654 case MVT::v2i64:
6655 case MVT::v1i128:
6656 case MVT::f128:
6657 // These can be scalar arguments or elements of a vector array type
6658 // passed directly. The latter are used to implement ELFv2 homogenous
6659 // vector aggregates.
6660
6661 // For a varargs call, named arguments go into VRs or on the stack as
6662 // usual; unnamed arguments always go to the stack or the corresponding
6663 // GPRs when within range. For now, we always put the value in both
6664 // locations (or even all three).
6665 if (CFlags.IsVarArg) {
6666 assert(HasParameterArea &&
6667 "Parameter area must exist if we have a varargs call.");
6668 // We could elide this store in the case where the object fits
6669 // entirely in R registers. Maybe later.
6670 SDValue Store =
6671 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6672 MemOpChains.push_back(Store);
6673 if (VR_idx != NumVRs) {
6674 SDValue Load =
6675 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6676 MemOpChains.push_back(Load.getValue(1));
6677 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6678 }
6679 ArgOffset += 16;
6680 for (unsigned i=0; i<16; i+=PtrByteSize) {
6681 if (GPR_idx == NumGPRs)
6682 break;
6683 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6684 DAG.getConstant(i, dl, PtrVT));
6685 SDValue Load =
6686 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6687 MemOpChains.push_back(Load.getValue(1));
6688 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6689 }
6690 break;
6691 }
6692
6693 // Non-varargs Altivec params go into VRs or on the stack.
6694 if (VR_idx != NumVRs) {
6695 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6696 } else {
6697 if (IsFastCall)
6698 ComputePtrOff();
6699
6700 assert(HasParameterArea &&
6701 "Parameter area must exist to pass an argument in memory.");
6702 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6703 true, CFlags.IsTailCall, true, MemOpChains,
6704 TailCallArguments, dl);
6705 if (IsFastCall)
6706 ArgOffset += 16;
6707 }
6708
6709 if (!IsFastCall)
6710 ArgOffset += 16;
6711 break;
6712 }
6713 }
6714
6715 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6716 "mismatch in size of parameter area");
6717 (void)NumBytesActuallyUsed;
6718
6719 if (!MemOpChains.empty())
6720 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6721
6722 // Check if this is an indirect call (MTCTR/BCTRL).
6723 // See prepareDescriptorIndirectCall and buildCallOperands for more
6724 // information about calls through function pointers in the 64-bit SVR4 ABI.
6725 if (CFlags.IsIndirect) {
6726 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6727 // caller in the TOC save area.
6728 if (isTOCSaveRestoreRequired(Subtarget)) {
6729 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6730 // Load r2 into a virtual register and store it to the TOC save area.
6731 setUsesTOCBasePtr(DAG);
6732 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6733 // TOC save area offset.
6734 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6735 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6736 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6737 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6739 DAG.getMachineFunction(), TOCSaveOffset));
6740 }
6741 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6742 // This does not mean the MTCTR instruction must use R12; it's easier
6743 // to model this as an extra parameter, so do that.
6744 if (isELFv2ABI && !CFlags.IsPatchPoint)
6745 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6746 }
6747
6748 // Build a sequence of copy-to-reg nodes chained together with token chain
6749 // and flag operands which copy the outgoing args into the appropriate regs.
6750 SDValue InGlue;
6751 for (const auto &[Reg, N] : RegsToPass) {
6752 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6753 InGlue = Chain.getValue(1);
6754 }
6755
6756 if (CFlags.IsTailCall && !IsSibCall)
6757 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6758 TailCallArguments);
6759
6760 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6761 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6762}
6763
6764// Returns true when the shadow of a general purpose argument register
6765// in the parameter save area is aligned to at least 'RequiredAlign'.
6766static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6767 assert(RequiredAlign.value() <= 16 &&
6768 "Required alignment greater than stack alignment.");
6769 switch (Reg) {
6770 default:
6771 report_fatal_error("called on invalid register.");
6772 case PPC::R5:
6773 case PPC::R9:
6774 case PPC::X3:
6775 case PPC::X5:
6776 case PPC::X7:
6777 case PPC::X9:
6778 // These registers are 16 byte aligned which is the most strict aligment
6779 // we can support.
6780 return true;
6781 case PPC::R3:
6782 case PPC::R7:
6783 case PPC::X4:
6784 case PPC::X6:
6785 case PPC::X8:
6786 case PPC::X10:
6787 // The shadow of these registers in the PSA is 8 byte aligned.
6788 return RequiredAlign <= 8;
6789 case PPC::R4:
6790 case PPC::R6:
6791 case PPC::R8:
6792 case PPC::R10:
6793 return RequiredAlign <= 4;
6794 }
6795}
6796
6797static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6798 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6799 Type *OrigTy, CCState &State) {
6800 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6801 State.getMachineFunction().getSubtarget());
6802 const bool IsPPC64 = Subtarget.isPPC64();
6803 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6804 const Align PtrAlign(PtrSize);
6805 const Align StackAlign(16);
6806 const MVT RegVT = Subtarget.getScalarIntVT();
6807
6808 if (ValVT == MVT::f128)
6809 report_fatal_error("f128 is unimplemented on AIX.");
6810
6811 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6812 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6813 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6814 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6815 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6816 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6817
6818 static const MCPhysReg VR[] = {// Vector registers.
6819 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6820 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6821 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6822
6823 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6824
6825 if (ArgFlags.isNest()) {
6826 MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11);
6827 if (!EnvReg)
6828 report_fatal_error("More then one nest argument.");
6829 State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo));
6830 return false;
6831 }
6832
6833 if (ArgFlags.isByVal()) {
6834 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6835 if (ByValAlign > StackAlign)
6836 report_fatal_error("Pass-by-value arguments with alignment greater than "
6837 "16 are not supported.");
6838
6839 const unsigned ByValSize = ArgFlags.getByValSize();
6840 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6841
6842 // An empty aggregate parameter takes up no storage and no registers,
6843 // but needs a MemLoc for a stack slot for the formal arguments side.
6844 if (ByValSize == 0) {
6846 State.getStackSize(), RegVT, LocInfo));
6847 return false;
6848 }
6849
6850 // Shadow allocate any registers that are not properly aligned.
6851 unsigned NextReg = State.getFirstUnallocated(GPRs);
6852 while (NextReg != GPRs.size() &&
6853 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6854 // Shadow allocate next registers since its aligment is not strict enough.
6855 MCRegister Reg = State.AllocateReg(GPRs);
6856 // Allocate the stack space shadowed by said register.
6857 State.AllocateStack(PtrSize, PtrAlign);
6858 assert(Reg && "Alocating register unexpectedly failed.");
6859 (void)Reg;
6860 NextReg = State.getFirstUnallocated(GPRs);
6861 }
6862
6863 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6864 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6865 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6866 if (MCRegister Reg = State.AllocateReg(GPRs))
6867 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6868 else {
6871 LocInfo));
6872 break;
6873 }
6874 }
6875 return false;
6876 }
6877
6878 // Arguments always reserve parameter save area.
6879 switch (ValVT.SimpleTy) {
6880 default:
6881 report_fatal_error("Unhandled value type for argument.");
6882 case MVT::i64:
6883 // i64 arguments should have been split to i32 for PPC32.
6884 assert(IsPPC64 && "PPC32 should have split i64 values.");
6885 [[fallthrough]];
6886 case MVT::i1:
6887 case MVT::i32: {
6888 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6889 // AIX integer arguments are always passed in register width.
6890 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6891 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6893 if (MCRegister Reg = State.AllocateReg(GPRs))
6894 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6895 else
6896 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6897
6898 return false;
6899 }
6900 case MVT::f32:
6901 case MVT::f64: {
6902 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6903 const unsigned StoreSize = LocVT.getStoreSize();
6904 // Floats are always 4-byte aligned in the PSA on AIX.
6905 // This includes f64 in 64-bit mode for ABI compatibility.
6906 const unsigned Offset =
6907 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6908 MCRegister FReg = State.AllocateReg(FPR);
6909 if (FReg)
6910 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6911
6912 // Reserve and initialize GPRs or initialize the PSA as required.
6913 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6914 if (MCRegister Reg = State.AllocateReg(GPRs)) {
6915 assert(FReg && "An FPR should be available when a GPR is reserved.");
6916 if (State.isVarArg()) {
6917 // Successfully reserved GPRs are only initialized for vararg calls.
6918 // Custom handling is required for:
6919 // f64 in PPC32 needs to be split into 2 GPRs.
6920 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6921 State.addLoc(
6922 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6923 }
6924 } else {
6925 // If there are insufficient GPRs, the PSA needs to be initialized.
6926 // Initialization occurs even if an FPR was initialized for
6927 // compatibility with the AIX XL compiler. The full memory for the
6928 // argument will be initialized even if a prior word is saved in GPR.
6929 // A custom memLoc is used when the argument also passes in FPR so
6930 // that the callee handling can skip over it easily.
6931 State.addLoc(
6932 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6933 LocInfo)
6934 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6935 break;
6936 }
6937 }
6938
6939 return false;
6940 }
6941 case MVT::v4f32:
6942 case MVT::v4i32:
6943 case MVT::v8i16:
6944 case MVT::v16i8:
6945 case MVT::v2i64:
6946 case MVT::v2f64:
6947 case MVT::v1i128: {
6948 const unsigned VecSize = 16;
6949 const Align VecAlign(VecSize);
6950
6951 if (!State.isVarArg()) {
6952 // If there are vector registers remaining we don't consume any stack
6953 // space.
6954 if (MCRegister VReg = State.AllocateReg(VR)) {
6955 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6956 return false;
6957 }
6958 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6959 // might be allocated in the portion of the PSA that is shadowed by the
6960 // GPRs.
6961 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6962 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6963 return false;
6964 }
6965
6966 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6967 // Burn any underaligned registers and their shadowed stack space until
6968 // we reach the required alignment.
6969 while (NextRegIndex != GPRs.size() &&
6970 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6971 // Shadow allocate register and its stack shadow.
6972 MCRegister Reg = State.AllocateReg(GPRs);
6973 State.AllocateStack(PtrSize, PtrAlign);
6974 assert(Reg && "Allocating register unexpectedly failed.");
6975 (void)Reg;
6976 NextRegIndex = State.getFirstUnallocated(GPRs);
6977 }
6978
6979 // Vectors that are passed as fixed arguments are handled differently.
6980 // They are passed in VRs if any are available (unlike arguments passed
6981 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6982 // functions)
6983 if (!ArgFlags.isVarArg()) {
6984 if (MCRegister VReg = State.AllocateReg(VR)) {
6985 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6986 // Shadow allocate GPRs and stack space even though we pass in a VR.
6987 for (unsigned I = 0; I != VecSize; I += PtrSize)
6988 State.AllocateReg(GPRs);
6989 State.AllocateStack(VecSize, VecAlign);
6990 return false;
6991 }
6992 // No vector registers remain so pass on the stack.
6993 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6994 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6995 return false;
6996 }
6997
6998 // If all GPRS are consumed then we pass the argument fully on the stack.
6999 if (NextRegIndex == GPRs.size()) {
7000 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7001 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7002 return false;
7003 }
7004
7005 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7006 // half of the argument, and then need to pass the remaining half on the
7007 // stack.
7008 if (GPRs[NextRegIndex] == PPC::R9) {
7009 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7010 State.addLoc(
7011 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7012
7013 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
7014 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
7015 assert(FirstReg && SecondReg &&
7016 "Allocating R9 or R10 unexpectedly failed.");
7017 State.addLoc(
7018 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7019 State.addLoc(
7020 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7021 return false;
7022 }
7023
7024 // We have enough GPRs to fully pass the vector argument, and we have
7025 // already consumed any underaligned registers. Start with the custom
7026 // MemLoc and then the custom RegLocs.
7027 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7028 State.addLoc(
7029 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7030 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7031 const MCRegister Reg = State.AllocateReg(GPRs);
7032 assert(Reg && "Failed to allocated register for vararg vector argument");
7033 State.addLoc(
7034 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7035 }
7036 return false;
7037 }
7038 }
7039 return true;
7040}
7041
7042// So far, this function is only used by LowerFormalArguments_AIX()
7044 bool IsPPC64,
7045 bool HasP8Vector,
7046 bool HasVSX) {
7047 assert((IsPPC64 || SVT != MVT::i64) &&
7048 "i64 should have been split for 32-bit codegen.");
7049
7050 switch (SVT) {
7051 default:
7052 report_fatal_error("Unexpected value type for formal argument");
7053 case MVT::i1:
7054 case MVT::i32:
7055 case MVT::i64:
7056 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7057 case MVT::f32:
7058 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7059 case MVT::f64:
7060 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7061 case MVT::v4f32:
7062 case MVT::v4i32:
7063 case MVT::v8i16:
7064 case MVT::v16i8:
7065 case MVT::v2i64:
7066 case MVT::v2f64:
7067 case MVT::v1i128:
7068 return &PPC::VRRCRegClass;
7069 }
7070}
7071
7073 SelectionDAG &DAG, SDValue ArgValue,
7074 MVT LocVT, const SDLoc &dl) {
7075 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7076 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7077
7078 if (Flags.isSExt())
7079 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7080 DAG.getValueType(ValVT));
7081 else if (Flags.isZExt())
7082 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7083 DAG.getValueType(ValVT));
7084
7085 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7086}
7087
7088static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7089 const unsigned LASize = FL->getLinkageSize();
7090
7091 if (PPC::GPRCRegClass.contains(Reg)) {
7092 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7093 "Reg must be a valid argument register!");
7094 return LASize + 4 * (Reg - PPC::R3);
7095 }
7096
7097 if (PPC::G8RCRegClass.contains(Reg)) {
7098 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7099 "Reg must be a valid argument register!");
7100 return LASize + 8 * (Reg - PPC::X3);
7101 }
7102
7103 llvm_unreachable("Only general purpose registers expected.");
7104}
7105
7106// AIX ABI Stack Frame Layout:
7107//
7108// Low Memory +--------------------------------------------+
7109// SP +---> | Back chain | ---+
7110// | +--------------------------------------------+ |
7111// | | Saved Condition Register | |
7112// | +--------------------------------------------+ |
7113// | | Saved Linkage Register | |
7114// | +--------------------------------------------+ | Linkage Area
7115// | | Reserved for compilers | |
7116// | +--------------------------------------------+ |
7117// | | Reserved for binders | |
7118// | +--------------------------------------------+ |
7119// | | Saved TOC pointer | ---+
7120// | +--------------------------------------------+
7121// | | Parameter save area |
7122// | +--------------------------------------------+
7123// | | Alloca space |
7124// | +--------------------------------------------+
7125// | | Local variable space |
7126// | +--------------------------------------------+
7127// | | Float/int conversion temporary |
7128// | +--------------------------------------------+
7129// | | Save area for AltiVec registers |
7130// | +--------------------------------------------+
7131// | | AltiVec alignment padding |
7132// | +--------------------------------------------+
7133// | | Save area for VRSAVE register |
7134// | +--------------------------------------------+
7135// | | Save area for General Purpose registers |
7136// | +--------------------------------------------+
7137// | | Save area for Floating Point registers |
7138// | +--------------------------------------------+
7139// +---- | Back chain |
7140// High Memory +--------------------------------------------+
7141//
7142// Specifications:
7143// AIX 7.2 Assembler Language Reference
7144// Subroutine linkage convention
7145
7146SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7147 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7148 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7149 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7150
7151 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7152 CallConv == CallingConv::Fast) &&
7153 "Unexpected calling convention!");
7154
7155 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7156 report_fatal_error("Tail call support is unimplemented on AIX.");
7157
7158 if (useSoftFloat())
7159 report_fatal_error("Soft float support is unimplemented on AIX.");
7160
7161 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7162
7163 const bool IsPPC64 = Subtarget.isPPC64();
7164 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7165
7166 // Assign locations to all of the incoming arguments.
7168 MachineFunction &MF = DAG.getMachineFunction();
7169 MachineFrameInfo &MFI = MF.getFrameInfo();
7170 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7171 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7172
7173 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7174 // Reserve space for the linkage area on the stack.
7175 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7176 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7177 uint64_t SaveStackPos = CCInfo.getStackSize();
7178 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7179 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7180
7182
7183 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7184 CCValAssign &VA = ArgLocs[I++];
7185 MVT LocVT = VA.getLocVT();
7186 MVT ValVT = VA.getValVT();
7187 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7188
7189 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7190 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7191 // For compatibility with the AIX XL compiler, the float args in the
7192 // parameter save area are initialized even if the argument is available
7193 // in register. The caller is required to initialize both the register
7194 // and memory, however, the callee can choose to expect it in either.
7195 // The memloc is dismissed here because the argument is retrieved from
7196 // the register.
7197 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7198 continue;
7199
7200 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7201 const TargetRegisterClass *RegClass = getRegClassForSVT(
7202 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7203 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7204 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7205 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7206 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7207 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7208 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7209 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7210 MachinePointerInfo(), Align(PtrByteSize));
7211 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7212 MemOps.push_back(StoreReg);
7213 }
7214
7215 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7216 unsigned StoreSize =
7217 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7218 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7219 }
7220
7221 auto HandleMemLoc = [&]() {
7222 const unsigned LocSize = LocVT.getStoreSize();
7223 const unsigned ValSize = ValVT.getStoreSize();
7224 assert((ValSize <= LocSize) &&
7225 "Object size is larger than size of MemLoc");
7226 int CurArgOffset = VA.getLocMemOffset();
7227 // Objects are right-justified because AIX is big-endian.
7228 if (LocSize > ValSize)
7229 CurArgOffset += LocSize - ValSize;
7230 // Potential tail calls could cause overwriting of argument stack slots.
7231 const bool IsImmutable =
7233 (CallConv == CallingConv::Fast));
7234 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7235 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7236 SDValue ArgValue =
7237 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7238
7239 // While the ABI specifies the argument type is (sign or zero) extended
7240 // out to register width, not all code is compliant. We truncate and
7241 // re-extend to be more forgiving of these callers when the argument type
7242 // is smaller than register width.
7243 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7244 ValVT.isInteger() &&
7245 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7246 // It is possible to have either real integer values
7247 // or integers that were not originally integers.
7248 // In the latter case, these could have came from structs,
7249 // and these integers would not have an extend on the parameter.
7250 // Since these types of integers do not have an extend specified
7251 // in the first place, the type of extend that we do should not matter.
7252 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7253 ? MVT::i8
7254 : ArgVT;
7255 SDValue ArgValueTrunc =
7256 DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue);
7257 SDValue ArgValueExt =
7258 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7259 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7260 InVals.push_back(ArgValueExt);
7261 } else {
7262 InVals.push_back(ArgValue);
7263 }
7264 };
7265
7266 // Vector arguments to VaArg functions are passed both on the stack, and
7267 // in any available GPRs. Load the value from the stack and add the GPRs
7268 // as live ins.
7269 if (VA.isMemLoc() && VA.needsCustom()) {
7270 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7271 assert(isVarArg && "Only use custom memloc for vararg.");
7272 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7273 // matching custom RegLocs.
7274 const unsigned OriginalValNo = VA.getValNo();
7275 (void)OriginalValNo;
7276
7277 auto HandleCustomVecRegLoc = [&]() {
7278 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7279 "Missing custom RegLoc.");
7280 VA = ArgLocs[I++];
7281 assert(VA.getValVT().isVector() &&
7282 "Unexpected Val type for custom RegLoc.");
7283 assert(VA.getValNo() == OriginalValNo &&
7284 "ValNo mismatch between custom MemLoc and RegLoc.");
7286 MF.addLiveIn(VA.getLocReg(),
7287 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7288 Subtarget.hasVSX()));
7289 };
7290
7291 HandleMemLoc();
7292 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7293 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7294 // R10.
7295 HandleCustomVecRegLoc();
7296 HandleCustomVecRegLoc();
7297
7298 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7299 // we passed the vector in R5, R6, R7 and R8.
7300 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7301 assert(!IsPPC64 &&
7302 "Only 2 custom RegLocs expected for 64-bit codegen.");
7303 HandleCustomVecRegLoc();
7304 HandleCustomVecRegLoc();
7305 }
7306
7307 continue;
7308 }
7309
7310 if (VA.isRegLoc()) {
7311 if (VA.getValVT().isScalarInteger())
7313 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7314 switch (VA.getValVT().SimpleTy) {
7315 default:
7316 report_fatal_error("Unhandled value type for argument.");
7317 case MVT::f32:
7319 break;
7320 case MVT::f64:
7322 break;
7323 }
7324 } else if (VA.getValVT().isVector()) {
7325 switch (VA.getValVT().SimpleTy) {
7326 default:
7327 report_fatal_error("Unhandled value type for argument.");
7328 case MVT::v16i8:
7330 break;
7331 case MVT::v8i16:
7333 break;
7334 case MVT::v4i32:
7335 case MVT::v2i64:
7336 case MVT::v1i128:
7338 break;
7339 case MVT::v4f32:
7340 case MVT::v2f64:
7342 break;
7343 }
7344 }
7345 }
7346
7347 if (Flags.isByVal() && VA.isMemLoc()) {
7348 const unsigned Size =
7349 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7350 PtrByteSize);
7351 const int FI = MF.getFrameInfo().CreateFixedObject(
7352 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7353 /* IsAliased */ true);
7354 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7355 InVals.push_back(FIN);
7356
7357 continue;
7358 }
7359
7360 if (Flags.isByVal()) {
7361 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7362
7363 const MCPhysReg ArgReg = VA.getLocReg();
7364 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7365
7366 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7367 const int FI = MF.getFrameInfo().CreateFixedObject(
7368 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7369 /* IsAliased */ true);
7370 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7371 InVals.push_back(FIN);
7372
7373 // Add live ins for all the RegLocs for the same ByVal.
7374 const TargetRegisterClass *RegClass =
7375 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7376
7377 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7378 unsigned Offset) {
7379 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7380 // Since the callers side has left justified the aggregate in the
7381 // register, we can simply store the entire register into the stack
7382 // slot.
7383 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7384 // The store to the fixedstack object is needed becuase accessing a
7385 // field of the ByVal will use a gep and load. Ideally we will optimize
7386 // to extracting the value from the register directly, and elide the
7387 // stores when the arguments address is not taken, but that will need to
7388 // be future work.
7389 SDValue Store = DAG.getStore(
7390 CopyFrom.getValue(1), dl, CopyFrom,
7393
7394 MemOps.push_back(Store);
7395 };
7396
7397 unsigned Offset = 0;
7398 HandleRegLoc(VA.getLocReg(), Offset);
7399 Offset += PtrByteSize;
7400 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7401 Offset += PtrByteSize) {
7402 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7403 "RegLocs should be for ByVal argument.");
7404
7405 const CCValAssign RL = ArgLocs[I++];
7406 HandleRegLoc(RL.getLocReg(), Offset);
7408 }
7409
7410 if (Offset != StackSize) {
7411 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7412 "Expected MemLoc for remaining bytes.");
7413 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7414 // Consume the MemLoc.The InVal has already been emitted, so nothing
7415 // more needs to be done.
7416 ++I;
7417 }
7418
7419 continue;
7420 }
7421
7422 if (VA.isRegLoc() && !VA.needsCustom()) {
7423 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7424 Register VReg =
7425 MF.addLiveIn(VA.getLocReg(),
7426 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7427 Subtarget.hasVSX()));
7428 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7429 if (ValVT.isScalarInteger() &&
7430 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7431 ArgValue =
7432 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7433 }
7434 InVals.push_back(ArgValue);
7435 continue;
7436 }
7437 if (VA.isMemLoc()) {
7438 HandleMemLoc();
7439 continue;
7440 }
7441 }
7442
7443 // On AIX a minimum of 8 words is saved to the parameter save area.
7444 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7445 // Area that is at least reserved in the caller of this function.
7446 unsigned CallerReservedArea = std::max<unsigned>(
7447 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7448
7449 // Set the size that is at least reserved in caller of this function. Tail
7450 // call optimized function's reserved stack space needs to be aligned so
7451 // that taking the difference between two stack areas will result in an
7452 // aligned stack.
7453 CallerReservedArea =
7454 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7455 FuncInfo->setMinReservedArea(CallerReservedArea);
7456
7457 if (isVarArg) {
7458 int VAListIndex = 0;
7459 // If any of the optional arguments are passed in register then the fixed
7460 // stack object we spill into is not immutable. Create a fixed stack object
7461 // that overlaps the remainder of the parameter save area.
7462 if (CCInfo.getStackSize() < (LinkageSize + MinParameterSaveArea)) {
7463 unsigned FixedStackSize =
7464 LinkageSize + MinParameterSaveArea - CCInfo.getStackSize();
7465 VAListIndex =
7466 MFI.CreateFixedObject(FixedStackSize, CCInfo.getStackSize(),
7467 /* IsImmutable */ false, /* IsAliased */ true);
7468 } else {
7469 // All the arguments passed through ellipses are on the stack. Create a
7470 // dummy fixed stack object the same size as a pointer since we don't
7471 // know the actual size.
7472 VAListIndex =
7473 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(),
7474 /* IsImmutable */ true, /* IsAliased */ true);
7475 }
7476
7477 FuncInfo->setVarArgsFrameIndex(VAListIndex);
7478 SDValue FIN = DAG.getFrameIndex(VAListIndex, PtrVT);
7479
7480 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7481 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7482
7483 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7484 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7485 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7486
7487 // The fixed integer arguments of a variadic function are stored to the
7488 // VarArgsFrameIndex on the stack so that they may be loaded by
7489 // dereferencing the result of va_next.
7490 for (unsigned
7491 GPRIndex = (CCInfo.getStackSize() - LinkageSize) / PtrByteSize,
7492 Offset = 0;
7493 GPRIndex < NumGPArgRegs; ++GPRIndex, Offset += PtrByteSize) {
7494
7495 const Register VReg =
7496 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7497 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7498
7499 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7500 MachinePointerInfo MPI =
7501 MachinePointerInfo::getFixedStack(MF, VAListIndex, Offset);
7502 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MPI);
7503 MemOps.push_back(Store);
7504 // Increment the address for the next argument to store.
7505 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7506 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7507 }
7508 }
7509
7510 if (!MemOps.empty())
7511 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7512
7513 return Chain;
7514}
7515
7516SDValue PPCTargetLowering::LowerCall_AIX(
7517 SDValue Chain, SDValue Callee, CallFlags CFlags,
7519 const SmallVectorImpl<SDValue> &OutVals,
7520 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7522 const CallBase *CB) const {
7523 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7524 // AIX ABI stack frame layout.
7525
7526 assert((CFlags.CallConv == CallingConv::C ||
7527 CFlags.CallConv == CallingConv::Cold ||
7528 CFlags.CallConv == CallingConv::Fast) &&
7529 "Unexpected calling convention!");
7530
7531 if (CFlags.IsPatchPoint)
7532 report_fatal_error("This call type is unimplemented on AIX.");
7533
7534 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7535
7536 MachineFunction &MF = DAG.getMachineFunction();
7538 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7539 *DAG.getContext());
7540
7541 // Reserve space for the linkage save area (LSA) on the stack.
7542 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7543 // [SP][CR][LR][2 x reserved][TOC].
7544 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7545 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7546 const bool IsPPC64 = Subtarget.isPPC64();
7547 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7548 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7549 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7550 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7551
7552 // The prolog code of the callee may store up to 8 GPR argument registers to
7553 // the stack, allowing va_start to index over them in memory if the callee
7554 // is variadic.
7555 // Because we cannot tell if this is needed on the caller side, we have to
7556 // conservatively assume that it is needed. As such, make sure we have at
7557 // least enough stack space for the caller to store the 8 GPRs.
7558 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7559 const unsigned NumBytes = std::max<unsigned>(
7560 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7561
7562 // Adjust the stack pointer for the new arguments...
7563 // These operations are automatically eliminated by the prolog/epilog pass.
7564 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7565 SDValue CallSeqStart = Chain;
7566
7568 SmallVector<SDValue, 8> MemOpChains;
7569
7570 // Set up a copy of the stack pointer for loading and storing any
7571 // arguments that may not fit in the registers available for argument
7572 // passing.
7573 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7574 : DAG.getRegister(PPC::R1, MVT::i32);
7575
7576 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7577 const unsigned ValNo = ArgLocs[I].getValNo();
7578 SDValue Arg = OutVals[ValNo];
7579 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7580
7581 if (Flags.isByVal()) {
7582 const unsigned ByValSize = Flags.getByValSize();
7583
7584 // Nothing to do for zero-sized ByVals on the caller side.
7585 if (!ByValSize) {
7586 ++I;
7587 continue;
7588 }
7589
7590 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7591 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7592 (LoadOffset != 0)
7593 ? DAG.getObjectPtrOffset(
7594 dl, Arg, TypeSize::getFixed(LoadOffset))
7595 : Arg,
7596 MachinePointerInfo(), VT);
7597 };
7598
7599 unsigned LoadOffset = 0;
7600
7601 // Initialize registers, which are fully occupied by the by-val argument.
7602 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7603 SDValue Load = GetLoad(PtrVT, LoadOffset);
7604 MemOpChains.push_back(Load.getValue(1));
7605 LoadOffset += PtrByteSize;
7606 const CCValAssign &ByValVA = ArgLocs[I++];
7607 assert(ByValVA.getValNo() == ValNo &&
7608 "Unexpected location for pass-by-value argument.");
7609 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7610 }
7611
7612 if (LoadOffset == ByValSize)
7613 continue;
7614
7615 // There must be one more loc to handle the remainder.
7616 assert(ArgLocs[I].getValNo() == ValNo &&
7617 "Expected additional location for by-value argument.");
7618
7619 if (ArgLocs[I].isMemLoc()) {
7620 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7621 const CCValAssign &ByValVA = ArgLocs[I++];
7622 ISD::ArgFlagsTy MemcpyFlags = Flags;
7623 // Only memcpy the bytes that don't pass in register.
7624 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7625 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7626 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7627 dl, Arg, TypeSize::getFixed(LoadOffset))
7628 : Arg,
7630 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7631 CallSeqStart, MemcpyFlags, DAG, dl);
7632 continue;
7633 }
7634
7635 // Initialize the final register residue.
7636 // Any residue that occupies the final by-val arg register must be
7637 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7638 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7639 // 2 and 1 byte loads.
7640 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7641 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7642 "Unexpected register residue for by-value argument.");
7643 SDValue ResidueVal;
7644 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7645 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7646 const MVT VT =
7647 N == 1 ? MVT::i8
7648 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7649 SDValue Load = GetLoad(VT, LoadOffset);
7650 MemOpChains.push_back(Load.getValue(1));
7651 LoadOffset += N;
7652 Bytes += N;
7653
7654 // By-val arguments are passed left-justfied in register.
7655 // Every load here needs to be shifted, otherwise a full register load
7656 // should have been used.
7657 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7658 "Unexpected load emitted during handling of pass-by-value "
7659 "argument.");
7660 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7661 EVT ShiftAmountTy =
7662 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7663 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7664 SDValue ShiftedLoad =
7665 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7666 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7667 ShiftedLoad)
7668 : ShiftedLoad;
7669 }
7670
7671 const CCValAssign &ByValVA = ArgLocs[I++];
7672 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7673 continue;
7674 }
7675
7676 CCValAssign &VA = ArgLocs[I++];
7677 const MVT LocVT = VA.getLocVT();
7678 const MVT ValVT = VA.getValVT();
7679
7680 switch (VA.getLocInfo()) {
7681 default:
7682 report_fatal_error("Unexpected argument extension type.");
7683 case CCValAssign::Full:
7684 break;
7685 case CCValAssign::ZExt:
7686 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7687 break;
7688 case CCValAssign::SExt:
7689 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7690 break;
7691 }
7692
7693 if (VA.isRegLoc() && !VA.needsCustom()) {
7694 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7695 continue;
7696 }
7697
7698 // Vector arguments passed to VarArg functions need custom handling when
7699 // they are passed (at least partially) in GPRs.
7700 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7701 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7702 // Store value to its stack slot.
7703 SDValue PtrOff =
7704 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7705 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7706 SDValue Store =
7707 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7708 MemOpChains.push_back(Store);
7709 const unsigned OriginalValNo = VA.getValNo();
7710 // Then load the GPRs from the stack
7711 unsigned LoadOffset = 0;
7712 auto HandleCustomVecRegLoc = [&]() {
7713 assert(I != E && "Unexpected end of CCvalAssigns.");
7714 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7715 "Expected custom RegLoc.");
7716 CCValAssign RegVA = ArgLocs[I++];
7717 assert(RegVA.getValNo() == OriginalValNo &&
7718 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7719 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7720 DAG.getConstant(LoadOffset, dl, PtrVT));
7721 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7722 MemOpChains.push_back(Load.getValue(1));
7723 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7724 LoadOffset += PtrByteSize;
7725 };
7726
7727 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7728 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7729 // R10.
7730 HandleCustomVecRegLoc();
7731 HandleCustomVecRegLoc();
7732
7733 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7734 ArgLocs[I].getValNo() == OriginalValNo) {
7735 assert(!IsPPC64 &&
7736 "Only 2 custom RegLocs expected for 64-bit codegen.");
7737 HandleCustomVecRegLoc();
7738 HandleCustomVecRegLoc();
7739 }
7740
7741 continue;
7742 }
7743
7744 if (VA.isMemLoc()) {
7745 SDValue PtrOff =
7746 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7747 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7748 MemOpChains.push_back(
7749 DAG.getStore(Chain, dl, Arg, PtrOff,
7751 Subtarget.getFrameLowering()->getStackAlign()));
7752
7753 continue;
7754 }
7755
7756 if (!ValVT.isFloatingPoint())
7758 "Unexpected register handling for calling convention.");
7759
7760 // Custom handling is used for GPR initializations for vararg float
7761 // arguments.
7762 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7763 LocVT.isInteger() &&
7764 "Custom register handling only expected for VarArg.");
7765
7766 SDValue ArgAsInt =
7767 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7768
7769 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7770 // f32 in 32-bit GPR
7771 // f64 in 64-bit GPR
7772 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7773 else if (Arg.getValueType().getFixedSizeInBits() <
7774 LocVT.getFixedSizeInBits())
7775 // f32 in 64-bit GPR.
7776 RegsToPass.push_back(std::make_pair(
7777 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7778 else {
7779 // f64 in two 32-bit GPRs
7780 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7781 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7782 "Unexpected custom register for argument!");
7783 CCValAssign &GPR1 = VA;
7784 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7785 DAG.getConstant(32, dl, MVT::i8));
7786 RegsToPass.push_back(std::make_pair(
7787 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7788
7789 if (I != E) {
7790 // If only 1 GPR was available, there will only be one custom GPR and
7791 // the argument will also pass in memory.
7792 CCValAssign &PeekArg = ArgLocs[I];
7793 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7794 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7795 CCValAssign &GPR2 = ArgLocs[I++];
7796 RegsToPass.push_back(std::make_pair(
7797 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7798 }
7799 }
7800 }
7801 }
7802
7803 if (!MemOpChains.empty())
7804 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7805
7806 // For indirect calls, we need to save the TOC base to the stack for
7807 // restoration after the call.
7808 if (CFlags.IsIndirect && !Subtarget.usePointerGlueHelper()) {
7809 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7810 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7811 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7812 const MVT PtrVT = Subtarget.getScalarIntVT();
7813 const unsigned TOCSaveOffset =
7814 Subtarget.getFrameLowering()->getTOCSaveOffset();
7815
7816 setUsesTOCBasePtr(DAG);
7817 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7818 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7819 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7820 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7821 Chain = DAG.getStore(
7822 Val.getValue(1), dl, Val, AddPtr,
7823 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7824 }
7825
7826 // Build a sequence of copy-to-reg nodes chained together with token chain
7827 // and flag operands which copy the outgoing args into the appropriate regs.
7828 SDValue InGlue;
7829 for (auto Reg : RegsToPass) {
7830 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7831 InGlue = Chain.getValue(1);
7832 }
7833
7834 const int SPDiff = 0;
7835 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7836 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7837}
7838
7839bool
7840PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7841 MachineFunction &MF, bool isVarArg,
7844 const Type *RetTy) const {
7846 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7847 return CCInfo.CheckReturn(
7848 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7850 : RetCC_PPC);
7851}
7852
7853SDValue
7854PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7855 bool isVarArg,
7857 const SmallVectorImpl<SDValue> &OutVals,
7858 const SDLoc &dl, SelectionDAG &DAG) const {
7860 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7861 *DAG.getContext());
7862 CCInfo.AnalyzeReturn(Outs,
7863 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7865 : RetCC_PPC);
7866
7867 SDValue Glue;
7868 SmallVector<SDValue, 4> RetOps(1, Chain);
7869
7870 // Copy the result values into the output registers.
7871 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7872 CCValAssign &VA = RVLocs[i];
7873 assert(VA.isRegLoc() && "Can only return in registers!");
7874
7875 SDValue Arg = OutVals[RealResIdx];
7876
7877 switch (VA.getLocInfo()) {
7878 default: llvm_unreachable("Unknown loc info!");
7879 case CCValAssign::Full: break;
7880 case CCValAssign::AExt:
7881 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7882 break;
7883 case CCValAssign::ZExt:
7884 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7885 break;
7886 case CCValAssign::SExt:
7887 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7888 break;
7889 }
7890 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7891 bool isLittleEndian = Subtarget.isLittleEndian();
7892 // Legalize ret f64 -> ret 2 x i32.
7893 SDValue SVal =
7894 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7895 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7896 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7897 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7898 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7899 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7900 Glue = Chain.getValue(1);
7901 VA = RVLocs[++i]; // skip ahead to next loc
7902 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7903 } else
7904 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7905 Glue = Chain.getValue(1);
7906 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7907 }
7908
7909 RetOps[0] = Chain; // Update chain.
7910
7911 // Add the glue if we have it.
7912 if (Glue.getNode())
7913 RetOps.push_back(Glue);
7914
7915 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7916}
7917
7918SDValue
7919PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7920 SelectionDAG &DAG) const {
7921 SDLoc dl(Op);
7922
7923 // Get the correct type for integers.
7924 EVT IntVT = Op.getValueType();
7925
7926 // Get the inputs.
7927 SDValue Chain = Op.getOperand(0);
7928 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7929 // Build a DYNAREAOFFSET node.
7930 SDValue Ops[2] = {Chain, FPSIdx};
7931 SDVTList VTs = DAG.getVTList(IntVT);
7932 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7933}
7934
7935SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7936 SelectionDAG &DAG) const {
7937 // When we pop the dynamic allocation we need to restore the SP link.
7938 SDLoc dl(Op);
7939
7940 // Get the correct type for pointers.
7941 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7942
7943 // Construct the stack pointer operand.
7944 bool isPPC64 = Subtarget.isPPC64();
7945 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7946 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7947
7948 // Get the operands for the STACKRESTORE.
7949 SDValue Chain = Op.getOperand(0);
7950 SDValue SaveSP = Op.getOperand(1);
7951
7952 // Load the old link SP.
7953 SDValue LoadLinkSP =
7954 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7955
7956 // Restore the stack pointer.
7957 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7958
7959 // Store the old link SP.
7960 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7961}
7962
7963SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7964 MachineFunction &MF = DAG.getMachineFunction();
7965 bool isPPC64 = Subtarget.isPPC64();
7966 EVT PtrVT = getPointerTy(MF.getDataLayout());
7967
7968 // Get current frame pointer save index. The users of this index will be
7969 // primarily DYNALLOC instructions.
7970 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7971 int RASI = FI->getReturnAddrSaveIndex();
7972
7973 // If the frame pointer save index hasn't been defined yet.
7974 if (!RASI) {
7975 // Find out what the fix offset of the frame pointer save area.
7976 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7977 // Allocate the frame index for frame pointer save area.
7978 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7979 // Save the result.
7980 FI->setReturnAddrSaveIndex(RASI);
7981 }
7982 return DAG.getFrameIndex(RASI, PtrVT);
7983}
7984
7985SDValue
7986PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7987 MachineFunction &MF = DAG.getMachineFunction();
7988 bool isPPC64 = Subtarget.isPPC64();
7989 EVT PtrVT = getPointerTy(MF.getDataLayout());
7990
7991 // Get current frame pointer save index. The users of this index will be
7992 // primarily DYNALLOC instructions.
7993 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7994 int FPSI = FI->getFramePointerSaveIndex();
7995
7996 // If the frame pointer save index hasn't been defined yet.
7997 if (!FPSI) {
7998 // Find out what the fix offset of the frame pointer save area.
7999 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
8000 // Allocate the frame index for frame pointer save area.
8001 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
8002 // Save the result.
8003 FI->setFramePointerSaveIndex(FPSI);
8004 }
8005 return DAG.getFrameIndex(FPSI, PtrVT);
8006}
8007
8008SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
8009 SelectionDAG &DAG) const {
8010 MachineFunction &MF = DAG.getMachineFunction();
8011 // Get the inputs.
8012 SDValue Chain = Op.getOperand(0);
8013 SDValue Size = Op.getOperand(1);
8014 SDLoc dl(Op);
8015
8016 // Get the correct type for pointers.
8017 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8018 // Negate the size.
8019 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
8020 DAG.getConstant(0, dl, PtrVT), Size);
8021 // Construct a node for the frame pointer save index.
8022 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8023 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8024 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8025 if (hasInlineStackProbe(MF))
8026 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8027 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8028}
8029
8030SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8031 SelectionDAG &DAG) const {
8032 MachineFunction &MF = DAG.getMachineFunction();
8033
8034 bool isPPC64 = Subtarget.isPPC64();
8035 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8036
8037 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8038 return DAG.getFrameIndex(FI, PtrVT);
8039}
8040
8041SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8042 SelectionDAG &DAG) const {
8043 SDLoc DL(Op);
8044 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8045 DAG.getVTList(MVT::i32, MVT::Other),
8046 Op.getOperand(0), Op.getOperand(1));
8047}
8048
8049SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8050 SelectionDAG &DAG) const {
8051 SDLoc DL(Op);
8052 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8053 Op.getOperand(0), Op.getOperand(1));
8054}
8055
8056SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8057 if (Op.getValueType().isVector())
8058 return LowerVectorLoad(Op, DAG);
8059
8060 assert(Op.getValueType() == MVT::i1 &&
8061 "Custom lowering only for i1 loads");
8062
8063 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8064
8065 SDLoc dl(Op);
8066 LoadSDNode *LD = cast<LoadSDNode>(Op);
8067
8068 SDValue Chain = LD->getChain();
8069 SDValue BasePtr = LD->getBasePtr();
8070 MachineMemOperand *MMO = LD->getMemOperand();
8071
8072 SDValue NewLD =
8073 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8074 BasePtr, MVT::i8, MMO);
8075 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8076
8077 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8078 return DAG.getMergeValues(Ops, dl);
8079}
8080
8081SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8082 if (Op.getOperand(1).getValueType().isVector())
8083 return LowerVectorStore(Op, DAG);
8084
8085 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8086 "Custom lowering only for i1 stores");
8087
8088 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8089
8090 SDLoc dl(Op);
8091 StoreSDNode *ST = cast<StoreSDNode>(Op);
8092
8093 SDValue Chain = ST->getChain();
8094 SDValue BasePtr = ST->getBasePtr();
8095 SDValue Value = ST->getValue();
8096 MachineMemOperand *MMO = ST->getMemOperand();
8097
8099 Value);
8100 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8101}
8102
8103// FIXME: Remove this once the ANDI glue bug is fixed:
8104SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8105 assert(Op.getValueType() == MVT::i1 &&
8106 "Custom lowering only for i1 results");
8107
8108 SDLoc DL(Op);
8109 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8110}
8111
8112SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8113 SelectionDAG &DAG) const {
8114
8115 // Implements a vector truncate that fits in a vector register as a shuffle.
8116 // We want to legalize vector truncates down to where the source fits in
8117 // a vector register (and target is therefore smaller than vector register
8118 // size). At that point legalization will try to custom lower the sub-legal
8119 // result and get here - where we can contain the truncate as a single target
8120 // operation.
8121
8122 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8123 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8124 //
8125 // We will implement it for big-endian ordering as this (where x denotes
8126 // undefined):
8127 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8128 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8129 //
8130 // The same operation in little-endian ordering will be:
8131 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8132 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8133
8134 EVT TrgVT = Op.getValueType();
8135 assert(TrgVT.isVector() && "Vector type expected.");
8136 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8137 EVT EltVT = TrgVT.getVectorElementType();
8138 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8139 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8141 return SDValue();
8142
8143 SDValue N1 = Op.getOperand(0);
8144 EVT SrcVT = N1.getValueType();
8145 unsigned SrcSize = SrcVT.getSizeInBits();
8146 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8149 return SDValue();
8150 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8151 return SDValue();
8152
8153 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8154 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8155
8156 SDLoc DL(Op);
8157 SDValue Op1, Op2;
8158 if (SrcSize == 256) {
8159 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8160 EVT SplitVT =
8162 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8163 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8164 DAG.getConstant(0, DL, VecIdxTy));
8165 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8166 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8167 }
8168 else {
8169 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8170 Op2 = DAG.getUNDEF(WideVT);
8171 }
8172
8173 // First list the elements we want to keep.
8174 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8175 SmallVector<int, 16> ShuffV;
8176 if (Subtarget.isLittleEndian())
8177 for (unsigned i = 0; i < TrgNumElts; ++i)
8178 ShuffV.push_back(i * SizeMult);
8179 else
8180 for (unsigned i = 1; i <= TrgNumElts; ++i)
8181 ShuffV.push_back(i * SizeMult - 1);
8182
8183 // Populate the remaining elements with undefs.
8184 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8185 // ShuffV.push_back(i + WideNumElts);
8186 ShuffV.push_back(WideNumElts + 1);
8187
8188 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8189 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8190 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8191}
8192
8193/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8194/// possible.
8195SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8196 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8197 EVT ResVT = Op.getValueType();
8198 EVT CmpVT = Op.getOperand(0).getValueType();
8199 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8200 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8201 SDLoc dl(Op);
8202
8203 // Without power9-vector, we don't have native instruction for f128 comparison.
8204 // Following transformation to libcall is needed for setcc:
8205 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8206 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8207 SDValue Z = DAG.getSetCC(
8208 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8209 LHS, RHS, CC);
8210 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8211 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8212 }
8213
8214 // Not FP, or using SPE? Not a fsel.
8215 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8216 Subtarget.hasSPE())
8217 return Op;
8218
8219 SDNodeFlags Flags = Op.getNode()->getFlags();
8220
8221 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8222 // presence of infinities.
8223 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8224 switch (CC) {
8225 default:
8226 break;
8227 case ISD::SETOGT:
8228 case ISD::SETGT:
8229 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8230 case ISD::SETOLT:
8231 case ISD::SETLT:
8232 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8233 }
8234 }
8235
8236 // We might be able to do better than this under some circumstances, but in
8237 // general, fsel-based lowering of select is a finite-math-only optimization.
8238 // For more information, see section F.3 of the 2.06 ISA specification.
8239 // With ISA 3.0
8240 if (!Flags.hasNoInfs() || !Flags.hasNoNaNs() || ResVT == MVT::f128)
8241 return Op;
8242
8243 // If the RHS of the comparison is a 0.0, we don't need to do the
8244 // subtraction at all.
8245 SDValue Sel1;
8247 switch (CC) {
8248 default: break; // SETUO etc aren't handled by fsel.
8249 case ISD::SETNE:
8250 std::swap(TV, FV);
8251 [[fallthrough]];
8252 case ISD::SETEQ:
8253 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8254 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8255 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8256 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8257 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8258 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8259 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8260 case ISD::SETULT:
8261 case ISD::SETLT:
8262 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8263 [[fallthrough]];
8264 case ISD::SETOGE:
8265 case ISD::SETGE:
8266 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8267 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8268 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8269 case ISD::SETUGT:
8270 case ISD::SETGT:
8271 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8272 [[fallthrough]];
8273 case ISD::SETOLE:
8274 case ISD::SETLE:
8275 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8276 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8277 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8278 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8279 }
8280
8281 SDValue Cmp;
8282 switch (CC) {
8283 default: break; // SETUO etc aren't handled by fsel.
8284 case ISD::SETNE:
8285 std::swap(TV, FV);
8286 [[fallthrough]];
8287 case ISD::SETEQ:
8288 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8289 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8290 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8291 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8292 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8293 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8294 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8295 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8296 case ISD::SETULT:
8297 case ISD::SETLT:
8298 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8299 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8300 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8301 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8302 case ISD::SETOGE:
8303 case ISD::SETGE:
8304 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8305 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8306 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8307 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8308 case ISD::SETUGT:
8309 case ISD::SETGT:
8310 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8311 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8312 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8313 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8314 case ISD::SETOLE:
8315 case ISD::SETLE:
8316 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8317 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8318 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8319 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8320 }
8321 return Op;
8322}
8323
8324static unsigned getPPCStrictOpcode(unsigned Opc) {
8325 switch (Opc) {
8326 default:
8327 llvm_unreachable("No strict version of this opcode!");
8328 case PPCISD::FCTIDZ:
8329 return PPCISD::STRICT_FCTIDZ;
8330 case PPCISD::FCTIWZ:
8331 return PPCISD::STRICT_FCTIWZ;
8332 case PPCISD::FCTIDUZ:
8333 return PPCISD::STRICT_FCTIDUZ;
8334 case PPCISD::FCTIWUZ:
8335 return PPCISD::STRICT_FCTIWUZ;
8336 case PPCISD::FCFID:
8337 return PPCISD::STRICT_FCFID;
8338 case PPCISD::FCFIDU:
8339 return PPCISD::STRICT_FCFIDU;
8340 case PPCISD::FCFIDS:
8341 return PPCISD::STRICT_FCFIDS;
8342 case PPCISD::FCFIDUS:
8343 return PPCISD::STRICT_FCFIDUS;
8344 }
8345}
8346
8348 const PPCSubtarget &Subtarget) {
8349 SDLoc dl(Op);
8350 bool IsStrict = Op->isStrictFPOpcode();
8351 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8352 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8353
8354 // TODO: Any other flags to propagate?
8355 SDNodeFlags Flags;
8356 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8357
8358 // For strict nodes, source is the second operand.
8359 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8360 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8361 MVT DestTy = Op.getSimpleValueType();
8362 assert(Src.getValueType().isFloatingPoint() &&
8363 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8364 DestTy == MVT::i64) &&
8365 "Invalid FP_TO_INT types");
8366 if (Src.getValueType() == MVT::f32) {
8367 if (IsStrict) {
8368 Src =
8370 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8371 Chain = Src.getValue(1);
8372 } else
8373 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8374 }
8375 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8376 DestTy = Subtarget.getScalarIntVT();
8377 unsigned Opc = ISD::DELETED_NODE;
8378 switch (DestTy.SimpleTy) {
8379 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8380 case MVT::i32:
8381 Opc = IsSigned ? PPCISD::FCTIWZ
8382 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8383 break;
8384 case MVT::i64:
8385 assert((IsSigned || Subtarget.hasFPCVT()) &&
8386 "i64 FP_TO_UINT is supported only with FPCVT");
8387 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8388 }
8389 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8390 SDValue Conv;
8391 if (IsStrict) {
8393 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8394 Flags);
8395 } else {
8396 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8397 }
8398 return Conv;
8399}
8400
8401void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8402 SelectionDAG &DAG,
8403 const SDLoc &dl) const {
8404 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8405 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8406 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8407 bool IsStrict = Op->isStrictFPOpcode();
8408
8409 // Convert the FP value to an int value through memory.
8410 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8411 (IsSigned || Subtarget.hasFPCVT());
8412 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8413 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8414 MachinePointerInfo MPI =
8416
8417 // Emit a store to the stack slot.
8418 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8419 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8420 if (i32Stack) {
8421 MachineFunction &MF = DAG.getMachineFunction();
8422 Alignment = Align(4);
8423 MachineMemOperand *MMO =
8424 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8425 SDValue Ops[] = { Chain, Tmp, FIPtr };
8426 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8427 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8428 } else
8429 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8430
8431 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8432 // add in a bias on big endian.
8433 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8434 !Subtarget.isLittleEndian()) {
8435 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8436 DAG.getConstant(4, dl, FIPtr.getValueType()));
8437 MPI = MPI.getWithOffset(4);
8438 }
8439
8440 RLI.Chain = Chain;
8441 RLI.Ptr = FIPtr;
8442 RLI.MPI = MPI;
8443 RLI.Alignment = Alignment;
8444}
8445
8446/// Custom lowers floating point to integer conversions to use
8447/// the direct move instructions available in ISA 2.07 to avoid the
8448/// need for load/store combinations.
8449SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8450 SelectionDAG &DAG,
8451 const SDLoc &dl) const {
8452 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8453 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8454 if (Op->isStrictFPOpcode())
8455 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8456 else
8457 return Mov;
8458}
8459
8460SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8461 const SDLoc &dl) const {
8462 bool IsStrict = Op->isStrictFPOpcode();
8463 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8464 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8465 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8466 EVT SrcVT = Src.getValueType();
8467 EVT DstVT = Op.getValueType();
8468
8469 // FP to INT conversions are legal for f128.
8470 if (SrcVT == MVT::f128)
8471 return Subtarget.hasP9Vector() ? Op : SDValue();
8472
8473 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8474 // PPC (the libcall is not available).
8475 if (SrcVT == MVT::ppcf128) {
8476 if (DstVT == MVT::i32) {
8477 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8478 // set other fast-math flags to FP operations in both strict and
8479 // non-strict cases. (FP_TO_SINT, FSUB)
8480 SDNodeFlags Flags;
8481 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8482
8483 if (IsSigned) {
8484 SDValue Lo, Hi;
8485 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8486
8487 // Add the two halves of the long double in round-to-zero mode, and use
8488 // a smaller FP_TO_SINT.
8489 if (IsStrict) {
8490 SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8491 DAG.getVTList(MVT::f64, MVT::Other),
8492 {Op.getOperand(0), Lo, Hi}, Flags);
8493 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8494 DAG.getVTList(MVT::i32, MVT::Other),
8495 {Res.getValue(1), Res}, Flags);
8496 } else {
8497 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8498 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8499 }
8500 } else {
8501 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8502 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8503 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8504 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8505 if (IsStrict) {
8506 // Sel = Src < 0x80000000
8507 // FltOfs = select Sel, 0.0, 0x80000000
8508 // IntOfs = select Sel, 0, 0x80000000
8509 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8510 SDValue Chain = Op.getOperand(0);
8511 EVT SetCCVT =
8512 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8513 EVT DstSetCCVT =
8514 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8515 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8516 Chain, true);
8517 Chain = Sel.getValue(1);
8518
8519 SDValue FltOfs = DAG.getSelect(
8520 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8521 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8522
8523 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8524 DAG.getVTList(SrcVT, MVT::Other),
8525 {Chain, Src, FltOfs}, Flags);
8526 Chain = Val.getValue(1);
8527 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8528 DAG.getVTList(DstVT, MVT::Other),
8529 {Chain, Val}, Flags);
8530 Chain = SInt.getValue(1);
8531 SDValue IntOfs = DAG.getSelect(
8532 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8533 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8534 return DAG.getMergeValues({Result, Chain}, dl);
8535 } else {
8536 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8537 // FIXME: generated code sucks.
8538 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8539 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8540 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8541 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8542 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8543 }
8544 }
8545 }
8546
8547 return SDValue();
8548 }
8549
8550 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8551 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8552
8553 ReuseLoadInfo RLI;
8554 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8555
8556 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8557 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8558}
8559
8560// We're trying to insert a regular store, S, and then a load, L. If the
8561// incoming value, O, is a load, we might just be able to have our load use the
8562// address used by O. However, we don't know if anything else will store to
8563// that address before we can load from it. To prevent this situation, we need
8564// to insert our load, L, into the chain as a peer of O. To do this, we give L
8565// the same chain operand as O, we create a token factor from the chain results
8566// of O and L, and we replace all uses of O's chain result with that token
8567// factor (this last part is handled by makeEquivalentMemoryOrdering).
8568bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8569 ReuseLoadInfo &RLI,
8570 SelectionDAG &DAG,
8571 ISD::LoadExtType ET) const {
8572 // Conservatively skip reusing for constrained FP nodes.
8573 if (Op->isStrictFPOpcode())
8574 return false;
8575
8576 SDLoc dl(Op);
8577 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8578 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8579 if (ET == ISD::NON_EXTLOAD &&
8580 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8581 isOperationLegalOrCustom(Op.getOpcode(),
8582 Op.getOperand(0).getValueType())) {
8583
8584 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8585 return true;
8586 }
8587
8588 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8589 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8590 LD->isNonTemporal())
8591 return false;
8592 if (LD->getMemoryVT() != MemVT)
8593 return false;
8594
8595 // If the result of the load is an illegal type, then we can't build a
8596 // valid chain for reuse since the legalised loads and token factor node that
8597 // ties the legalised loads together uses a different output chain then the
8598 // illegal load.
8599 if (!isTypeLegal(LD->getValueType(0)))
8600 return false;
8601
8602 RLI.Ptr = LD->getBasePtr();
8603 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8604 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8605 "Non-pre-inc AM on PPC?");
8606 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8607 LD->getOffset());
8608 }
8609
8610 RLI.Chain = LD->getChain();
8611 RLI.MPI = LD->getPointerInfo();
8612 RLI.IsDereferenceable = LD->isDereferenceable();
8613 RLI.IsInvariant = LD->isInvariant();
8614 RLI.Alignment = LD->getAlign();
8615 RLI.AAInfo = LD->getAAInfo();
8616 RLI.Ranges = LD->getRanges();
8617
8618 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8619 return true;
8620}
8621
8622/// Analyze profitability of direct move
8623/// prefer float load to int load plus direct move
8624/// when there is no integer use of int load
8625bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8626 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8627 if (Origin->getOpcode() != ISD::LOAD)
8628 return true;
8629
8630 // If there is no LXSIBZX/LXSIHZX, like Power8,
8631 // prefer direct move if the memory size is 1 or 2 bytes.
8632 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8633 if (!Subtarget.hasP9Vector() &&
8634 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8635 return true;
8636
8637 for (SDUse &Use : Origin->uses()) {
8638
8639 // Only look at the users of the loaded value.
8640 if (Use.getResNo() != 0)
8641 continue;
8642
8643 SDNode *User = Use.getUser();
8644 if (User->getOpcode() != ISD::SINT_TO_FP &&
8645 User->getOpcode() != ISD::UINT_TO_FP &&
8646 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8647 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8648 return true;
8649 }
8650
8651 return false;
8652}
8653
8655 const PPCSubtarget &Subtarget,
8656 SDValue Chain = SDValue()) {
8657 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8658 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8659 SDLoc dl(Op);
8660
8661 // TODO: Any other flags to propagate?
8662 SDNodeFlags Flags;
8663 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8664
8665 // If we have FCFIDS, then use it when converting to single-precision.
8666 // Otherwise, convert to double-precision and then round.
8667 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8668 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8669 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8670 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8671 if (Op->isStrictFPOpcode()) {
8672 if (!Chain)
8673 Chain = Op.getOperand(0);
8674 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8675 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8676 } else
8677 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8678}
8679
8680/// Custom lowers integer to floating point conversions to use
8681/// the direct move instructions available in ISA 2.07 to avoid the
8682/// need for load/store combinations.
8683SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8684 SelectionDAG &DAG,
8685 const SDLoc &dl) const {
8686 assert((Op.getValueType() == MVT::f32 ||
8687 Op.getValueType() == MVT::f64) &&
8688 "Invalid floating point type as target of conversion");
8689 assert(Subtarget.hasFPCVT() &&
8690 "Int to FP conversions with direct moves require FPCVT");
8691 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8692 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8693 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8694 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8695 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8696 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8697 return convertIntToFP(Op, Mov, DAG, Subtarget);
8698}
8699
8700static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8701
8702 EVT VecVT = Vec.getValueType();
8703 assert(VecVT.isVector() && "Expected a vector type.");
8704 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8705
8706 EVT EltVT = VecVT.getVectorElementType();
8707 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8708 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8709
8710 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8711 SmallVector<SDValue, 16> Ops(NumConcat);
8712 Ops[0] = Vec;
8713 SDValue UndefVec = DAG.getUNDEF(VecVT);
8714 for (unsigned i = 1; i < NumConcat; ++i)
8715 Ops[i] = UndefVec;
8716
8717 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8718}
8719
8720SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8721 const SDLoc &dl) const {
8722 bool IsStrict = Op->isStrictFPOpcode();
8723 unsigned Opc = Op.getOpcode();
8724 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8727 "Unexpected conversion type");
8728 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8729 "Supports conversions to v2f64/v4f32 only.");
8730
8731 // TODO: Any other flags to propagate?
8732 SDNodeFlags Flags;
8733 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8734
8735 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8736 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8737
8738 SDValue Wide = widenVec(DAG, Src, dl);
8739 EVT WideVT = Wide.getValueType();
8740 unsigned WideNumElts = WideVT.getVectorNumElements();
8741 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8742
8743 SmallVector<int, 16> ShuffV;
8744 for (unsigned i = 0; i < WideNumElts; ++i)
8745 ShuffV.push_back(i + WideNumElts);
8746
8747 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8748 int SaveElts = FourEltRes ? 4 : 2;
8749 if (Subtarget.isLittleEndian())
8750 for (int i = 0; i < SaveElts; i++)
8751 ShuffV[i * Stride] = i;
8752 else
8753 for (int i = 1; i <= SaveElts; i++)
8754 ShuffV[i * Stride - 1] = i - 1;
8755
8756 SDValue ShuffleSrc2 =
8757 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8758 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8759
8760 SDValue Extend;
8761 if (SignedConv) {
8762 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8763 EVT ExtVT = Src.getValueType();
8764 if (Subtarget.hasP9Altivec())
8765 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8766 IntermediateVT.getVectorNumElements());
8767
8768 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8769 DAG.getValueType(ExtVT));
8770 } else
8771 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8772
8773 if (IsStrict)
8774 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8775 {Op.getOperand(0), Extend}, Flags);
8776
8777 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8778}
8779
8780SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8781 SelectionDAG &DAG) const {
8782 SDLoc dl(Op);
8783 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8784 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8785 bool IsStrict = Op->isStrictFPOpcode();
8786 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8787 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8788
8789 // TODO: Any other flags to propagate?
8790 SDNodeFlags Flags;
8791 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8792
8793 EVT InVT = Src.getValueType();
8794 EVT OutVT = Op.getValueType();
8795 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8796 isOperationCustom(Op.getOpcode(), InVT))
8797 return LowerINT_TO_FPVector(Op, DAG, dl);
8798
8799 // Conversions to f128 are legal.
8800 if (Op.getValueType() == MVT::f128)
8801 return Subtarget.hasP9Vector() ? Op : SDValue();
8802
8803 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8804 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8805 return SDValue();
8806
8807 if (Src.getValueType() == MVT::i1) {
8808 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8809 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8810 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8811 if (IsStrict)
8812 return DAG.getMergeValues({Sel, Chain}, dl);
8813 else
8814 return Sel;
8815 }
8816
8817 // If we have direct moves, we can do all the conversion, skip the store/load
8818 // however, without FPCVT we can't do most conversions.
8819 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8820 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8821 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8822
8823 assert((IsSigned || Subtarget.hasFPCVT()) &&
8824 "UINT_TO_FP is supported only with FPCVT");
8825
8826 if (Src.getValueType() == MVT::i64) {
8827 SDValue SINT = Src;
8828 // When converting to single-precision, we actually need to convert
8829 // to double-precision first and then round to single-precision.
8830 // To avoid double-rounding effects during that operation, we have
8831 // to prepare the input operand. Bits that might be truncated when
8832 // converting to double-precision are replaced by a bit that won't
8833 // be lost at this stage, but is below the single-precision rounding
8834 // position.
8835 //
8836 // However, if afn is in effect, accept double
8837 // rounding to avoid the extra overhead.
8838 // FIXME: Currently INT_TO_FP can't support fast math flags because
8839 // of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
8840 // false.
8841 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
8842 !Op->getFlags().hasApproximateFuncs()) {
8843
8844 // Twiddle input to make sure the low 11 bits are zero. (If this
8845 // is the case, we are guaranteed the value will fit into the 53 bit
8846 // mantissa of an IEEE double-precision value without rounding.)
8847 // If any of those low 11 bits were not zero originally, make sure
8848 // bit 12 (value 2048) is set instead, so that the final rounding
8849 // to single-precision gets the correct result.
8850 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8851 SINT, DAG.getConstant(2047, dl, MVT::i64));
8852 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8853 Round, DAG.getConstant(2047, dl, MVT::i64));
8854 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8855 Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round,
8856 DAG.getSignedConstant(-2048, dl, MVT::i64));
8857
8858 // However, we cannot use that value unconditionally: if the magnitude
8859 // of the input value is small, the bit-twiddling we did above might
8860 // end up visibly changing the output. Fortunately, in that case, we
8861 // don't need to twiddle bits since the original input will convert
8862 // exactly to double-precision floating-point already. Therefore,
8863 // construct a conditional to use the original value if the top 11
8864 // bits are all sign-bit copies, and use the rounded value computed
8865 // above otherwise.
8866 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8867 SINT, DAG.getConstant(53, dl, MVT::i32));
8868 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8869 Cond, DAG.getConstant(1, dl, MVT::i64));
8870 Cond = DAG.getSetCC(
8871 dl,
8872 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8873 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8874
8875 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8876 }
8877
8878 ReuseLoadInfo RLI;
8879 SDValue Bits;
8880
8881 MachineFunction &MF = DAG.getMachineFunction();
8882 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8883 // Drop range metadata, as this metadata becomes invalid for f64 bit
8884 // reinterpretation of i64 values.
8885 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8886 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, nullptr);
8887 if (RLI.ResChain)
8888 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8889 } else if (Subtarget.hasLFIWAX() &&
8890 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8891 MachineMemOperand *MMO =
8893 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8894 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8895 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8896 DAG.getVTList(MVT::f64, MVT::Other),
8897 Ops, MVT::i32, MMO);
8898 if (RLI.ResChain)
8899 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8900 } else if (Subtarget.hasFPCVT() &&
8901 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8902 MachineMemOperand *MMO =
8904 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8905 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8906 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8907 DAG.getVTList(MVT::f64, MVT::Other),
8908 Ops, MVT::i32, MMO);
8909 if (RLI.ResChain)
8910 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8911 } else if (((Subtarget.hasLFIWAX() &&
8912 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8913 (Subtarget.hasFPCVT() &&
8914 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8915 SINT.getOperand(0).getValueType() == MVT::i32) {
8916 MachineFrameInfo &MFI = MF.getFrameInfo();
8917 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8918
8919 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8920 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8921
8922 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8924 DAG.getMachineFunction(), FrameIdx));
8925 Chain = Store;
8926
8927 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8928 "Expected an i32 store");
8929
8930 RLI.Ptr = FIdx;
8931 RLI.Chain = Chain;
8932 RLI.MPI =
8934 RLI.Alignment = Align(4);
8935
8936 MachineMemOperand *MMO =
8938 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8939 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8941 PPCISD::LFIWZX : PPCISD::LFIWAX,
8942 dl, DAG.getVTList(MVT::f64, MVT::Other),
8943 Ops, MVT::i32, MMO);
8944 Chain = Bits.getValue(1);
8945 } else
8946 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8947
8948 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8949 if (IsStrict)
8950 Chain = FP.getValue(1);
8951
8952 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8953 if (IsStrict)
8954 FP = DAG.getNode(
8955 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8956 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
8957 Flags);
8958 else
8959 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8960 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8961 }
8962 return FP;
8963 }
8964
8965 assert(Src.getValueType() == MVT::i32 &&
8966 "Unhandled INT_TO_FP type in custom expander!");
8967 // Since we only generate this in 64-bit mode, we can take advantage of
8968 // 64-bit registers. In particular, sign extend the input value into the
8969 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8970 // then lfd it and fcfid it.
8971 MachineFunction &MF = DAG.getMachineFunction();
8972 MachineFrameInfo &MFI = MF.getFrameInfo();
8973 EVT PtrVT = getPointerTy(MF.getDataLayout());
8974
8975 SDValue Ld;
8976 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8977 ReuseLoadInfo RLI;
8978 bool ReusingLoad;
8979 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8980 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8981 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8982
8983 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8985 DAG.getMachineFunction(), FrameIdx));
8986 Chain = Store;
8987
8988 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8989 "Expected an i32 store");
8990
8991 RLI.Ptr = FIdx;
8992 RLI.Chain = Chain;
8993 RLI.MPI =
8995 RLI.Alignment = Align(4);
8996 }
8997
8998 MachineMemOperand *MMO =
9000 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9001 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9002 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
9003 DAG.getVTList(MVT::f64, MVT::Other), Ops,
9004 MVT::i32, MMO);
9005 Chain = Ld.getValue(1);
9006 if (ReusingLoad && RLI.ResChain) {
9007 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
9008 }
9009 } else {
9010 assert(Subtarget.isPPC64() &&
9011 "i32->FP without LFIWAX supported only on PPC64");
9012
9013 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
9014 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9015
9016 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
9017
9018 // STD the extended value into the stack slot.
9019 SDValue Store = DAG.getStore(
9020 Chain, dl, Ext64, FIdx,
9022 Chain = Store;
9023
9024 // Load the value as a double.
9025 Ld = DAG.getLoad(
9026 MVT::f64, dl, Chain, FIdx,
9028 Chain = Ld.getValue(1);
9029 }
9030
9031 // FCFID it and return it.
9032 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9033 if (IsStrict)
9034 Chain = FP.getValue(1);
9035 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9036 if (IsStrict)
9037 FP = DAG.getNode(
9038 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9039 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
9040 else
9041 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9042 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9043 }
9044 return FP;
9045}
9046
9047SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9048 SelectionDAG &DAG) const {
9049 SDLoc Dl(Op);
9050 MachineFunction &MF = DAG.getMachineFunction();
9051 EVT PtrVT = getPointerTy(MF.getDataLayout());
9052 SDValue Chain = Op.getOperand(0);
9053
9054 // If requested mode is constant, just use simpler mtfsb/mffscrni
9055 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9056 uint64_t Mode = CVal->getZExtValue();
9057 assert(Mode < 4 && "Unsupported rounding mode!");
9058 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
9059 if (Subtarget.isISA3_0())
9060 return SDValue(
9061 DAG.getMachineNode(
9062 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9063 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9064 1);
9065 SDNode *SetHi = DAG.getMachineNode(
9066 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9067 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9068 SDNode *SetLo = DAG.getMachineNode(
9069 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9070 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9071 return SDValue(SetLo, 0);
9072 }
9073
9074 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9075 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9076 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9077 DAG.getConstant(3, Dl, MVT::i32));
9078 SDValue DstFlag = DAG.getNode(
9079 ISD::XOR, Dl, MVT::i32, SrcFlag,
9080 DAG.getNode(ISD::AND, Dl, MVT::i32,
9081 DAG.getNOT(Dl,
9082 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9083 MVT::i32),
9084 One));
9085 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9086 SDValue MFFS;
9087 if (!Subtarget.isISA3_0()) {
9088 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9089 Chain = MFFS.getValue(1);
9090 }
9091 SDValue NewFPSCR;
9092 if (Subtarget.isPPC64()) {
9093 if (Subtarget.isISA3_0()) {
9094 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9095 } else {
9096 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9097 SDNode *InsertRN = DAG.getMachineNode(
9098 PPC::RLDIMI, Dl, MVT::i64,
9099 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9100 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9101 DAG.getTargetConstant(0, Dl, MVT::i32),
9102 DAG.getTargetConstant(62, Dl, MVT::i32)});
9103 NewFPSCR = SDValue(InsertRN, 0);
9104 }
9105 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9106 } else {
9107 // In 32-bit mode, store f64, load and update the lower half.
9108 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9109 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9110 SDValue Addr = Subtarget.isLittleEndian()
9111 ? StackSlot
9112 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9113 DAG.getConstant(4, Dl, PtrVT));
9114 if (Subtarget.isISA3_0()) {
9115 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9116 } else {
9117 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9118 SDValue Tmp =
9119 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9120 Chain = Tmp.getValue(1);
9121 Tmp = SDValue(DAG.getMachineNode(
9122 PPC::RLWIMI, Dl, MVT::i32,
9123 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9124 DAG.getTargetConstant(30, Dl, MVT::i32),
9125 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9126 0);
9127 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9128 }
9129 NewFPSCR =
9130 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9131 Chain = NewFPSCR.getValue(1);
9132 }
9133 if (Subtarget.isISA3_0())
9134 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9135 {NewFPSCR, Chain}),
9136 1);
9137 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9138 SDNode *MTFSF = DAG.getMachineNode(
9139 PPC::MTFSF, Dl, MVT::Other,
9140 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9141 return SDValue(MTFSF, 0);
9142}
9143
9144SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9145 SelectionDAG &DAG) const {
9146 SDLoc dl(Op);
9147 /*
9148 The rounding mode is in bits 30:31 of FPSR, and has the following
9149 settings:
9150 00 Round to nearest
9151 01 Round to 0
9152 10 Round to +inf
9153 11 Round to -inf
9154
9155 GET_ROUNDING, on the other hand, expects the following:
9156 -1 Undefined
9157 0 Round to 0
9158 1 Round to nearest
9159 2 Round to +inf
9160 3 Round to -inf
9161
9162 To perform the conversion, we do:
9163 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9164 */
9165
9166 MachineFunction &MF = DAG.getMachineFunction();
9167 EVT VT = Op.getValueType();
9168 EVT PtrVT = getPointerTy(MF.getDataLayout());
9169
9170 // Save FP Control Word to register
9171 SDValue Chain = Op.getOperand(0);
9172 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9173 Chain = MFFS.getValue(1);
9174
9175 SDValue CWD;
9176 if (isTypeLegal(MVT::i64)) {
9177 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9178 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9179 } else {
9180 // Save FP register to stack slot
9181 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9182 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9183 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9184
9185 // Load FP Control Word from low 32 bits of stack slot.
9187 "Stack slot adjustment is valid only on big endian subtargets!");
9188 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9189 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9190 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9191 Chain = CWD.getValue(1);
9192 }
9193
9194 // Transform as necessary
9195 SDValue CWD1 =
9196 DAG.getNode(ISD::AND, dl, MVT::i32,
9197 CWD, DAG.getConstant(3, dl, MVT::i32));
9198 SDValue CWD2 =
9199 DAG.getNode(ISD::SRL, dl, MVT::i32,
9200 DAG.getNode(ISD::AND, dl, MVT::i32,
9201 DAG.getNode(ISD::XOR, dl, MVT::i32,
9202 CWD, DAG.getConstant(3, dl, MVT::i32)),
9203 DAG.getConstant(3, dl, MVT::i32)),
9204 DAG.getConstant(1, dl, MVT::i32));
9205
9206 SDValue RetVal =
9207 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9208
9209 RetVal =
9211 dl, VT, RetVal);
9212
9213 return DAG.getMergeValues({RetVal, Chain}, dl);
9214}
9215
9216SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9217 EVT VT = Op.getValueType();
9218 uint64_t BitWidth = VT.getSizeInBits();
9219 SDLoc dl(Op);
9220 assert(Op.getNumOperands() == 3 &&
9221 VT == Op.getOperand(1).getValueType() &&
9222 "Unexpected SHL!");
9223
9224 // Expand into a bunch of logical ops. Note that these ops
9225 // depend on the PPC behavior for oversized shift amounts.
9226 SDValue Lo = Op.getOperand(0);
9227 SDValue Hi = Op.getOperand(1);
9228 SDValue Amt = Op.getOperand(2);
9229 EVT AmtVT = Amt.getValueType();
9230
9231 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9232 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9233 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9234 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9235 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9236 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9237 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9238 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9239 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9240 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9241 SDValue OutOps[] = { OutLo, OutHi };
9242 return DAG.getMergeValues(OutOps, dl);
9243}
9244
9245SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9246 EVT VT = Op.getValueType();
9247 SDLoc dl(Op);
9248 uint64_t BitWidth = VT.getSizeInBits();
9249 assert(Op.getNumOperands() == 3 &&
9250 VT == Op.getOperand(1).getValueType() &&
9251 "Unexpected SRL!");
9252
9253 // Expand into a bunch of logical ops. Note that these ops
9254 // depend on the PPC behavior for oversized shift amounts.
9255 SDValue Lo = Op.getOperand(0);
9256 SDValue Hi = Op.getOperand(1);
9257 SDValue Amt = Op.getOperand(2);
9258 EVT AmtVT = Amt.getValueType();
9259
9260 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9261 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9262 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9263 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9264 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9265 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9266 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9267 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9268 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9269 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9270 SDValue OutOps[] = { OutLo, OutHi };
9271 return DAG.getMergeValues(OutOps, dl);
9272}
9273
9274SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9275 SDLoc dl(Op);
9276 EVT VT = Op.getValueType();
9277 uint64_t BitWidth = VT.getSizeInBits();
9278 assert(Op.getNumOperands() == 3 &&
9279 VT == Op.getOperand(1).getValueType() &&
9280 "Unexpected SRA!");
9281
9282 // Expand into a bunch of logical ops, followed by a select_cc.
9283 SDValue Lo = Op.getOperand(0);
9284 SDValue Hi = Op.getOperand(1);
9285 SDValue Amt = Op.getOperand(2);
9286 EVT AmtVT = Amt.getValueType();
9287
9288 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9289 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9290 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9291 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9292 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9293 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9294 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9295 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9296 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9297 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9298 Tmp4, Tmp6, ISD::SETLE);
9299 SDValue OutOps[] = { OutLo, OutHi };
9300 return DAG.getMergeValues(OutOps, dl);
9301}
9302
9303SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9304 SelectionDAG &DAG) const {
9305 SDLoc dl(Op);
9306 EVT VT = Op.getValueType();
9307 unsigned BitWidth = VT.getSizeInBits();
9308
9309 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9310 SDValue X = Op.getOperand(0);
9311 SDValue Y = Op.getOperand(1);
9312 SDValue Z = Op.getOperand(2);
9313 EVT AmtVT = Z.getValueType();
9314
9315 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9316 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9317 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9318 // on PowerPC shift by BW being well defined.
9319 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9320 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9321 SDValue SubZ =
9322 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9323 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9324 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9325 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9326}
9327
9328//===----------------------------------------------------------------------===//
9329// Vector related lowering.
9330//
9331
9332/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9333/// element size of SplatSize. Cast the result to VT.
9334static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9335 SelectionDAG &DAG, const SDLoc &dl) {
9336 static const MVT VTys[] = { // canonical VT to use for each size.
9337 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9338 };
9339
9340 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9341
9342 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9343 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9344 SplatSize = 1;
9345 Val = 0xFF;
9346 }
9347
9348 EVT CanonicalVT = VTys[SplatSize-1];
9349
9350 // Build a canonical splat for this value.
9351 // Explicitly truncate APInt here, as this API is used with a mix of
9352 // signed and unsigned values.
9353 return DAG.getBitcast(
9354 ReqVT,
9355 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9356}
9357
9358/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9359/// specified intrinsic ID.
9361 const SDLoc &dl, EVT DestVT = MVT::Other) {
9362 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9363 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9364 DAG.getConstant(IID, dl, MVT::i32), Op);
9365}
9366
9367/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9368/// specified intrinsic ID.
9370 SelectionDAG &DAG, const SDLoc &dl,
9371 EVT DestVT = MVT::Other) {
9372 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9373 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9374 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9375}
9376
9377/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9378/// specified intrinsic ID.
9379static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9380 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9381 EVT DestVT = MVT::Other) {
9382 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9383 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9384 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9385}
9386
9387/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9388/// amount. The result has the specified value type.
9389static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9390 SelectionDAG &DAG, const SDLoc &dl) {
9391 // Force LHS/RHS to be the right type.
9392 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9393 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9394
9395 int Ops[16];
9396 for (unsigned i = 0; i != 16; ++i)
9397 Ops[i] = i + Amt;
9398 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9399 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9400}
9401
9402/// Do we have an efficient pattern in a .td file for this node?
9403///
9404/// \param V - pointer to the BuildVectorSDNode being matched
9405/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9406///
9407/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9408/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9409/// the opposite is true (expansion is beneficial) are:
9410/// - The node builds a vector out of integers that are not 32 or 64-bits
9411/// - The node builds a vector out of constants
9412/// - The node is a "load-and-splat"
9413/// In all other cases, we will choose to keep the BUILD_VECTOR.
9415 bool HasDirectMove,
9416 bool HasP8Vector) {
9417 EVT VecVT = V->getValueType(0);
9418 bool RightType = VecVT == MVT::v2f64 ||
9419 (HasP8Vector && VecVT == MVT::v4f32) ||
9420 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9421 if (!RightType)
9422 return false;
9423
9424 bool IsSplat = true;
9425 bool IsLoad = false;
9426 SDValue Op0 = V->getOperand(0);
9427
9428 // This function is called in a block that confirms the node is not a constant
9429 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9430 // different constants.
9431 if (V->isConstant())
9432 return false;
9433 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9434 if (V->getOperand(i).isUndef())
9435 return false;
9436 // We want to expand nodes that represent load-and-splat even if the
9437 // loaded value is a floating point truncation or conversion to int.
9438 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9439 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9440 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9441 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9442 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9443 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9444 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9445 IsLoad = true;
9446 // If the operands are different or the input is not a load and has more
9447 // uses than just this BV node, then it isn't a splat.
9448 if (V->getOperand(i) != Op0 ||
9449 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9450 IsSplat = false;
9451 }
9452 return !(IsSplat && IsLoad);
9453}
9454
9455// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9456SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9457
9458 SDLoc dl(Op);
9459 SDValue Op0 = Op->getOperand(0);
9460
9461 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9462 (Op.getValueType() != MVT::f128))
9463 return SDValue();
9464
9465 SDValue Lo = Op0.getOperand(0);
9466 SDValue Hi = Op0.getOperand(1);
9467 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9468 return SDValue();
9469
9470 if (!Subtarget.isLittleEndian())
9471 std::swap(Lo, Hi);
9472
9473 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9474}
9475
9476static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9477 const SDValue *InputLoad = &Op;
9478 while (InputLoad->getOpcode() == ISD::BITCAST)
9479 InputLoad = &InputLoad->getOperand(0);
9480 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9481 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9482 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9483 InputLoad = &InputLoad->getOperand(0);
9484 }
9485 if (InputLoad->getOpcode() != ISD::LOAD)
9486 return nullptr;
9487 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9488 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9489}
9490
9491// Convert the argument APFloat to a single precision APFloat if there is no
9492// loss in information during the conversion to single precision APFloat and the
9493// resulting number is not a denormal number. Return true if successful.
9495 APFloat APFloatToConvert = ArgAPFloat;
9496 bool LosesInfo = true;
9498 &LosesInfo);
9499 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9500 if (Success)
9501 ArgAPFloat = APFloatToConvert;
9502 return Success;
9503}
9504
9505// Bitcast the argument APInt to a double and convert it to a single precision
9506// APFloat, bitcast the APFloat to an APInt and assign it to the original
9507// argument if there is no loss in information during the conversion from
9508// double to single precision APFloat and the resulting number is not a denormal
9509// number. Return true if successful.
9511 double DpValue = ArgAPInt.bitsToDouble();
9512 APFloat APFloatDp(DpValue);
9513 bool Success = convertToNonDenormSingle(APFloatDp);
9514 if (Success)
9515 ArgAPInt = APFloatDp.bitcastToAPInt();
9516 return Success;
9517}
9518
9519// Nondestructive check for convertTonNonDenormSingle.
9521 // Only convert if it loses info, since XXSPLTIDP should
9522 // handle the other case.
9523 APFloat APFloatToConvert = ArgAPFloat;
9524 bool LosesInfo = true;
9526 &LosesInfo);
9527
9528 return (!LosesInfo && !APFloatToConvert.isDenormal());
9529}
9530
9531static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9532 unsigned &Opcode) {
9533 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9534 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9535 return false;
9536
9537 EVT Ty = Op->getValueType(0);
9538 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9539 // as we cannot handle extending loads for these types.
9540 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9541 ISD::isNON_EXTLoad(InputNode))
9542 return true;
9543
9544 EVT MemVT = InputNode->getMemoryVT();
9545 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9546 // memory VT is the same vector element VT type.
9547 // The loads feeding into the v8i16 and v16i8 types will be extending because
9548 // scalar i8/i16 are not legal types.
9549 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9550 (MemVT == Ty.getVectorElementType()))
9551 return true;
9552
9553 if (Ty == MVT::v2i64) {
9554 // Check the extend type, when the input type is i32, and the output vector
9555 // type is v2i64.
9556 if (MemVT == MVT::i32) {
9557 if (ISD::isZEXTLoad(InputNode))
9558 Opcode = PPCISD::ZEXT_LD_SPLAT;
9559 if (ISD::isSEXTLoad(InputNode))
9560 Opcode = PPCISD::SEXT_LD_SPLAT;
9561 }
9562 return true;
9563 }
9564 return false;
9565}
9566
9568 bool IsLittleEndian) {
9569 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9570
9571 BitMask.clearAllBits();
9572 EVT VT = BVN.getValueType(0);
9573 unsigned VTSize = VT.getSizeInBits();
9574 APInt ConstValue(VTSize, 0);
9575
9576 unsigned EltWidth = VT.getScalarSizeInBits();
9577
9578 unsigned BitPos = 0;
9579 for (auto OpVal : BVN.op_values()) {
9580 auto *CN = dyn_cast<ConstantSDNode>(OpVal);
9581
9582 if (!CN)
9583 return false;
9584 // The elements in a vector register are ordered in reverse byte order
9585 // between little-endian and big-endian modes.
9586 ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
9587 IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9588 BitPos += EltWidth;
9589 }
9590
9591 for (unsigned J = 0; J < 16; ++J) {
9592 APInt ExtractValue = ConstValue.extractBits(8, J * 8);
9593 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9594 return false;
9595 if (ExtractValue == 0xFF)
9596 BitMask.setBit(J);
9597 }
9598 return true;
9599}
9600
9601// If this is a case we can't handle, return null and let the default
9602// expansion code take care of it. If we CAN select this case, and if it
9603// selects to a single instruction, return Op. Otherwise, if we can codegen
9604// this case more efficiently than a constant pool load, lower it to the
9605// sequence of ops that should be used.
9606SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9607 SelectionDAG &DAG) const {
9608 SDLoc dl(Op);
9609 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9610 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9611
9612 if (Subtarget.hasP10Vector()) {
9613 APInt BitMask(32, 0);
9614 // If the value of the vector is all zeros or all ones,
9615 // we do not convert it to MTVSRBMI.
9616 // The xxleqv instruction sets a vector with all ones.
9617 // The xxlxor instruction sets a vector with all zeros.
9618 if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
9619 BitMask != 0 && BitMask != 0xffff) {
9620 SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
9621 MachineSDNode *MSDNode =
9622 DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
9623 SDValue SDV = SDValue(MSDNode, 0);
9624 EVT DVT = BVN->getValueType(0);
9625 EVT SVT = SDV.getValueType();
9626 if (SVT != DVT) {
9627 SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
9628 }
9629 return SDV;
9630 }
9631 // Recognize build vector patterns to emit VSX vector instructions
9632 // instead of loading value from memory.
9633 if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9634 return VecPat;
9635 }
9636 // Check if this is a splat of a constant value.
9637 APInt APSplatBits, APSplatUndef;
9638 unsigned SplatBitSize = 0;
9639 bool HasAnyUndefs;
9640 bool BVNIsConstantSplat =
9641 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9642 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9643
9644 // If it is a splat of a double, check if we can shrink it to a 32 bit
9645 // non-denormal float which when converted back to double gives us the same
9646 // double. This is to exploit the XXSPLTIDP instruction.
9647 // If we lose precision, we use XXSPLTI32DX.
9648 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9649 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9650 // Check the type first to short-circuit so we don't modify APSplatBits if
9651 // this block isn't executed.
9652 if ((Op->getValueType(0) == MVT::v2f64) &&
9653 convertToNonDenormSingle(APSplatBits)) {
9654 SDValue SplatNode = DAG.getNode(
9655 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9656 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9657 return DAG.getBitcast(Op.getValueType(), SplatNode);
9658 } else {
9659 // We may lose precision, so we have to use XXSPLTI32DX.
9660
9661 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9662 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9663 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9664
9665 if (!Hi || !Lo)
9666 // If either load is 0, then we should generate XXLXOR to set to 0.
9667 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9668
9669 if (Hi)
9670 SplatNode = DAG.getNode(
9671 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9672 DAG.getTargetConstant(0, dl, MVT::i32),
9673 DAG.getTargetConstant(Hi, dl, MVT::i32));
9674
9675 if (Lo)
9676 SplatNode =
9677 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9678 DAG.getTargetConstant(1, dl, MVT::i32),
9679 DAG.getTargetConstant(Lo, dl, MVT::i32));
9680
9681 return DAG.getBitcast(Op.getValueType(), SplatNode);
9682 }
9683 }
9684
9685 if (SDValue V =
9686 LowerVecSplatSmallFP(Op, DAG, BVNIsConstantSplat, SplatBitSize))
9687 return V;
9688
9689 bool IsSplat64 = false;
9690 uint64_t SplatBits = 0;
9691 int32_t SextVal = 0;
9692 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9693 SplatBits = APSplatBits.getZExtValue();
9694 if (SplatBitSize <= 32) {
9695 SextVal = SignExtend32(SplatBits, SplatBitSize);
9696 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9697 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9698 bool P9Vector = Subtarget.hasP9Vector();
9699 int32_t Hi = P9Vector ? 127 : 15;
9700 int32_t Lo = P9Vector ? -128 : -16;
9701 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9702 SextVal = static_cast<int32_t>(SplatBits);
9703 }
9704 }
9705
9706 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9707 unsigned NewOpcode = PPCISD::LD_SPLAT;
9708
9709 // Handle load-and-splat patterns as we have instructions that will do this
9710 // in one go.
9711 if (DAG.isSplatValue(Op, true) &&
9712 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9713 const SDValue *InputLoad = &Op.getOperand(0);
9714 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9715
9716 // If the input load is an extending load, it will be an i32 -> i64
9717 // extending load and isValidSplatLoad() will update NewOpcode.
9718 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9719 unsigned ElementSize =
9720 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9721
9722 assert(((ElementSize == 2 * MemorySize)
9723 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9724 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9725 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9726 "Unmatched element size and opcode!\n");
9727
9728 // Checking for a single use of this load, we have to check for vector
9729 // width (128 bits) / ElementSize uses (since each operand of the
9730 // BUILD_VECTOR is a separate use of the value.
9731 unsigned NumUsesOfInputLD = 128 / ElementSize;
9732 for (SDValue BVInOp : Op->ops())
9733 if (BVInOp.isUndef())
9734 NumUsesOfInputLD--;
9735
9736 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9737 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9738 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9739 // 15", but function IsValidSplatLoad() now will only return true when
9740 // the data at index 0 is not nullptr. So we will not get into trouble for
9741 // these cases.
9742 //
9743 // case 1 - lfiwzx/lfiwax
9744 // 1.1: load result is i32 and is sign/zero extend to i64;
9745 // 1.2: build a v2i64 vector type with above loaded value;
9746 // 1.3: the vector has only one value at index 0, others are all undef;
9747 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9748 if (NumUsesOfInputLD == 1 &&
9749 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9750 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9751 Subtarget.hasLFIWAX()))
9752 return SDValue();
9753
9754 // case 2 - lxvr[hb]x
9755 // 2.1: load result is at most i16;
9756 // 2.2: build a vector with above loaded value;
9757 // 2.3: the vector has only one value at index 0, others are all undef;
9758 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9759 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9760 Subtarget.isISA3_1() && ElementSize <= 16)
9761 return SDValue();
9762
9763 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9764 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9765 Subtarget.hasVSX()) {
9766 SDValue Ops[] = {
9767 LD->getChain(), // Chain
9768 LD->getBasePtr(), // Ptr
9769 DAG.getValueType(Op.getValueType()) // VT
9770 };
9771 SDValue LdSplt = DAG.getMemIntrinsicNode(
9772 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9773 LD->getMemoryVT(), LD->getMemOperand());
9774 // Replace all uses of the output chain of the original load with the
9775 // output chain of the new load.
9776 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9777 LdSplt.getValue(1));
9778 return LdSplt;
9779 }
9780 }
9781
9782 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9783 // 32-bits can be lowered to VSX instructions under certain conditions.
9784 // Without VSX, there is no pattern more efficient than expanding the node.
9785 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9786 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9787 Subtarget.hasP8Vector()))
9788 return Op;
9789 return SDValue();
9790 }
9791
9792 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9793 unsigned SplatSize = SplatBitSize / 8;
9794
9795 // First, handle single instruction cases.
9796
9797 // All zeros?
9798 if (SplatBits == 0) {
9799 // Canonicalize all zero vectors to be v4i32.
9800 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9801 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9802 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9803 }
9804 return Op;
9805 }
9806
9807 // We have XXSPLTIW for constant splats four bytes wide.
9808 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9809 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9810 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9811 // turned into a 4-byte splat of 0xABABABAB.
9812 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9813 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9814 Op.getValueType(), DAG, dl);
9815
9816 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9817 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9818 dl);
9819
9820 // We have XXSPLTIB for constant splats one byte wide.
9821 if (Subtarget.hasP9Vector() && SplatSize == 1)
9822 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9823 dl);
9824
9825 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9826 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9827 if (SextVal >= -16 && SextVal <= 15) {
9828 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9829 // generate a splat word with extend for size 8.
9830 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9831 SDValue Res =
9832 getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
9833 if (SplatSize != 8)
9834 return Res;
9835 SDValue IntrinsicOp =
9836 BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw,
9837 DAG.getBitcast(MVT::v4i32, Res), DAG, dl, MVT::v2i64);
9838 return DAG.getBitcast(Op.getValueType(), IntrinsicOp);
9839 }
9840
9841 // Two instruction sequences.
9842
9843 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9844 SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
9846 SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
9847 unsigned IID;
9848 EVT VT;
9849 switch (SplatSize) {
9850 default:
9851 llvm_unreachable("Unexpected type for vector constant.");
9852 case 2:
9853 IID = Intrinsic::ppc_altivec_vupklsb;
9854 VT = MVT::v8i16;
9855 break;
9856 case 4:
9857 IID = Intrinsic::ppc_altivec_vextsb2w;
9858 VT = MVT::v4i32;
9859 break;
9860 case 8:
9861 IID = Intrinsic::ppc_altivec_vextsb2d;
9862 VT = MVT::v2i64;
9863 break;
9864 }
9865 SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl, VT);
9866 return DAG.getBitcast(Op->getValueType(0), Extend);
9867 }
9868 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9869
9870 // If this value is in the range [-32,30] and is even, use:
9871 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9872 // If this value is in the range [17,31] and is odd, use:
9873 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9874 // If this value is in the range [-31,-17] and is odd, use:
9875 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9876 // Note the last two are three-instruction sequences.
9877 if (SextVal >= -32 && SextVal <= 31) {
9878 // To avoid having these optimizations undone by constant folding,
9879 // we convert to a pseudo that will be expanded later into one of
9880 // the above forms.
9881 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9882 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9883 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9884 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9885 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9886 if (VT == Op.getValueType())
9887 return RetVal;
9888 else
9889 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9890 }
9891
9892 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9893 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9894 // for fneg/fabs.
9895 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9896 // Make -1 and vspltisw -1:
9897 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9898
9899 // Make the VSLW intrinsic, computing 0x8000_0000.
9900 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9901 OnesV, DAG, dl);
9902
9903 // xor by OnesV to invert it.
9904 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9905 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9906 }
9907
9908 // Check to see if this is a wide variety of vsplti*, binop self cases.
9909 static const signed char SplatCsts[] = {
9910 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9911 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9912 };
9913
9914 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9915 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9916 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9917 int i = SplatCsts[idx];
9918
9919 // Figure out what shift amount will be used by altivec if shifted by i in
9920 // this splat size.
9921 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9922
9923 // vsplti + shl self.
9924 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9925 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9926 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9927 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9928 Intrinsic::ppc_altivec_vslw
9929 };
9930 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9931 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9932 }
9933
9934 // vsplti + srl self.
9935 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9936 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9937 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9938 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9939 Intrinsic::ppc_altivec_vsrw
9940 };
9941 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9942 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9943 }
9944
9945 // vsplti + rol self.
9946 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9947 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9948 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9949 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9950 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9951 Intrinsic::ppc_altivec_vrlw
9952 };
9953 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9954 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9955 }
9956
9957 // t = vsplti c, result = vsldoi t, t, 1
9958 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9959 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9960 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9961 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9962 }
9963 // t = vsplti c, result = vsldoi t, t, 2
9964 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9965 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9966 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9967 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9968 }
9969 // t = vsplti c, result = vsldoi t, t, 3
9970 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9971 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9972 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9973 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9974 }
9975 }
9976
9977 return SDValue();
9978}
9979
9980/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9981/// the specified operations to build the shuffle.
9983 SDValue RHS, SelectionDAG &DAG,
9984 const SDLoc &dl) {
9985 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9986 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9987 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9988
9989 enum {
9990 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9991 OP_VMRGHW,
9992 OP_VMRGLW,
9993 OP_VSPLTISW0,
9994 OP_VSPLTISW1,
9995 OP_VSPLTISW2,
9996 OP_VSPLTISW3,
9997 OP_VSLDOI4,
9998 OP_VSLDOI8,
9999 OP_VSLDOI12
10000 };
10001
10002 if (OpNum == OP_COPY) {
10003 if (LHSID == (1*9+2)*9+3) return LHS;
10004 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
10005 return RHS;
10006 }
10007
10008 SDValue OpLHS, OpRHS;
10009 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
10010 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
10011
10012 int ShufIdxs[16];
10013 switch (OpNum) {
10014 default: llvm_unreachable("Unknown i32 permute!");
10015 case OP_VMRGHW:
10016 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
10017 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
10018 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
10019 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
10020 break;
10021 case OP_VMRGLW:
10022 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
10023 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
10024 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
10025 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
10026 break;
10027 case OP_VSPLTISW0:
10028 for (unsigned i = 0; i != 16; ++i)
10029 ShufIdxs[i] = (i&3)+0;
10030 break;
10031 case OP_VSPLTISW1:
10032 for (unsigned i = 0; i != 16; ++i)
10033 ShufIdxs[i] = (i&3)+4;
10034 break;
10035 case OP_VSPLTISW2:
10036 for (unsigned i = 0; i != 16; ++i)
10037 ShufIdxs[i] = (i&3)+8;
10038 break;
10039 case OP_VSPLTISW3:
10040 for (unsigned i = 0; i != 16; ++i)
10041 ShufIdxs[i] = (i&3)+12;
10042 break;
10043 case OP_VSLDOI4:
10044 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
10045 case OP_VSLDOI8:
10046 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
10047 case OP_VSLDOI12:
10048 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
10049 }
10050 EVT VT = OpLHS.getValueType();
10051 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
10052 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
10053 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
10054 return DAG.getNode(ISD::BITCAST, dl, VT, T);
10055}
10056
10057/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
10058/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
10059/// SDValue.
10060SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
10061 SelectionDAG &DAG) const {
10062 const unsigned BytesInVector = 16;
10063 bool IsLE = Subtarget.isLittleEndian();
10064 SDLoc dl(N);
10065 SDValue V1 = N->getOperand(0);
10066 SDValue V2 = N->getOperand(1);
10067 unsigned ShiftElts = 0, InsertAtByte = 0;
10068 bool Swap = false;
10069
10070 // Shifts required to get the byte we want at element 7.
10071 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
10072 0, 15, 14, 13, 12, 11, 10, 9};
10073 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10074 1, 2, 3, 4, 5, 6, 7, 8};
10075
10076 ArrayRef<int> Mask = N->getMask();
10077 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10078
10079 // For each mask element, find out if we're just inserting something
10080 // from V2 into V1 or vice versa.
10081 // Possible permutations inserting an element from V2 into V1:
10082 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10083 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10084 // ...
10085 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10086 // Inserting from V1 into V2 will be similar, except mask range will be
10087 // [16,31].
10088
10089 bool FoundCandidate = false;
10090 // If both vector operands for the shuffle are the same vector, the mask
10091 // will contain only elements from the first one and the second one will be
10092 // undef.
10093 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10094 // Go through the mask of half-words to find an element that's being moved
10095 // from one vector to the other.
10096 for (unsigned i = 0; i < BytesInVector; ++i) {
10097 unsigned CurrentElement = Mask[i];
10098 // If 2nd operand is undefined, we should only look for element 7 in the
10099 // Mask.
10100 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10101 continue;
10102
10103 bool OtherElementsInOrder = true;
10104 // Examine the other elements in the Mask to see if they're in original
10105 // order.
10106 for (unsigned j = 0; j < BytesInVector; ++j) {
10107 if (j == i)
10108 continue;
10109 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10110 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10111 // in which we always assume we're always picking from the 1st operand.
10112 int MaskOffset =
10113 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10114 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10115 OtherElementsInOrder = false;
10116 break;
10117 }
10118 }
10119 // If other elements are in original order, we record the number of shifts
10120 // we need to get the element we want into element 7. Also record which byte
10121 // in the vector we should insert into.
10122 if (OtherElementsInOrder) {
10123 // If 2nd operand is undefined, we assume no shifts and no swapping.
10124 if (V2.isUndef()) {
10125 ShiftElts = 0;
10126 Swap = false;
10127 } else {
10128 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10129 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10130 : BigEndianShifts[CurrentElement & 0xF];
10131 Swap = CurrentElement < BytesInVector;
10132 }
10133 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10134 FoundCandidate = true;
10135 break;
10136 }
10137 }
10138
10139 if (!FoundCandidate)
10140 return SDValue();
10141
10142 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10143 // optionally with VECSHL if shift is required.
10144 if (Swap)
10145 std::swap(V1, V2);
10146 if (V2.isUndef())
10147 V2 = V1;
10148 if (ShiftElts) {
10149 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10150 DAG.getConstant(ShiftElts, dl, MVT::i32));
10151 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10152 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10153 }
10154 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10155 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10156}
10157
10158/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10159/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10160/// SDValue.
10161SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10162 SelectionDAG &DAG) const {
10163 const unsigned NumHalfWords = 8;
10164 const unsigned BytesInVector = NumHalfWords * 2;
10165 // Check that the shuffle is on half-words.
10166 if (!isNByteElemShuffleMask(N, 2, 1))
10167 return SDValue();
10168
10169 bool IsLE = Subtarget.isLittleEndian();
10170 SDLoc dl(N);
10171 SDValue V1 = N->getOperand(0);
10172 SDValue V2 = N->getOperand(1);
10173 unsigned ShiftElts = 0, InsertAtByte = 0;
10174 bool Swap = false;
10175
10176 // Shifts required to get the half-word we want at element 3.
10177 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10178 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10179
10180 uint32_t Mask = 0;
10181 uint32_t OriginalOrderLow = 0x1234567;
10182 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10183 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10184 // 32-bit space, only need 4-bit nibbles per element.
10185 for (unsigned i = 0; i < NumHalfWords; ++i) {
10186 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10187 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10188 }
10189
10190 // For each mask element, find out if we're just inserting something
10191 // from V2 into V1 or vice versa. Possible permutations inserting an element
10192 // from V2 into V1:
10193 // X, 1, 2, 3, 4, 5, 6, 7
10194 // 0, X, 2, 3, 4, 5, 6, 7
10195 // 0, 1, X, 3, 4, 5, 6, 7
10196 // 0, 1, 2, X, 4, 5, 6, 7
10197 // 0, 1, 2, 3, X, 5, 6, 7
10198 // 0, 1, 2, 3, 4, X, 6, 7
10199 // 0, 1, 2, 3, 4, 5, X, 7
10200 // 0, 1, 2, 3, 4, 5, 6, X
10201 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10202
10203 bool FoundCandidate = false;
10204 // Go through the mask of half-words to find an element that's being moved
10205 // from one vector to the other.
10206 for (unsigned i = 0; i < NumHalfWords; ++i) {
10207 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10208 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10209 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10210 uint32_t TargetOrder = 0x0;
10211
10212 // If both vector operands for the shuffle are the same vector, the mask
10213 // will contain only elements from the first one and the second one will be
10214 // undef.
10215 if (V2.isUndef()) {
10216 ShiftElts = 0;
10217 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10218 TargetOrder = OriginalOrderLow;
10219 Swap = false;
10220 // Skip if not the correct element or mask of other elements don't equal
10221 // to our expected order.
10222 if (MaskOneElt == VINSERTHSrcElem &&
10223 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10224 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10225 FoundCandidate = true;
10226 break;
10227 }
10228 } else { // If both operands are defined.
10229 // Target order is [8,15] if the current mask is between [0,7].
10230 TargetOrder =
10231 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10232 // Skip if mask of other elements don't equal our expected order.
10233 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10234 // We only need the last 3 bits for the number of shifts.
10235 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10236 : BigEndianShifts[MaskOneElt & 0x7];
10237 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10238 Swap = MaskOneElt < NumHalfWords;
10239 FoundCandidate = true;
10240 break;
10241 }
10242 }
10243 }
10244
10245 if (!FoundCandidate)
10246 return SDValue();
10247
10248 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10249 // optionally with VECSHL if shift is required.
10250 if (Swap)
10251 std::swap(V1, V2);
10252 if (V2.isUndef())
10253 V2 = V1;
10254 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10255 if (ShiftElts) {
10256 // Double ShiftElts because we're left shifting on v16i8 type.
10257 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10258 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10259 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10260 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10261 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10262 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10263 }
10264 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10265 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10266 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10267 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10268}
10269
10270/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10271/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10272/// return the default SDValue.
10273SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10274 SelectionDAG &DAG) const {
10275 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10276 // to v16i8. Peek through the bitcasts to get the actual operands.
10279
10280 auto ShuffleMask = SVN->getMask();
10281 SDValue VecShuffle(SVN, 0);
10282 SDLoc DL(SVN);
10283
10284 // Check that we have a four byte shuffle.
10285 if (!isNByteElemShuffleMask(SVN, 4, 1))
10286 return SDValue();
10287
10288 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10289 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10290 std::swap(LHS, RHS);
10292 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10293 if (!CommutedSV)
10294 return SDValue();
10295 ShuffleMask = CommutedSV->getMask();
10296 }
10297
10298 // Ensure that the RHS is a vector of constants.
10299 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10300 if (!BVN)
10301 return SDValue();
10302
10303 // Check if RHS is a splat of 4-bytes (or smaller).
10304 APInt APSplatValue, APSplatUndef;
10305 unsigned SplatBitSize;
10306 bool HasAnyUndefs;
10307 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10308 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10309 SplatBitSize > 32)
10310 return SDValue();
10311
10312 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10313 // The instruction splats a constant C into two words of the source vector
10314 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10315 // Thus we check that the shuffle mask is the equivalent of
10316 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10317 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10318 // within each word are consecutive, so we only need to check the first byte.
10319 SDValue Index;
10320 bool IsLE = Subtarget.isLittleEndian();
10321 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10322 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10323 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10324 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10325 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10326 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10327 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10328 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10329 else
10330 return SDValue();
10331
10332 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10333 // for XXSPLTI32DX.
10334 unsigned SplatVal = APSplatValue.getZExtValue();
10335 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10336 SplatVal |= (SplatVal << SplatBitSize);
10337
10338 SDValue SplatNode = DAG.getNode(
10339 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10340 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10341 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10342}
10343
10344/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10345/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10346/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10347/// i.e (or (shl x, C1), (srl x, 128-C1)).
10348SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10349 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10350 assert(Op.getValueType() == MVT::v1i128 &&
10351 "Only set v1i128 as custom, other type shouldn't reach here!");
10352 SDLoc dl(Op);
10353 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10354 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10355 unsigned SHLAmt = N1.getConstantOperandVal(0);
10356 if (SHLAmt % 8 == 0) {
10357 std::array<int, 16> Mask;
10358 std::iota(Mask.begin(), Mask.end(), 0);
10359 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10360 if (SDValue Shuffle =
10361 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10362 DAG.getUNDEF(MVT::v16i8), Mask))
10363 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10364 }
10365 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10366 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10367 DAG.getConstant(SHLAmt, dl, MVT::i32));
10368 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10369 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10370 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10371 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10372}
10373
10374/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10375/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10376/// return the code it can be lowered into. Worst case, it can always be
10377/// lowered into a vperm.
10378SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10379 SelectionDAG &DAG) const {
10380 SDLoc dl(Op);
10381 SDValue V1 = Op.getOperand(0);
10382 SDValue V2 = Op.getOperand(1);
10383 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10384
10385 // Any nodes that were combined in the target-independent combiner prior
10386 // to vector legalization will not be sent to the target combine. Try to
10387 // combine it here.
10388 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10389 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10390 return NewShuffle;
10391 Op = NewShuffle;
10393 V1 = Op.getOperand(0);
10394 V2 = Op.getOperand(1);
10395 }
10396 EVT VT = Op.getValueType();
10397 bool isLittleEndian = Subtarget.isLittleEndian();
10398
10399 unsigned ShiftElts, InsertAtByte;
10400 bool Swap = false;
10401
10402 // If this is a load-and-splat, we can do that with a single instruction
10403 // in some cases. However if the load has multiple uses, we don't want to
10404 // combine it because that will just produce multiple loads.
10405 bool IsPermutedLoad = false;
10406 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10407 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10408 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10409 InputLoad->hasOneUse()) {
10410 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10411 int SplatIdx =
10412 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10413
10414 // The splat index for permuted loads will be in the left half of the vector
10415 // which is strictly wider than the loaded value by 8 bytes. So we need to
10416 // adjust the splat index to point to the correct address in memory.
10417 if (IsPermutedLoad) {
10418 assert((isLittleEndian || IsFourByte) &&
10419 "Unexpected size for permuted load on big endian target");
10420 SplatIdx += IsFourByte ? 2 : 1;
10421 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10422 "Splat of a value outside of the loaded memory");
10423 }
10424
10425 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10426 // For 4-byte load-and-splat, we need Power9.
10427 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10428 uint64_t Offset = 0;
10429 if (IsFourByte)
10430 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10431 else
10432 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10433
10434 // If the width of the load is the same as the width of the splat,
10435 // loading with an offset would load the wrong memory.
10436 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10437 Offset = 0;
10438
10439 SDValue BasePtr = LD->getBasePtr();
10440 if (Offset != 0)
10442 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10443 SDValue Ops[] = {
10444 LD->getChain(), // Chain
10445 BasePtr, // BasePtr
10446 DAG.getValueType(Op.getValueType()) // VT
10447 };
10448 SDVTList VTL =
10449 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10450 SDValue LdSplt =
10451 DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
10452 Ops, LD->getMemoryVT(), LD->getMemOperand());
10453 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10454 if (LdSplt.getValueType() != SVOp->getValueType(0))
10455 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10456 return LdSplt;
10457 }
10458 }
10459
10460 // All v2i64 and v2f64 shuffles are legal
10461 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10462 return Op;
10463
10464 if (Subtarget.hasP9Vector() &&
10465 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10466 isLittleEndian)) {
10467 if (V2.isUndef())
10468 V2 = V1;
10469 else if (Swap)
10470 std::swap(V1, V2);
10471 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10472 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10473 if (ShiftElts) {
10474 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10475 DAG.getConstant(ShiftElts, dl, MVT::i32));
10476 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10477 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10478 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10479 }
10480 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10481 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10482 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10483 }
10484
10485 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10486 SDValue SplatInsertNode;
10487 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10488 return SplatInsertNode;
10489 }
10490
10491 if (Subtarget.hasP9Altivec()) {
10492 SDValue NewISDNode;
10493 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10494 return NewISDNode;
10495
10496 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10497 return NewISDNode;
10498 }
10499
10500 if (Subtarget.hasVSX() &&
10501 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10502 if (Swap)
10503 std::swap(V1, V2);
10504 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10505 SDValue Conv2 =
10506 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10507
10508 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10509 DAG.getConstant(ShiftElts, dl, MVT::i32));
10510 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10511 }
10512
10513 if (Subtarget.hasVSX() &&
10514 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10515 if (Swap)
10516 std::swap(V1, V2);
10517 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10518 SDValue Conv2 =
10519 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10520
10521 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10522 DAG.getConstant(ShiftElts, dl, MVT::i32));
10523 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10524 }
10525
10526 if (Subtarget.hasP9Vector()) {
10527 if (PPC::isXXBRHShuffleMask(SVOp)) {
10528 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10529 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10530 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10531 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10532 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10533 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10534 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10535 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10536 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10537 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10538 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10539 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10540 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10541 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10542 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10543 }
10544 }
10545
10546 if (Subtarget.hasVSX()) {
10547 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10548 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10549
10550 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10551 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10552 DAG.getConstant(SplatIdx, dl, MVT::i32));
10553 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10554 }
10555
10556 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10557 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10558 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10559 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10560 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10561 }
10562 }
10563
10564 // Cases that are handled by instructions that take permute immediates
10565 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10566 // selected by the instruction selector.
10567 if (V2.isUndef()) {
10568 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10569 PPC::isSplatShuffleMask(SVOp, 2) ||
10570 PPC::isSplatShuffleMask(SVOp, 4) ||
10571 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10572 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10573 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10574 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10575 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10576 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10577 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10578 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10579 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10580 (Subtarget.hasP8Altivec() && (
10581 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10582 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10583 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10584 return Op;
10585 }
10586 }
10587
10588 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10589 // and produce a fixed permutation. If any of these match, do not lower to
10590 // VPERM.
10591 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10592 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10593 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10594 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10595 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10596 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10597 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10598 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10599 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10600 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10601 (Subtarget.hasP8Altivec() && (
10602 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10603 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10604 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10605 return Op;
10606
10607 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10608 // perfect shuffle table to emit an optimal matching sequence.
10609 ArrayRef<int> PermMask = SVOp->getMask();
10610
10611 if (!DisablePerfectShuffle && !isLittleEndian) {
10612 unsigned PFIndexes[4];
10613 bool isFourElementShuffle = true;
10614 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10615 ++i) { // Element number
10616 unsigned EltNo = 8; // Start out undef.
10617 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10618 if (PermMask[i * 4 + j] < 0)
10619 continue; // Undef, ignore it.
10620
10621 unsigned ByteSource = PermMask[i * 4 + j];
10622 if ((ByteSource & 3) != j) {
10623 isFourElementShuffle = false;
10624 break;
10625 }
10626
10627 if (EltNo == 8) {
10628 EltNo = ByteSource / 4;
10629 } else if (EltNo != ByteSource / 4) {
10630 isFourElementShuffle = false;
10631 break;
10632 }
10633 }
10634 PFIndexes[i] = EltNo;
10635 }
10636
10637 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10638 // perfect shuffle vector to determine if it is cost effective to do this as
10639 // discrete instructions, or whether we should use a vperm.
10640 // For now, we skip this for little endian until such time as we have a
10641 // little-endian perfect shuffle table.
10642 if (isFourElementShuffle) {
10643 // Compute the index in the perfect shuffle table.
10644 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10645 PFIndexes[2] * 9 + PFIndexes[3];
10646
10647 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10648 unsigned Cost = (PFEntry >> 30);
10649
10650 // Determining when to avoid vperm is tricky. Many things affect the cost
10651 // of vperm, particularly how many times the perm mask needs to be
10652 // computed. For example, if the perm mask can be hoisted out of a loop or
10653 // is already used (perhaps because there are multiple permutes with the
10654 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10655 // permute mask out of the loop requires an extra register.
10656 //
10657 // As a compromise, we only emit discrete instructions if the shuffle can
10658 // be generated in 3 or fewer operations. When we have loop information
10659 // available, if this block is within a loop, we should avoid using vperm
10660 // for 3-operation perms and use a constant pool load instead.
10661 if (Cost < 3)
10662 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10663 }
10664 }
10665
10666 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10667 // vector that will get spilled to the constant pool.
10668 if (V2.isUndef()) V2 = V1;
10669
10670 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10671}
10672
10673SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10674 ArrayRef<int> PermMask, EVT VT,
10675 SDValue V1, SDValue V2) const {
10676 unsigned Opcode = PPCISD::VPERM;
10677 EVT ValType = V1.getValueType();
10678 SDLoc dl(Op);
10679 bool NeedSwap = false;
10680 bool isLittleEndian = Subtarget.isLittleEndian();
10681 bool isPPC64 = Subtarget.isPPC64();
10682
10683 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10684 (V1->hasOneUse() || V2->hasOneUse())) {
10685 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10686 "XXPERM instead\n");
10687 Opcode = PPCISD::XXPERM;
10688
10689 // The second input to XXPERM is also an output so if the second input has
10690 // multiple uses then copying is necessary, as a result we want the
10691 // single-use operand to be used as the second input to prevent copying.
10692 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10693 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10694 std::swap(V1, V2);
10695 NeedSwap = !NeedSwap;
10696 }
10697 }
10698
10699 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10700 // that it is in input element units, not in bytes. Convert now.
10701
10702 // For little endian, the order of the input vectors is reversed, and
10703 // the permutation mask is complemented with respect to 31. This is
10704 // necessary to produce proper semantics with the big-endian-based vperm
10705 // instruction.
10706 EVT EltVT = V1.getValueType().getVectorElementType();
10707 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10708
10709 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10710 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10711
10712 /*
10713 Vectors will be appended like so: [ V1 | v2 ]
10714 XXSWAPD on V1:
10715 [ A | B | C | D ] -> [ C | D | A | B ]
10716 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10717 i.e. index of A, B += 8, and index of C, D -= 8.
10718 XXSWAPD on V2:
10719 [ E | F | G | H ] -> [ G | H | E | F ]
10720 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10721 i.e. index of E, F += 8, index of G, H -= 8
10722 Swap V1 and V2:
10723 [ V1 | V2 ] -> [ V2 | V1 ]
10724 0-15 16-31 0-15 16-31
10725 i.e. index of V1 += 16, index of V2 -= 16
10726 */
10727
10728 SmallVector<SDValue, 16> ResultMask;
10729 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10730 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10731
10732 if (V1HasXXSWAPD) {
10733 if (SrcElt < 8)
10734 SrcElt += 8;
10735 else if (SrcElt < 16)
10736 SrcElt -= 8;
10737 }
10738 if (V2HasXXSWAPD) {
10739 if (SrcElt > 23)
10740 SrcElt -= 8;
10741 else if (SrcElt > 15)
10742 SrcElt += 8;
10743 }
10744 if (NeedSwap) {
10745 if (SrcElt < 16)
10746 SrcElt += 16;
10747 else
10748 SrcElt -= 16;
10749 }
10750 for (unsigned j = 0; j != BytesPerElement; ++j)
10751 if (isLittleEndian)
10752 ResultMask.push_back(
10753 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10754 else
10755 ResultMask.push_back(
10756 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10757 }
10758
10759 if (V1HasXXSWAPD) {
10760 dl = SDLoc(V1->getOperand(0));
10761 V1 = V1->getOperand(0)->getOperand(1);
10762 }
10763 if (V2HasXXSWAPD) {
10764 dl = SDLoc(V2->getOperand(0));
10765 V2 = V2->getOperand(0)->getOperand(1);
10766 }
10767
10768 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10769 if (ValType != MVT::v2f64)
10770 V1 = DAG.getBitcast(MVT::v2f64, V1);
10771 if (V2.getValueType() != MVT::v2f64)
10772 V2 = DAG.getBitcast(MVT::v2f64, V2);
10773 }
10774
10775 ShufflesHandledWithVPERM++;
10776 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10777 LLVM_DEBUG({
10778 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10779 if (Opcode == PPCISD::XXPERM) {
10780 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10781 } else {
10782 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10783 }
10784 SVOp->dump();
10785 dbgs() << "With the following permute control vector:\n";
10786 VPermMask.dump();
10787 });
10788
10789 if (Opcode == PPCISD::XXPERM)
10790 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10791
10792 // Only need to place items backwards in LE,
10793 // the mask was properly calculated.
10794 if (isLittleEndian)
10795 std::swap(V1, V2);
10796
10797 SDValue VPERMNode =
10798 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10799
10800 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10801 return VPERMNode;
10802}
10803
10804/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10805/// vector comparison. If it is, return true and fill in Opc/isDot with
10806/// information about the intrinsic.
10807static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10808 bool &isDot, const PPCSubtarget &Subtarget) {
10809 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10810 CompareOpc = -1;
10811 isDot = false;
10812 switch (IntrinsicID) {
10813 default:
10814 return false;
10815 // Comparison predicates.
10816 case Intrinsic::ppc_altivec_vcmpbfp_p:
10817 CompareOpc = 966;
10818 isDot = true;
10819 break;
10820 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10821 CompareOpc = 198;
10822 isDot = true;
10823 break;
10824 case Intrinsic::ppc_altivec_vcmpequb_p:
10825 CompareOpc = 6;
10826 isDot = true;
10827 break;
10828 case Intrinsic::ppc_altivec_vcmpequh_p:
10829 CompareOpc = 70;
10830 isDot = true;
10831 break;
10832 case Intrinsic::ppc_altivec_vcmpequw_p:
10833 CompareOpc = 134;
10834 isDot = true;
10835 break;
10836 case Intrinsic::ppc_altivec_vcmpequd_p:
10837 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10838 CompareOpc = 199;
10839 isDot = true;
10840 } else
10841 return false;
10842 break;
10843 case Intrinsic::ppc_altivec_vcmpneb_p:
10844 case Intrinsic::ppc_altivec_vcmpneh_p:
10845 case Intrinsic::ppc_altivec_vcmpnew_p:
10846 case Intrinsic::ppc_altivec_vcmpnezb_p:
10847 case Intrinsic::ppc_altivec_vcmpnezh_p:
10848 case Intrinsic::ppc_altivec_vcmpnezw_p:
10849 if (Subtarget.hasP9Altivec()) {
10850 switch (IntrinsicID) {
10851 default:
10852 llvm_unreachable("Unknown comparison intrinsic.");
10853 case Intrinsic::ppc_altivec_vcmpneb_p:
10854 CompareOpc = 7;
10855 break;
10856 case Intrinsic::ppc_altivec_vcmpneh_p:
10857 CompareOpc = 71;
10858 break;
10859 case Intrinsic::ppc_altivec_vcmpnew_p:
10860 CompareOpc = 135;
10861 break;
10862 case Intrinsic::ppc_altivec_vcmpnezb_p:
10863 CompareOpc = 263;
10864 break;
10865 case Intrinsic::ppc_altivec_vcmpnezh_p:
10866 CompareOpc = 327;
10867 break;
10868 case Intrinsic::ppc_altivec_vcmpnezw_p:
10869 CompareOpc = 391;
10870 break;
10871 }
10872 isDot = true;
10873 } else
10874 return false;
10875 break;
10876 case Intrinsic::ppc_altivec_vcmpgefp_p:
10877 CompareOpc = 454;
10878 isDot = true;
10879 break;
10880 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10881 CompareOpc = 710;
10882 isDot = true;
10883 break;
10884 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10885 CompareOpc = 774;
10886 isDot = true;
10887 break;
10888 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10889 CompareOpc = 838;
10890 isDot = true;
10891 break;
10892 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10893 CompareOpc = 902;
10894 isDot = true;
10895 break;
10896 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10897 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10898 CompareOpc = 967;
10899 isDot = true;
10900 } else
10901 return false;
10902 break;
10903 case Intrinsic::ppc_altivec_vcmpgtub_p:
10904 CompareOpc = 518;
10905 isDot = true;
10906 break;
10907 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10908 CompareOpc = 582;
10909 isDot = true;
10910 break;
10911 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10912 CompareOpc = 646;
10913 isDot = true;
10914 break;
10915 case Intrinsic::ppc_altivec_vcmpgtud_p:
10916 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10917 CompareOpc = 711;
10918 isDot = true;
10919 } else
10920 return false;
10921 break;
10922
10923 case Intrinsic::ppc_altivec_vcmpequq:
10924 case Intrinsic::ppc_altivec_vcmpgtsq:
10925 case Intrinsic::ppc_altivec_vcmpgtuq:
10926 if (!Subtarget.isISA3_1())
10927 return false;
10928 switch (IntrinsicID) {
10929 default:
10930 llvm_unreachable("Unknown comparison intrinsic.");
10931 case Intrinsic::ppc_altivec_vcmpequq:
10932 CompareOpc = 455;
10933 break;
10934 case Intrinsic::ppc_altivec_vcmpgtsq:
10935 CompareOpc = 903;
10936 break;
10937 case Intrinsic::ppc_altivec_vcmpgtuq:
10938 CompareOpc = 647;
10939 break;
10940 }
10941 break;
10942
10943 // VSX predicate comparisons use the same infrastructure
10944 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10945 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10946 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10947 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10948 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10949 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10950 if (Subtarget.hasVSX()) {
10951 switch (IntrinsicID) {
10952 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10953 CompareOpc = 99;
10954 break;
10955 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10956 CompareOpc = 115;
10957 break;
10958 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10959 CompareOpc = 107;
10960 break;
10961 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10962 CompareOpc = 67;
10963 break;
10964 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10965 CompareOpc = 83;
10966 break;
10967 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10968 CompareOpc = 75;
10969 break;
10970 }
10971 isDot = true;
10972 } else
10973 return false;
10974 break;
10975
10976 // Normal Comparisons.
10977 case Intrinsic::ppc_altivec_vcmpbfp:
10978 CompareOpc = 966;
10979 break;
10980 case Intrinsic::ppc_altivec_vcmpeqfp:
10981 CompareOpc = 198;
10982 break;
10983 case Intrinsic::ppc_altivec_vcmpequb:
10984 CompareOpc = 6;
10985 break;
10986 case Intrinsic::ppc_altivec_vcmpequh:
10987 CompareOpc = 70;
10988 break;
10989 case Intrinsic::ppc_altivec_vcmpequw:
10990 CompareOpc = 134;
10991 break;
10992 case Intrinsic::ppc_altivec_vcmpequd:
10993 if (Subtarget.hasP8Altivec())
10994 CompareOpc = 199;
10995 else
10996 return false;
10997 break;
10998 case Intrinsic::ppc_altivec_vcmpneb:
10999 case Intrinsic::ppc_altivec_vcmpneh:
11000 case Intrinsic::ppc_altivec_vcmpnew:
11001 case Intrinsic::ppc_altivec_vcmpnezb:
11002 case Intrinsic::ppc_altivec_vcmpnezh:
11003 case Intrinsic::ppc_altivec_vcmpnezw:
11004 if (Subtarget.hasP9Altivec())
11005 switch (IntrinsicID) {
11006 default:
11007 llvm_unreachable("Unknown comparison intrinsic.");
11008 case Intrinsic::ppc_altivec_vcmpneb:
11009 CompareOpc = 7;
11010 break;
11011 case Intrinsic::ppc_altivec_vcmpneh:
11012 CompareOpc = 71;
11013 break;
11014 case Intrinsic::ppc_altivec_vcmpnew:
11015 CompareOpc = 135;
11016 break;
11017 case Intrinsic::ppc_altivec_vcmpnezb:
11018 CompareOpc = 263;
11019 break;
11020 case Intrinsic::ppc_altivec_vcmpnezh:
11021 CompareOpc = 327;
11022 break;
11023 case Intrinsic::ppc_altivec_vcmpnezw:
11024 CompareOpc = 391;
11025 break;
11026 }
11027 else
11028 return false;
11029 break;
11030 case Intrinsic::ppc_altivec_vcmpgefp:
11031 CompareOpc = 454;
11032 break;
11033 case Intrinsic::ppc_altivec_vcmpgtfp:
11034 CompareOpc = 710;
11035 break;
11036 case Intrinsic::ppc_altivec_vcmpgtsb:
11037 CompareOpc = 774;
11038 break;
11039 case Intrinsic::ppc_altivec_vcmpgtsh:
11040 CompareOpc = 838;
11041 break;
11042 case Intrinsic::ppc_altivec_vcmpgtsw:
11043 CompareOpc = 902;
11044 break;
11045 case Intrinsic::ppc_altivec_vcmpgtsd:
11046 if (Subtarget.hasP8Altivec())
11047 CompareOpc = 967;
11048 else
11049 return false;
11050 break;
11051 case Intrinsic::ppc_altivec_vcmpgtub:
11052 CompareOpc = 518;
11053 break;
11054 case Intrinsic::ppc_altivec_vcmpgtuh:
11055 CompareOpc = 582;
11056 break;
11057 case Intrinsic::ppc_altivec_vcmpgtuw:
11058 CompareOpc = 646;
11059 break;
11060 case Intrinsic::ppc_altivec_vcmpgtud:
11061 if (Subtarget.hasP8Altivec())
11062 CompareOpc = 711;
11063 else
11064 return false;
11065 break;
11066 case Intrinsic::ppc_altivec_vcmpequq_p:
11067 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11068 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11069 if (!Subtarget.isISA3_1())
11070 return false;
11071 switch (IntrinsicID) {
11072 default:
11073 llvm_unreachable("Unknown comparison intrinsic.");
11074 case Intrinsic::ppc_altivec_vcmpequq_p:
11075 CompareOpc = 455;
11076 break;
11077 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11078 CompareOpc = 903;
11079 break;
11080 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11081 CompareOpc = 647;
11082 break;
11083 }
11084 isDot = true;
11085 break;
11086 }
11087 return true;
11088}
11089
11090/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11091/// lower, do it, otherwise return null.
11092SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11093 SelectionDAG &DAG) const {
11094 unsigned IntrinsicID = Op.getConstantOperandVal(0);
11095
11096 SDLoc dl(Op);
11097 // Note: BCD instructions expect the immediate operand in vector form (v4i32),
11098 // but the builtin provides it as a scalar. To satisfy the instruction
11099 // encoding, we splat the scalar across all lanes using SPLAT_VECTOR.
11100 auto MapNodeWithSplatVector =
11101 [&](unsigned Opcode,
11102 std::initializer_list<SDValue> ExtraOps = {}) -> SDValue {
11103 SDValue SplatVal =
11104 DAG.getNode(ISD::SPLAT_VECTOR, dl, MVT::v4i32, Op.getOperand(2));
11105
11106 SmallVector<SDValue, 4> Ops{SplatVal, Op.getOperand(1)};
11107 Ops.append(ExtraOps.begin(), ExtraOps.end());
11108 return DAG.getNode(Opcode, dl, MVT::v16i8, Ops);
11109 };
11110
11111 switch (IntrinsicID) {
11112 case Intrinsic::thread_pointer:
11113 // Reads the thread pointer register, used for __builtin_thread_pointer.
11114 if (Subtarget.isPPC64())
11115 return DAG.getRegister(PPC::X13, MVT::i64);
11116 return DAG.getRegister(PPC::R2, MVT::i32);
11117
11118 case Intrinsic::ppc_rldimi: {
11119 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11120 SDValue Src = Op.getOperand(1);
11121 APInt Mask = Op.getConstantOperandAPInt(4);
11122 if (Mask.isZero())
11123 return Op.getOperand(2);
11124 if (Mask.isAllOnes())
11125 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11126 uint64_t SH = Op.getConstantOperandVal(3);
11127 unsigned MB = 0, ME = 0;
11128 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11129 report_fatal_error("invalid rldimi mask!");
11130 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11131 if (ME < 63 - SH) {
11132 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11133 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11134 } else if (ME > 63 - SH) {
11135 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11136 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11137 }
11138 return SDValue(
11139 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11140 {Op.getOperand(2), Src,
11141 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11142 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11143 0);
11144 }
11145
11146 case Intrinsic::ppc_rlwimi: {
11147 APInt Mask = Op.getConstantOperandAPInt(4);
11148 if (Mask.isZero())
11149 return Op.getOperand(2);
11150 if (Mask.isAllOnes())
11151 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11152 Op.getOperand(3));
11153 unsigned MB = 0, ME = 0;
11154 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11155 report_fatal_error("invalid rlwimi mask!");
11156 return SDValue(DAG.getMachineNode(
11157 PPC::RLWIMI, dl, MVT::i32,
11158 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11159 DAG.getTargetConstant(MB, dl, MVT::i32),
11160 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11161 0);
11162 }
11163
11164 case Intrinsic::ppc_bcdshift:
11165 return MapNodeWithSplatVector(PPCISD::BCDSHIFT, {Op.getOperand(3)});
11166 case Intrinsic::ppc_bcdshiftround:
11167 return MapNodeWithSplatVector(PPCISD::BCDSHIFTROUND, {Op.getOperand(3)});
11168 case Intrinsic::ppc_bcdtruncate:
11169 return MapNodeWithSplatVector(PPCISD::BCDTRUNC, {Op.getOperand(3)});
11170 case Intrinsic::ppc_bcdunsignedtruncate:
11171 return MapNodeWithSplatVector(PPCISD::BCDUTRUNC);
11172 case Intrinsic::ppc_bcdunsignedshift:
11173 return MapNodeWithSplatVector(PPCISD::BCDUSHIFT);
11174
11175 case Intrinsic::ppc_rlwnm: {
11176 if (Op.getConstantOperandVal(3) == 0)
11177 return DAG.getConstant(0, dl, MVT::i32);
11178 unsigned MB = 0, ME = 0;
11179 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11180 report_fatal_error("invalid rlwnm mask!");
11181 return SDValue(
11182 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11183 {Op.getOperand(1), Op.getOperand(2),
11184 DAG.getTargetConstant(MB, dl, MVT::i32),
11185 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11186 0);
11187 }
11188
11189 case Intrinsic::ppc_mma_disassemble_acc: {
11190 if (Subtarget.isISAFuture()) {
11191 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11192 SDValue WideVec =
11193 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11194 Op.getOperand(1)),
11195 0);
11197 SDValue Value = SDValue(WideVec.getNode(), 0);
11198 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11199
11200 SDValue Extract;
11201 Extract = DAG.getNode(
11202 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11203 Subtarget.isLittleEndian() ? Value2 : Value,
11204 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11205 dl, getPointerTy(DAG.getDataLayout())));
11206 RetOps.push_back(Extract);
11207 Extract = DAG.getNode(
11208 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11209 Subtarget.isLittleEndian() ? Value2 : Value,
11210 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11211 dl, getPointerTy(DAG.getDataLayout())));
11212 RetOps.push_back(Extract);
11213 Extract = DAG.getNode(
11214 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11215 Subtarget.isLittleEndian() ? Value : Value2,
11216 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11217 dl, getPointerTy(DAG.getDataLayout())));
11218 RetOps.push_back(Extract);
11219 Extract = DAG.getNode(
11220 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11221 Subtarget.isLittleEndian() ? Value : Value2,
11222 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11223 dl, getPointerTy(DAG.getDataLayout())));
11224 RetOps.push_back(Extract);
11225 return DAG.getMergeValues(RetOps, dl);
11226 }
11227 [[fallthrough]];
11228 }
11229 case Intrinsic::ppc_vsx_disassemble_pair: {
11230 int NumVecs = 2;
11231 SDValue WideVec = Op.getOperand(1);
11232 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11233 NumVecs = 4;
11234 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11235 }
11237 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11238 SDValue Extract = DAG.getNode(
11239 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11240 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11241 : VecNo,
11242 dl, getPointerTy(DAG.getDataLayout())));
11243 RetOps.push_back(Extract);
11244 }
11245 return DAG.getMergeValues(RetOps, dl);
11246 }
11247
11248 case Intrinsic::ppc_build_dmr: {
11251 for (int i = 1; i < 9; i += 2) {
11252 SDValue Hi = Op.getOperand(i);
11253 SDValue Lo = Op.getOperand(i + 1);
11254 if (Hi->getOpcode() == ISD::LOAD)
11255 Chains.push_back(Hi.getValue(1));
11256 if (Lo->getOpcode() == ISD::LOAD)
11257 Chains.push_back(Lo.getValue(1));
11258 Pairs.push_back(
11259 DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
11260 }
11261 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11262 SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
11263 return DAG.getMergeValues({Value, TF}, dl);
11264 }
11265
11266 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11267 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11268 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11269 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11270 "Specify P of 0 or 1 for lower or upper 512 bytes");
11271 unsigned HiLo = Idx->getSExtValue();
11272 unsigned Opcode;
11273 unsigned Subx;
11274 if (HiLo == 0) {
11275 Opcode = PPC::DMXXEXTFDMR512;
11276 Subx = PPC::sub_wacc_lo;
11277 } else {
11278 Opcode = PPC::DMXXEXTFDMR512_HI;
11279 Subx = PPC::sub_wacc_hi;
11280 }
11281 SDValue Subreg(
11282 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11283 Op.getOperand(1),
11284 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11285 0);
11286 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11287 return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
11288 }
11289
11290 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11291 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11292 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11293 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11294 "Specify a dmr row pair 0-3");
11295 unsigned IdxVal = Idx->getSExtValue();
11296 unsigned Subx;
11297 switch (IdxVal) {
11298 case 0:
11299 Subx = PPC::sub_dmrrowp0;
11300 break;
11301 case 1:
11302 Subx = PPC::sub_dmrrowp1;
11303 break;
11304 case 2:
11305 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11306 break;
11307 case 3:
11308 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11309 break;
11310 }
11311 SDValue Subreg(
11312 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
11313 Op.getOperand(1),
11314 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11315 0);
11316 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11317 return SDValue(
11318 DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
11319 0);
11320 }
11321
11322 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11323 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11324 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
11325 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11326 "Specify P of 0 or 1 for lower or upper 512 bytes");
11327 unsigned HiLo = Idx->getSExtValue();
11328 unsigned Opcode;
11329 unsigned Subx;
11330 if (HiLo == 0) {
11331 Opcode = PPCISD::INST512;
11332 Subx = PPC::sub_wacc_lo;
11333 } else {
11334 Opcode = PPCISD::INST512HI;
11335 Subx = PPC::sub_wacc_hi;
11336 }
11337 SDValue Wacc = DAG.getNode(Opcode, dl, MVT::v512i1, Op.getOperand(2),
11338 Op.getOperand(3));
11339 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11340 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11341 Op.getOperand(1), Wacc, SubReg),
11342 0);
11343 }
11344
11345 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11346 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11347 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
11348 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11349 "Specify a dmr row pair 0-3");
11350 unsigned IdxVal = Idx->getSExtValue();
11351 unsigned Subx;
11352 switch (IdxVal) {
11353 case 0:
11354 Subx = PPC::sub_dmrrowp0;
11355 break;
11356 case 1:
11357 Subx = PPC::sub_dmrrowp1;
11358 break;
11359 case 2:
11360 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11361 break;
11362 case 3:
11363 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11364 break;
11365 }
11366 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11367 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11368 SDValue DMRRowp =
11369 DAG.getNode(PPCISD::INST256, dl, MVT::v256i1, Op.getOperand(2), P);
11370 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11371 Op.getOperand(1), DMRRowp, SubReg),
11372 0);
11373 }
11374
11375 case Intrinsic::ppc_mma_xxmfacc:
11376 case Intrinsic::ppc_mma_xxmtacc: {
11377 // Allow pre-isa-future subtargets to lower as normal.
11378 if (!Subtarget.isISAFuture())
11379 return SDValue();
11380 // The intrinsics for xxmtacc and xxmfacc take one argument of
11381 // type v512i1, for future cpu the corresponding wacc instruction
11382 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11383 // the need to produce the xxm[t|f]acc.
11384 SDValue WideVec = Op.getOperand(1);
11385 DAG.ReplaceAllUsesWith(Op, WideVec);
11386 return SDValue();
11387 }
11388
11389 case Intrinsic::ppc_unpack_longdouble: {
11390 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11391 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11392 "Argument of long double unpack must be 0 or 1!");
11393 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11394 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11395 Idx->getValueType(0)));
11396 }
11397
11398 case Intrinsic::ppc_compare_exp_lt:
11399 case Intrinsic::ppc_compare_exp_gt:
11400 case Intrinsic::ppc_compare_exp_eq:
11401 case Intrinsic::ppc_compare_exp_uo: {
11402 unsigned Pred;
11403 switch (IntrinsicID) {
11404 case Intrinsic::ppc_compare_exp_lt:
11405 Pred = PPC::PRED_LT;
11406 break;
11407 case Intrinsic::ppc_compare_exp_gt:
11408 Pred = PPC::PRED_GT;
11409 break;
11410 case Intrinsic::ppc_compare_exp_eq:
11411 Pred = PPC::PRED_EQ;
11412 break;
11413 case Intrinsic::ppc_compare_exp_uo:
11414 Pred = PPC::PRED_UN;
11415 break;
11416 }
11417 return SDValue(
11418 DAG.getMachineNode(
11419 PPC::SELECT_CC_I4, dl, MVT::i32,
11420 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11421 Op.getOperand(1), Op.getOperand(2)),
11422 0),
11423 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11424 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11425 0);
11426 }
11427 case Intrinsic::ppc_test_data_class: {
11428 EVT OpVT = Op.getOperand(1).getValueType();
11429 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11430 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11431 : PPC::XSTSTDCSP);
11432 // Lower __builtin_ppc_test_data_class(value, mask) to XSTSTDC* instruction.
11433 // The XSTSTDC* instructions test if a floating-point value matches any of
11434 // the data classes specified in the mask, setting CR field bits
11435 // accordingly. We need to extract the EQ bit (bit 2) from the CR field and
11436 // convert it to an integer result (1 if match, 0 if no match).
11437 //
11438 // Note: Operands are swapped because XSTSTDC* expects (mask, value) but the
11439 // intrinsic provides (value, mask) as Op.getOperand(1) and
11440 // Op.getOperand(2).
11441 SDValue TestDataClass =
11442 SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32,
11443 {Op.getOperand(2), Op.getOperand(1)}),
11444 0);
11445 if (Subtarget.isISA3_1()) {
11446 // ISA 3.1+: Use SETBC instruction to directly convert CR bit to integer.
11447 // This is more efficient than the SELECT_CC approach used in earlier
11448 // ISAs.
11449 SDValue SubRegIdx = DAG.getTargetConstant(PPC::sub_eq, dl, MVT::i32);
11450 SDValue CRBit =
11451 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11452 TestDataClass, SubRegIdx),
11453 0);
11454
11455 return DAG.getNode(PPCISD::SETBC, dl, MVT::i32, CRBit);
11456 }
11457
11458 // Pre-ISA 3.1: Use SELECT_CC to convert CR field to integer (1 or 0).
11459 return SDValue(
11460 DAG.getMachineNode(PPC::SELECT_CC_I4, dl, MVT::i32,
11461 {TestDataClass, DAG.getConstant(1, dl, MVT::i32),
11462 DAG.getConstant(0, dl, MVT::i32),
11463 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11464 0);
11465 }
11466 case Intrinsic::ppc_fnmsub: {
11467 EVT VT = Op.getOperand(1).getValueType();
11468 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11469 return DAG.getNode(
11470 ISD::FNEG, dl, VT,
11471 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11472 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11473 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11474 Op.getOperand(2), Op.getOperand(3));
11475 }
11476 case Intrinsic::ppc_convert_f128_to_ppcf128:
11477 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11478 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11479 ? RTLIB::CONVERT_PPCF128_F128
11480 : RTLIB::CONVERT_F128_PPCF128;
11481 MakeLibCallOptions CallOptions;
11482 std::pair<SDValue, SDValue> Result =
11483 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11484 dl, SDValue());
11485 return Result.first;
11486 }
11487 case Intrinsic::ppc_maxfe:
11488 case Intrinsic::ppc_maxfl:
11489 case Intrinsic::ppc_maxfs:
11490 case Intrinsic::ppc_minfe:
11491 case Intrinsic::ppc_minfl:
11492 case Intrinsic::ppc_minfs: {
11493 EVT VT = Op.getValueType();
11494 assert(
11495 all_of(Op->ops().drop_front(4),
11496 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11497 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11498 (void)VT;
11500 if (IntrinsicID == Intrinsic::ppc_minfe ||
11501 IntrinsicID == Intrinsic::ppc_minfl ||
11502 IntrinsicID == Intrinsic::ppc_minfs)
11503 CC = ISD::SETLT;
11504 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11505 SDValue Res = Op.getOperand(I);
11506 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11507 Res =
11508 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11509 }
11510 return Res;
11511 }
11512 }
11513
11514 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11515 // opcode number of the comparison.
11516 int CompareOpc;
11517 bool isDot;
11518 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11519 return SDValue(); // Don't custom lower most intrinsics.
11520
11521 // If this is a non-dot comparison, make the VCMP node and we are done.
11522 if (!isDot) {
11523 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11524 Op.getOperand(1), Op.getOperand(2),
11525 DAG.getConstant(CompareOpc, dl, MVT::i32));
11526 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11527 }
11528
11529 // Create the PPCISD altivec 'dot' comparison node.
11530 SDValue Ops[] = {
11531 Op.getOperand(2), // LHS
11532 Op.getOperand(3), // RHS
11533 DAG.getConstant(CompareOpc, dl, MVT::i32)
11534 };
11535 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11536 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11537
11538 // Unpack the result based on how the target uses it.
11539 unsigned BitNo; // Bit # of CR6.
11540 bool InvertBit; // Invert result?
11541 unsigned Bitx;
11542 unsigned SetOp;
11543 switch (Op.getConstantOperandVal(1)) {
11544 default: // Can't happen, don't crash on invalid number though.
11545 case 0: // Return the value of the EQ bit of CR6.
11546 BitNo = 0;
11547 InvertBit = false;
11548 Bitx = PPC::sub_eq;
11549 SetOp = PPCISD::SETBC;
11550 break;
11551 case 1: // Return the inverted value of the EQ bit of CR6.
11552 BitNo = 0;
11553 InvertBit = true;
11554 Bitx = PPC::sub_eq;
11555 SetOp = PPCISD::SETBCR;
11556 break;
11557 case 2: // Return the value of the LT bit of CR6.
11558 BitNo = 2;
11559 InvertBit = false;
11560 Bitx = PPC::sub_lt;
11561 SetOp = PPCISD::SETBC;
11562 break;
11563 case 3: // Return the inverted value of the LT bit of CR6.
11564 BitNo = 2;
11565 InvertBit = true;
11566 Bitx = PPC::sub_lt;
11567 SetOp = PPCISD::SETBCR;
11568 break;
11569 }
11570
11571 SDValue GlueOp = CompNode.getValue(1);
11572 if (Subtarget.isISA3_1()) {
11573 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11574 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11575 SDValue CRBit =
11576 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11577 CR6Reg, SubRegIdx, GlueOp),
11578 0);
11579 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11580 }
11581
11582 // Now that we have the comparison, emit a copy from the CR to a GPR.
11583 // This is flagged to the above dot comparison.
11584 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11585 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11586
11587 // Shift the bit into the low position.
11588 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11589 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11590 // Isolate the bit.
11591 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11592 DAG.getConstant(1, dl, MVT::i32));
11593
11594 // If we are supposed to, toggle the bit.
11595 if (InvertBit)
11596 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11597 DAG.getConstant(1, dl, MVT::i32));
11598 return Flags;
11599}
11600
11601SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11602 SelectionDAG &DAG) const {
11603 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11604 // the beginning of the argument list.
11605 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11606 SDLoc DL(Op);
11607 switch (Op.getConstantOperandVal(ArgStart)) {
11608 case Intrinsic::ppc_cfence: {
11609 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11610 SDValue Val = Op.getOperand(ArgStart + 1);
11611 EVT Ty = Val.getValueType();
11612 if (Ty == MVT::i128) {
11613 // FIXME: Testing one of two paired registers is sufficient to guarantee
11614 // ordering?
11615 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11616 }
11617 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11618 return SDValue(
11619 DAG.getMachineNode(
11620 Opcode, DL, MVT::Other,
11621 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11622 Op.getOperand(0)),
11623 0);
11624 }
11625 case Intrinsic::ppc_disassemble_dmr: {
11626 assert(ArgStart == 1 &&
11627 "llvm.ppc.disassemble.dmr must carry a chain argument.");
11628 return DAG.getStore(Op.getOperand(0), DL, Op.getOperand(ArgStart + 2),
11629 Op.getOperand(ArgStart + 1), MachinePointerInfo());
11630 }
11631 default:
11632 break;
11633 }
11634 return SDValue();
11635}
11636
11637// Lower scalar BSWAP64 to xxbrd.
11638SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11639 SDLoc dl(Op);
11640 if (!Subtarget.isPPC64())
11641 return Op;
11642
11643 if (Subtarget.hasP9Vector()) {
11644 // MTVSRDD
11645 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11646 Op.getOperand(0));
11647 // XXBRD
11648 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11649 // MFVSRD
11650 int VectorIndex = 0;
11651 if (Subtarget.isLittleEndian())
11652 VectorIndex = 1;
11653 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11654 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11655 return Op;
11656 }
11657
11658 // For Power8, use parallel rotate instructions for faster bswap64.
11659 SDValue Input = Op.getOperand(0);
11660 // Helper to create rotate-and-insert operations (RLWIMI/RLDIMI).
11661 auto CreateRotateInsert =
11662 [&](unsigned Opcode, MVT VT, SDValue Dest, SDValue Src, unsigned RotAmt,
11663 unsigned MaskBegin,
11664 std::optional<unsigned> MaskEnd = std::nullopt) -> SDValue {
11666 Dest, Src, DAG.getTargetConstant(RotAmt, dl, MVT::i32),
11667 DAG.getTargetConstant(MaskBegin, dl, MVT::i32)};
11668 if (MaskEnd.has_value())
11669 Ops.push_back(DAG.getTargetConstant(*MaskEnd, dl, MVT::i32));
11670
11671 return SDValue(DAG.getMachineNode(Opcode, dl, VT, Ops), 0);
11672 };
11673
11674 // Helper to perform 32-bit byte swap using rotl(8) + 2x rlwimi.
11675 auto Swap32 = [&](SDValue Val32) -> SDValue {
11676 SDValue Rot = DAG.getNode(ISD::ROTL, dl, MVT::i32, Val32,
11677 DAG.getConstant(8, dl, MVT::i32));
11678 // Insert bits [24:31] from Val32 into Rot at position [0:7].
11679 SDValue Swap =
11680 CreateRotateInsert(PPC::RLWIMI, MVT::i32, Rot, Val32, 24, 0, 7);
11681 // Insert bits [16:23] from Val32 into Swap at position [16:23].
11682 return CreateRotateInsert(PPC::RLWIMI, MVT::i32, Swap, Val32, 24, 16, 23);
11683 };
11684 // Extract and swap high and low 32-bit halves independently for parallelism.
11685 SDValue Hi32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
11686 DAG.getNode(ISD::SRL, dl, MVT::i64, Input,
11687 DAG.getConstant(32, dl, MVT::i64)));
11688 SDValue Lo32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Input);
11689
11690 // Combine swapped halves: rotate LoSwap left by 32 bits and insert into
11691 // HiSwap to swap their positions, completing the 64-bit byte reversal.
11692 SDValue HiSwap = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Swap32(Hi32));
11693 SDValue LoSwap = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Swap32(Lo32));
11694
11695 return CreateRotateInsert(PPC::RLDIMI, MVT::i64, HiSwap, LoSwap, 32, 0);
11696}
11697
11698// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11699// compared to a value that is atomically loaded (atomic loads zero-extend).
11700SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11701 SelectionDAG &DAG) const {
11702 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11703 "Expecting an atomic compare-and-swap here.");
11704 SDLoc dl(Op);
11705 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11706 EVT MemVT = AtomicNode->getMemoryVT();
11707 if (MemVT.getSizeInBits() >= 32)
11708 return Op;
11709
11710 SDValue CmpOp = Op.getOperand(2);
11711 // If this is already correctly zero-extended, leave it alone.
11712 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11713 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11714 return Op;
11715
11716 // Clear the high bits of the compare operand.
11717 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11718 SDValue NewCmpOp =
11719 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11720 DAG.getConstant(MaskVal, dl, MVT::i32));
11721
11722 // Replace the existing compare operand with the properly zero-extended one.
11724 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11725 Ops.push_back(AtomicNode->getOperand(i));
11726 Ops[2] = NewCmpOp;
11727 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11728 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11729 auto NodeTy =
11730 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11731 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11732}
11733
11734SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11735 SelectionDAG &DAG) const {
11736 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11737 EVT MemVT = N->getMemoryVT();
11738 assert(MemVT.getSimpleVT() == MVT::i128 &&
11739 "Expect quadword atomic operations");
11740 SDLoc dl(N);
11741 unsigned Opc = N->getOpcode();
11742 switch (Opc) {
11743 case ISD::ATOMIC_LOAD: {
11744 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11745 // lowered to ppc instructions by pattern matching instruction selector.
11746 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11748 N->getOperand(0),
11749 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11750 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11751 Ops.push_back(N->getOperand(I));
11752 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11753 Ops, MemVT, N->getMemOperand());
11754 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11755 SDValue ValHi =
11756 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11757 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11758 DAG.getConstant(64, dl, MVT::i32));
11759 SDValue Val =
11760 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11761 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11762 {Val, LoadedVal.getValue(2)});
11763 }
11764 case ISD::ATOMIC_STORE: {
11765 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11766 // lowered to ppc instructions by pattern matching instruction selector.
11767 SDVTList Tys = DAG.getVTList(MVT::Other);
11769 N->getOperand(0),
11770 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11771 SDValue Val = N->getOperand(1);
11772 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11773 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11774 DAG.getConstant(64, dl, MVT::i32));
11775 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11776 Ops.push_back(ValLo);
11777 Ops.push_back(ValHi);
11778 Ops.push_back(N->getOperand(2));
11779 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11780 N->getMemOperand());
11781 }
11782 default:
11783 llvm_unreachable("Unexpected atomic opcode");
11784 }
11785}
11786
11788 SelectionDAG &DAG,
11789 const PPCSubtarget &Subtarget) {
11790 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11791
11792 enum DataClassMask {
11793 DC_NAN = 1 << 6,
11794 DC_NEG_INF = 1 << 4,
11795 DC_POS_INF = 1 << 5,
11796 DC_NEG_ZERO = 1 << 2,
11797 DC_POS_ZERO = 1 << 3,
11798 DC_NEG_SUBNORM = 1,
11799 DC_POS_SUBNORM = 1 << 1,
11800 };
11801
11802 EVT VT = Op.getValueType();
11803
11804 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11805 : VT == MVT::f64 ? PPC::XSTSTDCDP
11806 : PPC::XSTSTDCSP;
11807
11808 if (Mask == fcAllFlags)
11809 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11810 if (Mask == 0)
11811 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11812
11813 // When it's cheaper or necessary to test reverse flags.
11814 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11815 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11816 return DAG.getNOT(Dl, Rev, MVT::i1);
11817 }
11818
11819 // Power doesn't support testing whether a value is 'normal'. Test the rest
11820 // first, and test if it's 'not not-normal' with expected sign.
11821 if (Mask & fcNormal) {
11822 SDValue Rev(DAG.getMachineNode(
11823 TestOp, Dl, MVT::i32,
11824 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11825 DC_NEG_ZERO | DC_POS_ZERO |
11826 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11827 Dl, MVT::i32),
11828 Op),
11829 0);
11830 // Sign are stored in CR bit 0, result are in CR bit 2.
11831 SDValue Sign(
11832 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11833 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11834 0);
11835 SDValue Normal(DAG.getNOT(
11836 Dl,
11838 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11839 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11840 0),
11841 MVT::i1));
11842 if (Mask & fcPosNormal)
11843 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11844 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11845 if (Mask == fcPosNormal || Mask == fcNegNormal)
11846 return Result;
11847
11848 return DAG.getNode(
11849 ISD::OR, Dl, MVT::i1,
11850 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11851 }
11852
11853 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11854 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11855 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11856 bool IsQuiet = Mask & fcQNan;
11857 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11858
11859 // Quietness is determined by the first bit in fraction field.
11860 uint64_t QuietMask = 0;
11861 SDValue HighWord;
11862 if (VT == MVT::f128) {
11863 HighWord = DAG.getNode(
11864 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11865 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11866 QuietMask = 0x8000;
11867 } else if (VT == MVT::f64) {
11868 if (Subtarget.isPPC64()) {
11869 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11870 DAG.getBitcast(MVT::i64, Op),
11871 DAG.getConstant(1, Dl, MVT::i32));
11872 } else {
11873 SDValue Vec = DAG.getBitcast(
11874 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11875 HighWord = DAG.getNode(
11876 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11877 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11878 }
11879 QuietMask = 0x80000;
11880 } else if (VT == MVT::f32) {
11881 HighWord = DAG.getBitcast(MVT::i32, Op);
11882 QuietMask = 0x400000;
11883 }
11884 SDValue NanRes = DAG.getSetCC(
11885 Dl, MVT::i1,
11886 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11887 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11888 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11889 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11890 if (Mask == fcQNan || Mask == fcSNan)
11891 return NanRes;
11892
11893 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11894 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11895 NanRes);
11896 }
11897
11898 unsigned NativeMask = 0;
11899 if ((Mask & fcNan) == fcNan)
11900 NativeMask |= DC_NAN;
11901 if (Mask & fcNegInf)
11902 NativeMask |= DC_NEG_INF;
11903 if (Mask & fcPosInf)
11904 NativeMask |= DC_POS_INF;
11905 if (Mask & fcNegZero)
11906 NativeMask |= DC_NEG_ZERO;
11907 if (Mask & fcPosZero)
11908 NativeMask |= DC_POS_ZERO;
11909 if (Mask & fcNegSubnormal)
11910 NativeMask |= DC_NEG_SUBNORM;
11911 if (Mask & fcPosSubnormal)
11912 NativeMask |= DC_POS_SUBNORM;
11913 return SDValue(
11914 DAG.getMachineNode(
11915 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11917 TestOp, Dl, MVT::i32,
11918 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11919 0),
11920 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11921 0);
11922}
11923
11924SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11925 SelectionDAG &DAG) const {
11926 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11927 SDValue LHS = Op.getOperand(0);
11928 uint64_t RHSC = Op.getConstantOperandVal(1);
11929 SDLoc Dl(Op);
11930 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11931 if (LHS.getValueType() == MVT::ppcf128) {
11932 // The higher part determines the value class.
11933 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11934 DAG.getConstant(1, Dl, MVT::i32));
11935 }
11936
11937 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11938}
11939
11940// Adjust the length value for a load/store with length to account for the
11941// instructions requiring a left justified length, and for non-byte element
11942// types requiring scaling by element size.
11943static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
11944 SelectionDAG &DAG) {
11945 SDLoc dl(Val);
11946 EVT VT = Val->getValueType(0);
11947 unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
11948 unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
11949 SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
11950 return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
11951}
11952
11953SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
11954 auto VPLD = cast<VPLoadSDNode>(Op);
11955 bool Future = Subtarget.isISAFuture();
11956 SDLoc dl(Op);
11957 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
11958 "Mask predication not supported");
11959 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11960 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
11961 unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
11962 unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
11963 Len = AdjustLength(Len, EltBits, !Future, DAG);
11964 SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11965 VPLD->getOperand(1), Len};
11966 SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
11967 SDValue VPL =
11969 VPLD->getMemoryVT(), VPLD->getMemOperand());
11970 return VPL;
11971}
11972
11973SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
11974 auto VPST = cast<VPStoreSDNode>(Op);
11975 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
11976 "Mask predication not supported");
11977 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11978 SDLoc dl(Op);
11979 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
11980 unsigned EltBits =
11981 Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
11982 bool Future = Subtarget.isISAFuture();
11983 unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
11984 Len = AdjustLength(Len, EltBits, !Future, DAG);
11985 SDValue Ops[] = {
11986 VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11987 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
11988 VPST->getOperand(2), Len};
11989 SDVTList Tys = DAG.getVTList(MVT::Other);
11990 SDValue VPS =
11992 VPST->getMemoryVT(), VPST->getMemOperand());
11993 return VPS;
11994}
11995
11996SDValue PPCTargetLowering::LowerPartialReduce(SDValue Op,
11997 SelectionDAG &DAG) const {
11998 SDValue Acc = Op.getOperand(0);
11999 SDValue Op1 = Op.getOperand(1);
12000 SDValue Op2 = Op.getOperand(2);
12001
12002 assert(Op.getOpcode() == ISD::PARTIAL_REDUCE_UMLA &&
12003 "Unexpected partial reduction");
12004
12005 if (Acc.getValueType() != MVT::v4i32)
12006 return SDValue();
12007 if (Op1.getValueType() != MVT::v16i32 || Op1.getOpcode() != ISD::SIGN_EXTEND)
12008 return SDValue();
12009 SDValue Op1Input = Op1.getOperand(0);
12010 if (Op1Input.getValueType() != MVT::v16i8 || !llvm::isOneOrOneSplat(Op2))
12011 return SDValue();
12012
12013 SDLoc dl(Op);
12014 SDValue Ones = DAG.getConstant(1, dl, MVT::v16i8);
12015 return DAG.getNode(ISD::PARTIAL_REDUCE_SUMLA, dl, MVT::v4i32, Acc, Op1Input,
12016 Ones);
12017}
12018
12019SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
12020 SelectionDAG &DAG) const {
12021 SDLoc dl(Op);
12022
12023 MachineFunction &MF = DAG.getMachineFunction();
12024 SDValue Op0 = Op.getOperand(0);
12025 EVT ValVT = Op0.getValueType();
12026 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
12027 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
12028 int64_t IntVal = Op.getConstantOperandVal(0);
12029 if (IntVal >= -16 && IntVal <= 15)
12030 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
12031 dl);
12032 }
12033
12034 ReuseLoadInfo RLI;
12035 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
12036 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
12037 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
12038 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
12039
12040 MachineMemOperand *MMO =
12042 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
12043 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
12045 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
12046 MVT::i32, MMO);
12047 if (RLI.ResChain)
12048 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
12049 return Bits.getValue(0);
12050 }
12051
12052 // Create a stack slot that is 16-byte aligned.
12053 MachineFrameInfo &MFI = MF.getFrameInfo();
12054 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
12055 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12056 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
12057
12058 SDValue Val = Op0;
12059 // P10 hardware store forwarding requires that a single store contains all
12060 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
12061 // to avoid load hit store on P10 when running binaries compiled for older
12062 // processors by generating two mergeable scalar stores to forward with the
12063 // vector load.
12064 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
12065 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
12066 ValVT.getSizeInBits() <= 64) {
12067 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
12068 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
12069 SDValue ShiftBy = DAG.getConstant(
12070 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
12071 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
12072 SDValue Plus8 =
12073 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
12074 SDValue Store2 =
12075 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
12076 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
12077 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
12078 MachinePointerInfo());
12079 }
12080
12081 // Store the input value into Value#0 of the stack slot.
12082 SDValue Store =
12083 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
12084 // Load it out.
12085 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
12086}
12087
12088SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
12089 SelectionDAG &DAG) const {
12090 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
12091 "Should only be called for ISD::INSERT_VECTOR_ELT");
12092
12093 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
12094
12095 EVT VT = Op.getValueType();
12096 SDLoc dl(Op);
12097 SDValue V1 = Op.getOperand(0);
12098 SDValue V2 = Op.getOperand(1);
12099
12100 if (VT == MVT::v2f64 && C)
12101 return Op;
12102
12103 if (Subtarget.hasP9Vector()) {
12104 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
12105 // because on P10, it allows this specific insert_vector_elt load pattern to
12106 // utilize the refactored load and store infrastructure in order to exploit
12107 // prefixed loads.
12108 // On targets with inexpensive direct moves (Power9 and up), a
12109 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
12110 // load since a single precision load will involve conversion to double
12111 // precision on the load followed by another conversion to single precision.
12112 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
12113 (isa<LoadSDNode>(V2))) {
12114 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
12115 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
12116 SDValue InsVecElt =
12117 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
12118 BitcastLoad, Op.getOperand(2));
12119 return DAG.getBitcast(MVT::v4f32, InsVecElt);
12120 }
12121 }
12122
12123 if (Subtarget.isISA3_1()) {
12124 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
12125 return SDValue();
12126 // On P10, we have legal lowering for constant and variable indices for
12127 // all vectors.
12128 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
12129 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
12130 return Op;
12131 }
12132
12133 // Before P10, we have legal lowering for constant indices but not for
12134 // variable ones.
12135 if (!C)
12136 return SDValue();
12137
12138 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
12139 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
12140 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
12141 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
12142 unsigned InsertAtElement = C->getZExtValue();
12143 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
12144 if (Subtarget.isLittleEndian()) {
12145 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
12146 }
12147 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
12148 DAG.getConstant(InsertAtByte, dl, MVT::i32));
12149 }
12150 return Op;
12151}
12152
12153SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
12154 SelectionDAG &DAG) const {
12155 SDLoc dl(Op);
12156 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12157 SDValue LoadChain = LN->getChain();
12158 SDValue BasePtr = LN->getBasePtr();
12159 EVT VT = Op.getValueType();
12160 bool IsV1024i1 = VT == MVT::v1024i1;
12161 bool IsV2048i1 = VT == MVT::v2048i1;
12162
12163 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12164 // Dense Math dmr pair registers, respectively.
12165 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12166 (void)IsV2048i1;
12167 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12168 "Dense Math support required.");
12169 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12170
12172 SmallVector<SDValue, 8> LoadChains;
12173
12174 SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
12175 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
12176 MachineMemOperand *MMO = LN->getMemOperand();
12177 unsigned NumVecs = VT.getSizeInBits() / 256;
12178 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12179 MachineMemOperand *NewMMO =
12180 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12181 if (Idx > 0) {
12182 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12183 DAG.getConstant(32, dl, BasePtr.getValueType()));
12184 LoadOps[2] = BasePtr;
12185 }
12187 DAG.getVTList(MVT::v256i1, MVT::Other),
12188 LoadOps, MVT::v256i1, NewMMO);
12189 LoadChains.push_back(Ld.getValue(1));
12190 Loads.push_back(Ld);
12191 }
12192
12193 if (Subtarget.isLittleEndian()) {
12194 std::reverse(Loads.begin(), Loads.end());
12195 std::reverse(LoadChains.begin(), LoadChains.end());
12196 }
12197
12198 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12199 SDValue Value = DMFInsert1024(Loads, dl, DAG);
12200
12201 if (IsV1024i1) {
12202 return DAG.getMergeValues({Value, TF}, dl);
12203 }
12204
12205 // Handle Loads for V2048i1 which represents a dmr pair.
12206 SmallVector<SDValue, 4> MoreLoads{Loads[4], Loads[5], Loads[6], Loads[7]};
12207 SDValue Dmr1Value = DMFInsert1024(MoreLoads, dl, DAG);
12208
12209 SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
12210 SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
12211
12212 SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
12213 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12214
12215 SDValue DmrPValue = SDValue(
12216 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
12217
12218 return DAG.getMergeValues({DmrPValue, TF}, dl);
12219}
12220
12221SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12222 const SDLoc &dl,
12223 SelectionDAG &DAG) const {
12224 SDValue Lo =
12225 DAG.getNode(PPCISD::INST512, dl, MVT::v512i1, Pairs[0], Pairs[1]);
12226 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12227 SDValue Hi =
12228 DAG.getNode(PPCISD::INST512HI, dl, MVT::v512i1, Pairs[2], Pairs[3]);
12229 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12230 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12231
12232 return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1,
12233 {RC, Lo, LoSub, Hi, HiSub}),
12234 0);
12235}
12236
12237SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12238 SelectionDAG &DAG) const {
12239 SDLoc dl(Op);
12240 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12241 SDValue LoadChain = LN->getChain();
12242 SDValue BasePtr = LN->getBasePtr();
12243 EVT VT = Op.getValueType();
12244
12245 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12246 return LowerDMFVectorLoad(Op, DAG);
12247
12248 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12249 return Op;
12250
12251 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12252 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12253 "Type unsupported without MMA");
12254 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12255 "Type unsupported without paired vector support");
12256
12257 // For v256i1 on ISA Future, let the load go through to instruction selection
12258 // where it will be matched to lxvp/plxvp by the instruction patterns.
12259 if (VT == MVT::v256i1 && Subtarget.isISAFuture())
12260 return Op;
12261
12262 // For other cases, create 2 or 4 v16i8 loads to load the pair or accumulator
12263 // value in 2 or 4 vsx registers.
12264 Align Alignment = LN->getAlign();
12266 SmallVector<SDValue, 4> LoadChains;
12267 unsigned NumVecs = VT.getSizeInBits() / 128;
12268 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12269 SDValue Load =
12270 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
12271 LN->getPointerInfo().getWithOffset(Idx * 16),
12272 commonAlignment(Alignment, Idx * 16),
12273 LN->getMemOperand()->getFlags(), LN->getAAInfo());
12274 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12275 DAG.getConstant(16, dl, BasePtr.getValueType()));
12276 Loads.push_back(Load);
12277 LoadChains.push_back(Load.getValue(1));
12278 }
12279 if (Subtarget.isLittleEndian()) {
12280 std::reverse(Loads.begin(), Loads.end());
12281 std::reverse(LoadChains.begin(), LoadChains.end());
12282 }
12283 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12284 SDValue Value =
12285 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12286 dl, VT, Loads);
12287 SDValue RetOps[] = {Value, TF};
12288 return DAG.getMergeValues(RetOps, dl);
12289}
12290
12291SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12292 SelectionDAG &DAG) const {
12293
12294 SDLoc dl(Op);
12295 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12296 SDValue StoreChain = SN->getChain();
12297 SDValue BasePtr = SN->getBasePtr();
12300 EVT VT = SN->getValue().getValueType();
12301 bool IsV1024i1 = VT == MVT::v1024i1;
12302 bool IsV2048i1 = VT == MVT::v2048i1;
12303
12304 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12305 // Dense Math dmr pair registers, respectively.
12306 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12307 (void)IsV2048i1;
12308 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12309 "Dense Math support required.");
12310 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12311
12312 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12313 if (IsV1024i1) {
12315 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12316 Op.getOperand(1),
12317 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12318 0);
12320 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12321 Op.getOperand(1),
12322 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12323 0);
12324 MachineSDNode *ExtNode =
12325 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12326 Values.push_back(SDValue(ExtNode, 0));
12327 Values.push_back(SDValue(ExtNode, 1));
12328 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12329 Values.push_back(SDValue(ExtNode, 0));
12330 Values.push_back(SDValue(ExtNode, 1));
12331 } else {
12332 // This corresponds to v2048i1 which represents a dmr pair.
12333 SDValue Dmr0(
12334 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12335 Op.getOperand(1),
12336 DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
12337 0);
12338
12339 SDValue Dmr1(
12340 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12341 Op.getOperand(1),
12342 DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
12343 0);
12344
12345 SDValue Dmr0Lo(DAG.getMachineNode(
12346 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12347 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12348 0);
12349
12350 SDValue Dmr0Hi(DAG.getMachineNode(
12351 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12352 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12353 0);
12354
12355 SDValue Dmr1Lo(DAG.getMachineNode(
12356 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12357 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12358 0);
12359
12360 SDValue Dmr1Hi(DAG.getMachineNode(
12361 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12362 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12363 0);
12364
12365 MachineSDNode *ExtNode =
12366 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
12367 Values.push_back(SDValue(ExtNode, 0));
12368 Values.push_back(SDValue(ExtNode, 1));
12369 ExtNode =
12370 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
12371 Values.push_back(SDValue(ExtNode, 0));
12372 Values.push_back(SDValue(ExtNode, 1));
12373 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
12374 Values.push_back(SDValue(ExtNode, 0));
12375 Values.push_back(SDValue(ExtNode, 1));
12376 ExtNode =
12377 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
12378 Values.push_back(SDValue(ExtNode, 0));
12379 Values.push_back(SDValue(ExtNode, 1));
12380 }
12381
12382 if (Subtarget.isLittleEndian())
12383 std::reverse(Values.begin(), Values.end());
12384
12385 SDVTList Tys = DAG.getVTList(MVT::Other);
12387 StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
12388 Values[0], BasePtr};
12389 MachineMemOperand *MMO = SN->getMemOperand();
12390 unsigned NumVecs = VT.getSizeInBits() / 256;
12391 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12392 MachineMemOperand *NewMMO =
12393 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12394 if (Idx > 0) {
12395 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12396 DAG.getConstant(32, dl, BasePtr.getValueType()));
12397 Ops[3] = BasePtr;
12398 }
12399 Ops[2] = Values[Idx];
12401 MVT::v256i1, NewMMO);
12402 Stores.push_back(St);
12403 }
12404
12405 SDValue TF = DAG.getTokenFactor(dl, Stores);
12406 return TF;
12407}
12408
12409SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12410 SelectionDAG &DAG) const {
12411 SDLoc dl(Op);
12412 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12413 SDValue StoreChain = SN->getChain();
12414 SDValue BasePtr = SN->getBasePtr();
12415 SDValue Value = SN->getValue();
12416 SDValue Value2 = SN->getValue();
12417 EVT StoreVT = Value.getValueType();
12418
12419 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12420 return LowerDMFVectorStore(Op, DAG);
12421
12422 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12423 return Op;
12424
12425 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12426 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12427 "Type unsupported without MMA");
12428 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12429 "Type unsupported without paired vector support");
12430
12431 // For v256i1 on ISA Future, let the store go through to instruction selection
12432 // where it will be matched to stxvp/pstxvp by the instruction patterns.
12433 if (StoreVT == MVT::v256i1 && Subtarget.isISAFuture() &&
12435 return Op;
12436
12437 // For other cases, create 2 or 4 v16i8 stores to store the pair or
12438 // accumulator underlying registers individually.
12439 Align Alignment = SN->getAlign();
12441 unsigned NumVecs = 2;
12442 if (StoreVT == MVT::v512i1) {
12443 if (Subtarget.isISAFuture()) {
12444 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12445 MachineSDNode *ExtNode = DAG.getMachineNode(
12446 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
12447
12448 Value = SDValue(ExtNode, 0);
12449 Value2 = SDValue(ExtNode, 1);
12450 } else
12451 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
12452 NumVecs = 4;
12453 }
12454 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12455 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12456 SDValue Elt;
12457 if (Subtarget.isISAFuture()) {
12458 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12459 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
12460 Idx > 1 ? Value2 : Value,
12461 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12462 } else
12463 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
12464 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12465
12466 SDValue Store =
12467 DAG.getStore(StoreChain, dl, Elt, BasePtr,
12468 SN->getPointerInfo().getWithOffset(Idx * 16),
12469 commonAlignment(Alignment, Idx * 16),
12470 SN->getMemOperand()->getFlags(), SN->getAAInfo());
12471 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12472 DAG.getConstant(16, dl, BasePtr.getValueType()));
12473 Stores.push_back(Store);
12474 }
12475 SDValue TF = DAG.getTokenFactor(dl, Stores);
12476 return TF;
12477}
12478
12479SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12480 SDLoc dl(Op);
12481 if (Op.getValueType() == MVT::v4i32) {
12482 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12483
12484 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
12485 // +16 as shift amt.
12486 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
12487 SDValue RHSSwap = // = vrlw RHS, 16
12488 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
12489
12490 // Shrinkify inputs to v8i16.
12491 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
12492 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
12493 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
12494
12495 // Low parts multiplied together, generating 32-bit results (we ignore the
12496 // top parts).
12497 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
12498 LHS, RHS, DAG, dl, MVT::v4i32);
12499
12500 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
12501 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
12502 // Shift the high parts up 16 bits.
12503 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
12504 Neg16, DAG, dl);
12505 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
12506 } else if (Op.getValueType() == MVT::v16i8) {
12507 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12508 bool isLittleEndian = Subtarget.isLittleEndian();
12509
12510 // Multiply the even 8-bit parts, producing 16-bit sums.
12511 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
12512 LHS, RHS, DAG, dl, MVT::v8i16);
12513 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
12514
12515 // Multiply the odd 8-bit parts, producing 16-bit sums.
12516 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
12517 LHS, RHS, DAG, dl, MVT::v8i16);
12518 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
12519
12520 // Merge the results together. Because vmuleub and vmuloub are
12521 // instructions with a big-endian bias, we must reverse the
12522 // element numbering and reverse the meaning of "odd" and "even"
12523 // when generating little endian code.
12524 int Ops[16];
12525 for (unsigned i = 0; i != 8; ++i) {
12526 if (isLittleEndian) {
12527 Ops[i*2 ] = 2*i;
12528 Ops[i*2+1] = 2*i+16;
12529 } else {
12530 Ops[i*2 ] = 2*i+1;
12531 Ops[i*2+1] = 2*i+1+16;
12532 }
12533 }
12534 if (isLittleEndian)
12535 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
12536 else
12537 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
12538 } else {
12539 llvm_unreachable("Unknown mul to lower!");
12540 }
12541}
12542
12543SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12544 bool IsStrict = Op->isStrictFPOpcode();
12545 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12546 !Subtarget.hasP9Vector())
12547 return SDValue();
12548
12549 return Op;
12550}
12551
12552// Custom lowering for fpext vf32 to v2f64
12553SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12554
12555 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12556 "Should only be called for ISD::FP_EXTEND");
12557
12558 // FIXME: handle extends from half precision float vectors on P9.
12559 // We only want to custom lower an extend from v2f32 to v2f64.
12560 if (Op.getValueType() != MVT::v2f64 ||
12561 Op.getOperand(0).getValueType() != MVT::v2f32)
12562 return SDValue();
12563
12564 SDLoc dl(Op);
12565 SDValue Op0 = Op.getOperand(0);
12566
12567 switch (Op0.getOpcode()) {
12568 default:
12569 return SDValue();
12571 assert(Op0.getNumOperands() == 2 &&
12573 "Node should have 2 operands with second one being a constant!");
12574
12575 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
12576 return SDValue();
12577
12578 // Custom lower is only done for high or low doubleword.
12579 int Idx = Op0.getConstantOperandVal(1);
12580 if (Idx % 2 != 0)
12581 return SDValue();
12582
12583 // Since input is v4f32, at this point Idx is either 0 or 2.
12584 // Shift to get the doubleword position we want.
12585 int DWord = Idx >> 1;
12586
12587 // High and low word positions are different on little endian.
12588 if (Subtarget.isLittleEndian())
12589 DWord ^= 0x1;
12590
12591 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
12592 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
12593 }
12594 case ISD::FADD:
12595 case ISD::FMUL:
12596 case ISD::FSUB: {
12597 SDValue NewLoad[2];
12598 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12599 // Ensure both input are loads.
12600 SDValue LdOp = Op0.getOperand(i);
12601 if (LdOp.getOpcode() != ISD::LOAD)
12602 return SDValue();
12603 // Generate new load node.
12604 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12605 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12606 NewLoad[i] = DAG.getMemIntrinsicNode(
12607 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12608 LD->getMemoryVT(), LD->getMemOperand());
12609 }
12610 SDValue NewOp =
12611 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12612 NewLoad[1], Op0.getNode()->getFlags());
12613 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12614 DAG.getConstant(0, dl, MVT::i32));
12615 }
12616 case ISD::LOAD: {
12617 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12618 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12619 SDValue NewLd = DAG.getMemIntrinsicNode(
12620 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12621 LD->getMemoryVT(), LD->getMemOperand());
12622 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12623 DAG.getConstant(0, dl, MVT::i32));
12624 }
12625 }
12626 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12627}
12628
12630 SelectionDAG &DAG,
12631 const PPCSubtarget &STI) {
12632 SDLoc DL(Value);
12633 if (STI.useCRBits())
12634 Value = DAG.getNode(ISD::SELECT, DL, SumType, Value,
12635 DAG.getConstant(1, DL, SumType),
12636 DAG.getConstant(0, DL, SumType));
12637 else
12638 Value = DAG.getZExtOrTrunc(Value, DL, SumType);
12639 SDValue Sum = DAG.getNode(PPCISD::ADDC, DL, DAG.getVTList(SumType, MVT::i32),
12640 Value, DAG.getAllOnesConstant(DL, SumType));
12641 return Sum.getValue(1);
12642}
12643
12645 EVT CarryType, SelectionDAG &DAG,
12646 const PPCSubtarget &STI) {
12647 SDLoc DL(Flag);
12648 SDValue Zero = DAG.getConstant(0, DL, SumType);
12649 SDValue Carry = DAG.getNode(
12650 PPCISD::ADDE, DL, DAG.getVTList(SumType, MVT::i32), Zero, Zero, Flag);
12651 if (STI.useCRBits())
12652 return DAG.getSetCC(DL, CarryType, Carry, Zero, ISD::SETNE);
12653 return DAG.getZExtOrTrunc(Carry, DL, CarryType);
12654}
12655
12656SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12657
12658 SDLoc DL(Op);
12659 SDNode *N = Op.getNode();
12660 EVT VT = N->getValueType(0);
12661 EVT CarryType = N->getValueType(1);
12662 unsigned Opc = N->getOpcode();
12663 bool IsAdd = Opc == ISD::UADDO;
12664 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12665 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12666 N->getOperand(0), N->getOperand(1));
12667 SDValue Carry = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType,
12668 DAG, Subtarget);
12669 if (!IsAdd)
12670 Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry,
12671 DAG.getConstant(1UL, DL, CarryType));
12672 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry);
12673}
12674
12675SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12676 SelectionDAG &DAG) const {
12677 SDLoc DL(Op);
12678 SDNode *N = Op.getNode();
12679 unsigned Opc = N->getOpcode();
12680 EVT VT = N->getValueType(0);
12681 EVT CarryType = N->getValueType(1);
12682 SDValue CarryOp = N->getOperand(2);
12683 bool IsAdd = Opc == ISD::UADDO_CARRY;
12684 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12685 if (!IsAdd)
12686 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12687 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12688 CarryOp = ConvertCarryValueToCarryFlag(VT, CarryOp, DAG, Subtarget);
12689 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12690 Op.getOperand(0), Op.getOperand(1), CarryOp);
12691 CarryOp = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, DAG,
12692 Subtarget);
12693 if (!IsAdd)
12694 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12695 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12696 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp);
12697}
12698
12699SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12700
12701 SDLoc dl(Op);
12702 SDValue LHS = Op.getOperand(0);
12703 SDValue RHS = Op.getOperand(1);
12704 EVT VT = Op.getNode()->getValueType(0);
12705
12706 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12707
12708 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12709 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12710
12711 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12712
12713 SDValue Overflow =
12714 DAG.getNode(ISD::SRL, dl, VT, And,
12715 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12716
12717 SDValue OverflowTrunc =
12718 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12719
12720 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12721}
12722
12723/// Implements signed add with overflow detection using the rule:
12724/// (x eqv y) & (sum xor x), where the overflow bit is extracted from the sign
12725SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
12726
12727 SDLoc dl(Op);
12728 SDValue LHS = Op.getOperand(0);
12729 SDValue RHS = Op.getOperand(1);
12730 EVT VT = Op.getNode()->getValueType(0);
12731
12732 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
12733
12734 // Compute ~(x xor y)
12735 SDValue XorXY = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12736 SDValue EqvXY = DAG.getNOT(dl, XorXY, VT);
12737 // Compute (s xor x)
12738 SDValue SumXorX = DAG.getNode(ISD::XOR, dl, VT, Sum, LHS);
12739
12740 // overflow = (x eqv y) & (s xor x)
12741 SDValue OverflowInSign = DAG.getNode(ISD::AND, dl, VT, EqvXY, SumXorX);
12742
12743 // Shift sign bit down to LSB
12744 SDValue Overflow =
12745 DAG.getNode(ISD::SRL, dl, VT, OverflowInSign,
12746 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12747 // Truncate to the overflow type (i1)
12748 SDValue OverflowTrunc =
12749 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12750
12751 return DAG.getMergeValues({Sum, OverflowTrunc}, dl);
12752}
12753
12754// Lower unsigned 3-way compare producing -1/0/1.
12755SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12756 SDLoc DL(Op);
12757 SDValue A = DAG.getFreeze(Op.getOperand(0));
12758 SDValue B = DAG.getFreeze(Op.getOperand(1));
12759 EVT OpVT = A.getValueType();
12760 EVT ResVT = Op.getValueType();
12761
12762 // First compute diff = A - B.
12763 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B);
12764
12765 // Generate B - A using SUBC to capture carry.
12766 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
12767 SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A);
12768 SDValue CA0 = SubC.getValue(1);
12769
12770 // t2 = A - B + CA0 using SUBE.
12771 SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0);
12772 SDValue CA1 = SubE1.getValue(1);
12773
12774 // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12775 SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1);
12776
12777 // Extract the first result and truncate to result type if needed.
12778 return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT);
12779}
12780
12781/// LowerOperation - Provide custom lowering hooks for some operations.
12782///
12784 switch (Op.getOpcode()) {
12785 default:
12786 llvm_unreachable("Wasn't expecting to be able to lower this!");
12787 case ISD::FPOW: return lowerPow(Op, DAG);
12788 case ISD::FSIN: return lowerSin(Op, DAG);
12789 case ISD::FCOS: return lowerCos(Op, DAG);
12790 case ISD::FLOG: return lowerLog(Op, DAG);
12791 case ISD::FLOG10: return lowerLog10(Op, DAG);
12792 case ISD::FEXP: return lowerExp(Op, DAG);
12793 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12794 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12795 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12796 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12797 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12798 case ISD::STRICT_FSETCC:
12800 case ISD::SETCC: return LowerSETCC(Op, DAG);
12801 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12802 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12803 case ISD::SSUBO:
12804 return LowerSSUBO(Op, DAG);
12805 case ISD::SADDO:
12806 return LowerSADDO(Op, DAG);
12807
12808 case ISD::INLINEASM:
12809 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12810 // Variable argument lowering.
12811 case ISD::VASTART: return LowerVASTART(Op, DAG);
12812 case ISD::VAARG: return LowerVAARG(Op, DAG);
12813 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12814
12815 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12816 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12818 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12819
12820 // Exception handling lowering.
12821 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12822 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12823 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12824
12825 case ISD::LOAD: return LowerLOAD(Op, DAG);
12826 case ISD::STORE: return LowerSTORE(Op, DAG);
12827 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12828 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12831 case ISD::FP_TO_UINT:
12832 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12835 case ISD::UINT_TO_FP:
12836 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12837 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12838 case ISD::SET_ROUNDING:
12839 return LowerSET_ROUNDING(Op, DAG);
12840
12841 // Lower 64-bit shifts.
12842 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12843 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12844 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12845
12846 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12847 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12848
12849 // Vector-related lowering.
12850 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12851 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12852 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12853 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12854 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12855 case ISD::MUL: return LowerMUL(Op, DAG);
12856 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12858 case ISD::FP_ROUND:
12859 return LowerFP_ROUND(Op, DAG);
12860 case ISD::ROTL: return LowerROTL(Op, DAG);
12861
12862 // For counter-based loop handling.
12864 return SDValue();
12865
12866 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12867
12868 // Frame & Return address.
12869 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12870 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12871
12873 return LowerINTRINSIC_VOID(Op, DAG);
12874 case ISD::BSWAP:
12875 return LowerBSWAP(Op, DAG);
12877 return LowerATOMIC_CMP_SWAP(Op, DAG);
12878 case ISD::ATOMIC_STORE:
12879 return LowerATOMIC_LOAD_STORE(Op, DAG);
12880 case ISD::IS_FPCLASS:
12881 return LowerIS_FPCLASS(Op, DAG);
12882 case ISD::UADDO:
12883 case ISD::USUBO:
12884 return LowerADDSUBO(Op, DAG);
12885 case ISD::UADDO_CARRY:
12886 case ISD::USUBO_CARRY:
12887 return LowerADDSUBO_CARRY(Op, DAG);
12888 case ISD::UCMP:
12889 return LowerUCMP(Op, DAG);
12890 case ISD::STRICT_LRINT:
12891 case ISD::STRICT_LLRINT:
12892 case ISD::STRICT_LROUND:
12895 if (Op->getFlags().hasNoFPExcept())
12896 return Op;
12897 return SDValue();
12898 case ISD::VP_LOAD:
12899 return LowerVP_LOAD(Op, DAG);
12900 case ISD::VP_STORE:
12901 return LowerVP_STORE(Op, DAG);
12903 return LowerPartialReduce(Op, DAG);
12904 }
12905}
12906
12909 SelectionDAG &DAG) const {
12910 SDLoc dl(N);
12911 switch (N->getOpcode()) {
12912 default:
12913 llvm_unreachable("Do not know how to custom type legalize this operation!");
12914 case ISD::ATOMIC_LOAD: {
12915 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12916 Results.push_back(Res);
12917 Results.push_back(Res.getValue(1));
12918 break;
12919 }
12920 case ISD::READCYCLECOUNTER: {
12921 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12922 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12923
12924 Results.push_back(
12925 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12926 Results.push_back(RTB.getValue(2));
12927 break;
12928 }
12930 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12931 break;
12932
12933 assert(N->getValueType(0) == MVT::i1 &&
12934 "Unexpected result type for CTR decrement intrinsic");
12935 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12936 N->getValueType(0));
12937 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12938 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12939 N->getOperand(1));
12940
12941 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12942 Results.push_back(NewInt.getValue(1));
12943 break;
12944 }
12946 switch (N->getConstantOperandVal(0)) {
12947 case Intrinsic::ppc_pack_longdouble:
12948 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12949 N->getOperand(2), N->getOperand(1)));
12950 break;
12951 case Intrinsic::ppc_maxfe:
12952 case Intrinsic::ppc_minfe:
12953 case Intrinsic::ppc_fnmsub:
12954 case Intrinsic::ppc_convert_f128_to_ppcf128:
12955 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12956 break;
12957 }
12958 break;
12959 }
12960 case ISD::VAARG: {
12961 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12962 return;
12963
12964 EVT VT = N->getValueType(0);
12965
12966 if (VT == MVT::i64) {
12967 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12968
12969 Results.push_back(NewNode);
12970 Results.push_back(NewNode.getValue(1));
12971 }
12972 return;
12973 }
12976 case ISD::FP_TO_SINT:
12977 case ISD::FP_TO_UINT: {
12978 // LowerFP_TO_INT() can only handle f32 and f64.
12979 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12980 MVT::ppcf128)
12981 return;
12982 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12983 Results.push_back(LoweredValue);
12984 if (N->isStrictFPOpcode())
12985 Results.push_back(LoweredValue.getValue(1));
12986 return;
12987 }
12988 case ISD::TRUNCATE: {
12989 if (!N->getValueType(0).isVector())
12990 return;
12991 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12992 if (Lowered)
12993 Results.push_back(Lowered);
12994 return;
12995 }
12996 case ISD::SCALAR_TO_VECTOR: {
12997 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12998 if (Lowered)
12999 Results.push_back(Lowered);
13000 return;
13001 }
13002 case ISD::FSHL:
13003 case ISD::FSHR:
13004 // Don't handle funnel shifts here.
13005 return;
13006 case ISD::BITCAST:
13007 // Don't handle bitcast here.
13008 return;
13009 case ISD::FP_EXTEND:
13010 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
13011 if (Lowered)
13012 Results.push_back(Lowered);
13013 return;
13014 }
13015}
13016
13017//===----------------------------------------------------------------------===//
13018// Other Lowering Code
13019//===----------------------------------------------------------------------===//
13020
13022 return Builder.CreateIntrinsic(Id, {});
13023}
13024
13026 Value *Addr,
13027 AtomicOrdering Ord) const {
13028 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
13029
13030 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
13031 "Only 8/16/32/64-bit atomic loads supported");
13032 Intrinsic::ID IntID;
13033 switch (SZ) {
13034 default:
13035 llvm_unreachable("Unexpected PrimitiveSize");
13036 case 8:
13037 IntID = Intrinsic::ppc_lbarx;
13038 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13039 break;
13040 case 16:
13041 IntID = Intrinsic::ppc_lharx;
13042 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13043 break;
13044 case 32:
13045 IntID = Intrinsic::ppc_lwarx;
13046 break;
13047 case 64:
13048 IntID = Intrinsic::ppc_ldarx;
13049 break;
13050 }
13051 Value *Call =
13052 Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
13053
13054 return Builder.CreateTruncOrBitCast(Call, ValueTy);
13055}
13056
13057// Perform a store-conditional operation to Addr. Return the status of the
13058// store. This should be 0 if the store succeeded, non-zero otherwise.
13060 Value *Val, Value *Addr,
13061 AtomicOrdering Ord) const {
13062 Type *Ty = Val->getType();
13063 unsigned SZ = Ty->getPrimitiveSizeInBits();
13064
13065 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
13066 "Only 8/16/32/64-bit atomic loads supported");
13067 Intrinsic::ID IntID;
13068 switch (SZ) {
13069 default:
13070 llvm_unreachable("Unexpected PrimitiveSize");
13071 case 8:
13072 IntID = Intrinsic::ppc_stbcx;
13073 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13074 break;
13075 case 16:
13076 IntID = Intrinsic::ppc_sthcx;
13077 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13078 break;
13079 case 32:
13080 IntID = Intrinsic::ppc_stwcx;
13081 break;
13082 case 64:
13083 IntID = Intrinsic::ppc_stdcx;
13084 break;
13085 }
13086
13087 if (SZ == 8 || SZ == 16)
13088 Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
13089
13090 Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
13091 /*FMFSource=*/nullptr, "stcx");
13092 return Builder.CreateXor(Call, Builder.getInt32(1));
13093}
13094
13095// The mappings for emitLeading/TrailingFence is taken from
13096// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
13098 Instruction *Inst,
13099 AtomicOrdering Ord) const {
13101 return callIntrinsic(Builder, Intrinsic::ppc_sync);
13102 if (isReleaseOrStronger(Ord))
13103 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
13104 return nullptr;
13105}
13106
13108 Instruction *Inst,
13109 AtomicOrdering Ord) const {
13110 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
13111 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
13112 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
13113 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
13114 if (isa<LoadInst>(Inst))
13115 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
13116 {Inst});
13117 // FIXME: Can use isync for rmw operation.
13118 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
13119 }
13120 return nullptr;
13121}
13122
13125 unsigned BinOpcode,
13126 unsigned CmpOpcode,
13127 unsigned CmpPred) const {
13128 // BinOpcode != 0: Handles atomic load with binary operator, e.g. NAND.
13129 // CmpOpcode != 0: Handles atomic load with MIN/MAX etc.
13130 // BinOpcode == 0 && CmpOpcode == 0: Handles ATOMIC_SWAP.
13131 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13132 unsigned AtomicSize = MI.getOperand(3).getImm();
13133
13134 auto LoadMnemonic = PPC::LDARX;
13135 auto StoreMnemonic = PPC::STDCX;
13136 switch (AtomicSize) {
13137 default:
13138 llvm_unreachable("Unexpected size of atomic entity");
13139 case 1:
13140 LoadMnemonic = PPC::LBARX;
13141 StoreMnemonic = PPC::STBCX;
13142 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13143 break;
13144 case 2:
13145 LoadMnemonic = PPC::LHARX;
13146 StoreMnemonic = PPC::STHCX;
13147 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13148 break;
13149 case 4:
13150 LoadMnemonic = PPC::LWARX;
13151 StoreMnemonic = PPC::STWCX;
13152 break;
13153 case 8:
13154 LoadMnemonic = PPC::LDARX;
13155 StoreMnemonic = PPC::STDCX;
13156 break;
13157 }
13158
13159 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13160 MachineFunction *F = BB->getParent();
13162
13163 if (CmpOpcode == PPC::CMPW && (AtomicSize == 1 || AtomicSize == 2))
13164 signExtendOperandIfUnknown(MI, BB, 4, /*IsByte=*/AtomicSize == 1, TII);
13165
13166 Register dest = MI.getOperand(0).getReg();
13167 Register ptrA = MI.getOperand(1).getReg();
13168 Register ptrB = MI.getOperand(2).getReg();
13169 Register incr = MI.getOperand(4).getReg();
13170 DebugLoc dl = MI.getDebugLoc();
13171
13172 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13173 MachineBasicBlock *loop2MBB =
13174 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13175 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13176 F->insert(It, loopMBB);
13177 if (CmpOpcode)
13178 F->insert(It, loop2MBB);
13179 F->insert(It, exitMBB);
13180 exitMBB->splice(exitMBB->begin(), BB,
13181 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13183
13184 MachineRegisterInfo &RegInfo = F->getRegInfo();
13185 Register TmpReg = (!BinOpcode) ? incr :
13186 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
13187 : &PPC::GPRCRegClass);
13188
13189 // thisMBB:
13190 // ...
13191 // fallthrough --> loopMBB
13192 BB->addSuccessor(loopMBB);
13193
13194 // loopMBB:
13195 // l[wd]arx dest, ptr
13196 // add r0, dest, incr
13197 // st[wd]cx. r0, ptr
13198 // bne- loopMBB
13199 // fallthrough --> exitMBB
13200
13201 // For max/min...
13202 // loopMBB:
13203 // l[wd]arx dest, ptr
13204 // cmpl?[wd] dest, incr
13205 // bgt exitMBB
13206 // loop2MBB:
13207 // st[wd]cx. dest, ptr
13208 // bne- loopMBB
13209 // fallthrough --> exitMBB
13210
13211 BB = loopMBB;
13212 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
13213 .addReg(ptrA).addReg(ptrB);
13214 if (BinOpcode)
13215 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
13216 if (CmpOpcode) {
13217 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13218 // Signed comparisons of byte or halfword values must be sign-extended.
13219 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13220 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13221 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13222 ExtReg).addReg(dest);
13223 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
13224 } else
13225 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
13226
13227 BuildMI(BB, dl, TII->get(PPC::BCC))
13228 .addImm(CmpPred)
13229 .addReg(CrReg)
13230 .addMBB(exitMBB);
13231 BB->addSuccessor(loop2MBB);
13232 BB->addSuccessor(exitMBB);
13233 BB = loop2MBB;
13234 }
13235 BuildMI(BB, dl, TII->get(StoreMnemonic))
13236 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
13237 BuildMI(BB, dl, TII->get(PPC::BCC))
13239 .addReg(PPC::CR0)
13240 .addMBB(loopMBB);
13241 BB->addSuccessor(loopMBB);
13242 BB->addSuccessor(exitMBB);
13243
13244 // exitMBB:
13245 // ...
13246 BB = exitMBB;
13247 return BB;
13248}
13249
13251 switch(MI.getOpcode()) {
13252 default:
13253 return false;
13254 case PPC::COPY:
13255 return TII->isSignExtended(MI.getOperand(1).getReg(),
13256 &MI.getMF()->getRegInfo());
13257 case PPC::LHA:
13258 case PPC::LHA8:
13259 case PPC::LHAU:
13260 case PPC::LHAU8:
13261 case PPC::LHAUX:
13262 case PPC::LHAUX8:
13263 case PPC::LHAX:
13264 case PPC::LHAX8:
13265 case PPC::LWA:
13266 case PPC::LWAUX:
13267 case PPC::LWAX:
13268 case PPC::LWAX_32:
13269 case PPC::LWA_32:
13270 case PPC::PLHA:
13271 case PPC::PLHA8:
13272 case PPC::PLHA8pc:
13273 case PPC::PLHApc:
13274 case PPC::PLWA:
13275 case PPC::PLWA8:
13276 case PPC::PLWA8pc:
13277 case PPC::PLWApc:
13278 case PPC::EXTSB:
13279 case PPC::EXTSB8:
13280 case PPC::EXTSB8_32_64:
13281 case PPC::EXTSB8_rec:
13282 case PPC::EXTSB_rec:
13283 case PPC::EXTSH:
13284 case PPC::EXTSH8:
13285 case PPC::EXTSH8_32_64:
13286 case PPC::EXTSH8_rec:
13287 case PPC::EXTSH_rec:
13288 case PPC::EXTSW:
13289 case PPC::EXTSWSLI:
13290 case PPC::EXTSWSLI_32_64:
13291 case PPC::EXTSWSLI_32_64_rec:
13292 case PPC::EXTSWSLI_rec:
13293 case PPC::EXTSW_32:
13294 case PPC::EXTSW_32_64:
13295 case PPC::EXTSW_32_64_rec:
13296 case PPC::EXTSW_rec:
13297 case PPC::SRAW:
13298 case PPC::SRAWI:
13299 case PPC::SRAWI_rec:
13300 case PPC::SRAW_rec:
13301 return true;
13302 }
13303 return false;
13304}
13305
13306// Sign extend operand OpIdx if the value is not known to be sign extended.
13307// Assumes the operand is a register. The flag IsByte controls which intruction
13308// is used for the sign extension.
13310 unsigned OpIdx, bool IsByte,
13311 const PPCInstrInfo *TII) {
13312 MachineFunction *F = MI.getMF();
13313 MachineRegisterInfo &RegInfo = F->getRegInfo();
13314 Register Reg = MI.getOperand(OpIdx).getReg();
13315 bool IsSignExtended =
13316 Reg.isVirtual() && isSignExtended(*RegInfo.getVRegDef(Reg), TII);
13317
13318 if (!IsSignExtended) {
13319 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13320 BuildMI(*BB, MI, MI.getDebugLoc(),
13321 TII->get(IsByte ? PPC::EXTSB : PPC::EXTSH), ValueReg)
13322 .addReg(Reg);
13323 MI.getOperand(OpIdx).setReg(ValueReg);
13324 }
13325}
13326
13328 MachineInstr &MI, MachineBasicBlock *BB, unsigned BinOpcode,
13329 unsigned CmpOpcode, unsigned CmpPred) const {
13330 // BinOpcode != 0: Handles atomic load with binary operator, e.g. NAND.
13331 // CmpOpcode != 0: Handles atomic load with MIN/MAX etc.
13332 // BinOpcode == 0 && CmpOpcode == 0: Handles ATOMIC_SWAP.
13333 assert(!Subtarget.hasPartwordAtomics() &&
13334 "Assumes that part-word atomics are not available");
13335 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13336
13337 // If this is a signed comparison and the value being compared is not known
13338 // to be sign extended, sign extend it here.
13339 DebugLoc dl = MI.getDebugLoc();
13340 MachineFunction *F = BB->getParent();
13341 MachineRegisterInfo &RegInfo = F->getRegInfo();
13342 const bool is8bit = MI.getOperand(3).getImm() == 1;
13343 if (CmpOpcode == PPC::CMPW)
13344 signExtendOperandIfUnknown(MI, BB, 4, is8bit, TII);
13345 Register incr = MI.getOperand(4).getReg();
13346
13347 // In 64 bit mode we have to use 64 bits for addresses, even though the
13348 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13349 // registers without caring whether they're 32 or 64, but here we're
13350 // doing actual arithmetic on the addresses.
13351 bool is64bit = Subtarget.isPPC64();
13352 bool isLittleEndian = Subtarget.isLittleEndian();
13353 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13354
13355 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13357
13358 Register dest = MI.getOperand(0).getReg();
13359 Register ptrA = MI.getOperand(1).getReg();
13360 Register ptrB = MI.getOperand(2).getReg();
13361
13362 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13363 MachineBasicBlock *loop2MBB =
13364 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13365 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13366 F->insert(It, loopMBB);
13367 if (CmpOpcode)
13368 F->insert(It, loop2MBB);
13369 F->insert(It, exitMBB);
13370 exitMBB->splice(exitMBB->begin(), BB,
13371 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13373
13374 const TargetRegisterClass *RC =
13375 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13376 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13377
13378 Register PtrReg = RegInfo.createVirtualRegister(RC);
13379 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13380 Register ShiftReg =
13381 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13382 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
13383 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13384 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13385 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13386 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13387 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
13388 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13389 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13390 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
13391 Register Ptr1Reg;
13392 Register TmpReg =
13393 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
13394
13395 // thisMBB:
13396 // ...
13397 // fallthrough --> loopMBB
13398 BB->addSuccessor(loopMBB);
13399
13400 // The 4-byte load must be aligned, while a char or short may be
13401 // anywhere in the word. Hence all this nasty bookkeeping code.
13402 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13403 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13404 // xori shift, shift1, 24 [16]
13405 // rlwinm ptr, ptr1, 0, 0, 29
13406 // slw incr2, incr, shift
13407 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13408 // slw mask, mask2, shift
13409 // loopMBB:
13410 // lwarx tmpDest, ptr
13411 // add tmp, tmpDest, incr2
13412 // andc tmp2, tmpDest, mask
13413 // and tmp3, tmp, mask
13414 // or tmp4, tmp3, tmp2
13415 // stwcx. tmp4, ptr
13416 // bne- loopMBB
13417 // fallthrough --> exitMBB
13418 // srw SrwDest, tmpDest, shift
13419 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13420 if (ptrA != ZeroReg) {
13421 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13422 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13423 .addReg(ptrA)
13424 .addReg(ptrB);
13425 } else {
13426 Ptr1Reg = ptrB;
13427 }
13428 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13429 // mode.
13430 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13431 .addReg(Ptr1Reg, {}, is64bit ? PPC::sub_32 : 0)
13432 .addImm(3)
13433 .addImm(27)
13434 .addImm(is8bit ? 28 : 27);
13435 if (!isLittleEndian)
13436 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13437 .addReg(Shift1Reg)
13438 .addImm(is8bit ? 24 : 16);
13439 if (is64bit)
13440 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13441 .addReg(Ptr1Reg)
13442 .addImm(0)
13443 .addImm(61);
13444 else
13445 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13446 .addReg(Ptr1Reg)
13447 .addImm(0)
13448 .addImm(0)
13449 .addImm(29);
13450 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
13451 if (is8bit)
13452 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13453 else {
13454 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13455 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13456 .addReg(Mask3Reg)
13457 .addImm(65535);
13458 }
13459 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13460 .addReg(Mask2Reg)
13461 .addReg(ShiftReg);
13462
13463 BB = loopMBB;
13464 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13465 .addReg(ZeroReg)
13466 .addReg(PtrReg);
13467 if (BinOpcode)
13468 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
13469 .addReg(Incr2Reg)
13470 .addReg(TmpDestReg);
13471 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13472 .addReg(TmpDestReg)
13473 .addReg(MaskReg);
13474 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
13475 if (CmpOpcode) {
13476 // For unsigned comparisons, we can directly compare the shifted values.
13477 // For signed comparisons we shift and sign extend.
13478 Register SReg = RegInfo.createVirtualRegister(GPRC);
13479 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13480 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
13481 .addReg(TmpDestReg)
13482 .addReg(MaskReg);
13483 unsigned ValueReg = SReg;
13484 unsigned CmpReg = Incr2Reg;
13485 if (CmpOpcode == PPC::CMPW) {
13486 ValueReg = RegInfo.createVirtualRegister(GPRC);
13487 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
13488 .addReg(SReg)
13489 .addReg(ShiftReg);
13490 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
13491 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
13492 .addReg(ValueReg);
13493 ValueReg = ValueSReg;
13494 CmpReg = incr;
13495 }
13496 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
13497 BuildMI(BB, dl, TII->get(PPC::BCC))
13498 .addImm(CmpPred)
13499 .addReg(CrReg)
13500 .addMBB(exitMBB);
13501 BB->addSuccessor(loop2MBB);
13502 BB->addSuccessor(exitMBB);
13503 BB = loop2MBB;
13504 }
13505 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
13506 BuildMI(BB, dl, TII->get(PPC::STWCX))
13507 .addReg(Tmp4Reg)
13508 .addReg(ZeroReg)
13509 .addReg(PtrReg);
13510 BuildMI(BB, dl, TII->get(PPC::BCC))
13512 .addReg(PPC::CR0)
13513 .addMBB(loopMBB);
13514 BB->addSuccessor(loopMBB);
13515 BB->addSuccessor(exitMBB);
13516
13517 // exitMBB:
13518 // ...
13519 BB = exitMBB;
13520 // Since the shift amount is not a constant, we need to clear
13521 // the upper bits with a separate RLWINM.
13522 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
13523 .addReg(SrwDestReg)
13524 .addImm(0)
13525 .addImm(is8bit ? 24 : 16)
13526 .addImm(31);
13527 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
13528 .addReg(TmpDestReg)
13529 .addReg(ShiftReg);
13530 return BB;
13531}
13532
13535 MachineBasicBlock *MBB) const {
13536 DebugLoc DL = MI.getDebugLoc();
13537 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13538 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13539
13540 MachineFunction *MF = MBB->getParent();
13541 MachineRegisterInfo &MRI = MF->getRegInfo();
13542
13543 const BasicBlock *BB = MBB->getBasicBlock();
13544 MachineFunction::iterator I = ++MBB->getIterator();
13545
13546 Register DstReg = MI.getOperand(0).getReg();
13547 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
13548 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13549 Register mainDstReg = MRI.createVirtualRegister(RC);
13550 Register restoreDstReg = MRI.createVirtualRegister(RC);
13551
13552 MVT PVT = getPointerTy(MF->getDataLayout());
13553 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13554 "Invalid Pointer Size!");
13555 // For v = setjmp(buf), we generate
13556 //
13557 // thisMBB:
13558 // SjLjSetup mainMBB
13559 // bl mainMBB
13560 // v_restore = 1
13561 // b sinkMBB
13562 //
13563 // mainMBB:
13564 // buf[LabelOffset] = LR
13565 // v_main = 0
13566 //
13567 // sinkMBB:
13568 // v = phi(main, restore)
13569 //
13570
13571 MachineBasicBlock *thisMBB = MBB;
13572 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13573 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13574 MF->insert(I, mainMBB);
13575 MF->insert(I, sinkMBB);
13576
13578
13579 // Transfer the remainder of BB and its successor edges to sinkMBB.
13580 sinkMBB->splice(sinkMBB->begin(), MBB,
13581 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13583
13584 // Note that the structure of the jmp_buf used here is not compatible
13585 // with that used by libc, and is not designed to be. Specifically, it
13586 // stores only those 'reserved' registers that LLVM does not otherwise
13587 // understand how to spill. Also, by convention, by the time this
13588 // intrinsic is called, Clang has already stored the frame address in the
13589 // first slot of the buffer and stack address in the third. Following the
13590 // X86 target code, we'll store the jump address in the second slot. We also
13591 // need to save the TOC pointer (R2) to handle jumps between shared
13592 // libraries, and that will be stored in the fourth slot. The thread
13593 // identifier (R13) is not affected.
13594
13595 // thisMBB:
13596 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13597 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13598 const int64_t BPOffset = 4 * PVT.getStoreSize();
13599
13600 // Prepare IP either in reg.
13601 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
13602 Register LabelReg = MRI.createVirtualRegister(PtrRC);
13603 Register BufReg = MI.getOperand(1).getReg();
13604
13605 if (Subtarget.is64BitELFABI()) {
13606 setUsesTOCBasePtr(*MBB->getParent());
13607 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
13608 .addReg(PPC::X2)
13609 .addImm(TOCOffset)
13610 .addReg(BufReg)
13611 .cloneMemRefs(MI);
13612 }
13613
13614 // Naked functions never have a base pointer, and so we use r1. For all
13615 // other functions, this decision must be delayed until during PEI.
13616 unsigned BaseReg;
13617 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
13618 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13619 else
13620 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13621
13622 MIB = BuildMI(*thisMBB, MI, DL,
13623 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13624 .addReg(BaseReg)
13625 .addImm(BPOffset)
13626 .addReg(BufReg)
13627 .cloneMemRefs(MI);
13628
13629 // Setup
13630 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
13631 MIB.addRegMask(TRI->getNoPreservedMask());
13632
13633 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
13634
13635 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
13636 .addMBB(mainMBB);
13637 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
13638
13639 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
13640 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
13641
13642 // mainMBB:
13643 // mainDstReg = 0
13644 MIB =
13645 BuildMI(mainMBB, DL,
13646 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
13647
13648 // Store IP
13649 if (Subtarget.isPPC64()) {
13650 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
13651 .addReg(LabelReg)
13652 .addImm(LabelOffset)
13653 .addReg(BufReg);
13654 } else {
13655 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
13656 .addReg(LabelReg)
13657 .addImm(LabelOffset)
13658 .addReg(BufReg);
13659 }
13660 MIB.cloneMemRefs(MI);
13661
13662 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
13663 mainMBB->addSuccessor(sinkMBB);
13664
13665 // sinkMBB:
13666 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
13667 TII->get(PPC::PHI), DstReg)
13668 .addReg(mainDstReg).addMBB(mainMBB)
13669 .addReg(restoreDstReg).addMBB(thisMBB);
13670
13671 MI.eraseFromParent();
13672 return sinkMBB;
13673}
13674
13677 MachineBasicBlock *MBB) const {
13678 DebugLoc DL = MI.getDebugLoc();
13679 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13680
13681 MachineFunction *MF = MBB->getParent();
13682 MachineRegisterInfo &MRI = MF->getRegInfo();
13683
13684 MVT PVT = getPointerTy(MF->getDataLayout());
13685 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13686 "Invalid Pointer Size!");
13687
13688 const TargetRegisterClass *RC =
13689 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13690 Register Tmp = MRI.createVirtualRegister(RC);
13691 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13692 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13693 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13694 unsigned BP =
13695 (PVT == MVT::i64)
13696 ? PPC::X30
13697 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13698 : PPC::R30);
13699
13701
13702 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13703 const int64_t SPOffset = 2 * PVT.getStoreSize();
13704 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13705 const int64_t BPOffset = 4 * PVT.getStoreSize();
13706
13707 Register BufReg = MI.getOperand(0).getReg();
13708
13709 // Reload FP (the jumped-to function may not have had a
13710 // frame pointer, and if so, then its r31 will be restored
13711 // as necessary).
13712 if (PVT == MVT::i64) {
13713 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
13714 .addImm(0)
13715 .addReg(BufReg);
13716 } else {
13717 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
13718 .addImm(0)
13719 .addReg(BufReg);
13720 }
13721 MIB.cloneMemRefs(MI);
13722
13723 // Reload IP
13724 if (PVT == MVT::i64) {
13725 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
13726 .addImm(LabelOffset)
13727 .addReg(BufReg);
13728 } else {
13729 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
13730 .addImm(LabelOffset)
13731 .addReg(BufReg);
13732 }
13733 MIB.cloneMemRefs(MI);
13734
13735 // Reload SP
13736 if (PVT == MVT::i64) {
13737 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
13738 .addImm(SPOffset)
13739 .addReg(BufReg);
13740 } else {
13741 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
13742 .addImm(SPOffset)
13743 .addReg(BufReg);
13744 }
13745 MIB.cloneMemRefs(MI);
13746
13747 // Reload BP
13748 if (PVT == MVT::i64) {
13749 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
13750 .addImm(BPOffset)
13751 .addReg(BufReg);
13752 } else {
13753 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
13754 .addImm(BPOffset)
13755 .addReg(BufReg);
13756 }
13757 MIB.cloneMemRefs(MI);
13758
13759 // Reload TOC
13760 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13761 setUsesTOCBasePtr(*MBB->getParent());
13762 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
13763 .addImm(TOCOffset)
13764 .addReg(BufReg)
13765 .cloneMemRefs(MI);
13766 }
13767
13768 // Jump
13769 BuildMI(*MBB, MI, DL,
13770 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
13771 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13772
13773 MI.eraseFromParent();
13774 return MBB;
13775}
13776
13778 // If the function specifically requests inline stack probes, emit them.
13779 if (MF.getFunction().hasFnAttribute("probe-stack"))
13780 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
13781 "inline-asm";
13782 return false;
13783}
13784
13786 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13787 unsigned StackAlign = TFI->getStackAlignment();
13788 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13789 "Unexpected stack alignment");
13790 // The default stack probe size is 4096 if the function has no
13791 // stack-probe-size attribute.
13792 const Function &Fn = MF.getFunction();
13793 unsigned StackProbeSize =
13794 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
13795 // Round down to the stack alignment.
13796 StackProbeSize &= ~(StackAlign - 1);
13797 return StackProbeSize ? StackProbeSize : StackAlign;
13798}
13799
13800// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13801// into three phases. In the first phase, it uses pseudo instruction
13802// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13803// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13804// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13805// MaxCallFrameSize so that it can calculate correct data area pointer.
13808 MachineBasicBlock *MBB) const {
13809 const bool isPPC64 = Subtarget.isPPC64();
13810 MachineFunction *MF = MBB->getParent();
13811 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13812 DebugLoc DL = MI.getDebugLoc();
13813 const unsigned ProbeSize = getStackProbeSize(*MF);
13814 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13815 MachineRegisterInfo &MRI = MF->getRegInfo();
13816 // The CFG of probing stack looks as
13817 // +-----+
13818 // | MBB |
13819 // +--+--+
13820 // |
13821 // +----v----+
13822 // +--->+ TestMBB +---+
13823 // | +----+----+ |
13824 // | | |
13825 // | +-----v----+ |
13826 // +---+ BlockMBB | |
13827 // +----------+ |
13828 // |
13829 // +---------+ |
13830 // | TailMBB +<--+
13831 // +---------+
13832 // In MBB, calculate previous frame pointer and final stack pointer.
13833 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13834 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13835 // TailMBB is spliced via \p MI.
13836 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13837 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13838 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13839
13840 MachineFunction::iterator MBBIter = ++MBB->getIterator();
13841 MF->insert(MBBIter, TestMBB);
13842 MF->insert(MBBIter, BlockMBB);
13843 MF->insert(MBBIter, TailMBB);
13844
13845 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13846 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13847
13848 Register DstReg = MI.getOperand(0).getReg();
13849 Register NegSizeReg = MI.getOperand(1).getReg();
13850 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13851 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13852 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13853 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13854
13855 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13856 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13857 // NegSize.
13858 unsigned ProbeOpc;
13859 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13860 ProbeOpc =
13861 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13862 else
13863 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13864 // and NegSizeReg will be allocated in the same phyreg to avoid
13865 // redundant copy when NegSizeReg has only one use which is current MI and
13866 // will be replaced by PREPARE_PROBED_ALLOCA then.
13867 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13868 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13869 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13870 .addDef(ActualNegSizeReg)
13871 .addReg(NegSizeReg)
13872 .add(MI.getOperand(2))
13873 .add(MI.getOperand(3));
13874
13875 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13876 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13877 FinalStackPtr)
13878 .addReg(SPReg)
13879 .addReg(ActualNegSizeReg);
13880
13881 // Materialize a scratch register for update.
13882 int64_t NegProbeSize = -(int64_t)ProbeSize;
13883 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13884 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13885 if (!isInt<16>(NegProbeSize)) {
13886 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13887 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13888 .addImm(NegProbeSize >> 16);
13889 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13890 ScratchReg)
13891 .addReg(TempReg)
13892 .addImm(NegProbeSize & 0xFFFF);
13893 } else
13894 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13895 .addImm(NegProbeSize);
13896
13897 {
13898 // Probing leading residual part.
13899 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13900 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13901 .addReg(ActualNegSizeReg)
13902 .addReg(ScratchReg);
13903 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13904 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13905 .addReg(Div)
13906 .addReg(ScratchReg);
13907 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13908 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13909 .addReg(Mul)
13910 .addReg(ActualNegSizeReg);
13911 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13912 .addReg(FramePointer)
13913 .addReg(SPReg)
13914 .addReg(NegMod);
13915 }
13916
13917 {
13918 // Remaining part should be multiple of ProbeSize.
13919 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13920 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13921 .addReg(SPReg)
13922 .addReg(FinalStackPtr);
13923 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13925 .addReg(CmpResult)
13926 .addMBB(TailMBB);
13927 TestMBB->addSuccessor(BlockMBB);
13928 TestMBB->addSuccessor(TailMBB);
13929 }
13930
13931 {
13932 // Touch the block.
13933 // |P...|P...|P...
13934 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13935 .addReg(FramePointer)
13936 .addReg(SPReg)
13937 .addReg(ScratchReg);
13938 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13939 BlockMBB->addSuccessor(TestMBB);
13940 }
13941
13942 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13943 // DYNAREAOFFSET pseudo instruction to get the future result.
13944 Register MaxCallFrameSizeReg =
13945 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13946 BuildMI(TailMBB, DL,
13947 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13948 MaxCallFrameSizeReg)
13949 .add(MI.getOperand(2))
13950 .add(MI.getOperand(3));
13951 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13952 .addReg(SPReg)
13953 .addReg(MaxCallFrameSizeReg);
13954
13955 // Splice instructions after MI to TailMBB.
13956 TailMBB->splice(TailMBB->end(), MBB,
13957 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13959 MBB->addSuccessor(TestMBB);
13960
13961 // Delete the pseudo instruction.
13962 MI.eraseFromParent();
13963
13964 ++NumDynamicAllocaProbed;
13965 return TailMBB;
13966}
13967
13968/// Check if the opcode is a SELECT or SELECT_CC variant.
13969/// @param Opcode The opcode to check
13970/// @param CheckOnlyCC If true, only return true for SELECT_CC variants;
13971/// if false, return true for both SELECT and SELECT_CC
13972static bool IsSelect(unsigned Opcode, bool CheckOnlyCC = false) {
13973 switch (Opcode) {
13974 // SELECT_CC variants - always return true
13975 case PPC::SELECT_CC_I4:
13976 case PPC::SELECT_CC_I8:
13977 case PPC::SELECT_CC_F4:
13978 case PPC::SELECT_CC_F8:
13979 case PPC::SELECT_CC_F16:
13980 case PPC::SELECT_CC_VRRC:
13981 case PPC::SELECT_CC_VSFRC:
13982 case PPC::SELECT_CC_VSSRC:
13983 case PPC::SELECT_CC_VSRC:
13984 case PPC::SELECT_CC_SPE4:
13985 case PPC::SELECT_CC_SPE:
13986 return true;
13987 // SELECT variants - only return true if CheckOnlyCC is false
13988 case PPC::SELECT_I4:
13989 case PPC::SELECT_I8:
13990 case PPC::SELECT_F4:
13991 case PPC::SELECT_F8:
13992 case PPC::SELECT_F16:
13993 case PPC::SELECT_SPE:
13994 case PPC::SELECT_SPE4:
13995 case PPC::SELECT_VRRC:
13996 case PPC::SELECT_VSFRC:
13997 case PPC::SELECT_VSSRC:
13998 case PPC::SELECT_VSRC:
13999 return !CheckOnlyCC; // true if checking all SELECTs, false if only CC
14000 default:
14001 return false;
14002 }
14003}
14004static bool IsSelectCC(unsigned Opcode) { return IsSelect(Opcode, true); }
14005
14006/// Emit SELECT instruction, using ISEL if available, otherwise use
14007/// branch-based control flow.
14008///
14009/// For targets with ISEL support (SELECT_CC_I4/I8, SELECT_I4/I8), this
14010/// generates a single ISEL instruction. Otherwise, it creates a
14011/// branch-based control flow pattern with PHI nodes.
14013 const TargetInstrInfo *TII,
14014 const PPCSubtarget &Subtarget) {
14015 assert(IsSelect(MI.getOpcode()) && "Instruction must be a SELECT variant");
14016
14017 // Check if we can use ISEL for this SELECT
14018 if (Subtarget.hasISEL() &&
14019 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
14020 MI.getOpcode() == PPC::SELECT_CC_I8 ||
14021 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
14023 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
14024 MI.getOpcode() == PPC::SELECT_CC_I8)
14025 Cond.push_back(MI.getOperand(4));
14026 else
14028 Cond.push_back(MI.getOperand(1));
14029
14030 DebugLoc dl = MI.getDebugLoc();
14031 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
14032 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
14033 MI.eraseFromParent();
14034 return BB;
14035 }
14036
14037 // Fall back to branch-based SELECT implementation
14038 MachineFunction *F = BB->getParent();
14039 const BasicBlock *LLVM_BB = BB->getBasicBlock();
14041 DebugLoc dl = MI.getDebugLoc();
14042
14043 MachineBasicBlock *thisMBB = BB;
14044 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
14045 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
14046 F->insert(It, copy0MBB);
14047 F->insert(It, sinkMBB);
14048
14049 if (isPhysRegUsedAfter(PPC::CARRY, MI.getIterator())) {
14050 copy0MBB->addLiveIn(PPC::CARRY);
14051 sinkMBB->addLiveIn(PPC::CARRY);
14052 }
14053
14054 // Set the call frame size on entry to the new basic blocks.
14055 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
14056 copy0MBB->setCallFrameSize(CallFrameSize);
14057 sinkMBB->setCallFrameSize(CallFrameSize);
14058
14059 // Transfer the remainder of BB and its successor edges to sinkMBB.
14060 sinkMBB->splice(sinkMBB->begin(), BB,
14061 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14063
14064 // Add successors
14065 BB->addSuccessor(copy0MBB);
14066 BB->addSuccessor(sinkMBB);
14067
14068 // Build branch instruction
14069 if (IsSelectCC(MI.getOpcode()))
14070 BuildMI(BB, dl, TII->get(PPC::BCC))
14071 .addImm(MI.getOperand(4).getImm())
14072 .addReg(MI.getOperand(1).getReg())
14073 .addMBB(sinkMBB);
14074 else
14075 BuildMI(BB, dl, TII->get(PPC::BC))
14076 .addReg(MI.getOperand(1).getReg())
14077 .addMBB(sinkMBB);
14078
14079 // copy0MBB: fallthrough to sinkMBB
14080 BB = copy0MBB;
14081 BB->addSuccessor(sinkMBB);
14082
14083 // sinkMBB: PHI instruction
14084 BB = sinkMBB;
14085 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
14086 .addReg(MI.getOperand(3).getReg())
14087 .addMBB(copy0MBB)
14088 .addReg(MI.getOperand(2).getReg())
14089 .addMBB(thisMBB);
14090 MI.eraseFromParent();
14091 return BB;
14092}
14093
14094/// Helper function to create basic blocks for atomic compare-and-swap.
14095/// Creates three basic blocks (loop1MBB, loop2MBB, exitMBB) and sets up
14096/// the control flow structure common to both hardware and software
14097/// implementations of atomic compare-and-swap operations.
14099 MachineBasicBlock *&loop1MBB,
14100 MachineBasicBlock *&loop2MBB,
14101 MachineBasicBlock *&exitMBB,
14104 const BasicBlock *LLVM_BB = BB->getBasicBlock();
14105 loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14106 loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14107 exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14108 F->insert(It, loop1MBB);
14109 F->insert(It, loop2MBB);
14110 F->insert(It, exitMBB);
14111 exitMBB->splice(exitMBB->begin(), BB,
14112 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14114 BB->addSuccessor(loop1MBB);
14115}
14116
14117/// Emit hardware-supported atomic compare-and-swap for I32/I64 and I8/I16
14118/// with partword atomic support.
14119///
14120/// This uses native PowerPC atomic instructions (LBARX/LHARX/LWARX/LDARX for
14121/// load-and-reserve, STBCX/STHCX/STWCX/STDCX for store-conditional) to
14122/// implement atomic compare-and-swap at byte, halfword, word, or doubleword
14123/// granularity.
14124///
14125/// Control flow:
14126/// thisMBB -> loop1MBB -> loop2MBB -> exitMBB
14127/// | |
14128/// +------------+
14129///
14130/// loop1MBB:
14131/// - Load-and-reserve from memory
14132/// - Compare loaded value with expected old value
14133/// - Branch to exitMBB if not equal (CAS failed)
14134/// loop2MBB:
14135/// - Store-conditional new value to memory
14136/// - Branch back to loop1MBB if store failed (retry)
14137/// - Fall through to exitMBB on success
14138static MachineBasicBlock *
14140 const TargetInstrInfo *TII,
14141 const PPCSubtarget &Subtarget) {
14142 MachineFunction *F = BB->getParent();
14144
14145 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14146
14147 unsigned LoadMnemonic = PPC::LDARX;
14148 unsigned StoreMnemonic = PPC::STDCX;
14149 switch (MI.getOpcode()) {
14150 default:
14151 llvm_unreachable("Compare and swap of unknown size");
14152 case PPC::ATOMIC_CMP_SWAP_I8:
14153 LoadMnemonic = PPC::LBARX;
14154 StoreMnemonic = PPC::STBCX;
14155 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14156 break;
14157 case PPC::ATOMIC_CMP_SWAP_I16:
14158 LoadMnemonic = PPC::LHARX;
14159 StoreMnemonic = PPC::STHCX;
14160 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14161 break;
14162 case PPC::ATOMIC_CMP_SWAP_I32:
14163 LoadMnemonic = PPC::LWARX;
14164 StoreMnemonic = PPC::STWCX;
14165 break;
14166 case PPC::ATOMIC_CMP_SWAP_I64:
14167 LoadMnemonic = PPC::LDARX;
14168 StoreMnemonic = PPC::STDCX;
14169 break;
14170 }
14171
14172 MachineRegisterInfo &RegInfo = F->getRegInfo();
14173 Register dest = MI.getOperand(0).getReg();
14174 Register ptrA = MI.getOperand(1).getReg();
14175 Register ptrB = MI.getOperand(2).getReg();
14176 Register oldval = MI.getOperand(3).getReg();
14177 Register newval = MI.getOperand(4).getReg();
14178 DebugLoc dl = MI.getDebugLoc();
14179
14180 MachineBasicBlock *loop1MBB, *loop2MBB, *exitMBB;
14181 createAtomicLoopBlocks(F, BB, loop1MBB, loop2MBB, exitMBB, MI, It);
14182
14183 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14184
14185 // loop1MBB:
14186 // l[bhwd]arx dest, ptr
14187 // cmp[wd] dest, oldval
14188 // bne- exitBB
14189 BB = loop1MBB;
14190 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
14191 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
14192 .addReg(dest)
14193 .addReg(oldval);
14194 BuildMI(BB, dl, TII->get(PPC::BCC))
14196 .addReg(CrReg)
14197 .addMBB(exitMBB);
14198 BB->addSuccessor(loop2MBB);
14199 BB->addSuccessor(exitMBB);
14200
14201 // loop2MBB:
14202 // st[bhwd]cx. newval, ptr
14203 // bne- loopMBB
14204 // b exitBB
14205 BB = loop2MBB;
14206 BuildMI(BB, dl, TII->get(StoreMnemonic))
14207 .addReg(newval)
14208 .addReg(ptrA)
14209 .addReg(ptrB);
14210 BuildMI(BB, dl, TII->get(PPC::BCC))
14212 .addReg(PPC::CR0)
14213 .addMBB(loop1MBB);
14214 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14215 BB->addSuccessor(loop1MBB);
14216 BB->addSuccessor(exitMBB);
14217
14218 return exitMBB;
14219}
14220
14221/// Emit software-emulated atomic compare-and-swap for I8/I16 without
14222/// hardware partword atomic support.
14223///
14224/// This emulates byte/halfword atomic operations using word (32-bit) atomic
14225/// instructions. Since PowerPC atomic instructions work at word granularity,
14226/// we must:
14227/// 1. Align the pointer to a word boundary
14228/// 2. Calculate the bit shift for the target byte/halfword within the word
14229/// 3. Create masks to isolate the target byte/halfword
14230/// 4. Shift old/new values into the correct bit position
14231/// 5. Use LWARX/STWCX on the full word
14232/// 6. Mask and merge to preserve other bytes in the word
14233/// 7. Extract and shift the result back
14234///
14235/// Control flow:
14236/// thisMBB -> loop1MBB -> loop2MBB -> exitMBB
14237/// | |
14238/// +------------+
14239///
14240/// loop1MBB:
14241/// - LWARX: Load-and-reserve full word
14242/// - Mask to extract target byte/halfword
14243/// - Compare with expected old value
14244/// - Branch to exitMBB if not equal (CAS failed)
14245/// loop2MBB:
14246/// - Merge new value with other bytes in the word
14247/// - STWCX: Store-conditional full word
14248/// - Branch back to loop1MBB if store failed (retry)
14249/// - Fall through to exitMBB on success
14250/// exitMBB:
14251/// - Extract and return the loaded value
14252static MachineBasicBlock *
14254 const TargetInstrInfo *TII,
14255 const PPCSubtarget &Subtarget) {
14256 MachineFunction *F = BB->getParent();
14258
14259 bool is64bit = Subtarget.isPPC64();
14260 bool isLittleEndian = Subtarget.isLittleEndian();
14261 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14262
14263 Register dest = MI.getOperand(0).getReg();
14264 Register ptrA = MI.getOperand(1).getReg();
14265 Register ptrB = MI.getOperand(2).getReg();
14266 Register oldval = MI.getOperand(3).getReg();
14267 Register newval = MI.getOperand(4).getReg();
14268 DebugLoc dl = MI.getDebugLoc();
14269
14270 MachineBasicBlock *loop1MBB, *loop2MBB, *exitMBB;
14271 createAtomicLoopBlocks(F, BB, loop1MBB, loop2MBB, exitMBB, MI, It);
14272
14273 MachineRegisterInfo &RegInfo = F->getRegInfo();
14274 const TargetRegisterClass *RC =
14275 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14276 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14277
14278 // Lambda to create virtual registers
14279 auto createVReg = [&](const TargetRegisterClass *RC) {
14280 return RegInfo.createVirtualRegister(RC);
14281 };
14282
14283 Register PtrReg = createVReg(RC);
14284 Register Shift1Reg = createVReg(GPRC);
14285 Register ShiftReg = isLittleEndian ? Shift1Reg : createVReg(GPRC);
14286 Register NewVal2Reg = createVReg(GPRC);
14287 Register NewVal3Reg = createVReg(GPRC);
14288 Register OldVal2Reg = createVReg(GPRC);
14289 Register OldVal3Reg = createVReg(GPRC);
14290 Register MaskReg = createVReg(GPRC);
14291 Register Mask2Reg = createVReg(GPRC);
14292 Register Mask3Reg = createVReg(GPRC);
14293 Register Tmp2Reg = createVReg(GPRC);
14294 Register Tmp4Reg = createVReg(GPRC);
14295 Register TmpDestReg = createVReg(GPRC);
14296 Register TmpReg = createVReg(GPRC);
14297 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14298 Register CrReg = createVReg(&PPC::CRRCRegClass);
14299
14300 // Compute aligned pointer and shift amount
14301 Register Ptr1Reg;
14302 if (ptrA != ZeroReg) {
14303 Ptr1Reg = createVReg(RC);
14304 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
14305 .addReg(ptrA)
14306 .addReg(ptrB);
14307 } else {
14308 Ptr1Reg = ptrB;
14309 }
14310
14311 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
14312 .addReg(Ptr1Reg, {}, is64bit ? PPC::sub_32 : 0)
14313 .addImm(3)
14314 .addImm(27)
14315 .addImm(is8bit ? 28 : 27);
14316 if (!isLittleEndian)
14317 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
14318 .addReg(Shift1Reg)
14319 .addImm(is8bit ? 24 : 16);
14320 if (is64bit)
14321 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
14322 .addReg(Ptr1Reg)
14323 .addImm(0)
14324 .addImm(61);
14325 else
14326 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
14327 .addReg(Ptr1Reg)
14328 .addImm(0)
14329 .addImm(0)
14330 .addImm(29);
14331
14332 // Prepare masked values
14333 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
14334 .addReg(newval)
14335 .addReg(ShiftReg);
14336 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
14337 .addReg(oldval)
14338 .addReg(ShiftReg);
14339 if (is8bit)
14340 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
14341 else {
14342 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
14343 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
14344 .addReg(Mask3Reg)
14345 .addImm(65535);
14346 }
14347 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
14348 .addReg(Mask2Reg)
14349 .addReg(ShiftReg);
14350 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
14351 .addReg(NewVal2Reg)
14352 .addReg(MaskReg);
14353 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
14354 .addReg(OldVal2Reg)
14355 .addReg(MaskReg);
14356
14357 // loop1MBB:
14358 // lwarx tmpDest, ptr
14359 // and tmp, tmpDest, mask
14360 // cmpw tmp, oldval3
14361 // bne- exitBB
14362 BB = loop1MBB;
14363 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
14364 .addReg(ZeroReg)
14365 .addReg(PtrReg);
14366 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
14367 .addReg(TmpDestReg)
14368 .addReg(MaskReg);
14369 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg).addReg(TmpReg).addReg(OldVal3Reg);
14370 BuildMI(BB, dl, TII->get(PPC::BCC))
14372 .addReg(CrReg)
14373 .addMBB(exitMBB);
14374 BB->addSuccessor(loop2MBB);
14375 BB->addSuccessor(exitMBB);
14376
14377 // loop2MBB:
14378 // andc tmp2, tmpDest, mask
14379 // or tmp4, tmp2, newval3
14380 // stwcx. tmp4, ptr
14381 // bne- loop1MBB
14382 // b exitBB
14383 BB = loop2MBB;
14384 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
14385 .addReg(TmpDestReg)
14386 .addReg(MaskReg);
14387 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
14388 .addReg(Tmp2Reg)
14389 .addReg(NewVal3Reg);
14390 BuildMI(BB, dl, TII->get(PPC::STWCX))
14391 .addReg(Tmp4Reg)
14392 .addReg(ZeroReg)
14393 .addReg(PtrReg);
14394 BuildMI(BB, dl, TII->get(PPC::BCC))
14396 .addReg(PPC::CR0)
14397 .addMBB(loop1MBB);
14398 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14399 BB->addSuccessor(loop1MBB);
14400 BB->addSuccessor(exitMBB);
14401
14402 // exitMBB:
14403 // srw dest, tmpDest, shift
14404 BB = exitMBB;
14405 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
14406 .addReg(TmpReg)
14407 .addReg(ShiftReg);
14408
14409 return BB;
14410}
14411
14414 MachineBasicBlock *BB) const {
14415 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
14416
14417 // To "insert" these instructions we actually have to insert their
14418 // control-flow patterns.
14419 const BasicBlock *LLVM_BB = BB->getBasicBlock();
14421
14422 MachineFunction *F = BB->getParent();
14423 MachineRegisterInfo &MRI = F->getRegInfo();
14424
14425 // Handle SELECT with ISEL support first (before generic SELECT handling)
14426 if (IsSelect(MI.getOpcode()))
14427 return emitSelect(MI, BB, TII, Subtarget);
14428
14429 switch (MI.getOpcode()) {
14430 case TargetOpcode::STACKMAP:
14431 return emitPatchPoint(MI, BB);
14432 case TargetOpcode::PATCHPOINT:
14433 // Call lowering should have added an r2 operand to indicate a dependence
14434 // on the TOC base pointer value. It can't however, because there is no
14435 // way to mark the dependence as implicit there, and so the stackmap code
14436 // will confuse it with a regular operand. Instead, add the dependence
14437 // here.
14438 if (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls())
14439 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
14440 return emitPatchPoint(MI, BB);
14441
14442 case PPC::EH_SjLj_SetJmp32:
14443 case PPC::EH_SjLj_SetJmp64:
14444 return emitEHSjLjSetJmp(MI, BB);
14445
14446 case PPC::EH_SjLj_LongJmp32:
14447 case PPC::EH_SjLj_LongJmp64:
14448 return emitEHSjLjLongJmp(MI, BB);
14449
14450 case PPC::ReadTB: {
14451 // To read the 64-bit time-base register on a 32-bit target, we read the
14452 // two halves. Should the counter have wrapped while it was being read, we
14453 // need to try again.
14454 // ...
14455 // readLoop:
14456 // mfspr Rx,TBU # load from TBU
14457 // mfspr Ry,TB # load from TB
14458 // mfspr Rz,TBU # load from TBU
14459 // cmpw crX,Rx,Rz # check if 'old'='new'
14460 // bne readLoop # branch if they're not equal
14461 // ...
14462
14463 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
14464 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
14465 DebugLoc dl = MI.getDebugLoc();
14466 F->insert(It, readMBB);
14467 F->insert(It, sinkMBB);
14468
14469 // Transfer the remainder of BB and its successor edges to sinkMBB.
14470 sinkMBB->splice(sinkMBB->begin(), BB,
14471 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14473
14474 BB->addSuccessor(readMBB);
14475 BB = readMBB;
14476
14477 MachineRegisterInfo &RegInfo = F->getRegInfo();
14478 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
14479 Register LoReg = MI.getOperand(0).getReg();
14480 Register HiReg = MI.getOperand(1).getReg();
14481
14482 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
14483 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
14484 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
14485
14486 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14487
14488 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
14489 .addReg(HiReg)
14490 .addReg(ReadAgainReg);
14491 BuildMI(BB, dl, TII->get(PPC::BCC))
14493 .addReg(CmpReg)
14494 .addMBB(readMBB);
14495
14496 BB->addSuccessor(readMBB);
14497 BB->addSuccessor(sinkMBB);
14498 break;
14499 }
14500 case PPC::ATOMIC_LOAD_ADD_NOWP:
14501 BB = EmitPartwordAtomicBinary(MI, BB, PPC::ADD4);
14502 break;
14503 case PPC::ATOMIC_LOAD_ADD:
14504 BB = EmitAtomicBinary(MI, BB, PPC::ADD4);
14505 break;
14506 case PPC::ATOMIC_LOAD_ADD_I64:
14507 BB = EmitAtomicBinary(MI, BB, PPC::ADD8);
14508 break;
14509 case PPC::ATOMIC_LOAD_AND_NOWP:
14510 BB = EmitPartwordAtomicBinary(MI, BB, PPC::AND);
14511 break;
14512 case PPC::ATOMIC_LOAD_AND:
14513 BB = EmitAtomicBinary(MI, BB, PPC::AND);
14514 break;
14515 case PPC::ATOMIC_LOAD_AND_I64:
14516 BB = EmitAtomicBinary(MI, BB, PPC::AND8);
14517 break;
14518 case PPC::ATOMIC_LOAD_OR_NOWP:
14519 BB = EmitPartwordAtomicBinary(MI, BB, PPC::OR);
14520 break;
14521 case PPC::ATOMIC_LOAD_OR:
14522 BB = EmitAtomicBinary(MI, BB, PPC::OR);
14523 break;
14524 case PPC::ATOMIC_LOAD_OR_I64:
14525 BB = EmitAtomicBinary(MI, BB, PPC::OR8);
14526 break;
14527 case PPC::ATOMIC_LOAD_XOR_NOWP:
14528 BB = EmitPartwordAtomicBinary(MI, BB, PPC::XOR);
14529 break;
14530 case PPC::ATOMIC_LOAD_XOR:
14531 BB = EmitAtomicBinary(MI, BB, PPC::XOR);
14532 break;
14533 case PPC::ATOMIC_LOAD_XOR_I64:
14534 BB = EmitAtomicBinary(MI, BB, PPC::XOR8);
14535 break;
14536 case PPC::ATOMIC_LOAD_NAND_NOWP:
14537 BB = EmitPartwordAtomicBinary(MI, BB, PPC::NAND);
14538 break;
14539 case PPC::ATOMIC_LOAD_NAND:
14540 BB = EmitAtomicBinary(MI, BB, PPC::NAND);
14541 break;
14542 case PPC::ATOMIC_LOAD_NAND_I64:
14543 BB = EmitAtomicBinary(MI, BB, PPC::NAND8);
14544 break;
14545 case PPC::ATOMIC_LOAD_SUB_NOWP:
14546 BB = EmitPartwordAtomicBinary(MI, BB, PPC::SUBF);
14547 break;
14548 case PPC::ATOMIC_LOAD_SUB:
14549 BB = EmitAtomicBinary(MI, BB, PPC::SUBF);
14550 break;
14551 case PPC::ATOMIC_LOAD_SUB_I64:
14552 BB = EmitAtomicBinary(MI, BB, PPC::SUBF8);
14553 break;
14554 case PPC::ATOMIC_LOAD_MIN_NOWP:
14555 BB = EmitPartwordAtomicBinary(MI, BB, 0, PPC::CMPW, PPC::PRED_LT);
14556 break;
14557 case PPC::ATOMIC_LOAD_MIN:
14558 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPW, PPC::PRED_LT);
14559 break;
14560 case PPC::ATOMIC_LOAD_MIN_I64:
14561 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPD, PPC::PRED_LT);
14562 break;
14563 case PPC::ATOMIC_LOAD_MAX_NOWP:
14564 BB = EmitPartwordAtomicBinary(MI, BB, 0, PPC::CMPW, PPC::PRED_GT);
14565 break;
14566 case PPC::ATOMIC_LOAD_MAX:
14567 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPW, PPC::PRED_GT);
14568 break;
14569 case PPC::ATOMIC_LOAD_MAX_I64:
14570 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPD, PPC::PRED_GT);
14571 break;
14572 case PPC::ATOMIC_LOAD_UMIN_NOWP:
14573 BB = EmitPartwordAtomicBinary(MI, BB, 0, PPC::CMPLW, PPC::PRED_LT);
14574 break;
14575 case PPC::ATOMIC_LOAD_UMIN:
14576 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPLW, PPC::PRED_LT);
14577 break;
14578 case PPC::ATOMIC_LOAD_UMIN_I64:
14579 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPLD, PPC::PRED_LT);
14580 break;
14581 case PPC::ATOMIC_LOAD_UMAX_NOWP:
14582 BB = EmitPartwordAtomicBinary(MI, BB, 0, PPC::CMPLW, PPC::PRED_GT);
14583 break;
14584 case PPC::ATOMIC_LOAD_UMAX:
14585 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPLW, PPC::PRED_GT);
14586 break;
14587 case PPC::ATOMIC_LOAD_UMAX_I64:
14588 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPLD, PPC::PRED_GT);
14589 break;
14590 case PPC::ATOMIC_SWAP_NOWP:
14591 BB = EmitPartwordAtomicBinary(MI, BB, 0);
14592 break;
14593 case PPC::ATOMIC_SWAP:
14594 case PPC::ATOMIC_SWAP_I64:
14595 BB = EmitAtomicBinary(MI, BB, 0);
14596 break;
14597 case PPC::ATOMIC_CMP_SWAP_I32:
14598 case PPC::ATOMIC_CMP_SWAP_I64:
14599 case PPC::ATOMIC_CMP_SWAP_I8:
14600 case PPC::ATOMIC_CMP_SWAP_I16: {
14601 // Use hardware-supported atomic operations if available
14602 bool useHardware = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14603 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14604 (Subtarget.hasPartwordAtomics() &&
14605 (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14606 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16));
14607
14608 if (useHardware)
14609 BB = emitAtomicCmpSwapHardware(MI, BB, TII, Subtarget);
14610 else
14611 BB = emitAtomicCmpSwapSoftware(MI, BB, TII, Subtarget);
14612 break;
14613 }
14614 case PPC::FADDrtz: {
14615 // This pseudo performs an FADD with rounding mode temporarily forced
14616 // to round-to-zero. We emit this via custom inserter since the FPSCR
14617 // is not modeled at the SelectionDAG level.
14618 Register Dest = MI.getOperand(0).getReg();
14619 Register Src1 = MI.getOperand(1).getReg();
14620 Register Src2 = MI.getOperand(2).getReg();
14621 DebugLoc dl = MI.getDebugLoc();
14622
14623 MachineRegisterInfo &RegInfo = F->getRegInfo();
14624 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14625
14626 // Save FPSCR value.
14627 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
14628
14629 // Set rounding mode to round-to-zero.
14630 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
14631 .addImm(31)
14633
14634 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
14635 .addImm(30)
14637
14638 // Perform addition.
14639 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
14640 .addReg(Src1)
14641 .addReg(Src2);
14642 if (MI.getFlag(MachineInstr::NoFPExcept))
14644
14645 // Restore FPSCR value.
14646 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
14647 break;
14648 }
14649 case PPC::ANDI_rec_1_EQ_BIT:
14650 case PPC::ANDI_rec_1_GT_BIT:
14651 case PPC::ANDI_rec_1_EQ_BIT8:
14652 case PPC::ANDI_rec_1_GT_BIT8: {
14653 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14654 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14655 ? PPC::ANDI8_rec
14656 : PPC::ANDI_rec;
14657 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14658 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14659
14660 MachineRegisterInfo &RegInfo = F->getRegInfo();
14661 Register Dest = RegInfo.createVirtualRegister(
14662 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14663
14664 DebugLoc Dl = MI.getDebugLoc();
14665 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
14666 .addReg(MI.getOperand(1).getReg())
14667 .addImm(1);
14668 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14669 MI.getOperand(0).getReg())
14670 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14671 break;
14672 }
14673 case PPC::TCHECK_RET: {
14674 DebugLoc Dl = MI.getDebugLoc();
14675 MachineRegisterInfo &RegInfo = F->getRegInfo();
14676 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14677 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
14678 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14679 MI.getOperand(0).getReg())
14680 .addReg(CRReg);
14681 break;
14682 }
14683 case PPC::TBEGIN_RET: {
14684 DebugLoc Dl = MI.getDebugLoc();
14685 unsigned Imm = MI.getOperand(1).getImm();
14686 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
14687 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14688 MI.getOperand(0).getReg())
14689 .addReg(PPC::CR0EQ);
14690 break;
14691 }
14692 case PPC::SETRNDi: {
14693 DebugLoc dl = MI.getDebugLoc();
14694 Register OldFPSCRReg = MI.getOperand(0).getReg();
14695
14696 // Save FPSCR value.
14697 if (MRI.use_empty(OldFPSCRReg))
14698 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14699 else
14700 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14701
14702 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14703 // the following settings:
14704 // 00 Round to nearest
14705 // 01 Round to 0
14706 // 10 Round to +inf
14707 // 11 Round to -inf
14708
14709 // When the operand is immediate, using the two least significant bits of
14710 // the immediate to set the bits 62:63 of FPSCR.
14711 unsigned Mode = MI.getOperand(1).getImm();
14712 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14713 .addImm(31)
14715
14716 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14717 .addImm(30)
14719 break;
14720 }
14721 case PPC::SETRND: {
14722 DebugLoc dl = MI.getDebugLoc();
14723
14724 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14725 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14726 // If the target doesn't have DirectMove, we should use stack to do the
14727 // conversion, because the target doesn't have the instructions like mtvsrd
14728 // or mfvsrd to do this conversion directly.
14729 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14730 if (Subtarget.hasDirectMove()) {
14731 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
14732 .addReg(SrcReg);
14733 } else {
14734 // Use stack to do the register copy.
14735 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14736 MachineRegisterInfo &RegInfo = F->getRegInfo();
14737 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
14738 if (RC == &PPC::F8RCRegClass) {
14739 // Copy register from F8RCRegClass to G8RCRegclass.
14740 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14741 "Unsupported RegClass.");
14742
14743 StoreOp = PPC::STFD;
14744 LoadOp = PPC::LD;
14745 } else {
14746 // Copy register from G8RCRegClass to F8RCRegclass.
14747 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14748 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14749 "Unsupported RegClass.");
14750 }
14751
14752 MachineFrameInfo &MFI = F->getFrameInfo();
14753 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
14754
14755 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14756 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14758 MFI.getObjectAlign(FrameIdx));
14759
14760 // Store the SrcReg into the stack.
14761 BuildMI(*BB, MI, dl, TII->get(StoreOp))
14762 .addReg(SrcReg)
14763 .addImm(0)
14764 .addFrameIndex(FrameIdx)
14765 .addMemOperand(MMOStore);
14766
14767 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14768 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14770 MFI.getObjectAlign(FrameIdx));
14771
14772 // Load from the stack where SrcReg is stored, and save to DestReg,
14773 // so we have done the RegClass conversion from RegClass::SrcReg to
14774 // RegClass::DestReg.
14775 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
14776 .addImm(0)
14777 .addFrameIndex(FrameIdx)
14778 .addMemOperand(MMOLoad);
14779 }
14780 };
14781
14782 Register OldFPSCRReg = MI.getOperand(0).getReg();
14783
14784 // Save FPSCR value.
14785 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14786
14787 // When the operand is gprc register, use two least significant bits of the
14788 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14789 //
14790 // copy OldFPSCRTmpReg, OldFPSCRReg
14791 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14792 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14793 // copy NewFPSCRReg, NewFPSCRTmpReg
14794 // mtfsf 255, NewFPSCRReg
14795 MachineOperand SrcOp = MI.getOperand(1);
14796 MachineRegisterInfo &RegInfo = F->getRegInfo();
14797 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14798
14799 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14800
14801 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14802 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14803
14804 // The first operand of INSERT_SUBREG should be a register which has
14805 // subregisters, we only care about its RegClass, so we should use an
14806 // IMPLICIT_DEF register.
14807 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
14808 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
14809 .addReg(ImDefReg)
14810 .add(SrcOp)
14811 .addImm(1);
14812
14813 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14814 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
14815 .addReg(OldFPSCRTmpReg)
14816 .addReg(ExtSrcReg)
14817 .addImm(0)
14818 .addImm(62);
14819
14820 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14821 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14822
14823 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14824 // bits of FPSCR.
14825 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
14826 .addImm(255)
14827 .addReg(NewFPSCRReg)
14828 .addImm(0)
14829 .addImm(0);
14830 break;
14831 }
14832 case PPC::SETFLM: {
14833 DebugLoc Dl = MI.getDebugLoc();
14834
14835 // Result of setflm is previous FPSCR content, so we need to save it first.
14836 Register OldFPSCRReg = MI.getOperand(0).getReg();
14837 if (MRI.use_empty(OldFPSCRReg))
14838 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14839 else
14840 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
14841
14842 // Put bits in 32:63 to FPSCR.
14843 Register NewFPSCRReg = MI.getOperand(1).getReg();
14844 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
14845 .addImm(255)
14846 .addReg(NewFPSCRReg)
14847 .addImm(0)
14848 .addImm(0);
14849 break;
14850 }
14851 case PPC::PROBED_ALLOCA_32:
14852 case PPC::PROBED_ALLOCA_64:
14853 return emitProbedAlloca(MI, BB);
14854
14855 case PPC::SPLIT_QUADWORD: {
14856 DebugLoc DL = MI.getDebugLoc();
14857 Register Src = MI.getOperand(2).getReg();
14858 Register Lo = MI.getOperand(0).getReg();
14859 Register Hi = MI.getOperand(1).getReg();
14860 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14861 .addDef(Lo)
14862 .addUse(Src, {}, PPC::sub_gp8_x1);
14863 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14864 .addDef(Hi)
14865 .addUse(Src, {}, PPC::sub_gp8_x0);
14866 break;
14867 }
14868 case PPC::LQX_PSEUDO:
14869 case PPC::STQX_PSEUDO: {
14870 DebugLoc DL = MI.getDebugLoc();
14871 // Ptr is used as the ptr_rc_no_r0 part
14872 // of LQ/STQ's memory operand and adding result of RA and RB,
14873 // so it has to be g8rc_and_g8rc_nox0.
14874 Register Ptr =
14875 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
14876 Register Val = MI.getOperand(0).getReg();
14877 Register RA = MI.getOperand(1).getReg();
14878 Register RB = MI.getOperand(2).getReg();
14879 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
14880 BuildMI(*BB, MI, DL,
14881 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
14882 : TII->get(PPC::STQ))
14883 .addReg(Val, getDefRegState(MI.getOpcode() == PPC::LQX_PSEUDO))
14884 .addImm(0)
14885 .addReg(Ptr);
14886 break;
14887 }
14888 default:
14889 llvm_unreachable("Unexpected instr type to insert");
14890 }
14891
14892 MI.eraseFromParent(); // The pseudo instruction is gone now.
14893 return BB;
14894}
14895
14896//===----------------------------------------------------------------------===//
14897// Target Optimization Hooks
14898//===----------------------------------------------------------------------===//
14899
14900static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14901 // For the estimates, convergence is quadratic, so we essentially double the
14902 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14903 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14904 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14905 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14906 if (VT.getScalarType() == MVT::f64)
14907 RefinementSteps++;
14908 return RefinementSteps;
14909}
14910
14911SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14912 const DenormalMode &Mode,
14913 SDNodeFlags Flags) const {
14914 // We only have VSX Vector Test for software Square Root.
14915 EVT VT = Op.getValueType();
14916 if (!isTypeLegal(MVT::i1) ||
14917 (VT != MVT::f64 &&
14918 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14919 return TargetLowering::getSqrtInputTest(Op, DAG, Mode, Flags);
14920
14921 SDLoc DL(Op);
14922 // The output register of FTSQRT is CR field.
14923 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op, Flags);
14924 // ftsqrt BF,FRB
14925 // Let e_b be the unbiased exponent of the double-precision
14926 // floating-point operand in register FRB.
14927 // fe_flag is set to 1 if either of the following conditions occurs.
14928 // - The double-precision floating-point operand in register FRB is a zero,
14929 // a NaN, or an infinity, or a negative value.
14930 // - e_b is less than or equal to -970.
14931 // Otherwise fe_flag is set to 0.
14932 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14933 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14934 // exponent is less than -970)
14935 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14936 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14937 FTSQRT, SRIdxVal),
14938 0);
14939}
14940
14941SDValue
14942PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14943 SelectionDAG &DAG) const {
14944 // We only have VSX Vector Square Root.
14945 EVT VT = Op.getValueType();
14946 if (VT != MVT::f64 &&
14947 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14949
14950 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14951}
14952
14953SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14954 int Enabled, int &RefinementSteps,
14955 bool &UseOneConstNR,
14956 bool Reciprocal) const {
14957 EVT VT = Operand.getValueType();
14958 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14959 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14960 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14961 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14962 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14963 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14964
14965 // The Newton-Raphson computation with a single constant does not provide
14966 // enough accuracy on some CPUs.
14967 UseOneConstNR = !Subtarget.needsTwoConstNR();
14968 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14969 }
14970 return SDValue();
14971}
14972
14973SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14974 int Enabled,
14975 int &RefinementSteps) const {
14976 EVT VT = Operand.getValueType();
14977 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14978 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14979 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14980 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14981 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14982 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14983 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14984 }
14985 return SDValue();
14986}
14987
14989 // Note: This functionality is used only when arcp is enabled, and
14990 // on cores with reciprocal estimates (which are used when arcp is
14991 // enabled for division), this functionality is redundant with the default
14992 // combiner logic (once the division -> reciprocal/multiply transformation
14993 // has taken place). As a result, this matters more for older cores than for
14994 // newer ones.
14995
14996 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14997 // reciprocal if there are two or more FDIVs (for embedded cores with only
14998 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14999 switch (Subtarget.getCPUDirective()) {
15000 default:
15001 return 3;
15002 case PPC::DIR_440:
15003 case PPC::DIR_A2:
15004 case PPC::DIR_E500:
15005 case PPC::DIR_E500mc:
15006 case PPC::DIR_E5500:
15007 return 2;
15008 }
15009}
15010
15011// isConsecutiveLSLoc needs to work even if all adds have not yet been
15012// collapsed, and so we need to look through chains of them.
15014 int64_t& Offset, SelectionDAG &DAG) {
15015 if (DAG.isBaseWithConstantOffset(Loc)) {
15016 Base = Loc.getOperand(0);
15017 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
15018
15019 // The base might itself be a base plus an offset, and if so, accumulate
15020 // that as well.
15021 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
15022 }
15023}
15024
15026 unsigned Bytes, int Dist,
15027 SelectionDAG &DAG) {
15028 if (VT.getSizeInBits() / 8 != Bytes)
15029 return false;
15030
15031 SDValue BaseLoc = Base->getBasePtr();
15032 if (Loc.getOpcode() == ISD::FrameIndex) {
15033 if (BaseLoc.getOpcode() != ISD::FrameIndex)
15034 return false;
15036 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
15037 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
15038 int FS = MFI.getObjectSize(FI);
15039 int BFS = MFI.getObjectSize(BFI);
15040 if (FS != BFS || FS != (int)Bytes) return false;
15041 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
15042 }
15043
15044 SDValue Base1 = Loc, Base2 = BaseLoc;
15045 int64_t Offset1 = 0, Offset2 = 0;
15046 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
15047 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
15048 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
15049 return true;
15050
15051 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15052 const GlobalValue *GV1 = nullptr;
15053 const GlobalValue *GV2 = nullptr;
15054 Offset1 = 0;
15055 Offset2 = 0;
15056 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
15057 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
15058 if (isGA1 && isGA2 && GV1 == GV2)
15059 return Offset1 == (Offset2 + Dist*Bytes);
15060 return false;
15061}
15062
15063// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
15064// not enforce equality of the chain operands.
15066 unsigned Bytes, int Dist,
15067 SelectionDAG &DAG) {
15069 EVT VT = LS->getMemoryVT();
15070 SDValue Loc = LS->getBasePtr();
15071 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
15072 }
15073
15074 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
15075 EVT VT;
15076 switch (N->getConstantOperandVal(1)) {
15077 default: return false;
15078 case Intrinsic::ppc_altivec_lvx:
15079 case Intrinsic::ppc_altivec_lvxl:
15080 case Intrinsic::ppc_vsx_lxvw4x:
15081 case Intrinsic::ppc_vsx_lxvw4x_be:
15082 VT = MVT::v4i32;
15083 break;
15084 case Intrinsic::ppc_vsx_lxvd2x:
15085 case Intrinsic::ppc_vsx_lxvd2x_be:
15086 VT = MVT::v2f64;
15087 break;
15088 case Intrinsic::ppc_altivec_lvebx:
15089 VT = MVT::i8;
15090 break;
15091 case Intrinsic::ppc_altivec_lvehx:
15092 VT = MVT::i16;
15093 break;
15094 case Intrinsic::ppc_altivec_lvewx:
15095 VT = MVT::i32;
15096 break;
15097 }
15098
15099 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
15100 }
15101
15102 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
15103 EVT VT;
15104 switch (N->getConstantOperandVal(1)) {
15105 default: return false;
15106 case Intrinsic::ppc_altivec_stvx:
15107 case Intrinsic::ppc_altivec_stvxl:
15108 case Intrinsic::ppc_vsx_stxvw4x:
15109 VT = MVT::v4i32;
15110 break;
15111 case Intrinsic::ppc_vsx_stxvd2x:
15112 VT = MVT::v2f64;
15113 break;
15114 case Intrinsic::ppc_vsx_stxvw4x_be:
15115 VT = MVT::v4i32;
15116 break;
15117 case Intrinsic::ppc_vsx_stxvd2x_be:
15118 VT = MVT::v2f64;
15119 break;
15120 case Intrinsic::ppc_altivec_stvebx:
15121 VT = MVT::i8;
15122 break;
15123 case Intrinsic::ppc_altivec_stvehx:
15124 VT = MVT::i16;
15125 break;
15126 case Intrinsic::ppc_altivec_stvewx:
15127 VT = MVT::i32;
15128 break;
15129 }
15130
15131 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
15132 }
15133
15134 return false;
15135}
15136
15137// Return true is there is a nearyby consecutive load to the one provided
15138// (regardless of alignment). We search up and down the chain, looking though
15139// token factors and other loads (but nothing else). As a result, a true result
15140// indicates that it is safe to create a new consecutive load adjacent to the
15141// load provided.
15143 SDValue Chain = LD->getChain();
15144 EVT VT = LD->getMemoryVT();
15145
15146 SmallPtrSet<SDNode *, 16> LoadRoots;
15147 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
15149
15150 // First, search up the chain, branching to follow all token-factor operands.
15151 // If we find a consecutive load, then we're done, otherwise, record all
15152 // nodes just above the top-level loads and token factors.
15153 while (!Queue.empty()) {
15154 SDNode *ChainNext = Queue.pop_back_val();
15155 if (!Visited.insert(ChainNext).second)
15156 continue;
15157
15158 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
15159 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
15160 return true;
15161
15162 if (!Visited.count(ChainLD->getChain().getNode()))
15163 Queue.push_back(ChainLD->getChain().getNode());
15164 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
15165 for (const SDUse &O : ChainNext->ops())
15166 if (!Visited.count(O.getNode()))
15167 Queue.push_back(O.getNode());
15168 } else
15169 LoadRoots.insert(ChainNext);
15170 }
15171
15172 // Second, search down the chain, starting from the top-level nodes recorded
15173 // in the first phase. These top-level nodes are the nodes just above all
15174 // loads and token factors. Starting with their uses, recursively look though
15175 // all loads (just the chain uses) and token factors to find a consecutive
15176 // load.
15177 Visited.clear();
15178 Queue.clear();
15179
15180 for (SDNode *I : LoadRoots) {
15181 Queue.push_back(I);
15182
15183 while (!Queue.empty()) {
15184 SDNode *LoadRoot = Queue.pop_back_val();
15185 if (!Visited.insert(LoadRoot).second)
15186 continue;
15187
15188 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
15189 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
15190 return true;
15191
15192 for (SDNode *U : LoadRoot->users())
15193 if (((isa<MemSDNode>(U) &&
15194 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
15195 U->getOpcode() == ISD::TokenFactor) &&
15196 !Visited.count(U))
15197 Queue.push_back(U);
15198 }
15199 }
15200
15201 return false;
15202}
15203
15204/// This function is called when we have proved that a SETCC node can be replaced
15205/// by subtraction (and other supporting instructions) so that the result of
15206/// comparison is kept in a GPR instead of CR. This function is purely for
15207/// codegen purposes and has some flags to guide the codegen process.
15208static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
15209 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
15210 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15211
15212 // Zero extend the operands to the largest legal integer. Originally, they
15213 // must be of a strictly smaller size.
15214 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
15215 DAG.getConstant(Size, DL, MVT::i32));
15216 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
15217 DAG.getConstant(Size, DL, MVT::i32));
15218
15219 // Swap if needed. Depends on the condition code.
15220 if (Swap)
15221 std::swap(Op0, Op1);
15222
15223 // Subtract extended integers.
15224 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
15225
15226 // Move the sign bit to the least significant position and zero out the rest.
15227 // Now the least significant bit carries the result of original comparison.
15228 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
15229 DAG.getConstant(Size - 1, DL, MVT::i32));
15230 auto Final = Shifted;
15231
15232 // Complement the result if needed. Based on the condition code.
15233 if (Complement)
15234 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
15235 DAG.getConstant(1, DL, MVT::i64));
15236
15237 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
15238}
15239
15240SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
15241 DAGCombinerInfo &DCI) const {
15242 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15243
15244 SelectionDAG &DAG = DCI.DAG;
15245 SDLoc DL(N);
15246
15247 // Size of integers being compared has a critical role in the following
15248 // analysis, so we prefer to do this when all types are legal.
15249 if (!DCI.isAfterLegalizeDAG())
15250 return SDValue();
15251
15252 // If all users of SETCC extend its value to a legal integer type
15253 // then we replace SETCC with a subtraction
15254 for (const SDNode *U : N->users())
15255 if (U->getOpcode() != ISD::ZERO_EXTEND)
15256 return SDValue();
15257
15258 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15259 auto OpSize = N->getOperand(0).getValueSizeInBits();
15260
15262
15263 if (OpSize < Size) {
15264 switch (CC) {
15265 default: break;
15266 case ISD::SETULT:
15267 return generateEquivalentSub(N, Size, false, false, DL, DAG);
15268 case ISD::SETULE:
15269 return generateEquivalentSub(N, Size, true, true, DL, DAG);
15270 case ISD::SETUGT:
15271 return generateEquivalentSub(N, Size, false, true, DL, DAG);
15272 case ISD::SETUGE:
15273 return generateEquivalentSub(N, Size, true, false, DL, DAG);
15274 }
15275 }
15276
15277 return SDValue();
15278}
15279
15280SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
15281 DAGCombinerInfo &DCI) const {
15282 SelectionDAG &DAG = DCI.DAG;
15283 SDLoc dl(N);
15284
15285 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
15286 // If we're tracking CR bits, we need to be careful that we don't have:
15287 // trunc(binary-ops(zext(x), zext(y)))
15288 // or
15289 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
15290 // such that we're unnecessarily moving things into GPRs when it would be
15291 // better to keep them in CR bits.
15292
15293 // Note that trunc here can be an actual i1 trunc, or can be the effective
15294 // truncation that comes from a setcc or select_cc.
15295 if (N->getOpcode() == ISD::TRUNCATE &&
15296 N->getValueType(0) != MVT::i1)
15297 return SDValue();
15298
15299 if (N->getOperand(0).getValueType() != MVT::i32 &&
15300 N->getOperand(0).getValueType() != MVT::i64)
15301 return SDValue();
15302
15303 if (N->getOpcode() == ISD::SETCC ||
15304 N->getOpcode() == ISD::SELECT_CC) {
15305 // If we're looking at a comparison, then we need to make sure that the
15306 // high bits (all except for the first) don't matter the result.
15307 ISD::CondCode CC =
15308 cast<CondCodeSDNode>(N->getOperand(
15309 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
15310 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
15311
15312 if (ISD::isSignedIntSetCC(CC)) {
15313 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
15314 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
15315 return SDValue();
15316 } else if (ISD::isUnsignedIntSetCC(CC)) {
15317 if (!DAG.MaskedValueIsZero(N->getOperand(0),
15318 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
15319 !DAG.MaskedValueIsZero(N->getOperand(1),
15320 APInt::getHighBitsSet(OpBits, OpBits-1)))
15321 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15322 : SDValue());
15323 } else {
15324 // This is neither a signed nor an unsigned comparison, just make sure
15325 // that the high bits are equal.
15326 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
15327 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
15328
15329 // We don't really care about what is known about the first bit (if
15330 // anything), so pretend that it is known zero for both to ensure they can
15331 // be compared as constants.
15332 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
15333 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
15334
15335 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15336 Op1Known.getConstant() != Op2Known.getConstant())
15337 return SDValue();
15338 }
15339 }
15340
15341 // We now know that the higher-order bits are irrelevant, we just need to
15342 // make sure that all of the intermediate operations are bit operations, and
15343 // all inputs are extensions.
15344 if (N->getOperand(0).getOpcode() != ISD::AND &&
15345 N->getOperand(0).getOpcode() != ISD::OR &&
15346 N->getOperand(0).getOpcode() != ISD::XOR &&
15347 N->getOperand(0).getOpcode() != ISD::SELECT &&
15348 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
15349 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
15350 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
15351 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
15352 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
15353 return SDValue();
15354
15355 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15356 N->getOperand(1).getOpcode() != ISD::AND &&
15357 N->getOperand(1).getOpcode() != ISD::OR &&
15358 N->getOperand(1).getOpcode() != ISD::XOR &&
15359 N->getOperand(1).getOpcode() != ISD::SELECT &&
15360 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
15361 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
15362 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
15363 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
15364 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
15365 return SDValue();
15366
15368 SmallVector<SDValue, 8> BinOps, PromOps;
15369 SmallPtrSet<SDNode *, 16> Visited;
15370
15371 for (unsigned i = 0; i < 2; ++i) {
15372 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15373 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15374 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15375 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15376 isa<ConstantSDNode>(N->getOperand(i)))
15377 Inputs.push_back(N->getOperand(i));
15378 else
15379 BinOps.push_back(N->getOperand(i));
15380
15381 if (N->getOpcode() == ISD::TRUNCATE)
15382 break;
15383 }
15384
15385 // Visit all inputs, collect all binary operations (and, or, xor and
15386 // select) that are all fed by extensions.
15387 while (!BinOps.empty()) {
15388 SDValue BinOp = BinOps.pop_back_val();
15389
15390 if (!Visited.insert(BinOp.getNode()).second)
15391 continue;
15392
15393 PromOps.push_back(BinOp);
15394
15395 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15396 // The condition of the select is not promoted.
15397 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15398 continue;
15399 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15400 continue;
15401
15402 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15403 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15404 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15405 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15406 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15407 Inputs.push_back(BinOp.getOperand(i));
15408 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15409 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15410 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15411 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15412 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15413 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15414 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15415 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15416 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15417 BinOps.push_back(BinOp.getOperand(i));
15418 } else {
15419 // We have an input that is not an extension or another binary
15420 // operation; we'll abort this transformation.
15421 return SDValue();
15422 }
15423 }
15424 }
15425
15426 // Make sure that this is a self-contained cluster of operations (which
15427 // is not quite the same thing as saying that everything has only one
15428 // use).
15429 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15430 if (isa<ConstantSDNode>(Inputs[i]))
15431 continue;
15432
15433 for (const SDNode *User : Inputs[i].getNode()->users()) {
15434 if (User != N && !Visited.count(User))
15435 return SDValue();
15436
15437 // Make sure that we're not going to promote the non-output-value
15438 // operand(s) or SELECT or SELECT_CC.
15439 // FIXME: Although we could sometimes handle this, and it does occur in
15440 // practice that one of the condition inputs to the select is also one of
15441 // the outputs, we currently can't deal with this.
15442 if (User->getOpcode() == ISD::SELECT) {
15443 if (User->getOperand(0) == Inputs[i])
15444 return SDValue();
15445 } else if (User->getOpcode() == ISD::SELECT_CC) {
15446 if (User->getOperand(0) == Inputs[i] ||
15447 User->getOperand(1) == Inputs[i])
15448 return SDValue();
15449 }
15450 }
15451 }
15452
15453 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15454 for (const SDNode *User : PromOps[i].getNode()->users()) {
15455 if (User != N && !Visited.count(User))
15456 return SDValue();
15457
15458 // Make sure that we're not going to promote the non-output-value
15459 // operand(s) or SELECT or SELECT_CC.
15460 // FIXME: Although we could sometimes handle this, and it does occur in
15461 // practice that one of the condition inputs to the select is also one of
15462 // the outputs, we currently can't deal with this.
15463 if (User->getOpcode() == ISD::SELECT) {
15464 if (User->getOperand(0) == PromOps[i])
15465 return SDValue();
15466 } else if (User->getOpcode() == ISD::SELECT_CC) {
15467 if (User->getOperand(0) == PromOps[i] ||
15468 User->getOperand(1) == PromOps[i])
15469 return SDValue();
15470 }
15471 }
15472 }
15473
15474 // Replace all inputs with the extension operand.
15475 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15476 // Constants may have users outside the cluster of to-be-promoted nodes,
15477 // and so we need to replace those as we do the promotions.
15478 if (isa<ConstantSDNode>(Inputs[i]))
15479 continue;
15480 else
15481 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
15482 }
15483
15484 std::list<HandleSDNode> PromOpHandles;
15485 for (auto &PromOp : PromOps)
15486 PromOpHandles.emplace_back(PromOp);
15487
15488 // Replace all operations (these are all the same, but have a different
15489 // (i1) return type). DAG.getNode will validate that the types of
15490 // a binary operator match, so go through the list in reverse so that
15491 // we've likely promoted both operands first. Any intermediate truncations or
15492 // extensions disappear.
15493 while (!PromOpHandles.empty()) {
15494 SDValue PromOp = PromOpHandles.back().getValue();
15495 PromOpHandles.pop_back();
15496
15497 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15498 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15499 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15500 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15501 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
15502 PromOp.getOperand(0).getValueType() != MVT::i1) {
15503 // The operand is not yet ready (see comment below).
15504 PromOpHandles.emplace_front(PromOp);
15505 continue;
15506 }
15507
15508 SDValue RepValue = PromOp.getOperand(0);
15509 if (isa<ConstantSDNode>(RepValue))
15510 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
15511
15512 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
15513 continue;
15514 }
15515
15516 unsigned C;
15517 switch (PromOp.getOpcode()) {
15518 default: C = 0; break;
15519 case ISD::SELECT: C = 1; break;
15520 case ISD::SELECT_CC: C = 2; break;
15521 }
15522
15523 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15524 PromOp.getOperand(C).getValueType() != MVT::i1) ||
15525 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15526 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
15527 // The to-be-promoted operands of this node have not yet been
15528 // promoted (this should be rare because we're going through the
15529 // list backward, but if one of the operands has several users in
15530 // this cluster of to-be-promoted nodes, it is possible).
15531 PromOpHandles.emplace_front(PromOp);
15532 continue;
15533 }
15534
15536
15537 // If there are any constant inputs, make sure they're replaced now.
15538 for (unsigned i = 0; i < 2; ++i)
15539 if (isa<ConstantSDNode>(Ops[C+i]))
15540 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
15541
15542 DAG.ReplaceAllUsesOfValueWith(PromOp,
15543 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
15544 }
15545
15546 // Now we're left with the initial truncation itself.
15547 if (N->getOpcode() == ISD::TRUNCATE)
15548 return N->getOperand(0);
15549
15550 // Otherwise, this is a comparison. The operands to be compared have just
15551 // changed type (to i1), but everything else is the same.
15552 return SDValue(N, 0);
15553}
15554
15555SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15556 DAGCombinerInfo &DCI) const {
15557 SelectionDAG &DAG = DCI.DAG;
15558 SDLoc dl(N);
15559
15560 // If we're tracking CR bits, we need to be careful that we don't have:
15561 // zext(binary-ops(trunc(x), trunc(y)))
15562 // or
15563 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15564 // such that we're unnecessarily moving things into CR bits that can more
15565 // efficiently stay in GPRs. Note that if we're not certain that the high
15566 // bits are set as required by the final extension, we still may need to do
15567 // some masking to get the proper behavior.
15568
15569 // This same functionality is important on PPC64 when dealing with
15570 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15571 // the return values of functions. Because it is so similar, it is handled
15572 // here as well.
15573
15574 if (N->getValueType(0) != MVT::i32 &&
15575 N->getValueType(0) != MVT::i64)
15576 return SDValue();
15577
15578 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15579 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15580 return SDValue();
15581
15582 if (N->getOperand(0).getOpcode() != ISD::AND &&
15583 N->getOperand(0).getOpcode() != ISD::OR &&
15584 N->getOperand(0).getOpcode() != ISD::XOR &&
15585 N->getOperand(0).getOpcode() != ISD::SELECT &&
15586 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
15587 return SDValue();
15588
15590 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
15591 SmallPtrSet<SDNode *, 16> Visited;
15592
15593 // Visit all inputs, collect all binary operations (and, or, xor and
15594 // select) that are all fed by truncations.
15595 while (!BinOps.empty()) {
15596 SDValue BinOp = BinOps.pop_back_val();
15597
15598 if (!Visited.insert(BinOp.getNode()).second)
15599 continue;
15600
15601 PromOps.push_back(BinOp);
15602
15603 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15604 // The condition of the select is not promoted.
15605 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15606 continue;
15607 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15608 continue;
15609
15610 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15611 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15612 Inputs.push_back(BinOp.getOperand(i));
15613 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15614 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15615 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15616 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15617 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15618 BinOps.push_back(BinOp.getOperand(i));
15619 } else {
15620 // We have an input that is not a truncation or another binary
15621 // operation; we'll abort this transformation.
15622 return SDValue();
15623 }
15624 }
15625 }
15626
15627 // The operands of a select that must be truncated when the select is
15628 // promoted because the operand is actually part of the to-be-promoted set.
15629 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15630
15631 // Make sure that this is a self-contained cluster of operations (which
15632 // is not quite the same thing as saying that everything has only one
15633 // use).
15634 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15635 if (isa<ConstantSDNode>(Inputs[i]))
15636 continue;
15637
15638 for (SDNode *User : Inputs[i].getNode()->users()) {
15639 if (User != N && !Visited.count(User))
15640 return SDValue();
15641
15642 // If we're going to promote the non-output-value operand(s) or SELECT or
15643 // SELECT_CC, record them for truncation.
15644 if (User->getOpcode() == ISD::SELECT) {
15645 if (User->getOperand(0) == Inputs[i])
15646 SelectTruncOp[0].insert(std::make_pair(User,
15647 User->getOperand(0).getValueType()));
15648 } else if (User->getOpcode() == ISD::SELECT_CC) {
15649 if (User->getOperand(0) == Inputs[i])
15650 SelectTruncOp[0].insert(std::make_pair(User,
15651 User->getOperand(0).getValueType()));
15652 if (User->getOperand(1) == Inputs[i])
15653 SelectTruncOp[1].insert(std::make_pair(User,
15654 User->getOperand(1).getValueType()));
15655 }
15656 }
15657 }
15658
15659 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15660 for (SDNode *User : PromOps[i].getNode()->users()) {
15661 if (User != N && !Visited.count(User))
15662 return SDValue();
15663
15664 // If we're going to promote the non-output-value operand(s) or SELECT or
15665 // SELECT_CC, record them for truncation.
15666 if (User->getOpcode() == ISD::SELECT) {
15667 if (User->getOperand(0) == PromOps[i])
15668 SelectTruncOp[0].insert(std::make_pair(User,
15669 User->getOperand(0).getValueType()));
15670 } else if (User->getOpcode() == ISD::SELECT_CC) {
15671 if (User->getOperand(0) == PromOps[i])
15672 SelectTruncOp[0].insert(std::make_pair(User,
15673 User->getOperand(0).getValueType()));
15674 if (User->getOperand(1) == PromOps[i])
15675 SelectTruncOp[1].insert(std::make_pair(User,
15676 User->getOperand(1).getValueType()));
15677 }
15678 }
15679 }
15680
15681 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
15682 bool ReallyNeedsExt = false;
15683 if (N->getOpcode() != ISD::ANY_EXTEND) {
15684 // If all of the inputs are not already sign/zero extended, then
15685 // we'll still need to do that at the end.
15686 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15687 if (isa<ConstantSDNode>(Inputs[i]))
15688 continue;
15689
15690 unsigned OpBits =
15691 Inputs[i].getOperand(0).getValueSizeInBits();
15692 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15693
15694 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15695 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
15696 APInt::getHighBitsSet(OpBits,
15697 OpBits-PromBits))) ||
15698 (N->getOpcode() == ISD::SIGN_EXTEND &&
15699 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
15700 (OpBits-(PromBits-1)))) {
15701 ReallyNeedsExt = true;
15702 break;
15703 }
15704 }
15705 }
15706
15707 // Convert PromOps to handles before doing any RAUW operations, as these
15708 // may CSE with existing nodes, deleting the originals.
15709 std::list<HandleSDNode> PromOpHandles;
15710 for (auto &PromOp : PromOps)
15711 PromOpHandles.emplace_back(PromOp);
15712
15713 // Replace all inputs, either with the truncation operand, or a
15714 // truncation or extension to the final output type.
15715 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15716 // Constant inputs need to be replaced with the to-be-promoted nodes that
15717 // use them because they might have users outside of the cluster of
15718 // promoted nodes.
15719 if (isa<ConstantSDNode>(Inputs[i]))
15720 continue;
15721
15722 SDValue InSrc = Inputs[i].getOperand(0);
15723 if (Inputs[i].getValueType() == N->getValueType(0))
15724 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
15725 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15726 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15727 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
15728 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15729 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15730 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
15731 else
15732 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15733 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
15734 }
15735
15736 // Replace all operations (these are all the same, but have a different
15737 // (promoted) return type). DAG.getNode will validate that the types of
15738 // a binary operator match, so go through the list in reverse so that
15739 // we've likely promoted both operands first.
15740 while (!PromOpHandles.empty()) {
15741 SDValue PromOp = PromOpHandles.back().getValue();
15742 PromOpHandles.pop_back();
15743
15744 unsigned C;
15745 switch (PromOp.getOpcode()) {
15746 default: C = 0; break;
15747 case ISD::SELECT: C = 1; break;
15748 case ISD::SELECT_CC: C = 2; break;
15749 }
15750
15751 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15752 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
15753 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15754 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
15755 // The to-be-promoted operands of this node have not yet been
15756 // promoted (this should be rare because we're going through the
15757 // list backward, but if one of the operands has several users in
15758 // this cluster of to-be-promoted nodes, it is possible).
15759 PromOpHandles.emplace_front(PromOp);
15760 continue;
15761 }
15762
15763 // For SELECT and SELECT_CC nodes, we do a similar check for any
15764 // to-be-promoted comparison inputs.
15765 if (PromOp.getOpcode() == ISD::SELECT ||
15766 PromOp.getOpcode() == ISD::SELECT_CC) {
15767 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
15768 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
15769 (SelectTruncOp[1].count(PromOp.getNode()) &&
15770 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
15771 PromOpHandles.emplace_front(PromOp);
15772 continue;
15773 }
15774 }
15775
15777
15778 // If this node has constant inputs, then they'll need to be promoted here.
15779 for (unsigned i = 0; i < 2; ++i) {
15780 if (!isa<ConstantSDNode>(Ops[C+i]))
15781 continue;
15782 if (Ops[C+i].getValueType() == N->getValueType(0))
15783 continue;
15784
15785 if (N->getOpcode() == ISD::SIGN_EXTEND)
15786 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15787 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15788 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15789 else
15790 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15791 }
15792
15793 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15794 // truncate them again to the original value type.
15795 if (PromOp.getOpcode() == ISD::SELECT ||
15796 PromOp.getOpcode() == ISD::SELECT_CC) {
15797 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
15798 if (SI0 != SelectTruncOp[0].end())
15799 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
15800 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
15801 if (SI1 != SelectTruncOp[1].end())
15802 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
15803 }
15804
15805 DAG.ReplaceAllUsesOfValueWith(PromOp,
15806 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
15807 }
15808
15809 // Now we're left with the initial extension itself.
15810 if (!ReallyNeedsExt)
15811 return N->getOperand(0);
15812
15813 // To zero extend, just mask off everything except for the first bit (in the
15814 // i1 case).
15815 if (N->getOpcode() == ISD::ZERO_EXTEND)
15816 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
15818 N->getValueSizeInBits(0), PromBits),
15819 dl, N->getValueType(0)));
15820
15821 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15822 "Invalid extension type");
15823 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
15824 SDValue ShiftCst =
15825 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
15826 return DAG.getNode(
15827 ISD::SRA, dl, N->getValueType(0),
15828 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
15829 ShiftCst);
15830}
15831
15832// The function check a i128 load can convert to 16i8 load for Vcmpequb.
15833static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS, bool IsPPC64) {
15834
15835 auto isValidForConvert = [IsPPC64](SDValue &Operand) {
15836 if (!Operand.hasOneUse())
15837 return false;
15838
15839 if (Operand.getValueType() != MVT::i128)
15840 return false;
15841
15842 if (Operand.getOpcode() == ISD::Constant) {
15843 auto *C = cast<ConstantSDNode>(Operand);
15844 const APInt &Val = C->getAPIntValue();
15845 // On PPC64, comparing an i128 value loaded from memory against a
15846 // constant smaller than 2^16 is usually better left to scalar lowering.
15847 // In that case, the compare can be lowered using xori (since xori has a
15848 // 16-bit immediate field), which is cheaper than materializing a vector
15849 // constant and using vcmpequb.
15850 if (IsPPC64 && Val.ult(1ULL << 16))
15851 return false;
15852 return true;
15853 }
15854
15855 auto *LoadNode = dyn_cast<LoadSDNode>(Operand);
15856 if (!LoadNode)
15857 return false;
15858
15859 // If memory operation is volatile, do not perform any
15860 // optimization or transformation. Volatile operations must be preserved
15861 // as written to ensure correct program behavior, so we return an empty
15862 // SDValue to indicate no action.
15863
15864 if (LoadNode->isVolatile())
15865 return false;
15866
15867 // Only combine loads if both use the unindexed addressing mode.
15868 // PowerPC AltiVec/VMX does not support vector loads or stores with
15869 // pre/post-increment addressing. Indexed modes may imply implicit
15870 // pointer updates, which are not compatible with AltiVec vector
15871 // instructions.
15872 if (LoadNode->getAddressingMode() != ISD::UNINDEXED)
15873 return false;
15874
15875 // Only combine loads if both are non-extending loads
15876 // (ISD::NON_EXTLOAD). Extending loads (such as ISD::ZEXTLOAD or
15877 // ISD::SEXTLOAD) perform zero or sign extension, which may change the
15878 // loaded value's semantics and are not compatible with vector loads.
15879 if (LoadNode->getExtensionType() != ISD::NON_EXTLOAD)
15880 return false;
15881
15882 return true;
15883 };
15884
15885 return (isValidForConvert(LHS) && isValidForConvert(RHS));
15886}
15887
15889 const SDLoc &DL) {
15890
15891 assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node");
15892
15893 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15894 assert((CC == ISD::SETNE || CC == ISD::SETEQ) &&
15895 "CC mus be ISD::SETNE or ISD::SETEQ");
15896
15897 auto getV16i8Load = [&](const SDValue &Operand) {
15898 if (Operand.getOpcode() == ISD::Constant)
15899 return DAG.getBitcast(MVT::v16i8, Operand);
15900
15901 assert(Operand.getOpcode() == ISD::LOAD && "Must be LoadSDNode here.");
15902
15903 auto *LoadNode = cast<LoadSDNode>(Operand);
15904 // Create a new MachineMemOperand without range metadata.
15905 // Range metadata is only valid for integer scalar types, not vectors.
15906 // The original i128 load may have range metadata, but when we convert
15907 // to v16i8, that metadata is no longer semantically valid.
15908 MachineMemOperand *MMO = LoadNode->getMemOperand();
15911 MMO->getPointerInfo(), MMO->getFlags(), MMO->getSize(), MMO->getAlign(),
15912 MMO->getAAInfo(), nullptr, MMO->getSyncScopeID(),
15913 MMO->getSuccessOrdering(), MMO->getFailureOrdering());
15914 SDValue NewLoad = DAG.getLoad(MVT::v16i8, DL, LoadNode->getChain(),
15915 LoadNode->getBasePtr(), NewMMO);
15916 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadNode, 1), NewLoad.getValue(1));
15917 return NewLoad;
15918 };
15919
15920 // Following code transforms the DAG
15921 // t0: ch,glue = EntryToken
15922 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15923 // t3: i128,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15924 // undef:i64
15925 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15926 // t5: i128,ch =
15927 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64 t6: i1 =
15928 // setcc t3, t5, setne:ch
15929 //
15930 // ---->
15931 //
15932 // t0: ch,glue = EntryToken
15933 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15934 // t3: v16i8,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15935 // undef:i64
15936 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15937 // t5: v16i8,ch =
15938 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64
15939 // t6: i32 =
15940 // llvm.ppc.altivec.vcmpequb.p TargetConstant:i32<10505>,
15941 // Constant:i32<2>, t3, t5
15942 // t7: i1 = setcc t6, Constant:i32<0>, seteq:ch
15943
15944 // Or transforms the DAG
15945 // t5: i128,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15946 // t8: i1 =
15947 // setcc Constant:i128<237684487579686500932345921536>, t5, setne:ch
15948 //
15949 // --->
15950 //
15951 // t5: v16i8,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15952 // t6: v16i8 = bitcast Constant:i128<237684487579686500932345921536>
15953 // t7: i32 =
15954 // llvm.ppc.altivec.vcmpequb.p Constant:i32<10962>, Constant:i32<2>, t5, t2
15955
15956 SDValue LHSVec = getV16i8Load(N->getOperand(0));
15957 SDValue RHSVec = getV16i8Load(N->getOperand(1));
15958
15959 SDValue IntrID =
15960 DAG.getConstant(Intrinsic::ppc_altivec_vcmpequb_p, DL, MVT::i32);
15961 SDValue CRSel = DAG.getConstant(2, DL, MVT::i32); // which CR6 predicate field
15962 SDValue PredResult = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
15963 IntrID, CRSel, LHSVec, RHSVec);
15964 // ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
15965 // so we need to invert the CC opcode.
15966 return DAG.getSetCC(DL, N->getValueType(0), PredResult,
15967 DAG.getConstant(0, DL, MVT::i32),
15968 CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
15969}
15970
15971// Detect whether there is a pattern like (setcc (and X, 1), 0, eq).
15972// If it is , return true; otherwise return false.
15974 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15975
15976 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15977 if (CC != ISD::SETEQ)
15978 return false;
15979
15980 SDValue LHS = N->getOperand(0);
15981 SDValue RHS = N->getOperand(1);
15982
15983 // Check the `SDValue &V` is from `and` with `1`.
15984 auto IsAndWithOne = [](SDValue &V) {
15985 if (V.getOpcode() == ISD::AND) {
15986 for (const SDValue &Op : V->ops())
15987 if (auto *C = dyn_cast<ConstantSDNode>(Op))
15988 if (C->isOne())
15989 return true;
15990 }
15991 return false;
15992 };
15993
15994 // Check whether the SETCC compare with zero.
15995 auto IsCompareWithZero = [](SDValue &V) {
15996 if (auto *C = dyn_cast<ConstantSDNode>(V))
15997 if (C->isZero())
15998 return true;
15999 return false;
16000 };
16001
16002 return (IsAndWithOne(LHS) && IsCompareWithZero(RHS)) ||
16003 (IsAndWithOne(RHS) && IsCompareWithZero(LHS));
16004}
16005
16006// You must check whether the `SDNode* N` can be converted to Xori using
16007// the function `static bool canConvertSETCCToXori(SDNode *N)`
16008// before calling the function; otherwise, it may produce incorrect results.
16010
16011 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
16012 SDValue LHS = N->getOperand(0);
16013 SDValue RHS = N->getOperand(1);
16014 SDLoc DL(N);
16015
16016 [[maybe_unused]] ISD::CondCode CC =
16017 cast<CondCodeSDNode>(N->getOperand(2))->get();
16018 assert((CC == ISD::SETEQ) && "CC must be ISD::SETEQ.");
16019 // Rewrite it as XORI (and X, 1), 1.
16020 auto MakeXor1 = [&](SDValue V) {
16021 EVT VT = V.getValueType();
16022 SDValue One = DAG.getConstant(1, DL, VT);
16023 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, V, One);
16024 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Xor);
16025 };
16026
16027 if (LHS.getOpcode() == ISD::AND && RHS.getOpcode() != ISD::AND)
16028 return MakeXor1(LHS);
16029
16030 if (RHS.getOpcode() == ISD::AND && LHS.getOpcode() != ISD::AND)
16031 return MakeXor1(RHS);
16032
16033 llvm_unreachable("Should not reach here.");
16034}
16035
16036// Match `sext(setcc X, 0, eq)` and turn it into an ADDIC/SUBFE sequence.
16037//
16038// This generates code for:
16039// X == 0 ? -1 : 0
16040//
16041// On pre-ISA 3.1 targets, this is better than the longer CNTLZW/SRWI/NEG
16042// sequence. This is useful for cases like:
16043// uint8_t f(uint8_t x) { return (x == 0) ? -1 : 0; }
16044//
16045// ISA 3.1+ is skipped because those targets can use SETBC.
16046
16047SDValue PPCTargetLowering::combineSignExtendSetCC(SDNode *N,
16048 DAGCombinerInfo &DCI) const {
16049 if (Subtarget.isISA3_1())
16050 return SDValue();
16051
16052 EVT VT = N->getValueType(0);
16053 if (VT != MVT::i32 && VT != MVT::i64)
16054 return SDValue();
16055
16056 SDValue N0 = N->getOperand(0);
16057 if (N0.getOpcode() != ISD::SETCC)
16058 return SDValue();
16059
16061 SDValue LHS = N0.getOperand(0);
16062 SDValue RHS = N0.getOperand(1);
16063
16064 // Not match: sext (setcc x, 0, eq) or sext (setcc 0, x, eq)
16065 if (CC != ISD::SETEQ || (!isNullConstant(LHS) && !isNullConstant(RHS)))
16066 return SDValue();
16067
16068 SDLoc dl(N);
16069 SelectionDAG &DAG = DCI.DAG;
16071 EVT XVT = X.getValueType(); // The type of x in the setcc x, 0, eq.
16072
16073 if ((XVT == MVT::i64 || VT == MVT::i64) && !Subtarget.isPPC64())
16074 return SDValue();
16075
16076 // On PPC64, i32 carry operations use the full 64-bit XER register,
16077 // so we must use i64 operations to avoid incorrect results.
16078 // Use i64 operations and truncate the result if needed.
16079 if (XVT != MVT::i64 && Subtarget.isPPC64())
16080 // Zero-extend if input type is not 64bits.
16081 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, X);
16082
16083 EVT OpVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
16084
16085 // Generate: SUBFE(ADDC(X, -1)).
16086 SDValue MinusOne = DAG.getAllOnesConstant(dl, OpVT);
16087 SDValue Addc =
16088 DAG.getNode(PPCISD::ADDC, dl, DAG.getVTList(OpVT, MVT::i32), X, MinusOne);
16089 SDValue Carry = Addc.getValue(1);
16090 SDValue Sube = DAG.getNode(PPCISD::SUBE, dl, DAG.getVTList(OpVT, MVT::i32),
16091 Addc, Addc, Carry);
16092
16093 // Truncate back to i32 if we used i64 operations.
16094 if (OpVT == MVT::i64 && VT == MVT::i32)
16095 return DAG.getNode(ISD::TRUNCATE, dl, VT, Sube);
16096
16097 return Sube;
16098}
16099
16100SDValue PPCTargetLowering::combineSetCC(SDNode *N,
16101 DAGCombinerInfo &DCI) const {
16102 assert(N->getOpcode() == ISD::SETCC &&
16103 "Should be called with a SETCC node");
16104
16105 // Check if the pattern (setcc (and X, 1), 0, eq) is present.
16106 // If it is, rewrite it as XORI (and X, 1), 1.
16108 return ConvertSETCCToXori(N, DCI.DAG);
16109
16110 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
16111 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
16112 SDValue LHS = N->getOperand(0);
16113 SDValue RHS = N->getOperand(1);
16114
16115 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
16116 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
16117 LHS.hasOneUse())
16118 std::swap(LHS, RHS);
16119
16120 // x == 0-y --> x+y == 0
16121 // x != 0-y --> x+y != 0
16122 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
16123 RHS.hasOneUse()) {
16124 SDLoc DL(N);
16125 SelectionDAG &DAG = DCI.DAG;
16126 EVT VT = N->getValueType(0);
16127 EVT OpVT = LHS.getValueType();
16128 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
16129 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
16130 }
16131
16132 // Optimization: Fold i128 equality/inequality compares of two loads into a
16133 // vectorized compare using vcmpequb.p when Altivec is available.
16134 //
16135 // Rationale:
16136 // A scalar i128 SETCC (eq/ne) normally lowers to multiple scalar ops.
16137 // On VSX-capable subtargets, we can instead reinterpret the i128 loads
16138 // as v16i8 vectors and use the Altive vcmpequb.p instruction to
16139 // perform a full 128-bit equality check in a single vector compare.
16140 //
16141 // Example Result:
16142 // This transformation replaces memcmp(a, b, 16) with two vector loads
16143 // and one vector compare instruction.
16144
16145 if (Subtarget.hasAltivec() &&
16146 canConvertToVcmpequb(LHS, RHS, Subtarget.isPPC64()))
16147 return convertTwoLoadsAndCmpToVCMPEQUB(DCI.DAG, N, SDLoc(N));
16148 }
16149
16150 return DAGCombineTruncBoolExt(N, DCI);
16151}
16152
16153// Is this an extending load from an f32 to an f64?
16154static bool isFPExtLoad(SDValue Op) {
16155 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
16156 return LD->getExtensionType() == ISD::EXTLOAD &&
16157 Op.getValueType() == MVT::f64;
16158 return false;
16159}
16160
16161/// Reduces the number of fp-to-int conversion when building a vector.
16162///
16163/// If this vector is built out of floating to integer conversions,
16164/// transform it to a vector built out of floating point values followed by a
16165/// single floating to integer conversion of the vector.
16166/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
16167/// becomes (fptosi (build_vector ($A, $B, ...)))
16168SDValue PPCTargetLowering::
16169combineElementTruncationToVectorTruncation(SDNode *N,
16170 DAGCombinerInfo &DCI) const {
16171 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16172 "Should be called with a BUILD_VECTOR node");
16173
16174 SelectionDAG &DAG = DCI.DAG;
16175 SDLoc dl(N);
16176
16177 SDValue FirstInput = N->getOperand(0);
16178 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
16179 "The input operand must be an fp-to-int conversion.");
16180
16181 // This combine happens after legalization so the fp_to_[su]i nodes are
16182 // already converted to PPCSISD nodes.
16183 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
16184 if (FirstConversion == PPCISD::FCTIDZ ||
16185 FirstConversion == PPCISD::FCTIDUZ ||
16186 FirstConversion == PPCISD::FCTIWZ ||
16187 FirstConversion == PPCISD::FCTIWUZ) {
16188 bool IsSplat = true;
16189 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
16190 FirstConversion == PPCISD::FCTIWUZ;
16191 EVT SrcVT = FirstInput.getOperand(0).getValueType();
16193 EVT TargetVT = N->getValueType(0);
16194 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
16195 SDValue NextOp = N->getOperand(i);
16196 if (NextOp.getOpcode() != PPCISD::MFVSR)
16197 return SDValue();
16198 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
16199 if (NextConversion != FirstConversion)
16200 return SDValue();
16201 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
16202 // This is not valid if the input was originally double precision. It is
16203 // also not profitable to do unless this is an extending load in which
16204 // case doing this combine will allow us to combine consecutive loads.
16205 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
16206 return SDValue();
16207 if (N->getOperand(i) != FirstInput)
16208 IsSplat = false;
16209 }
16210
16211 // If this is a splat, we leave it as-is since there will be only a single
16212 // fp-to-int conversion followed by a splat of the integer. This is better
16213 // for 32-bit and smaller ints and neutral for 64-bit ints.
16214 if (IsSplat)
16215 return SDValue();
16216
16217 // Now that we know we have the right type of node, get its operands
16218 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
16219 SDValue In = N->getOperand(i).getOperand(0);
16220 if (Is32Bit) {
16221 // For 32-bit values, we need to add an FP_ROUND node (if we made it
16222 // here, we know that all inputs are extending loads so this is safe).
16223 if (In.isUndef())
16224 Ops.push_back(DAG.getUNDEF(SrcVT));
16225 else {
16226 SDValue Trunc =
16227 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
16228 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
16229 Ops.push_back(Trunc);
16230 }
16231 } else
16232 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
16233 }
16234
16235 unsigned Opcode;
16236 if (FirstConversion == PPCISD::FCTIDZ ||
16237 FirstConversion == PPCISD::FCTIWZ)
16238 Opcode = ISD::FP_TO_SINT;
16239 else
16240 Opcode = ISD::FP_TO_UINT;
16241
16242 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
16243 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
16244 return DAG.getNode(Opcode, dl, TargetVT, BV);
16245 }
16246 return SDValue();
16247}
16248
16249// LXVKQ instruction load VSX vector with a special quadword value
16250// based on an immediate value. This helper method returns the details of the
16251// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
16252// to help generate the LXVKQ instruction and the subsequent shift instruction
16253// required to match the original build vector pattern.
16254
16255// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
16256using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
16257
16258static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
16259
16260 // LXVKQ instruction loads the Quadword value:
16261 // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
16262 static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
16263 static const uint32_t Uim = 16;
16264
16265 // Check for direct LXVKQ match (no shift needed)
16266 if (FullVal == BasePattern)
16267 return std::make_tuple(Uim, uint8_t{0});
16268
16269 // Check if FullValue is 1 (the result of the base pattern >> 127)
16270 if (FullVal == APInt(128, 1))
16271 return std::make_tuple(Uim, uint8_t{127});
16272
16273 return std::nullopt;
16274}
16275
16276/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
16277/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
16278/// LXVKQ instruction load VSX vector with a special quadword value based on an
16279/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
16280/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
16281/// This can be used to inline the build vector constants that have the
16282/// following patterns:
16283///
16284/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
16285/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
16286/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
16287/// combination of splatting and right shift instructions.
16288
16289SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
16290 SelectionDAG &DAG) const {
16291
16292 assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
16293 "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
16294
16295 // This transformation is only supported if we are loading either a byte,
16296 // halfword, word, or doubleword.
16297 EVT VT = Op.getValueType();
16298 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
16299 VT == MVT::v2i64))
16300 return SDValue();
16301
16302 LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
16303 << VT.getEVTString() << "): ";
16304 Op->dump());
16305
16306 unsigned NumElems = VT.getVectorNumElements();
16307 unsigned ElemBits = VT.getScalarSizeInBits();
16308
16309 bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
16310
16311 // Check for Non-constant operand in the build vector.
16312 for (const SDValue &Operand : Op.getNode()->op_values()) {
16313 if (!isa<ConstantSDNode>(Operand))
16314 return SDValue();
16315 }
16316
16317 // Assemble build vector operands as a 128-bit register value
16318 // We need to reconstruct what the 128-bit register pattern would be
16319 // that produces this vector when interpreted with the current endianness
16320 APInt FullVal = APInt::getZero(128);
16321
16322 for (unsigned Index = 0; Index < NumElems; ++Index) {
16323 auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
16324
16325 // Get element value as raw bits (zero-extended)
16326 uint64_t ElemValue = C->getZExtValue();
16327
16328 // Mask to element size to ensure we only get the relevant bits
16329 if (ElemBits < 64)
16330 ElemValue &= ((1ULL << ElemBits) - 1);
16331
16332 // Calculate bit position for this element in the 128-bit register
16333 unsigned BitPos =
16334 (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
16335
16336 // Create APInt for the element value and shift it to correct position
16337 APInt ElemAPInt(128, ElemValue);
16338 ElemAPInt <<= BitPos;
16339
16340 // Place the element value at the correct bit position
16341 FullVal |= ElemAPInt;
16342 }
16343
16344 if (FullVal.isZero() || FullVal.isAllOnes())
16345 return SDValue();
16346
16347 if (auto UIMOpt = getPatternInfo(FullVal)) {
16348 const auto &[Uim, ShiftAmount] = *UIMOpt;
16349 SDLoc Dl(Op);
16350
16351 // Generate LXVKQ instruction if the shift amount is zero.
16352 if (ShiftAmount == 0) {
16353 SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
16354 SDValue LxvkqInstr =
16355 SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
16357 << "combineBVLoadsSpecialValue: Instruction Emitted ";
16358 LxvkqInstr.dump());
16359 return LxvkqInstr;
16360 }
16361
16362 assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
16363
16364 // The right shifted pattern can be constructed using a combination of
16365 // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
16366 // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
16367 // value 255.
16368 SDValue ShiftAmountVec =
16369 SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
16370 DAG.getTargetConstant(255, Dl, MVT::i32)),
16371 0);
16372 // Generate appropriate right shift instruction
16373 SDValue ShiftVec = SDValue(
16374 DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
16375 0);
16377 << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
16378 ShiftVec.dump());
16379 return ShiftVec;
16380 }
16381 // No patterns matched for build vectors.
16382 return SDValue();
16383}
16384
16385/// Reduce the number of loads when building a vector.
16386///
16387/// Building a vector out of multiple loads can be converted to a load
16388/// of the vector type if the loads are consecutive. If the loads are
16389/// consecutive but in descending order, a shuffle is added at the end
16390/// to reorder the vector.
16392 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16393 "Should be called with a BUILD_VECTOR node");
16394
16395 SDLoc dl(N);
16396
16397 // Return early for non byte-sized type, as they can't be consecutive.
16398 if (!N->getValueType(0).getVectorElementType().isByteSized())
16399 return SDValue();
16400
16401 bool InputsAreConsecutiveLoads = true;
16402 bool InputsAreReverseConsecutive = true;
16403 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
16404 SDValue FirstInput = N->getOperand(0);
16405 bool IsRoundOfExtLoad = false;
16406 LoadSDNode *FirstLoad = nullptr;
16407
16408 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
16409 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
16410 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
16411 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
16412 }
16413 // Not a build vector of (possibly fp_rounded) loads.
16414 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
16415 N->getNumOperands() == 1)
16416 return SDValue();
16417
16418 if (!IsRoundOfExtLoad)
16419 FirstLoad = cast<LoadSDNode>(FirstInput);
16420
16422 InputLoads.push_back(FirstLoad);
16423 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
16424 // If any inputs are fp_round(extload), they all must be.
16425 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
16426 return SDValue();
16427
16428 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
16429 N->getOperand(i);
16430 if (NextInput.getOpcode() != ISD::LOAD)
16431 return SDValue();
16432
16433 SDValue PreviousInput =
16434 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
16435 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
16436 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
16437
16438 // If any inputs are fp_round(extload), they all must be.
16439 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
16440 return SDValue();
16441
16442 // We only care about regular loads. The PPC-specific load intrinsics
16443 // will not lead to a merge opportunity.
16444 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
16445 InputsAreConsecutiveLoads = false;
16446 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
16447 InputsAreReverseConsecutive = false;
16448
16449 // Exit early if the loads are neither consecutive nor reverse consecutive.
16450 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
16451 return SDValue();
16452 InputLoads.push_back(LD2);
16453 }
16454
16455 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
16456 "The loads cannot be both consecutive and reverse consecutive.");
16457
16458 SDValue WideLoad;
16459 SDValue ReturnSDVal;
16460 if (InputsAreConsecutiveLoads) {
16461 assert(FirstLoad && "Input needs to be a LoadSDNode.");
16462 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
16463 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
16464 FirstLoad->getAlign());
16465 ReturnSDVal = WideLoad;
16466 } else if (InputsAreReverseConsecutive) {
16467 LoadSDNode *LastLoad = InputLoads.back();
16468 assert(LastLoad && "Input needs to be a LoadSDNode.");
16469 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
16470 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
16471 LastLoad->getAlign());
16473 for (int i = N->getNumOperands() - 1; i >= 0; i--)
16474 Ops.push_back(i);
16475
16476 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
16477 DAG.getUNDEF(N->getValueType(0)), Ops);
16478 } else
16479 return SDValue();
16480
16481 for (auto *LD : InputLoads)
16482 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
16483 return ReturnSDVal;
16484}
16485
16486// This function adds the required vector_shuffle needed to get
16487// the elements of the vector extract in the correct position
16488// as specified by the CorrectElems encoding.
16490 SDValue Input, uint64_t Elems,
16491 uint64_t CorrectElems) {
16492 SDLoc dl(N);
16493
16494 unsigned NumElems = Input.getValueType().getVectorNumElements();
16495 SmallVector<int, 16> ShuffleMask(NumElems, -1);
16496
16497 // Knowing the element indices being extracted from the original
16498 // vector and the order in which they're being inserted, just put
16499 // them at element indices required for the instruction.
16500 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16501 if (DAG.getDataLayout().isLittleEndian())
16502 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
16503 else
16504 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
16505 CorrectElems = CorrectElems >> 8;
16506 Elems = Elems >> 8;
16507 }
16508
16509 SDValue Shuffle =
16510 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
16511 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
16512
16513 EVT VT = N->getValueType(0);
16514 SDValue Conv = DAG.getBitcast(VT, Shuffle);
16515
16516 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
16517 Input.getValueType().getVectorElementType(),
16519 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
16520 DAG.getValueType(ExtVT));
16521}
16522
16523// Look for build vector patterns where input operands come from sign
16524// extended vector_extract elements of specific indices. If the correct indices
16525// aren't used, add a vector shuffle to fix up the indices and create
16526// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
16527// during instruction selection.
16529 // This array encodes the indices that the vector sign extend instructions
16530 // extract from when extending from one type to another for both BE and LE.
16531 // The right nibble of each byte corresponds to the LE incides.
16532 // and the left nibble of each byte corresponds to the BE incides.
16533 // For example: 0x3074B8FC byte->word
16534 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
16535 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
16536 // For example: 0x000070F8 byte->double word
16537 // For LE: the allowed indices are: 0x0,0x8
16538 // For BE: the allowed indices are: 0x7,0xF
16539 uint64_t TargetElems[] = {
16540 0x3074B8FC, // b->w
16541 0x000070F8, // b->d
16542 0x10325476, // h->w
16543 0x00003074, // h->d
16544 0x00001032, // w->d
16545 };
16546
16547 uint64_t Elems = 0;
16548 int Index;
16549 SDValue Input;
16550
16551 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
16552 if (!Op)
16553 return false;
16554 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
16555 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
16556 return false;
16557
16558 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
16559 // of the right width.
16560 SDValue Extract = Op.getOperand(0);
16561 if (Extract.getOpcode() == ISD::ANY_EXTEND)
16562 Extract = Extract.getOperand(0);
16563 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16564 return false;
16565
16567 if (!ExtOp)
16568 return false;
16569
16570 Index = ExtOp->getZExtValue();
16571 if (Input && Input != Extract.getOperand(0))
16572 return false;
16573
16574 if (!Input)
16575 Input = Extract.getOperand(0);
16576
16577 Elems = Elems << 8;
16578 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
16579 Elems |= Index;
16580
16581 return true;
16582 };
16583
16584 // If the build vector operands aren't sign extended vector extracts,
16585 // of the same input vector, then return.
16586 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16587 if (!isSExtOfVecExtract(N->getOperand(i))) {
16588 return SDValue();
16589 }
16590 }
16591
16592 // If the vector extract indices are not correct, add the appropriate
16593 // vector_shuffle.
16594 int TgtElemArrayIdx;
16595 int InputSize = Input.getValueType().getScalarSizeInBits();
16596 int OutputSize = N->getValueType(0).getScalarSizeInBits();
16597 if (InputSize + OutputSize == 40)
16598 TgtElemArrayIdx = 0;
16599 else if (InputSize + OutputSize == 72)
16600 TgtElemArrayIdx = 1;
16601 else if (InputSize + OutputSize == 48)
16602 TgtElemArrayIdx = 2;
16603 else if (InputSize + OutputSize == 80)
16604 TgtElemArrayIdx = 3;
16605 else if (InputSize + OutputSize == 96)
16606 TgtElemArrayIdx = 4;
16607 else
16608 return SDValue();
16609
16610 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
16611 CorrectElems = DAG.getDataLayout().isLittleEndian()
16612 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
16613 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
16614 if (Elems != CorrectElems) {
16615 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
16616 }
16617
16618 // Regular lowering will catch cases where a shuffle is not needed.
16619 return SDValue();
16620}
16621
16622// Look for the pattern of a load from a narrow width to i128, feeding
16623// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
16624// (LXVRZX). This node represents a zero extending load that will be matched
16625// to the Load VSX Vector Rightmost instructions.
16627 SDLoc DL(N);
16628
16629 // This combine is only eligible for a BUILD_VECTOR of v1i128.
16630 if (N->getValueType(0) != MVT::v1i128)
16631 return SDValue();
16632
16633 SDValue Operand = N->getOperand(0);
16634 // Proceed with the transformation if the operand to the BUILD_VECTOR
16635 // is a load instruction.
16636 if (Operand.getOpcode() != ISD::LOAD)
16637 return SDValue();
16638
16639 auto *LD = cast<LoadSDNode>(Operand);
16640 EVT MemoryType = LD->getMemoryVT();
16641
16642 // This transformation is only valid if the we are loading either a byte,
16643 // halfword, word, or doubleword.
16644 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
16645 MemoryType == MVT::i32 || MemoryType == MVT::i64;
16646
16647 // Ensure that the load from the narrow width is being zero extended to i128.
16648 if (!ValidLDType ||
16649 (LD->getExtensionType() != ISD::ZEXTLOAD &&
16650 LD->getExtensionType() != ISD::EXTLOAD))
16651 return SDValue();
16652
16653 SDValue LoadOps[] = {
16654 LD->getChain(), LD->getBasePtr(),
16655 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
16656
16657 return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
16658 DAG.getVTList(MVT::v1i128, MVT::Other),
16659 LoadOps, MemoryType, LD->getMemOperand());
16660}
16661
16662SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
16663 DAGCombinerInfo &DCI) const {
16664 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16665 "Should be called with a BUILD_VECTOR node");
16666
16667 SelectionDAG &DAG = DCI.DAG;
16668 SDLoc dl(N);
16669
16670 if (!Subtarget.hasVSX())
16671 return SDValue();
16672
16673 // The target independent DAG combiner will leave a build_vector of
16674 // float-to-int conversions intact. We can generate MUCH better code for
16675 // a float-to-int conversion of a vector of floats.
16676 SDValue FirstInput = N->getOperand(0);
16677 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
16678 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
16679 if (Reduced)
16680 return Reduced;
16681 }
16682
16683 // If we're building a vector out of consecutive loads, just load that
16684 // vector type.
16685 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
16686 if (Reduced)
16687 return Reduced;
16688
16689 // If we're building a vector out of extended elements from another vector
16690 // we have P9 vector integer extend instructions. The code assumes legal
16691 // input types (i.e. it can't handle things like v4i16) so do not run before
16692 // legalization.
16693 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
16694 Reduced = combineBVOfVecSExt(N, DAG);
16695 if (Reduced)
16696 return Reduced;
16697 }
16698
16699 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
16700 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
16701 // is a load from <valid narrow width> to i128.
16702 if (Subtarget.isISA3_1()) {
16703 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
16704 if (BVOfZLoad)
16705 return BVOfZLoad;
16706 }
16707
16708 if (N->getValueType(0) != MVT::v2f64)
16709 return SDValue();
16710
16711 // Looking for:
16712 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
16713 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
16714 FirstInput.getOpcode() != ISD::UINT_TO_FP)
16715 return SDValue();
16716 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
16717 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
16718 return SDValue();
16719 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
16720 return SDValue();
16721
16722 SDValue Ext1 = FirstInput.getOperand(0);
16723 SDValue Ext2 = N->getOperand(1).getOperand(0);
16724 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16726 return SDValue();
16727
16728 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
16729 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
16730 if (!Ext1Op || !Ext2Op)
16731 return SDValue();
16732 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
16733 Ext1.getOperand(0) != Ext2.getOperand(0))
16734 return SDValue();
16735
16736 int FirstElem = Ext1Op->getZExtValue();
16737 int SecondElem = Ext2Op->getZExtValue();
16738 int SubvecIdx;
16739 if (FirstElem == 0 && SecondElem == 1)
16740 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16741 else if (FirstElem == 2 && SecondElem == 3)
16742 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16743 else
16744 return SDValue();
16745
16746 SDValue SrcVec = Ext1.getOperand(0);
16747 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
16748 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
16749 return DAG.getNode(NodeType, dl, MVT::v2f64,
16750 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
16751}
16752
16753SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16754 DAGCombinerInfo &DCI) const {
16755 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16756 N->getOpcode() == ISD::UINT_TO_FP) &&
16757 "Need an int -> FP conversion node here");
16758
16759 if (useSoftFloat() || !Subtarget.has64BitSupport())
16760 return SDValue();
16761
16762 SelectionDAG &DAG = DCI.DAG;
16763 SDLoc dl(N);
16764 SDValue Op(N, 0);
16765
16766 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16767 // from the hardware.
16768 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16769 return SDValue();
16770 if (!Op.getOperand(0).getValueType().isSimple())
16771 return SDValue();
16772 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16773 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
16774 return SDValue();
16775
16776 SDValue FirstOperand(Op.getOperand(0));
16777 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16778 (FirstOperand.getValueType() == MVT::i8 ||
16779 FirstOperand.getValueType() == MVT::i16);
16780 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16781 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16782 bool DstDouble = Op.getValueType() == MVT::f64;
16783 unsigned ConvOp = Signed ?
16784 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16785 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16786 SDValue WidthConst =
16787 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16788 dl, false);
16789 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
16790 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16791 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
16792 DAG.getVTList(MVT::f64, MVT::Other),
16793 Ops, MVT::i8, LDN->getMemOperand());
16794 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
16795
16796 // For signed conversion, we need to sign-extend the value in the VSR
16797 if (Signed) {
16798 SDValue ExtOps[] = { Ld, WidthConst };
16799 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
16800 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
16801 } else
16802 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
16803 }
16804
16805
16806 // For i32 intermediate values, unfortunately, the conversion functions
16807 // leave the upper 32 bits of the value are undefined. Within the set of
16808 // scalar instructions, we have no method for zero- or sign-extending the
16809 // value. Thus, we cannot handle i32 intermediate values here.
16810 if (Op.getOperand(0).getValueType() == MVT::i32)
16811 return SDValue();
16812
16813 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16814 "UINT_TO_FP is supported only with FPCVT");
16815
16816 // If we have FCFIDS, then use it when converting to single-precision.
16817 // Otherwise, convert to double-precision and then round.
16818 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16819 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16820 : PPCISD::FCFIDS)
16821 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16822 : PPCISD::FCFID);
16823 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16824 ? MVT::f32
16825 : MVT::f64;
16826
16827 // If we're converting from a float, to an int, and back to a float again,
16828 // then we don't need the store/load pair at all.
16829 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
16830 Subtarget.hasFPCVT()) ||
16831 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
16832 SDValue Src = Op.getOperand(0).getOperand(0);
16833 if (Src.getValueType() == MVT::f32) {
16834 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
16835 DCI.AddToWorklist(Src.getNode());
16836 } else if (Src.getValueType() != MVT::f64) {
16837 // Make sure that we don't pick up a ppc_fp128 source value.
16838 return SDValue();
16839 }
16840
16841 unsigned FCTOp =
16842 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16843 PPCISD::FCTIDUZ;
16844
16845 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
16846 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
16847
16848 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16849 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
16850 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
16851 DCI.AddToWorklist(FP.getNode());
16852 }
16853
16854 return FP;
16855 }
16856
16857 return SDValue();
16858}
16859
16860// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16861// builtins) into loads with swaps.
16863 DAGCombinerInfo &DCI) const {
16864 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16865 // load combines.
16866 if (DCI.isBeforeLegalizeOps())
16867 return SDValue();
16868
16869 SelectionDAG &DAG = DCI.DAG;
16870 SDLoc dl(N);
16871 SDValue Chain;
16872 SDValue Base;
16873 MachineMemOperand *MMO;
16874
16875 switch (N->getOpcode()) {
16876 default:
16877 llvm_unreachable("Unexpected opcode for little endian VSX load");
16878 case ISD::LOAD: {
16880 Chain = LD->getChain();
16881 Base = LD->getBasePtr();
16882 MMO = LD->getMemOperand();
16883 // If the MMO suggests this isn't a load of a full vector, leave
16884 // things alone. For a built-in, we have to make the change for
16885 // correctness, so if there is a size problem that will be a bug.
16886 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16887 return SDValue();
16888 break;
16889 }
16892 Chain = Intrin->getChain();
16893 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16894 // us what we want. Get operand 2 instead.
16895 Base = Intrin->getOperand(2);
16896 MMO = Intrin->getMemOperand();
16897 break;
16898 }
16899 }
16900
16901 MVT VecTy = N->getValueType(0).getSimpleVT();
16902
16903 SDValue LoadOps[] = { Chain, Base };
16904 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
16905 DAG.getVTList(MVT::v2f64, MVT::Other),
16906 LoadOps, MVT::v2f64, MMO);
16907
16908 DCI.AddToWorklist(Load.getNode());
16909 Chain = Load.getValue(1);
16910 SDValue Swap = DAG.getNode(
16911 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
16912 DCI.AddToWorklist(Swap.getNode());
16913
16914 // Add a bitcast if the resulting load type doesn't match v2f64.
16915 if (VecTy != MVT::v2f64) {
16916 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
16917 DCI.AddToWorklist(N.getNode());
16918 // Package {bitcast value, swap's chain} to match Load's shape.
16919 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
16920 N, Swap.getValue(1));
16921 }
16922
16923 return Swap;
16924}
16925
16926// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16927// builtins) into stores with swaps.
16929 DAGCombinerInfo &DCI) const {
16930 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16931 // store combines.
16932 if (DCI.isBeforeLegalizeOps())
16933 return SDValue();
16934
16935 SelectionDAG &DAG = DCI.DAG;
16936 SDLoc dl(N);
16937 SDValue Chain;
16938 SDValue Base;
16939 unsigned SrcOpnd;
16940 MachineMemOperand *MMO;
16941
16942 switch (N->getOpcode()) {
16943 default:
16944 llvm_unreachable("Unexpected opcode for little endian VSX store");
16945 case ISD::STORE: {
16947 Chain = ST->getChain();
16948 Base = ST->getBasePtr();
16949 MMO = ST->getMemOperand();
16950 SrcOpnd = 1;
16951 // If the MMO suggests this isn't a store of a full vector, leave
16952 // things alone. For a built-in, we have to make the change for
16953 // correctness, so if there is a size problem that will be a bug.
16954 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16955 return SDValue();
16956 break;
16957 }
16958 case ISD::INTRINSIC_VOID: {
16960 Chain = Intrin->getChain();
16961 // Intrin->getBasePtr() oddly does not get what we want.
16962 Base = Intrin->getOperand(3);
16963 MMO = Intrin->getMemOperand();
16964 SrcOpnd = 2;
16965 break;
16966 }
16967 }
16968
16969 SDValue Src = N->getOperand(SrcOpnd);
16970 MVT VecTy = Src.getValueType().getSimpleVT();
16971
16972 // All stores are done as v2f64 and possible bit cast.
16973 if (VecTy != MVT::v2f64) {
16974 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
16975 DCI.AddToWorklist(Src.getNode());
16976 }
16977
16978 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
16979 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
16980 DCI.AddToWorklist(Swap.getNode());
16981 Chain = Swap.getValue(1);
16982 SDValue StoreOps[] = { Chain, Swap, Base };
16983 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
16984 DAG.getVTList(MVT::Other),
16985 StoreOps, VecTy, MMO);
16986 DCI.AddToWorklist(Store.getNode());
16987 return Store;
16988}
16989
16990// Handle DAG combine for STORE (FP_TO_INT F).
16991SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16992 DAGCombinerInfo &DCI) const {
16993 SelectionDAG &DAG = DCI.DAG;
16994 SDLoc dl(N);
16995 unsigned Opcode = N->getOperand(1).getOpcode();
16996 (void)Opcode;
16997 bool Strict = N->getOperand(1)->isStrictFPOpcode();
16998
16999 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
17000 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
17001 && "Not a FP_TO_INT Instruction!");
17002
17003 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
17004 EVT Op1VT = N->getOperand(1).getValueType();
17005 EVT ResVT = Val.getValueType();
17006
17007 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
17008 return SDValue();
17009
17010 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
17011 bool ValidTypeForStoreFltAsInt =
17012 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
17013 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
17014
17015 // TODO: Lower conversion from f128 on all VSX targets
17016 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
17017 return SDValue();
17018
17019 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
17020 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
17021 return SDValue();
17022
17023 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
17024
17025 // Set number of bytes being converted.
17026 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
17027 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
17028 DAG.getIntPtrConstant(ByteSize, dl, false),
17029 DAG.getValueType(Op1VT)};
17030
17031 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
17032 DAG.getVTList(MVT::Other), Ops,
17033 cast<StoreSDNode>(N)->getMemoryVT(),
17034 cast<StoreSDNode>(N)->getMemOperand());
17035
17036 return Val;
17037}
17038
17039static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
17040 // Check that the source of the element keeps flipping
17041 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
17042 bool PrevElemFromFirstVec = Mask[0] < NumElts;
17043 for (int i = 1, e = Mask.size(); i < e; i++) {
17044 if (PrevElemFromFirstVec && Mask[i] < NumElts)
17045 return false;
17046 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
17047 return false;
17048 PrevElemFromFirstVec = !PrevElemFromFirstVec;
17049 }
17050 return true;
17051}
17052
17053static bool isSplatBV(SDValue Op) {
17054 if (Op.getOpcode() != ISD::BUILD_VECTOR)
17055 return false;
17056 SDValue FirstOp;
17057
17058 // Find first non-undef input.
17059 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
17060 FirstOp = Op.getOperand(i);
17061 if (!FirstOp.isUndef())
17062 break;
17063 }
17064
17065 // All inputs are undef or the same as the first non-undef input.
17066 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
17067 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
17068 return false;
17069 return true;
17070}
17071
17073 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
17074 return Op;
17075 if (Op.getOpcode() != ISD::BITCAST)
17076 return SDValue();
17077 Op = Op.getOperand(0);
17078 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
17079 return Op;
17080 return SDValue();
17081}
17082
17083// Fix up the shuffle mask to account for the fact that the result of
17084// scalar_to_vector is not in lane zero. This just takes all values in
17085// the ranges specified by the min/max indices and adds the number of
17086// elements required to ensure each element comes from the respective
17087// position in the valid lane.
17088// On little endian, that's just the corresponding element in the other
17089// half of the vector. On big endian, it is in the same half but right
17090// justified rather than left justified in that half.
17092 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
17093 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
17094 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
17095 int LHSEltFixup =
17096 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
17097 int RHSEltFixup =
17098 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
17099 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
17100 int Idx = ShuffV[I];
17101 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
17102 ShuffV[I] += LHSEltFixup;
17103 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
17104 ShuffV[I] += RHSEltFixup;
17105 }
17106}
17107
17108// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
17109// the original is:
17110// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
17111// In such a case, just change the shuffle mask to extract the element
17112// from the permuted index.
17114 const PPCSubtarget &Subtarget) {
17115 SDLoc dl(OrigSToV);
17116 EVT VT = OrigSToV.getValueType();
17117 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17118 "Expecting a SCALAR_TO_VECTOR here");
17119 SDValue Input = OrigSToV.getOperand(0);
17120
17121 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
17122 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
17123 SDValue OrigVector = Input.getOperand(0);
17124
17125 // Can't handle non-const element indices or different vector types
17126 // for the input to the extract and the output of the scalar_to_vector.
17127 if (Idx && VT == OrigVector.getValueType()) {
17128 unsigned NumElts = VT.getVectorNumElements();
17129 assert(
17130 NumElts > 1 &&
17131 "Cannot produce a permuted scalar_to_vector for one element vector");
17132 SmallVector<int, 16> NewMask(NumElts, -1);
17133 unsigned ResultInElt = NumElts / 2;
17134 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
17135 NewMask[ResultInElt] = Idx->getZExtValue();
17136 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
17137 }
17138 }
17139 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
17140 OrigSToV.getOperand(0));
17141}
17142
17144 int HalfVec, int LHSLastElementDefined,
17145 int RHSLastElementDefined) {
17146 for (int Index : ShuffV) {
17147 if (Index < 0) // Skip explicitly undefined mask indices.
17148 continue;
17149 // Handle first input vector of the vector_shuffle.
17150 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
17151 (Index > LHSLastElementDefined))
17152 return false;
17153 // Handle second input vector of the vector_shuffle.
17154 if ((RHSLastElementDefined >= 0) &&
17155 (Index > HalfVec + RHSLastElementDefined))
17156 return false;
17157 }
17158 return true;
17159}
17160
17162 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
17163 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
17164 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
17165 EVT VecShuffOperandType = VecShuffOperand.getValueType();
17166 // Set up the values for the shuffle vector fixup.
17167 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
17168 // The last element depends on if the input comes from the LHS or RHS.
17169 //
17170 // For example:
17171 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
17172 //
17173 // For the LHS: The last element that comes from the LHS is actually 0, not 3
17174 // because elements 1 and higher of a scalar_to_vector are undefined.
17175 // For the RHS: The last element that comes from the RHS is actually 5, not 7
17176 // because elements 1 and higher of a scalar_to_vector are undefined.
17177 // It is also not 4 because the original scalar_to_vector is wider and
17178 // actually contains two i32 elements.
17179 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
17180 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
17181 : FirstElt;
17182 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
17183 if (SToVPermuted.getValueType() != VecShuffOperandType)
17184 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
17185 return SToVPermuted;
17186}
17187
17188// On little endian subtargets, combine shuffles such as:
17189// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
17190// into:
17191// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
17192// because the latter can be matched to a single instruction merge.
17193// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
17194// to put the value into element zero. Adjust the shuffle mask so that the
17195// vector can remain in permuted form (to prevent a swap prior to a shuffle).
17196// On big endian targets, this is still useful for SCALAR_TO_VECTOR
17197// nodes with elements smaller than doubleword because all the ways
17198// of getting scalar data into a vector register put the value in the
17199// rightmost element of the left half of the vector.
17200SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
17201 SelectionDAG &DAG) const {
17202 SDValue LHS = SVN->getOperand(0);
17203 SDValue RHS = SVN->getOperand(1);
17204 auto Mask = SVN->getMask();
17205 int NumElts = LHS.getValueType().getVectorNumElements();
17206 SDValue Res(SVN, 0);
17207 SDLoc dl(SVN);
17208 bool IsLittleEndian = Subtarget.isLittleEndian();
17209
17210 // On big endian targets this is only useful for subtargets with direct moves.
17211 // On little endian targets it would be useful for all subtargets with VSX.
17212 // However adding special handling for LE subtargets without direct moves
17213 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
17214 // which includes direct moves.
17215 if (!Subtarget.hasDirectMove())
17216 return Res;
17217
17218 // If this is not a shuffle of a shuffle and the first element comes from
17219 // the second vector, canonicalize to the commuted form. This will make it
17220 // more likely to match one of the single instruction patterns.
17221 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
17222 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
17223 std::swap(LHS, RHS);
17224 Res = DAG.getCommutedVectorShuffle(*SVN);
17225
17226 if (!isa<ShuffleVectorSDNode>(Res))
17227 return Res;
17228
17229 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
17230 }
17231
17232 // Adjust the shuffle mask if either input vector comes from a
17233 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
17234 // form (to prevent the need for a swap).
17235 SmallVector<int, 16> ShuffV(Mask);
17236 SDValue SToVLHS = isScalarToVec(LHS);
17237 SDValue SToVRHS = isScalarToVec(RHS);
17238 if (SToVLHS || SToVRHS) {
17239 EVT VT = SVN->getValueType(0);
17240 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
17241 int ShuffleNumElts = ShuffV.size();
17242 int HalfVec = ShuffleNumElts / 2;
17243 // The width of the "valid lane" (i.e. the lane that contains the value that
17244 // is vectorized) needs to be expressed in terms of the number of elements
17245 // of the shuffle. It is thereby the ratio of the values before and after
17246 // any bitcast, which will be set later on if the LHS or RHS are
17247 // SCALAR_TO_VECTOR nodes.
17248 unsigned LHSNumValidElts = HalfVec;
17249 unsigned RHSNumValidElts = HalfVec;
17250
17251 // Initially assume that neither input is permuted. These will be adjusted
17252 // accordingly if either input is. Note, that -1 means that all elements
17253 // are undefined.
17254 int LHSFirstElt = 0;
17255 int RHSFirstElt = ShuffleNumElts;
17256 int LHSLastElt = -1;
17257 int RHSLastElt = -1;
17258
17259 // Get the permuted scalar to vector nodes for the source(s) that come from
17260 // ISD::SCALAR_TO_VECTOR.
17261 // On big endian systems, this only makes sense for element sizes smaller
17262 // than 64 bits since for 64-bit elements, all instructions already put
17263 // the value into element zero. Since scalar size of LHS and RHS may differ
17264 // after isScalarToVec, this should be checked using their own sizes.
17265 int LHSScalarSize = 0;
17266 int RHSScalarSize = 0;
17267 if (SToVLHS) {
17268 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
17269 if (!IsLittleEndian && LHSScalarSize >= 64)
17270 return Res;
17271 }
17272 if (SToVRHS) {
17273 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
17274 if (!IsLittleEndian && RHSScalarSize >= 64)
17275 return Res;
17276 }
17277 if (LHSScalarSize != 0)
17279 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
17280 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
17281 if (RHSScalarSize != 0)
17283 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
17284 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
17285
17286 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
17287 return Res;
17288
17289 // Fix up the shuffle mask to reflect where the desired element actually is.
17290 // The minimum and maximum indices that correspond to element zero for both
17291 // the LHS and RHS are computed and will control which shuffle mask entries
17292 // are to be changed. For example, if the RHS is permuted, any shuffle mask
17293 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
17295 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
17296 LHSNumValidElts, RHSNumValidElts, Subtarget);
17297 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17298
17299 // We may have simplified away the shuffle. We won't be able to do anything
17300 // further with it here.
17301 if (!isa<ShuffleVectorSDNode>(Res))
17302 return Res;
17303 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
17304 }
17305
17306 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
17307 // The common case after we commuted the shuffle is that the RHS is a splat
17308 // and we have elements coming in from the splat at indices that are not
17309 // conducive to using a merge.
17310 // Example:
17311 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
17312 if (!isSplatBV(TheSplat))
17313 return Res;
17314
17315 // We are looking for a mask such that all even elements are from
17316 // one vector and all odd elements from the other.
17317 if (!isAlternatingShuffMask(Mask, NumElts))
17318 return Res;
17319
17320 // Adjust the mask so we are pulling in the same index from the splat
17321 // as the index from the interesting vector in consecutive elements.
17322 if (IsLittleEndian) {
17323 // Example (even elements from first vector):
17324 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
17325 if (Mask[0] < NumElts)
17326 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17327 if (ShuffV[i] < 0)
17328 continue;
17329 // If element from non-splat is undef, pick first element from splat.
17330 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
17331 }
17332 // Example (odd elements from first vector):
17333 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
17334 else
17335 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17336 if (ShuffV[i] < 0)
17337 continue;
17338 // If element from non-splat is undef, pick first element from splat.
17339 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
17340 }
17341 } else {
17342 // Example (even elements from first vector):
17343 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
17344 if (Mask[0] < NumElts)
17345 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17346 if (ShuffV[i] < 0)
17347 continue;
17348 // If element from non-splat is undef, pick first element from splat.
17349 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
17350 }
17351 // Example (odd elements from first vector):
17352 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
17353 else
17354 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17355 if (ShuffV[i] < 0)
17356 continue;
17357 // If element from non-splat is undef, pick first element from splat.
17358 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
17359 }
17360 }
17361
17362 // If the RHS has undefs, we need to remove them since we may have created
17363 // a shuffle that adds those instead of the splat value.
17364 SDValue SplatVal =
17365 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
17366 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
17367
17368 if (IsLittleEndian)
17369 RHS = TheSplat;
17370 else
17371 LHS = TheSplat;
17372 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17373}
17374
17375SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
17376 LSBaseSDNode *LSBase,
17377 DAGCombinerInfo &DCI) const {
17378 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
17379 "Not a reverse memop pattern!");
17380
17381 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
17382 auto Mask = SVN->getMask();
17383 int i = 0;
17384 auto I = Mask.rbegin();
17385 auto E = Mask.rend();
17386
17387 for (; I != E; ++I) {
17388 if (*I != i)
17389 return false;
17390 i++;
17391 }
17392 return true;
17393 };
17394
17395 SelectionDAG &DAG = DCI.DAG;
17396 EVT VT = SVN->getValueType(0);
17397
17398 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
17399 return SDValue();
17400
17401 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
17402 // See comment in PPCVSXSwapRemoval.cpp.
17403 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
17404 if (!Subtarget.hasP9Vector())
17405 return SDValue();
17406
17407 if(!IsElementReverse(SVN))
17408 return SDValue();
17409
17410 if (LSBase->getOpcode() == ISD::LOAD) {
17411 // If the load return value 0 has more than one user except the
17412 // shufflevector instruction, it is not profitable to replace the
17413 // shufflevector with a reverse load.
17414 for (SDUse &Use : LSBase->uses())
17415 if (Use.getResNo() == 0 &&
17416 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
17417 return SDValue();
17418
17419 SDLoc dl(LSBase);
17420 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
17421 return DAG.getMemIntrinsicNode(
17422 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
17423 LSBase->getMemoryVT(), LSBase->getMemOperand());
17424 }
17425
17426 if (LSBase->getOpcode() == ISD::STORE) {
17427 // If there are other uses of the shuffle, the swap cannot be avoided.
17428 // Forcing the use of an X-Form (since swapped stores only have
17429 // X-Forms) without removing the swap is unprofitable.
17430 if (!SVN->hasOneUse())
17431 return SDValue();
17432
17433 SDLoc dl(LSBase);
17434 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
17435 LSBase->getBasePtr()};
17436 return DAG.getMemIntrinsicNode(
17437 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
17438 LSBase->getMemoryVT(), LSBase->getMemOperand());
17439 }
17440
17441 llvm_unreachable("Expected a load or store node here");
17442}
17443
17444static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
17445 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
17446 if (IntrinsicID == Intrinsic::ppc_stdcx)
17447 StoreWidth = 8;
17448 else if (IntrinsicID == Intrinsic::ppc_stwcx)
17449 StoreWidth = 4;
17450 else if (IntrinsicID == Intrinsic::ppc_sthcx)
17451 StoreWidth = 2;
17452 else if (IntrinsicID == Intrinsic::ppc_stbcx)
17453 StoreWidth = 1;
17454 else
17455 return false;
17456 return true;
17457}
17458
17461 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
17462 // (ADDC (ADDE 0, 0, C), -1) -> C
17463 SDValue LHS = N->getOperand(0);
17464 SDValue RHS = N->getOperand(1);
17465 if (LHS->getOpcode() == PPCISD::ADDE &&
17466 isNullConstant(LHS->getOperand(0)) &&
17467 isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
17468 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
17469 }
17470 }
17471 return SDValue();
17472}
17473
17474/// Optimize the bitfloor(X) pattern for PowerPC.
17475/// Transforms: select_cc X, 0, 0, (srl MinSignedValue, (ctlz X)), seteq
17476/// Into: srl MinSignedValue, (ctlz X)
17477///
17478/// This is safe on PowerPC because the srw instruction returns 0 when the
17479/// shift amount is == bitwidth, which matches the behavior we need for X=0.
17481 if (N->getOpcode() != ISD::SELECT_CC)
17482 return SDValue();
17483
17484 // SELECT_CC operands: LHS, RHS, TrueVal, FalseVal, CC
17485 SDValue CmpLHS = N->getOperand(0);
17486 SDValue CmpRHS = N->getOperand(1);
17487 SDValue TrueVal = N->getOperand(2);
17488 SDValue FalseVal = N->getOperand(3);
17489 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
17490
17491 // Check if condition is (X == 0)
17492 if (CC != ISD::SETEQ || !isNullConstant(CmpRHS))
17493 return SDValue();
17494
17495 // Check if TrueVal is constant 0
17496 if (!isNullConstant(TrueVal))
17497 return SDValue();
17498
17499 // This combine is replacing a select_cc with a PPC srl, not an srl with a
17500 // PPC srl. If the original srl had multiple uses it would just remain in the
17501 // code. This is at most a performance consideration.
17502 if (FalseVal.getOpcode() != ISD::SRL || !FalseVal.hasOneUse())
17503 return SDValue();
17504
17505 SDValue ShiftVal = FalseVal.getOperand(0);
17506 SDValue ShiftAmt = FalseVal.getOperand(1);
17507
17508 // Check if ShiftVal is MinSignedValue
17509 auto *ShiftConst = dyn_cast<ConstantSDNode>(ShiftVal);
17510 if (!ShiftConst || !ShiftConst->getAPIntValue().isMinSignedValue())
17511 return SDValue();
17512
17513 SDValue CtlzArg;
17514 // Check if ShiftAmt is (ctlz CmpLHS) or (truncate (ctlz ...))
17515 if (ShiftAmt.getOpcode() != ISD::CTLZ) {
17516 // Look through truncate if present (for i64 ctlz truncated to i32 shift
17517 // amount)
17518 if (ShiftAmt.getOpcode() != ISD::TRUNCATE)
17519 return SDValue();
17520
17521 // Verify the truncate target type is appropriate for shift amount (i32, not
17522 // i1 or other)
17523 if (ShiftAmt.getValueType() != MVT::i32)
17524 return SDValue();
17525
17526 SDValue CtlzNode = ShiftAmt.getOperand(0);
17527
17528 if (CtlzNode.getOpcode() != ISD::CTLZ)
17529 return SDValue();
17530
17531 CtlzArg = CtlzNode.getOperand(0);
17532 } else {
17533 CtlzArg = ShiftAmt.getOperand(0);
17534 }
17535
17536 // Check if ctlz operates on the same value as the comparison
17537 if (CtlzArg != CmpLHS)
17538 return SDValue();
17539
17540 // Using PPCISD::SRL to ensure well-defined behavior.
17541 // On PowerPC, PPCISD::SRL guarantees that shift by bitwidth returns 0,
17542 // which is exactly what we need for the bitfloor(0) case.
17543 SDLoc DL(N);
17544 SDValue PPCSrl =
17545 DAG.getNode(PPCISD::SRL, DL, FalseVal.getValueType(), ShiftVal, ShiftAmt);
17546 return PPCSrl;
17547}
17548
17549// Optimize zero-extension of setcc when the compared value is known to be 0
17550// or 1.
17551//
17552// Pattern: zext(setcc(Value, 0, seteq/setne)) where Value is 0 or 1
17553// -> zext(xor(Value, 1)) for seteq
17554// -> zext(Value) for setne
17555//
17556// This optimization avoids the i32 -> i1 -> i32/i64 conversion sequence
17557// by keeping the value in its original i32 type throughout.
17558//
17559// Example:
17560// Before: zext(setcc(test_data_class(...), 0, seteq))
17561// // test_data_class returns 0 or 1 in i32
17562// // setcc converts i32 -> i1
17563// // zext converts i1 -> i64
17564// After: zext(xor(test_data_class(...), 1))
17565// // Stays in i32, then extends to i64
17566//
17567// This is beneficial because:
17568// 1. Eliminates the setcc instruction
17569// 2. Avoids i32 -> i1 truncation
17570// 3. Keeps computation in native integer width
17571
17573 // Check if this is a zero_extend
17574 if (N->getOpcode() != ISD::ZERO_EXTEND)
17575 return SDValue();
17576
17577 SDValue Src = N->getOperand(0);
17578
17579 // Check if the source is a setcc
17580 if (Src.getOpcode() != ISD::SETCC)
17581 return SDValue();
17582
17583 SDValue LHS = Src.getOperand(0);
17584 SDValue RHS = Src.getOperand(1);
17585 ISD::CondCode CC = cast<CondCodeSDNode>(Src.getOperand(2))->get();
17586
17588 return SDValue();
17589
17590 SDValue NonNullConstant = isNullConstant(RHS) ? LHS : RHS;
17591
17592 auto isZeroOrOne = [=](SDValue &V) {
17593 if (V.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17594 V.getConstantOperandVal(0) == Intrinsic::ppc_test_data_class)
17595 return true;
17596 return false;
17597 };
17598
17599 if (!isZeroOrOne(NonNullConstant))
17600 return SDValue();
17601
17602 // Check for pattern: zext(setcc (Value), 0, seteq)) or
17603 // zext(setcc (Value), 0, setne))
17604 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
17605 // Replace with: zext(xor(Value, 1)) for seteq
17606 // or: zext(Value) for setne
17607 // This keeps the value in i32 instead of converting to i1
17608 SDLoc DL(N);
17609 EVT VType = N->getValueType(0);
17610 SDValue NewNonNullConstant = DAG.getZExtOrTrunc(NonNullConstant, DL, VType);
17611
17612 if (CC == ISD::SETNE)
17613 return NewNonNullConstant;
17614
17615 SDValue One = DAG.getConstant(1, DL, VType);
17616 return DAG.getNode(ISD::XOR, DL, VType, NewNonNullConstant, One);
17617 }
17618
17619 return SDValue();
17620}
17621
17622// Combine XOR patterns with SELECT_CC_I4/I8, for Example:
17623// 1. XOR(SELECT_CC_I4(cond, 1, 0, cc), 1) -> SELECT_CC_I4(cond, 0, 1, cc)
17624// 2. XOR(ZEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond, 0,
17625// 1, cc))
17626// 3. XOR(ANYEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond,
17627// 0, 1, cc))
17628// 4. etc
17630 assert(N->getOpcode() == ISD::XOR && "Expected XOR node");
17631
17632 EVT XorVT = N->getValueType(0);
17633 if ((XorVT != MVT::i32 && XorVT != MVT::i64))
17634 return SDValue();
17635
17636 SDValue LHS = N->getOperand(0);
17637 SDValue RHS = N->getOperand(1);
17638
17639 // Check for XOR with constant 1
17641 if (!XorConst || !XorConst->isOne()) {
17642 XorConst = dyn_cast<ConstantSDNode>(LHS);
17643 if (!XorConst || !XorConst->isOne())
17644 return SDValue();
17645 // Swap so LHS is the SELECT_CC_I4 (or extension) and RHS is the constant
17646 std::swap(LHS, RHS);
17647 }
17648
17649 // Check if LHS has only one use
17650 if (!LHS.hasOneUse())
17651 return SDValue();
17652
17653 // Handle extensions: ZEXT, ANYEXT
17654 SDValue SelectNode = LHS;
17655
17656 if (LHS.getOpcode() == ISD::ZERO_EXTEND ||
17657 LHS.getOpcode() == ISD::ANY_EXTEND) {
17658 SelectNode = LHS.getOperand(0);
17659
17660 // Check if the extension input has only one use
17661 if (!SelectNode.hasOneUse())
17662 return SDValue();
17663 }
17664
17665 // Check if SelectNode is a MachineSDNode with SELECT_CC_I4/I8 opcode
17666 if (!SelectNode.isMachineOpcode())
17667 return SDValue();
17668
17669 unsigned MachineOpc = SelectNode.getMachineOpcode();
17670
17671 // Handle both SELECT_CC_I4 and SELECT_CC_I8
17672 if (MachineOpc != PPC::SELECT_CC_I4 && MachineOpc != PPC::SELECT_CC_I8)
17673 return SDValue();
17674
17675 // SELECT_CC_I4 operands: (cond, true_val, false_val, bropc)
17676 if (SelectNode.getNumOperands() != 4)
17677 return SDValue();
17678
17679 ConstantSDNode *ConstOp1 = dyn_cast<ConstantSDNode>(SelectNode.getOperand(1));
17680 ConstantSDNode *ConstOp2 = dyn_cast<ConstantSDNode>(SelectNode.getOperand(2));
17681
17682 if (!ConstOp1 || !ConstOp2)
17683 return SDValue();
17684
17685 // Only optimize if operands are {0, 1} or {1, 0}
17686 if (!((ConstOp1->isOne() && ConstOp2->isZero()) ||
17687 (ConstOp1->isZero() && ConstOp2->isOne())))
17688 return SDValue();
17689
17690 // Pattern matched! Create new SELECT_CC with swapped 0/1 operands to
17691 // eliminate XOR. If original was SELECT_CC(cond, 1, 0, pred), create
17692 // SELECT_CC(cond, 0, 1, pred). If original was SELECT_CC(cond, 0, 1, pred),
17693 // create SELECT_CC(cond, 1, 0, pred).
17694 SDLoc DL(N);
17695 MachineOpc = (XorVT == MVT::i32) ? PPC::SELECT_CC_I4 : PPC::SELECT_CC_I8;
17696
17697 bool ConstOp1IsOne = ConstOp1->isOne();
17698 return SDValue(
17699 DAG.getMachineNode(MachineOpc, DL, XorVT,
17700 {SelectNode.getOperand(0),
17701 DAG.getConstant(ConstOp1IsOne ? 0 : 1, DL, XorVT),
17702 DAG.getConstant(ConstOp1IsOne ? 1 : 0, DL, XorVT),
17703 SelectNode.getOperand(3)}),
17704 0);
17705}
17706
17708 DAGCombinerInfo &DCI) const {
17709 SelectionDAG &DAG = DCI.DAG;
17710 SDLoc dl(N);
17711 switch (N->getOpcode()) {
17712 default: break;
17713 case ISD::ADD:
17714 return combineADD(N, DCI);
17715 case ISD::AND: {
17716 // We don't want (and (zext (shift...)), C) if C fits in the width of the
17717 // original input as that will prevent us from selecting optimal rotates.
17718 // This only matters if the input to the extend is i32 widened to i64.
17719 SDValue Op1 = N->getOperand(0);
17720 SDValue Op2 = N->getOperand(1);
17721 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
17722 Op1.getOpcode() != ISD::ANY_EXTEND) ||
17723 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
17724 Op1.getOperand(0).getValueType() != MVT::i32)
17725 break;
17726 SDValue NarrowOp = Op1.getOperand(0);
17727 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
17728 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
17729 break;
17730
17731 uint64_t Imm = Op2->getAsZExtVal();
17732 // Make sure that the constant is narrow enough to fit in the narrow type.
17733 if (!isUInt<32>(Imm))
17734 break;
17735 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
17736 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
17737 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
17738 }
17739 case ISD::XOR: {
17740 // Optimize XOR(ISEL(1,0,CR), 1) -> ISEL(0,1,CR)
17741 if (SDValue V = combineXorSelectCC(N, DAG))
17742 return V;
17743 break;
17744 }
17745 case ISD::SHL:
17746 return combineSHL(N, DCI);
17747 case ISD::SRA:
17748 return combineSRA(N, DCI);
17749 case ISD::SRL:
17750 return combineSRL(N, DCI);
17751 case ISD::MUL:
17752 return combineMUL(N, DCI);
17753 case ISD::FMA:
17754 case PPCISD::FNMSUB:
17755 return combineFMALike(N, DCI);
17756 case PPCISD::SHL:
17757 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
17758 return N->getOperand(0);
17759 break;
17760 case PPCISD::SRL:
17761 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
17762 return N->getOperand(0);
17763 break;
17764 case PPCISD::SRA:
17765 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
17766 if (C->isZero() || // 0 >>s V -> 0.
17767 C->isAllOnes()) // -1 >>s V -> -1.
17768 return N->getOperand(0);
17769 }
17770 break;
17771 case ISD::SIGN_EXTEND:
17772 if (SDValue SECC = combineSignExtendSetCC(N, DCI))
17773 return SECC;
17774 [[fallthrough]];
17775 case ISD::ZERO_EXTEND:
17776 if (SDValue RetV = combineZextSetccWithZero(N, DCI.DAG))
17777 return RetV;
17778 [[fallthrough]];
17779 case ISD::ANY_EXTEND:
17780 return DAGCombineExtBoolTrunc(N, DCI);
17781 case ISD::TRUNCATE:
17782 return combineTRUNCATE(N, DCI);
17783 case ISD::SETCC:
17784 if (SDValue CSCC = combineSetCC(N, DCI))
17785 return CSCC;
17786 [[fallthrough]];
17787 case ISD::SELECT_CC:
17788 if (SDValue V = combineSELECT_CCBitFloor(N, DAG))
17789 return V;
17790 return DAGCombineTruncBoolExt(N, DCI);
17791 case ISD::SINT_TO_FP:
17792 case ISD::UINT_TO_FP:
17793 return combineFPToIntToFP(N, DCI);
17795 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
17796 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
17797 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
17798 }
17799 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
17800 case ISD::STORE: {
17801
17802 EVT Op1VT = N->getOperand(1).getValueType();
17803 unsigned Opcode = N->getOperand(1).getOpcode();
17804
17805 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
17806 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
17807 SDValue Val = combineStoreFPToInt(N, DCI);
17808 if (Val)
17809 return Val;
17810 }
17811
17812 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
17813 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
17814 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
17815 if (Val)
17816 return Val;
17817 }
17818
17819 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
17820 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
17821 N->getOperand(1).getNode()->hasOneUse() &&
17822 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
17823 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
17824
17825 // STBRX can only handle simple types and it makes no sense to store less
17826 // two bytes in byte-reversed order.
17827 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
17828 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
17829 break;
17830
17831 SDValue BSwapOp = N->getOperand(1).getOperand(0);
17832 // Do an any-extend to 32-bits if this is a half-word input.
17833 if (BSwapOp.getValueType() == MVT::i16)
17834 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
17835
17836 // If the type of BSWAP operand is wider than stored memory width
17837 // it need to be shifted to the right side before STBRX.
17838 if (Op1VT.bitsGT(mVT)) {
17839 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
17840 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
17841 DAG.getConstant(Shift, dl, MVT::i32));
17842 // Need to truncate if this is a bswap of i64 stored as i32/i16.
17843 if (Op1VT == MVT::i64)
17844 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
17845 }
17846
17847 SDValue Ops[] = {
17848 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
17849 };
17850 return
17851 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
17852 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
17853 cast<StoreSDNode>(N)->getMemOperand());
17854 }
17855
17856 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
17857 // So it can increase the chance of CSE constant construction.
17858 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
17859 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
17860 // Need to sign-extended to 64-bits to handle negative values.
17861 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
17862 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
17863 MemVT.getSizeInBits());
17864 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
17865
17866 auto *ST = cast<StoreSDNode>(N);
17867 SDValue NewST = DAG.getStore(ST->getChain(), dl, Const64,
17868 ST->getBasePtr(), ST->getOffset(), MemVT,
17869 ST->getMemOperand(), ST->getAddressingMode(),
17870 /*IsTruncating=*/true);
17871 // Note we use CombineTo here to prevent DAGCombiner from visiting the
17872 // new store which will change the constant by removing non-demanded bits.
17873 return ST->isUnindexed()
17874 ? DCI.CombineTo(N, NewST, /*AddTo=*/false)
17875 : DCI.CombineTo(N, NewST, NewST.getValue(1), /*AddTo=*/false);
17876 }
17877
17878 // For little endian, VSX stores require generating xxswapd/lxvd2x.
17879 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17880 if (Op1VT.isSimple()) {
17881 MVT StoreVT = Op1VT.getSimpleVT();
17882 if (Subtarget.needsSwapsForVSXMemOps() &&
17883 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
17884 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
17885 return expandVSXStoreForLE(N, DCI);
17886 }
17887 break;
17888 }
17889 case ISD::LOAD: {
17891 EVT VT = LD->getValueType(0);
17892
17893 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17894 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17895 if (VT.isSimple()) {
17896 MVT LoadVT = VT.getSimpleVT();
17897 if (Subtarget.needsSwapsForVSXMemOps() &&
17898 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
17899 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
17900 return expandVSXLoadForLE(N, DCI);
17901 }
17902
17903 // We sometimes end up with a 64-bit integer load, from which we extract
17904 // two single-precision floating-point numbers. This happens with
17905 // std::complex<float>, and other similar structures, because of the way we
17906 // canonicalize structure copies. However, if we lack direct moves,
17907 // then the final bitcasts from the extracted integer values to the
17908 // floating-point numbers turn into store/load pairs. Even with direct moves,
17909 // just loading the two floating-point numbers is likely better.
17910 auto ReplaceTwoFloatLoad = [&]() {
17911 if (VT != MVT::i64)
17912 return false;
17913
17914 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
17915 LD->isVolatile())
17916 return false;
17917
17918 // We're looking for a sequence like this:
17919 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
17920 // t16: i64 = srl t13, Constant:i32<32>
17921 // t17: i32 = truncate t16
17922 // t18: f32 = bitcast t17
17923 // t19: i32 = truncate t13
17924 // t20: f32 = bitcast t19
17925
17926 if (!LD->hasNUsesOfValue(2, 0))
17927 return false;
17928
17929 auto UI = LD->user_begin();
17930 while (UI.getUse().getResNo() != 0) ++UI;
17931 SDNode *Trunc = *UI++;
17932 while (UI.getUse().getResNo() != 0) ++UI;
17933 SDNode *RightShift = *UI;
17934 if (Trunc->getOpcode() != ISD::TRUNCATE)
17935 std::swap(Trunc, RightShift);
17936
17937 if (Trunc->getOpcode() != ISD::TRUNCATE ||
17938 Trunc->getValueType(0) != MVT::i32 ||
17939 !Trunc->hasOneUse())
17940 return false;
17941 if (RightShift->getOpcode() != ISD::SRL ||
17942 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
17943 RightShift->getConstantOperandVal(1) != 32 ||
17944 !RightShift->hasOneUse())
17945 return false;
17946
17947 SDNode *Trunc2 = *RightShift->user_begin();
17948 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
17949 Trunc2->getValueType(0) != MVT::i32 ||
17950 !Trunc2->hasOneUse())
17951 return false;
17952
17953 SDNode *Bitcast = *Trunc->user_begin();
17954 SDNode *Bitcast2 = *Trunc2->user_begin();
17955
17956 if (Bitcast->getOpcode() != ISD::BITCAST ||
17957 Bitcast->getValueType(0) != MVT::f32)
17958 return false;
17959 if (Bitcast2->getOpcode() != ISD::BITCAST ||
17960 Bitcast2->getValueType(0) != MVT::f32)
17961 return false;
17962
17963 if (Subtarget.isLittleEndian())
17964 std::swap(Bitcast, Bitcast2);
17965
17966 // Bitcast has the second float (in memory-layout order) and Bitcast2
17967 // has the first one.
17968
17969 SDValue BasePtr = LD->getBasePtr();
17970 if (LD->isIndexed()) {
17971 assert(LD->getAddressingMode() == ISD::PRE_INC &&
17972 "Non-pre-inc AM on PPC?");
17973 BasePtr =
17974 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17975 LD->getOffset());
17976 }
17977
17978 auto MMOFlags =
17979 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17980 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
17981 LD->getPointerInfo(), LD->getAlign(),
17982 MMOFlags, LD->getAAInfo());
17983 SDValue AddPtr =
17984 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
17985 BasePtr, DAG.getIntPtrConstant(4, dl));
17986 SDValue FloatLoad2 = DAG.getLoad(
17987 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
17988 LD->getPointerInfo().getWithOffset(4),
17989 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
17990
17991 if (LD->isIndexed()) {
17992 // Note that DAGCombine should re-form any pre-increment load(s) from
17993 // what is produced here if that makes sense.
17994 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
17995 }
17996
17997 DCI.CombineTo(Bitcast2, FloatLoad);
17998 DCI.CombineTo(Bitcast, FloatLoad2);
17999
18000 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
18001 SDValue(FloatLoad2.getNode(), 1));
18002 return true;
18003 };
18004
18005 if (ReplaceTwoFloatLoad())
18006 return SDValue(N, 0);
18007
18008 EVT MemVT = LD->getMemoryVT();
18009 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
18010 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
18011 if (LD->isUnindexed() && VT.isVector() &&
18012 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
18013 // P8 and later hardware should just use LOAD.
18014 !Subtarget.hasP8Vector() &&
18015 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
18016 VT == MVT::v4f32))) &&
18017 LD->getAlign() < ABIAlignment) {
18018 // This is a type-legal unaligned Altivec load.
18019 SDValue Chain = LD->getChain();
18020 SDValue Ptr = LD->getBasePtr();
18021 bool isLittleEndian = Subtarget.isLittleEndian();
18022
18023 // This implements the loading of unaligned vectors as described in
18024 // the venerable Apple Velocity Engine overview. Specifically:
18025 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
18026 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
18027 //
18028 // The general idea is to expand a sequence of one or more unaligned
18029 // loads into an alignment-based permutation-control instruction (lvsl
18030 // or lvsr), a series of regular vector loads (which always truncate
18031 // their input address to an aligned address), and a series of
18032 // permutations. The results of these permutations are the requested
18033 // loaded values. The trick is that the last "extra" load is not taken
18034 // from the address you might suspect (sizeof(vector) bytes after the
18035 // last requested load), but rather sizeof(vector) - 1 bytes after the
18036 // last requested vector. The point of this is to avoid a page fault if
18037 // the base address happened to be aligned. This works because if the
18038 // base address is aligned, then adding less than a full vector length
18039 // will cause the last vector in the sequence to be (re)loaded.
18040 // Otherwise, the next vector will be fetched as you might suspect was
18041 // necessary.
18042
18043 // We might be able to reuse the permutation generation from
18044 // a different base address offset from this one by an aligned amount.
18045 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
18046 // optimization later.
18047 Intrinsic::ID Intr, IntrLD, IntrPerm;
18048 MVT PermCntlTy, PermTy, LDTy;
18049 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
18050 : Intrinsic::ppc_altivec_lvsl;
18051 IntrLD = Intrinsic::ppc_altivec_lvx;
18052 IntrPerm = Intrinsic::ppc_altivec_vperm;
18053 PermCntlTy = MVT::v16i8;
18054 PermTy = MVT::v4i32;
18055 LDTy = MVT::v4i32;
18056
18057 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
18058
18059 // Create the new MMO for the new base load. It is like the original MMO,
18060 // but represents an area in memory almost twice the vector size centered
18061 // on the original address. If the address is unaligned, we might start
18062 // reading up to (sizeof(vector)-1) bytes below the address of the
18063 // original unaligned load.
18065 MachineMemOperand *BaseMMO =
18066 MF.getMachineMemOperand(LD->getMemOperand(),
18067 -(int64_t)MemVT.getStoreSize()+1,
18068 2*MemVT.getStoreSize()-1);
18069
18070 // Create the new base load.
18071 SDValue LDXIntID =
18072 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
18073 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
18074 SDValue BaseLoad =
18076 DAG.getVTList(PermTy, MVT::Other),
18077 BaseLoadOps, LDTy, BaseMMO);
18078
18079 // Note that the value of IncOffset (which is provided to the next
18080 // load's pointer info offset value, and thus used to calculate the
18081 // alignment), and the value of IncValue (which is actually used to
18082 // increment the pointer value) are different! This is because we
18083 // require the next load to appear to be aligned, even though it
18084 // is actually offset from the base pointer by a lesser amount.
18085 int IncOffset = VT.getSizeInBits() / 8;
18086 int IncValue = IncOffset;
18087
18088 // Walk (both up and down) the chain looking for another load at the real
18089 // (aligned) offset (the alignment of the other load does not matter in
18090 // this case). If found, then do not use the offset reduction trick, as
18091 // that will prevent the loads from being later combined (as they would
18092 // otherwise be duplicates).
18093 if (!findConsecutiveLoad(LD, DAG))
18094 --IncValue;
18095
18097 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
18098 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
18099
18100 MachineMemOperand *ExtraMMO =
18101 MF.getMachineMemOperand(LD->getMemOperand(),
18102 1, 2*MemVT.getStoreSize()-1);
18103 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
18104 SDValue ExtraLoad =
18106 DAG.getVTList(PermTy, MVT::Other),
18107 ExtraLoadOps, LDTy, ExtraMMO);
18108
18109 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18110 BaseLoad.getValue(1), ExtraLoad.getValue(1));
18111
18112 // Because vperm has a big-endian bias, we must reverse the order
18113 // of the input vectors and complement the permute control vector
18114 // when generating little endian code. We have already handled the
18115 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
18116 // and ExtraLoad here.
18117 SDValue Perm;
18118 if (isLittleEndian)
18119 Perm = BuildIntrinsicOp(IntrPerm,
18120 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
18121 else
18122 Perm = BuildIntrinsicOp(IntrPerm,
18123 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
18124
18125 if (VT != PermTy)
18126 Perm = Subtarget.hasAltivec()
18127 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
18128 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
18129 DAG.getTargetConstant(1, dl, MVT::i64));
18130 // second argument is 1 because this rounding
18131 // is always exact.
18132
18133 // The output of the permutation is our loaded result, the TokenFactor is
18134 // our new chain.
18135 DCI.CombineTo(N, Perm, TF);
18136 return SDValue(N, 0);
18137 }
18138 }
18139 break;
18141 bool isLittleEndian = Subtarget.isLittleEndian();
18142 unsigned IID = N->getConstantOperandVal(0);
18143 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
18144 : Intrinsic::ppc_altivec_lvsl);
18145 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
18146 SDValue Add = N->getOperand(1);
18147
18148 int Bits = 4 /* 16 byte alignment */;
18149
18150 if (DAG.MaskedValueIsZero(Add->getOperand(1),
18151 APInt::getAllOnes(Bits /* alignment */)
18152 .zext(Add.getScalarValueSizeInBits()))) {
18153 SDNode *BasePtr = Add->getOperand(0).getNode();
18154 for (SDNode *U : BasePtr->users()) {
18155 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18156 U->getConstantOperandVal(0) == IID) {
18157 // We've found another LVSL/LVSR, and this address is an aligned
18158 // multiple of that one. The results will be the same, so use the
18159 // one we've just found instead.
18160
18161 return SDValue(U, 0);
18162 }
18163 }
18164 }
18165
18166 if (isa<ConstantSDNode>(Add->getOperand(1))) {
18167 SDNode *BasePtr = Add->getOperand(0).getNode();
18168 for (SDNode *U : BasePtr->users()) {
18169 if (U->getOpcode() == ISD::ADD &&
18170 isa<ConstantSDNode>(U->getOperand(1)) &&
18171 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
18172 (1ULL << Bits) ==
18173 0) {
18174 SDNode *OtherAdd = U;
18175 for (SDNode *V : OtherAdd->users()) {
18176 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18177 V->getConstantOperandVal(0) == IID) {
18178 return SDValue(V, 0);
18179 }
18180 }
18181 }
18182 }
18183 }
18184 }
18185
18186 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
18187 // Expose the vabsduw/h/b opportunity for down stream
18188 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
18189 (IID == Intrinsic::ppc_altivec_vmaxsw ||
18190 IID == Intrinsic::ppc_altivec_vmaxsh ||
18191 IID == Intrinsic::ppc_altivec_vmaxsb)) {
18192 SDValue V1 = N->getOperand(1);
18193 SDValue V2 = N->getOperand(2);
18194 if ((V1.getSimpleValueType() == MVT::v4i32 ||
18195 V1.getSimpleValueType() == MVT::v8i16 ||
18196 V1.getSimpleValueType() == MVT::v16i8) &&
18198 // (0-a, a)
18199 if (V1.getOpcode() == ISD::SUB &&
18201 V1.getOperand(1) == V2) {
18202 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
18203 }
18204 // (a, 0-a)
18205 if (V2.getOpcode() == ISD::SUB &&
18207 V2.getOperand(1) == V1) {
18208 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
18209 }
18210 // (x-y, y-x)
18211 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
18212 V1.getOperand(0) == V2.getOperand(1) &&
18213 V1.getOperand(1) == V2.getOperand(0)) {
18214 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
18215 }
18216 }
18217 }
18218 }
18219
18220 break;
18222 switch (N->getConstantOperandVal(1)) {
18223 default:
18224 break;
18225 case Intrinsic::ppc_altivec_vsum4sbs:
18226 case Intrinsic::ppc_altivec_vsum4shs:
18227 case Intrinsic::ppc_altivec_vsum4ubs: {
18228 // These sum-across intrinsics only have a chain due to the side effect
18229 // that they may set the SAT bit. If we know the SAT bit will not be set
18230 // for some inputs, we can replace any uses of their chain with the
18231 // input chain.
18232 if (BuildVectorSDNode *BVN =
18233 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
18234 APInt APSplatBits, APSplatUndef;
18235 unsigned SplatBitSize;
18236 bool HasAnyUndefs;
18237 bool BVNIsConstantSplat = BVN->isConstantSplat(
18238 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
18239 !Subtarget.isLittleEndian());
18240 // If the constant splat vector is 0, the SAT bit will not be set.
18241 if (BVNIsConstantSplat && APSplatBits == 0)
18242 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
18243 }
18244 return SDValue();
18245 }
18246 case Intrinsic::ppc_vsx_lxvw4x:
18247 case Intrinsic::ppc_vsx_lxvd2x:
18248 // For little endian, VSX loads require generating lxvd2x/xxswapd.
18249 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
18250 if (Subtarget.needsSwapsForVSXMemOps())
18251 return expandVSXLoadForLE(N, DCI);
18252 break;
18253 }
18254 break;
18256 // For little endian, VSX stores require generating xxswapd/stxvd2x.
18257 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
18258 if (Subtarget.needsSwapsForVSXMemOps()) {
18259 switch (N->getConstantOperandVal(1)) {
18260 default:
18261 break;
18262 case Intrinsic::ppc_vsx_stxvw4x:
18263 case Intrinsic::ppc_vsx_stxvd2x:
18264 return expandVSXStoreForLE(N, DCI);
18265 }
18266 }
18267 break;
18268 case ISD::BSWAP: {
18269 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
18270 // For subtargets without LDBRX, we can still do better than the default
18271 // expansion even for 64-bit BSWAP (LOAD).
18272 bool Is64BitBswapOn64BitTgt =
18273 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
18274 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
18275 N->getOperand(0).hasOneUse();
18276 if (IsSingleUseNormalLd &&
18277 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
18278 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
18279 SDValue Load = N->getOperand(0);
18280 LoadSDNode *LD = cast<LoadSDNode>(Load);
18281 // Create the byte-swapping load.
18282 SDValue Ops[] = {
18283 LD->getChain(), // Chain
18284 LD->getBasePtr(), // Ptr
18285 DAG.getValueType(N->getValueType(0)) // VT
18286 };
18287 SDValue BSLoad =
18288 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
18289 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
18290 MVT::i64 : MVT::i32, MVT::Other),
18291 Ops, LD->getMemoryVT(), LD->getMemOperand());
18292
18293 // If this is an i16 load, insert the truncate.
18294 SDValue ResVal = BSLoad;
18295 if (N->getValueType(0) == MVT::i16)
18296 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
18297
18298 // First, combine the bswap away. This makes the value produced by the
18299 // load dead.
18300 DCI.CombineTo(N, ResVal);
18301
18302 // Next, combine the load away, we give it a bogus result value but a real
18303 // chain result. The result value is dead because the bswap is dead.
18304 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
18305
18306 // Return N so it doesn't get rechecked!
18307 return SDValue(N, 0);
18308 }
18309 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
18310 // before legalization so that the BUILD_PAIR is handled correctly.
18311 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
18312 !IsSingleUseNormalLd)
18313 return SDValue();
18314 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
18315
18316 // Can't split volatile or atomic loads.
18317 if (!LD->isSimple())
18318 return SDValue();
18319 SDValue BasePtr = LD->getBasePtr();
18320 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
18321 LD->getPointerInfo(), LD->getAlign());
18322 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
18323 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
18324 DAG.getIntPtrConstant(4, dl));
18326 LD->getMemOperand(), 4, 4);
18327 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
18328 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
18329 SDValue Res;
18330 if (Subtarget.isLittleEndian())
18331 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
18332 else
18333 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
18334 SDValue TF =
18335 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18336 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
18337 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
18338 return Res;
18339 }
18340 case PPCISD::VCMP:
18341 // If a VCMP_rec node already exists with exactly the same operands as this
18342 // node, use its result instead of this node (VCMP_rec computes both a CR6
18343 // and a normal output).
18344 //
18345 if (!N->getOperand(0).hasOneUse() &&
18346 !N->getOperand(1).hasOneUse() &&
18347 !N->getOperand(2).hasOneUse()) {
18348
18349 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
18350 SDNode *VCMPrecNode = nullptr;
18351
18352 SDNode *LHSN = N->getOperand(0).getNode();
18353 for (SDNode *User : LHSN->users())
18354 if (User->getOpcode() == PPCISD::VCMP_rec &&
18355 User->getOperand(1) == N->getOperand(1) &&
18356 User->getOperand(2) == N->getOperand(2) &&
18357 User->getOperand(0) == N->getOperand(0)) {
18358 VCMPrecNode = User;
18359 break;
18360 }
18361
18362 // If there is no VCMP_rec node, or if the flag value has a single use,
18363 // don't transform this.
18364 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
18365 break;
18366
18367 // Look at the (necessarily single) use of the flag value. If it has a
18368 // chain, this transformation is more complex. Note that multiple things
18369 // could use the value result, which we should ignore.
18370 SDNode *FlagUser = nullptr;
18371 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
18372 FlagUser == nullptr; ++UI) {
18373 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
18374 SDNode *User = UI->getUser();
18375 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
18376 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
18377 FlagUser = User;
18378 break;
18379 }
18380 }
18381 }
18382
18383 // If the user is a MFOCRF instruction, we know this is safe.
18384 // Otherwise we give up for right now.
18385 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
18386 return SDValue(VCMPrecNode, 0);
18387 }
18388 break;
18389 case ISD::BR_CC: {
18390 // If this is a branch on an altivec predicate comparison, lower this so
18391 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
18392 // lowering is done pre-legalize, because the legalizer lowers the predicate
18393 // compare down to code that is difficult to reassemble.
18394 // This code also handles branches that depend on the result of a store
18395 // conditional.
18396 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18397 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
18398
18399 int CompareOpc;
18400 bool isDot;
18401
18402 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
18403 break;
18404
18405 // Since we are doing this pre-legalize, the RHS can be a constant of
18406 // arbitrary bitwidth which may cause issues when trying to get the value
18407 // from the underlying APInt.
18408 auto RHSAPInt = RHS->getAsAPIntVal();
18409 if (!RHSAPInt.isIntN(64))
18410 break;
18411
18412 unsigned Val = RHSAPInt.getZExtValue();
18413 auto isImpossibleCompare = [&]() {
18414 // If this is a comparison against something other than 0/1, then we know
18415 // that the condition is never/always true.
18416 if (Val != 0 && Val != 1) {
18417 if (CC == ISD::SETEQ) // Cond never true, remove branch.
18418 return N->getOperand(0);
18419 // Always !=, turn it into an unconditional branch.
18420 return DAG.getNode(ISD::BR, dl, MVT::Other,
18421 N->getOperand(0), N->getOperand(4));
18422 }
18423 return SDValue();
18424 };
18425 // Combine branches fed by store conditional instructions (st[bhwd]cx).
18426 unsigned StoreWidth = 0;
18427 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
18428 isStoreConditional(LHS, StoreWidth)) {
18429 if (SDValue Impossible = isImpossibleCompare())
18430 return Impossible;
18431 PPC::Predicate CompOpc;
18432 // eq 0 => ne
18433 // ne 0 => eq
18434 // eq 1 => eq
18435 // ne 1 => ne
18436 if (Val == 0)
18437 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
18438 else
18439 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
18440
18441 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
18442 DAG.getConstant(StoreWidth, dl, MVT::i32)};
18443 auto *MemNode = cast<MemSDNode>(LHS);
18444 SDValue ConstSt = DAG.getMemIntrinsicNode(
18445 PPCISD::STORE_COND, dl,
18446 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
18447 MemNode->getMemoryVT(), MemNode->getMemOperand());
18448
18449 SDValue InChain;
18450 // Unchain the branch from the original store conditional.
18451 if (N->getOperand(0) == LHS.getValue(1))
18452 InChain = LHS.getOperand(0);
18453 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
18454 SmallVector<SDValue, 4> InChains;
18455 SDValue InTF = N->getOperand(0);
18456 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
18457 if (InTF.getOperand(i) != LHS.getValue(1))
18458 InChains.push_back(InTF.getOperand(i));
18459 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
18460 }
18461
18462 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
18463 DAG.getConstant(CompOpc, dl, MVT::i32),
18464 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
18465 ConstSt.getValue(2));
18466 }
18467
18468 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18469 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
18470 assert(isDot && "Can't compare against a vector result!");
18471
18472 if (SDValue Impossible = isImpossibleCompare())
18473 return Impossible;
18474
18475 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
18476 // Create the PPCISD altivec 'dot' comparison node.
18477 SDValue Ops[] = {
18478 LHS.getOperand(2), // LHS of compare
18479 LHS.getOperand(3), // RHS of compare
18480 DAG.getConstant(CompareOpc, dl, MVT::i32)
18481 };
18482 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
18483 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
18484
18485 // Unpack the result based on how the target uses it.
18486 PPC::Predicate CompOpc;
18487 switch (LHS.getConstantOperandVal(1)) {
18488 default: // Can't happen, don't crash on invalid number though.
18489 case 0: // Branch on the value of the EQ bit of CR6.
18490 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
18491 break;
18492 case 1: // Branch on the inverted value of the EQ bit of CR6.
18493 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
18494 break;
18495 case 2: // Branch on the value of the LT bit of CR6.
18496 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
18497 break;
18498 case 3: // Branch on the inverted value of the LT bit of CR6.
18499 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
18500 break;
18501 }
18502
18503 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
18504 DAG.getConstant(CompOpc, dl, MVT::i32),
18505 DAG.getRegister(PPC::CR6, MVT::i32),
18506 N->getOperand(4), CompNode.getValue(1));
18507 }
18508 break;
18509 }
18510 case ISD::BUILD_VECTOR:
18511 return DAGCombineBuildVector(N, DCI);
18512 case PPCISD::ADDC:
18513 return DAGCombineAddc(N, DCI);
18514
18515 case ISD::BITCAST:
18516 return DAGCombineBitcast(N, DCI);
18517 }
18518
18519 return SDValue();
18520}
18521
18522SDValue
18524 SelectionDAG &DAG,
18525 SmallVectorImpl<SDNode *> &Created) const {
18526 // fold (sdiv X, pow2)
18527 EVT VT = N->getValueType(0);
18528 if (VT == MVT::i64 && !Subtarget.isPPC64())
18529 return SDValue();
18530 if ((VT != MVT::i32 && VT != MVT::i64) ||
18531 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
18532 return SDValue();
18533
18534 SDLoc DL(N);
18535 SDValue N0 = N->getOperand(0);
18536
18537 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
18538 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
18539 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
18540
18541 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
18542 Created.push_back(Op.getNode());
18543
18544 if (IsNegPow2) {
18545 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
18546 Created.push_back(Op.getNode());
18547 }
18548
18549 return Op;
18550}
18551
18552//===----------------------------------------------------------------------===//
18553// Inline Assembly Support
18554//===----------------------------------------------------------------------===//
18555
18557 KnownBits &Known,
18558 const APInt &DemandedElts,
18559 const SelectionDAG &DAG,
18560 unsigned Depth) const {
18561 Known.resetAll();
18562 switch (Op.getOpcode()) {
18563 default: break;
18564 case PPCISD::LBRX: {
18565 // lhbrx is known to have the top bits cleared out.
18566 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
18567 Known.Zero = 0xFFFF0000;
18568 break;
18569 }
18570 case PPCISD::ADDE: {
18571 if (Op.getResNo() == 0) {
18572 // (0|1), _ = ADDE 0, 0, CARRY
18573 SDValue LHS = Op.getOperand(0);
18574 SDValue RHS = Op.getOperand(1);
18575 if (isNullConstant(LHS) && isNullConstant(RHS))
18576 Known.Zero = ~1ULL;
18577 }
18578 break;
18579 }
18581 switch (Op.getConstantOperandVal(0)) {
18582 default: break;
18583 case Intrinsic::ppc_altivec_vcmpbfp_p:
18584 case Intrinsic::ppc_altivec_vcmpeqfp_p:
18585 case Intrinsic::ppc_altivec_vcmpequb_p:
18586 case Intrinsic::ppc_altivec_vcmpequh_p:
18587 case Intrinsic::ppc_altivec_vcmpequw_p:
18588 case Intrinsic::ppc_altivec_vcmpequd_p:
18589 case Intrinsic::ppc_altivec_vcmpequq_p:
18590 case Intrinsic::ppc_altivec_vcmpgefp_p:
18591 case Intrinsic::ppc_altivec_vcmpgtfp_p:
18592 case Intrinsic::ppc_altivec_vcmpgtsb_p:
18593 case Intrinsic::ppc_altivec_vcmpgtsh_p:
18594 case Intrinsic::ppc_altivec_vcmpgtsw_p:
18595 case Intrinsic::ppc_altivec_vcmpgtsd_p:
18596 case Intrinsic::ppc_altivec_vcmpgtsq_p:
18597 case Intrinsic::ppc_altivec_vcmpgtub_p:
18598 case Intrinsic::ppc_altivec_vcmpgtuh_p:
18599 case Intrinsic::ppc_altivec_vcmpgtuw_p:
18600 case Intrinsic::ppc_altivec_vcmpgtud_p:
18601 case Intrinsic::ppc_altivec_vcmpgtuq_p:
18602 Known.Zero = ~1U; // All bits but the low one are known to be zero.
18603 break;
18604 }
18605 break;
18606 }
18608 switch (Op.getConstantOperandVal(1)) {
18609 default:
18610 break;
18611 case Intrinsic::ppc_load2r:
18612 // Top bits are cleared for load2r (which is the same as lhbrx).
18613 Known.Zero = 0xFFFF0000;
18614 break;
18615 }
18616 break;
18617 }
18618 }
18619}
18620
18622 switch (Subtarget.getCPUDirective()) {
18623 default: break;
18624 case PPC::DIR_970:
18625 case PPC::DIR_PWR4:
18626 case PPC::DIR_PWR5:
18627 case PPC::DIR_PWR5X:
18628 case PPC::DIR_PWR6:
18629 case PPC::DIR_PWR6X:
18630 case PPC::DIR_PWR7:
18631 case PPC::DIR_PWR8:
18632 case PPC::DIR_PWR9:
18633 case PPC::DIR_PWR10:
18634 case PPC::DIR_PWR11:
18635 case PPC::DIR_PWR_FUTURE: {
18636 if (!ML)
18637 break;
18638
18640 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
18641 // so that we can decrease cache misses and branch-prediction misses.
18642 // Actual alignment of the loop will depend on the hotness check and other
18643 // logic in alignBlocks.
18644 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
18645 return Align(32);
18646 }
18647
18648 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
18649
18650 // For small loops (between 5 and 8 instructions), align to a 32-byte
18651 // boundary so that the entire loop fits in one instruction-cache line.
18652 uint64_t LoopSize = 0;
18653 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
18654 for (const MachineInstr &J : **I) {
18655 LoopSize += TII->getInstSizeInBytes(J);
18656 if (LoopSize > 32)
18657 break;
18658 }
18659
18660 if (LoopSize > 16 && LoopSize <= 32)
18661 return Align(32);
18662
18663 break;
18664 }
18665 }
18666
18668}
18669
18670/// getConstraintType - Given a constraint, return the type of
18671/// constraint it is for this target.
18674 if (Constraint.size() == 1) {
18675 switch (Constraint[0]) {
18676 default: break;
18677 case 'b':
18678 case 'r':
18679 case 'f':
18680 case 'd':
18681 case 'v':
18682 case 'y':
18683 return C_RegisterClass;
18684 case 'Z':
18685 // FIXME: While Z does indicate a memory constraint, it specifically
18686 // indicates an r+r address (used in conjunction with the 'y' modifier
18687 // in the replacement string). Currently, we're forcing the base
18688 // register to be r0 in the asm printer (which is interpreted as zero)
18689 // and forming the complete address in the second register. This is
18690 // suboptimal.
18691 return C_Memory;
18692 }
18693 } else if (Constraint == "wc") { // individual CR bits.
18694 return C_RegisterClass;
18695 } else if (Constraint == "wa" || Constraint == "wd" ||
18696 Constraint == "wf" || Constraint == "ws" ||
18697 Constraint == "wi" || Constraint == "ww") {
18698 return C_RegisterClass; // VSX registers.
18699 }
18700 return TargetLowering::getConstraintType(Constraint);
18701}
18702
18703/// Examine constraint type and operand type and determine a weight value.
18704/// This object must already have been set up with the operand type
18705/// and the current alternative constraint selected.
18708 AsmOperandInfo &info, const char *constraint) const {
18710 Value *CallOperandVal = info.CallOperandVal;
18711 // If we don't have a value, we can't do a match,
18712 // but allow it at the lowest weight.
18713 if (!CallOperandVal)
18714 return CW_Default;
18715 Type *type = CallOperandVal->getType();
18716
18717 // Look at the constraint type.
18718 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
18719 return CW_Register; // an individual CR bit.
18720 else if ((StringRef(constraint) == "wa" ||
18721 StringRef(constraint) == "wd" ||
18722 StringRef(constraint) == "wf") &&
18723 type->isVectorTy())
18724 return CW_Register;
18725 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
18726 return CW_Register; // just hold 64-bit integers data.
18727 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
18728 return CW_Register;
18729 else if (StringRef(constraint) == "ww" && type->isFloatTy())
18730 return CW_Register;
18731
18732 switch (*constraint) {
18733 default:
18735 break;
18736 case 'b':
18737 if (type->isIntegerTy())
18738 weight = CW_Register;
18739 break;
18740 case 'f':
18741 if (type->isFloatTy())
18742 weight = CW_Register;
18743 break;
18744 case 'd':
18745 if (type->isDoubleTy())
18746 weight = CW_Register;
18747 break;
18748 case 'v':
18749 if (type->isVectorTy())
18750 weight = CW_Register;
18751 break;
18752 case 'y':
18753 weight = CW_Register;
18754 break;
18755 case 'Z':
18756 weight = CW_Memory;
18757 break;
18758 }
18759 return weight;
18760}
18761
18762std::pair<unsigned, const TargetRegisterClass *>
18764 StringRef Constraint,
18765 MVT VT) const {
18766 if (Constraint.size() == 1) {
18767 // GCC RS6000 Constraint Letters
18768 switch (Constraint[0]) {
18769 case 'b': // R1-R31
18770 if (VT == MVT::i64 && Subtarget.isPPC64())
18771 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
18772 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
18773 case 'r': // R0-R31
18774 if (VT == MVT::i64 && Subtarget.isPPC64())
18775 return std::make_pair(0U, &PPC::G8RCRegClass);
18776 return std::make_pair(0U, &PPC::GPRCRegClass);
18777 // 'd' and 'f' constraints are both defined to be "the floating point
18778 // registers", where one is for 32-bit and the other for 64-bit. We don't
18779 // really care overly much here so just give them all the same reg classes.
18780 case 'd':
18781 case 'f':
18782 if (Subtarget.hasSPE()) {
18783 if (VT == MVT::f32 || VT == MVT::i32)
18784 return std::make_pair(0U, &PPC::GPRCRegClass);
18785 if (VT == MVT::f64 || VT == MVT::i64)
18786 return std::make_pair(0U, &PPC::SPERCRegClass);
18787 } else {
18788 if (VT == MVT::f32 || VT == MVT::i32)
18789 return std::make_pair(0U, &PPC::F4RCRegClass);
18790 if (VT == MVT::f64 || VT == MVT::i64)
18791 return std::make_pair(0U, &PPC::F8RCRegClass);
18792 }
18793 break;
18794 case 'v':
18795 if (Subtarget.hasAltivec() && VT.isVector())
18796 return std::make_pair(0U, &PPC::VRRCRegClass);
18797 else if (Subtarget.hasVSX())
18798 // Scalars in Altivec registers only make sense with VSX.
18799 return std::make_pair(0U, &PPC::VFRCRegClass);
18800 break;
18801 case 'y': // crrc
18802 return std::make_pair(0U, &PPC::CRRCRegClass);
18803 }
18804 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
18805 // An individual CR bit.
18806 return std::make_pair(0U, &PPC::CRBITRCRegClass);
18807 } else if ((Constraint == "wa" || Constraint == "wd" ||
18808 Constraint == "wf" || Constraint == "wi") &&
18809 Subtarget.hasVSX()) {
18810 // A VSX register for either a scalar (FP) or vector. There is no
18811 // support for single precision scalars on subtargets prior to Power8.
18812 if (VT.isVector())
18813 return std::make_pair(0U, &PPC::VSRCRegClass);
18814 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18815 return std::make_pair(0U, &PPC::VSSRCRegClass);
18816 return std::make_pair(0U, &PPC::VSFRCRegClass);
18817 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
18818 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18819 return std::make_pair(0U, &PPC::VSSRCRegClass);
18820 else
18821 return std::make_pair(0U, &PPC::VSFRCRegClass);
18822 } else if (Constraint == "lr") {
18823 if (VT == MVT::i64)
18824 return std::make_pair(0U, &PPC::LR8RCRegClass);
18825 else
18826 return std::make_pair(0U, &PPC::LRRCRegClass);
18827 }
18828
18829 // Handle special cases of physical registers that are not properly handled
18830 // by the base class.
18831 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
18832 // If we name a VSX register, we can't defer to the base class because it
18833 // will not recognize the correct register (their names will be VSL{0-31}
18834 // and V{0-31} so they won't match). So we match them here.
18835 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
18836 int VSNum = atoi(Constraint.data() + 3);
18837 assert(VSNum >= 0 && VSNum <= 63 &&
18838 "Attempted to access a vsr out of range");
18839 if (VSNum < 32)
18840 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
18841 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
18842 }
18843
18844 // For float registers, we can't defer to the base class as it will match
18845 // the SPILLTOVSRRC class.
18846 if (Constraint.size() > 3 && Constraint[1] == 'f') {
18847 int RegNum = atoi(Constraint.data() + 2);
18848 if (RegNum > 31 || RegNum < 0)
18849 report_fatal_error("Invalid floating point register number");
18850 if (VT == MVT::f32 || VT == MVT::i32)
18851 return Subtarget.hasSPE()
18852 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
18853 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
18854 if (VT == MVT::f64 || VT == MVT::i64)
18855 return Subtarget.hasSPE()
18856 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
18857 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
18858 }
18859 }
18860
18861 std::pair<unsigned, const TargetRegisterClass *> R =
18863
18864 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
18865 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
18866 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
18867 // register.
18868 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
18869 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
18870 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
18871 PPC::GPRCRegClass.contains(R.first))
18872 return std::make_pair(TRI->getMatchingSuperReg(R.first,
18873 PPC::sub_32, &PPC::G8RCRegClass),
18874 &PPC::G8RCRegClass);
18875
18876 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
18877 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
18878 R.first = PPC::CR0;
18879 R.second = &PPC::CRRCRegClass;
18880 }
18881 // FIXME: This warning should ideally be emitted in the front end.
18882 const auto &TM = getTargetMachine();
18883 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
18884 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
18885 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
18886 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
18887 errs() << "warning: vector registers 20 to 32 are reserved in the "
18888 "default AIX AltiVec ABI and cannot be used\n";
18889 }
18890
18891 return R;
18892}
18893
18894/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
18895/// vector. If it is invalid, don't add anything to Ops.
18897 StringRef Constraint,
18898 std::vector<SDValue> &Ops,
18899 SelectionDAG &DAG) const {
18900 SDValue Result;
18901
18902 // Only support length 1 constraints.
18903 if (Constraint.size() > 1)
18904 return;
18905
18906 char Letter = Constraint[0];
18907 switch (Letter) {
18908 default: break;
18909 case 'I':
18910 case 'J':
18911 case 'K':
18912 case 'L':
18913 case 'M':
18914 case 'N':
18915 case 'O':
18916 case 'P': {
18918 if (!CST) return; // Must be an immediate to match.
18919 SDLoc dl(Op);
18920 int64_t Value = CST->getSExtValue();
18921 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
18922 // numbers are printed as such.
18923 switch (Letter) {
18924 default: llvm_unreachable("Unknown constraint letter!");
18925 case 'I': // "I" is a signed 16-bit constant.
18926 if (isInt<16>(Value))
18927 Result = DAG.getTargetConstant(Value, dl, TCVT);
18928 break;
18929 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
18931 Result = DAG.getTargetConstant(Value, dl, TCVT);
18932 break;
18933 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
18935 Result = DAG.getTargetConstant(Value, dl, TCVT);
18936 break;
18937 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
18938 if (isUInt<16>(Value))
18939 Result = DAG.getTargetConstant(Value, dl, TCVT);
18940 break;
18941 case 'M': // "M" is a constant that is greater than 31.
18942 if (Value > 31)
18943 Result = DAG.getTargetConstant(Value, dl, TCVT);
18944 break;
18945 case 'N': // "N" is a positive constant that is an exact power of two.
18946 if (Value > 0 && isPowerOf2_64(Value))
18947 Result = DAG.getTargetConstant(Value, dl, TCVT);
18948 break;
18949 case 'O': // "O" is the constant zero.
18950 if (Value == 0)
18951 Result = DAG.getTargetConstant(Value, dl, TCVT);
18952 break;
18953 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
18954 if (isInt<16>(-Value))
18955 Result = DAG.getTargetConstant(Value, dl, TCVT);
18956 break;
18957 }
18958 break;
18959 }
18960 }
18961
18962 if (Result.getNode()) {
18963 Ops.push_back(Result);
18964 return;
18965 }
18966
18967 // Handle standard constraint letters.
18969}
18970
18973 SelectionDAG &DAG) const {
18974 if (I.getNumOperands() <= 1)
18975 return;
18976 if (!isa<ConstantSDNode>(Ops[1].getNode()))
18977 return;
18978 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
18979 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
18980 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18981 return;
18982
18983 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
18984 Ops.push_back(DAG.getMDNode(MDN));
18985}
18986
18987// isLegalAddressingMode - Return true if the addressing mode represented
18988// by AM is legal for this target, for a load/store of the specified type.
18990 const AddrMode &AM, Type *Ty,
18991 unsigned AS,
18992 Instruction *I) const {
18993 // Vector type r+i form is supported since power9 as DQ form. We don't check
18994 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18995 // imm form is preferred and the offset can be adjusted to use imm form later
18996 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18997 // max offset to check legal addressing mode, we should be a little aggressive
18998 // to contain other offsets for that LSRUse.
18999 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
19000 return false;
19001
19002 // PPC allows a sign-extended 16-bit immediate field.
19003 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
19004 return false;
19005
19006 // No global is ever allowed as a base.
19007 if (AM.BaseGV)
19008 return false;
19009
19010 // PPC only support r+r,
19011 switch (AM.Scale) {
19012 case 0: // "r+i" or just "i", depending on HasBaseReg.
19013 break;
19014 case 1:
19015 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
19016 return false;
19017 // Otherwise we have r+r or r+i.
19018 break;
19019 case 2:
19020 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
19021 return false;
19022 // Allow 2*r as r+r.
19023 break;
19024 default:
19025 // No other scales are supported.
19026 return false;
19027 }
19028
19029 return true;
19030}
19031
19032SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
19033 SelectionDAG &DAG) const {
19035 MachineFrameInfo &MFI = MF.getFrameInfo();
19036 MFI.setReturnAddressIsTaken(true);
19037
19038 SDLoc dl(Op);
19039 unsigned Depth = Op.getConstantOperandVal(0);
19040
19041 // Make sure the function does not optimize away the store of the RA to
19042 // the stack.
19043 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
19044 FuncInfo->setLRStoreRequired();
19045 auto PtrVT = getPointerTy(MF.getDataLayout());
19046
19047 if (Depth > 0) {
19048 // The link register (return address) is saved in the caller's frame
19049 // not the callee's stack frame. So we must get the caller's frame
19050 // address and load the return address at the LR offset from there.
19051 SDValue FrameAddr =
19052 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
19054 SDValue Offset =
19055 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
19056 Subtarget.getScalarIntVT());
19057 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
19058 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
19060 }
19061
19062 // Just load the return address off the stack.
19063 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
19064 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
19066}
19067
19068SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
19069 SelectionDAG &DAG) const {
19070 SDLoc dl(Op);
19071 unsigned Depth = Op.getConstantOperandVal(0);
19072
19073 MachineFunction &MF = DAG.getMachineFunction();
19074 MachineFrameInfo &MFI = MF.getFrameInfo();
19075 MFI.setFrameAddressIsTaken(true);
19076
19077 EVT PtrVT = getPointerTy(MF.getDataLayout());
19078 bool isPPC64 = PtrVT == MVT::i64;
19079
19080 // Naked functions never have a frame pointer, and so we use r1. For all
19081 // other functions, this decision must be delayed until during PEI.
19082 unsigned FrameReg;
19083 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
19084 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
19085 else
19086 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
19087
19088 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
19089 PtrVT);
19090 while (Depth--)
19091 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
19092 FrameAddr, MachinePointerInfo());
19093 return FrameAddr;
19094}
19095
19096#define GET_REGISTER_MATCHER
19097#include "PPCGenAsmMatcher.inc"
19098
19100 const MachineFunction &MF) const {
19101 bool IsPPC64 = Subtarget.isPPC64();
19102
19103 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
19104 if (!Is64Bit && VT != LLT::scalar(32))
19105 report_fatal_error("Invalid register global variable type");
19106
19108 if (!Reg)
19109 return Reg;
19110
19111 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
19112 // Need followup investigation as to why.
19113 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
19114 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
19115 StringRef(RegName) + "\"."));
19116
19117 // Convert GPR to GP8R register for 64bit.
19118 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
19119 Reg = Reg.id() - PPC::R0 + PPC::X0;
19120
19121 return Reg;
19122}
19123
19125 // 32-bit SVR4 ABI access everything as got-indirect.
19126 if (Subtarget.is32BitELFABI())
19127 return true;
19128
19129 // AIX accesses everything indirectly through the TOC, which is similar to
19130 // the GOT.
19131 if (Subtarget.isAIXABI())
19132 return true;
19133
19135 // If it is small or large code model, module locals are accessed
19136 // indirectly by loading their address from .toc/.got.
19137 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
19138 return true;
19139
19140 // JumpTable and BlockAddress are accessed as got-indirect.
19142 return true;
19143
19145 return Subtarget.isGVIndirectSymbol(G->getGlobal());
19146
19147 return false;
19148}
19149
19150bool
19152 // The PowerPC target isn't yet aware of offsets.
19153 return false;
19154}
19155
19158 MachineFunction &MF, unsigned Intrinsic) const {
19159 IntrinsicInfo Info;
19160 switch (Intrinsic) {
19161 case Intrinsic::ppc_atomicrmw_xchg_i128:
19162 case Intrinsic::ppc_atomicrmw_add_i128:
19163 case Intrinsic::ppc_atomicrmw_sub_i128:
19164 case Intrinsic::ppc_atomicrmw_nand_i128:
19165 case Intrinsic::ppc_atomicrmw_and_i128:
19166 case Intrinsic::ppc_atomicrmw_or_i128:
19167 case Intrinsic::ppc_atomicrmw_xor_i128:
19168 case Intrinsic::ppc_cmpxchg_i128:
19169 Info.opc = ISD::INTRINSIC_W_CHAIN;
19170 Info.memVT = MVT::i128;
19171 Info.ptrVal = I.getArgOperand(0);
19172 Info.offset = 0;
19173 Info.align = Align(16);
19176 Infos.push_back(Info);
19177 return;
19178 case Intrinsic::ppc_atomic_load_i128:
19179 Info.opc = ISD::INTRINSIC_W_CHAIN;
19180 Info.memVT = MVT::i128;
19181 Info.ptrVal = I.getArgOperand(0);
19182 Info.offset = 0;
19183 Info.align = Align(16);
19185 Infos.push_back(Info);
19186 return;
19187 case Intrinsic::ppc_atomic_store_i128:
19188 Info.opc = ISD::INTRINSIC_VOID;
19189 Info.memVT = MVT::i128;
19190 Info.ptrVal = I.getArgOperand(2);
19191 Info.offset = 0;
19192 Info.align = Align(16);
19194 Infos.push_back(Info);
19195 return;
19196 case Intrinsic::ppc_altivec_lvx:
19197 case Intrinsic::ppc_altivec_lvxl:
19198 case Intrinsic::ppc_altivec_lvebx:
19199 case Intrinsic::ppc_altivec_lvehx:
19200 case Intrinsic::ppc_altivec_lvewx:
19201 case Intrinsic::ppc_vsx_lxvd2x:
19202 case Intrinsic::ppc_vsx_lxvw4x:
19203 case Intrinsic::ppc_vsx_lxvd2x_be:
19204 case Intrinsic::ppc_vsx_lxvw4x_be:
19205 case Intrinsic::ppc_vsx_lxvl:
19206 case Intrinsic::ppc_vsx_lxvll: {
19207 EVT VT;
19208 switch (Intrinsic) {
19209 case Intrinsic::ppc_altivec_lvebx:
19210 VT = MVT::i8;
19211 break;
19212 case Intrinsic::ppc_altivec_lvehx:
19213 VT = MVT::i16;
19214 break;
19215 case Intrinsic::ppc_altivec_lvewx:
19216 VT = MVT::i32;
19217 break;
19218 case Intrinsic::ppc_vsx_lxvd2x:
19219 case Intrinsic::ppc_vsx_lxvd2x_be:
19220 VT = MVT::v2f64;
19221 break;
19222 default:
19223 VT = MVT::v4i32;
19224 break;
19225 }
19226
19227 Info.opc = ISD::INTRINSIC_W_CHAIN;
19228 Info.memVT = VT;
19229 Info.ptrVal = I.getArgOperand(0);
19230 Info.offset = -VT.getStoreSize()+1;
19231 Info.size = 2*VT.getStoreSize()-1;
19232 Info.align = Align(1);
19233 Info.flags = MachineMemOperand::MOLoad;
19234 Infos.push_back(Info);
19235 return;
19236 }
19237 case Intrinsic::ppc_altivec_stvx:
19238 case Intrinsic::ppc_altivec_stvxl:
19239 case Intrinsic::ppc_altivec_stvebx:
19240 case Intrinsic::ppc_altivec_stvehx:
19241 case Intrinsic::ppc_altivec_stvewx:
19242 case Intrinsic::ppc_vsx_stxvd2x:
19243 case Intrinsic::ppc_vsx_stxvw4x:
19244 case Intrinsic::ppc_vsx_stxvd2x_be:
19245 case Intrinsic::ppc_vsx_stxvw4x_be:
19246 case Intrinsic::ppc_vsx_stxvl:
19247 case Intrinsic::ppc_vsx_stxvll: {
19248 EVT VT;
19249 switch (Intrinsic) {
19250 case Intrinsic::ppc_altivec_stvebx:
19251 VT = MVT::i8;
19252 break;
19253 case Intrinsic::ppc_altivec_stvehx:
19254 VT = MVT::i16;
19255 break;
19256 case Intrinsic::ppc_altivec_stvewx:
19257 VT = MVT::i32;
19258 break;
19259 case Intrinsic::ppc_vsx_stxvd2x:
19260 case Intrinsic::ppc_vsx_stxvd2x_be:
19261 VT = MVT::v2f64;
19262 break;
19263 default:
19264 VT = MVT::v4i32;
19265 break;
19266 }
19267
19268 Info.opc = ISD::INTRINSIC_VOID;
19269 Info.memVT = VT;
19270 Info.ptrVal = I.getArgOperand(1);
19271 Info.offset = -VT.getStoreSize()+1;
19272 Info.size = 2*VT.getStoreSize()-1;
19273 Info.align = Align(1);
19274 Info.flags = MachineMemOperand::MOStore;
19275 Infos.push_back(Info);
19276 return;
19277 }
19278 case Intrinsic::ppc_stdcx:
19279 case Intrinsic::ppc_stwcx:
19280 case Intrinsic::ppc_sthcx:
19281 case Intrinsic::ppc_stbcx: {
19282 EVT VT;
19283 auto Alignment = Align(8);
19284 switch (Intrinsic) {
19285 case Intrinsic::ppc_stdcx:
19286 VT = MVT::i64;
19287 break;
19288 case Intrinsic::ppc_stwcx:
19289 VT = MVT::i32;
19290 Alignment = Align(4);
19291 break;
19292 case Intrinsic::ppc_sthcx:
19293 VT = MVT::i16;
19294 Alignment = Align(2);
19295 break;
19296 case Intrinsic::ppc_stbcx:
19297 VT = MVT::i8;
19298 Alignment = Align(1);
19299 break;
19300 }
19301 Info.opc = ISD::INTRINSIC_W_CHAIN;
19302 Info.memVT = VT;
19303 Info.ptrVal = I.getArgOperand(0);
19304 Info.offset = 0;
19305 Info.align = Alignment;
19307 Infos.push_back(Info);
19308 return;
19309 }
19310 default:
19311 break;
19312 }
19313}
19314
19315/// It returns EVT::Other if the type should be determined using generic
19316/// target-independent logic.
19318 LLVMContext &Context, const MemOp &Op,
19319 const AttributeList &FuncAttributes) const {
19320 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
19321 // We should use Altivec/VSX loads and stores when available. For unaligned
19322 // addresses, unaligned VSX loads are only fast starting with the P8.
19323 if (Subtarget.hasAltivec() && Op.size() >= 16) {
19324 if (Op.isMemset() && Subtarget.hasVSX()) {
19325 uint64_t TailSize = Op.size() % 16;
19326 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
19327 // element if vector element type matches tail store. For tail size
19328 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
19329 if (TailSize > 2 && TailSize <= 4) {
19330 return MVT::v8i16;
19331 }
19332 return MVT::v4i32;
19333 }
19334 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
19335 return MVT::v4i32;
19336 }
19337 }
19338
19339 if (Subtarget.isPPC64()) {
19340 return MVT::i64;
19341 }
19342
19343 return MVT::i32;
19344}
19345
19346/// Returns true if it is beneficial to convert a load of a constant
19347/// to just the constant itself.
19349 Type *Ty) const {
19350 assert(Ty->isIntegerTy());
19351
19352 unsigned BitSize = Ty->getPrimitiveSizeInBits();
19353 return !(BitSize == 0 || BitSize > 64);
19354}
19355
19357 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19358 return false;
19359 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
19360 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
19361 return NumBits1 == 64 && NumBits2 == 32;
19362}
19363
19365 if (!VT1.isInteger() || !VT2.isInteger())
19366 return false;
19367 unsigned NumBits1 = VT1.getSizeInBits();
19368 unsigned NumBits2 = VT2.getSizeInBits();
19369 return NumBits1 == 64 && NumBits2 == 32;
19370}
19371
19373 // Generally speaking, zexts are not free, but they are free when they can be
19374 // folded with other operations.
19375 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
19376 EVT MemVT = LD->getMemoryVT();
19377 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
19378 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
19379 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
19380 LD->getExtensionType() == ISD::ZEXTLOAD))
19381 return true;
19382 }
19383
19384 // FIXME: Add other cases...
19385 // - 32-bit shifts with a zext to i64
19386 // - zext after ctlz, bswap, etc.
19387 // - zext after and by a constant mask
19388
19389 return TargetLowering::isZExtFree(Val, VT2);
19390}
19391
19392bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
19393 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
19394 "invalid fpext types");
19395 // Extending to float128 is not free.
19396 if (DestVT == MVT::f128)
19397 return false;
19398 return true;
19399}
19400
19402 return isInt<16>(Imm) || isUInt<16>(Imm);
19403}
19404
19406 return isInt<16>(Imm) || isUInt<16>(Imm);
19407}
19408
19411 unsigned *Fast) const {
19413 return false;
19414
19415 // PowerPC supports unaligned memory access for simple non-vector types.
19416 // Although accessing unaligned addresses is not as efficient as accessing
19417 // aligned addresses, it is generally more efficient than manual expansion,
19418 // and generally only traps for software emulation when crossing page
19419 // boundaries.
19420
19421 if (!VT.isSimple())
19422 return false;
19423
19424 if (VT.isFloatingPoint() && !VT.isVector() &&
19425 !Subtarget.allowsUnalignedFPAccess())
19426 return false;
19427
19428 if (VT.getSimpleVT().isVector()) {
19429 if (Subtarget.hasVSX()) {
19430 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
19431 VT != MVT::v4f32 && VT != MVT::v4i32)
19432 return false;
19433 } else {
19434 return false;
19435 }
19436 }
19437
19438 if (VT == MVT::ppcf128)
19439 return false;
19440
19441 if (Fast)
19442 *Fast = 1;
19443
19444 return true;
19445}
19446
19448 SDValue C) const {
19449 // Check integral scalar types.
19450 if (!VT.isScalarInteger())
19451 return false;
19452 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
19453 if (!ConstNode->getAPIntValue().isSignedIntN(64))
19454 return false;
19455 // This transformation will generate >= 2 operations. But the following
19456 // cases will generate <= 2 instructions during ISEL. So exclude them.
19457 // 1. If the constant multiplier fits 16 bits, it can be handled by one
19458 // HW instruction, ie. MULLI
19459 // 2. If the multiplier after shifted fits 16 bits, an extra shift
19460 // instruction is needed than case 1, ie. MULLI and RLDICR
19461 int64_t Imm = ConstNode->getSExtValue();
19462 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
19463 Imm >>= Shift;
19464 if (isInt<16>(Imm))
19465 return false;
19466 uint64_t UImm = static_cast<uint64_t>(Imm);
19467 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
19468 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
19469 return true;
19470 }
19471 return false;
19472}
19473
19479
19481 Type *Ty) const {
19482 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
19483 return false;
19484 switch (Ty->getScalarType()->getTypeID()) {
19485 case Type::FloatTyID:
19486 case Type::DoubleTyID:
19487 return true;
19488 case Type::FP128TyID:
19489 return Subtarget.hasP9Vector();
19490 default:
19491 return false;
19492 }
19493}
19494
19495// FIXME: add more patterns which are not profitable to hoist.
19497 if (!I->hasOneUse())
19498 return true;
19499
19500 Instruction *User = I->user_back();
19501 assert(User && "A single use instruction with no uses.");
19502
19503 switch (I->getOpcode()) {
19504 case Instruction::FMul: {
19505 // Don't break FMA, PowerPC prefers FMA.
19506 if (User->getOpcode() != Instruction::FSub &&
19507 User->getOpcode() != Instruction::FAdd)
19508 return true;
19509
19511 const Function *F = I->getFunction();
19512 const DataLayout &DL = F->getDataLayout();
19513 Type *Ty = User->getOperand(0)->getType();
19514 bool AllowContract = I->getFastMathFlags().allowContract() &&
19515 User->getFastMathFlags().allowContract();
19516
19517 return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
19519 (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
19520 }
19521 case Instruction::Load: {
19522 // Don't break "store (load float*)" pattern, this pattern will be combined
19523 // to "store (load int32)" in later InstCombine pass. See function
19524 // combineLoadToOperationType. On PowerPC, loading a float point takes more
19525 // cycles than loading a 32 bit integer.
19526 LoadInst *LI = cast<LoadInst>(I);
19527 // For the loads that combineLoadToOperationType does nothing, like
19528 // ordered load, it should be profitable to hoist them.
19529 // For swifterror load, it can only be used for pointer to pointer type, so
19530 // later type check should get rid of this case.
19531 if (!LI->isUnordered())
19532 return true;
19533
19534 if (User->getOpcode() != Instruction::Store)
19535 return true;
19536
19537 if (I->getType()->getTypeID() != Type::FloatTyID)
19538 return true;
19539
19540 return false;
19541 }
19542 default:
19543 return true;
19544 }
19545 return true;
19546}
19547
19548const MCPhysReg *
19550 // LR is a callee-save register, but we must treat it as clobbered by any call
19551 // site. Hence we include LR in the scratch registers, which are in turn added
19552 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
19553 // to CTR, which is used by any indirect call.
19554 static const MCPhysReg ScratchRegs[] = {
19555 PPC::X12, PPC::LR8, PPC::CTR8, 0
19556 };
19557
19558 return ScratchRegs;
19559}
19560
19562 const Constant *PersonalityFn) const {
19563 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
19564}
19565
19567 const Constant *PersonalityFn) const {
19568 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
19569}
19570
19571bool
19573 EVT VT , unsigned DefinedValues) const {
19574 if (VT == MVT::v2i64)
19575 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
19576
19577 if (Subtarget.hasVSX())
19578 return true;
19579
19581}
19582
19584 if (DisableILPPref || Subtarget.enableMachineScheduler())
19586
19587 return Sched::ILP;
19588}
19589
19590// Create a fast isel object.
19592 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
19593 const LibcallLoweringInfo *LibcallLowering) const {
19594 return PPC::createFastISel(FuncInfo, LibInfo, LibcallLowering);
19595}
19596
19597// 'Inverted' means the FMA opcode after negating one multiplicand.
19598// For example, (fma -a b c) = (fnmsub a b c)
19599static unsigned invertFMAOpcode(unsigned Opc) {
19600 switch (Opc) {
19601 default:
19602 llvm_unreachable("Invalid FMA opcode for PowerPC!");
19603 case ISD::FMA:
19604 return PPCISD::FNMSUB;
19605 case PPCISD::FNMSUB:
19606 return ISD::FMA;
19607 }
19608}
19609
19611 bool LegalOps, bool OptForSize,
19613 unsigned Depth) const {
19615 return SDValue();
19616
19617 unsigned Opc = Op.getOpcode();
19618 EVT VT = Op.getValueType();
19619 SDNodeFlags Flags = Op.getNode()->getFlags();
19620
19621 switch (Opc) {
19622 case PPCISD::FNMSUB:
19623 if (!Op.hasOneUse() || !isTypeLegal(VT))
19624 break;
19625
19626 SDValue N0 = Op.getOperand(0);
19627 SDValue N1 = Op.getOperand(1);
19628 SDValue N2 = Op.getOperand(2);
19629 SDLoc Loc(Op);
19630
19632 SDValue NegN2 =
19633 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
19634
19635 if (!NegN2)
19636 return SDValue();
19637
19638 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
19639 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
19640 // These transformations may change sign of zeroes. For example,
19641 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
19642 if (Flags.hasNoSignedZeros()) {
19643 // Try and choose the cheaper one to negate.
19645 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
19646 N0Cost, Depth + 1);
19647
19649 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
19650 N1Cost, Depth + 1);
19651
19652 if (NegN0 && N0Cost <= N1Cost) {
19653 Cost = std::min(N0Cost, N2Cost);
19654 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
19655 } else if (NegN1) {
19656 Cost = std::min(N1Cost, N2Cost);
19657 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
19658 }
19659 }
19660
19661 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
19662 if (isOperationLegal(ISD::FMA, VT)) {
19663 Cost = N2Cost;
19664 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
19665 }
19666
19667 break;
19668 }
19669
19670 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
19671 Cost, Depth);
19672}
19673
19674// Override to enable LOAD_STACK_GUARD lowering on Linux.
19676 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
19677 return true;
19679}
19680
19682 bool ForCodeSize) const {
19683 if (!VT.isSimple() || !Subtarget.hasVSX())
19684 return false;
19685
19686 switch(VT.getSimpleVT().SimpleTy) {
19687 default:
19688 // For FP types that are currently not supported by PPC backend, return
19689 // false. Examples: f16, f80.
19690 return false;
19691 case MVT::f32:
19692 case MVT::f64: {
19693 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
19694 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
19695 return true;
19696 }
19697 bool IsExact;
19698 APSInt IntResult(16, false);
19699 // The rounding mode doesn't really matter because we only care about floats
19700 // that can be converted to integers exactly.
19701 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
19702 // For exact values in the range [-16, 15] we can materialize the float.
19703 if (IsExact && IntResult <= 15 && IntResult >= -16)
19704 return true;
19705 return Imm.isZero();
19706 }
19707 case MVT::ppcf128:
19708 return Imm.isPosZero();
19709 }
19710}
19711
19712// For vector shift operation op, fold
19713// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
19715 SelectionDAG &DAG) {
19716 SDValue N0 = N->getOperand(0);
19717 SDValue N1 = N->getOperand(1);
19718 EVT VT = N0.getValueType();
19719 unsigned OpSizeInBits = VT.getScalarSizeInBits();
19720 unsigned Opcode = N->getOpcode();
19721 unsigned TargetOpcode;
19722
19723 switch (Opcode) {
19724 default:
19725 llvm_unreachable("Unexpected shift operation");
19726 case ISD::SHL:
19727 TargetOpcode = PPCISD::SHL;
19728 break;
19729 case ISD::SRL:
19730 TargetOpcode = PPCISD::SRL;
19731 break;
19732 case ISD::SRA:
19733 TargetOpcode = PPCISD::SRA;
19734 break;
19735 }
19736
19737 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
19738 N1->getOpcode() == ISD::AND)
19739 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
19740 if (Mask->getZExtValue() == OpSizeInBits - 1)
19741 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
19742
19743 return SDValue();
19744}
19745
19746SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
19747 DAGCombinerInfo &DCI) const {
19748 EVT VT = N->getValueType(0);
19749 assert(VT.isVector() && "Vector type expected.");
19750
19751 unsigned Opc = N->getOpcode();
19752 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
19753 "Unexpected opcode.");
19754
19755 if (!isOperationLegal(Opc, VT))
19756 return SDValue();
19757
19758 EVT EltTy = VT.getScalarType();
19759 unsigned EltBits = EltTy.getSizeInBits();
19760 if (EltTy != MVT::i64 && EltTy != MVT::i32)
19761 return SDValue();
19762
19763 SDValue N1 = N->getOperand(1);
19764 uint64_t SplatBits = 0;
19765 bool AddSplatCase = false;
19766 unsigned OpcN1 = N1.getOpcode();
19767 if (OpcN1 == PPCISD::VADD_SPLAT &&
19769 AddSplatCase = true;
19770 SplatBits = N1.getConstantOperandVal(0);
19771 }
19772
19773 if (!AddSplatCase) {
19774 if (OpcN1 != ISD::BUILD_VECTOR)
19775 return SDValue();
19776
19777 unsigned SplatBitSize;
19778 bool HasAnyUndefs;
19779 APInt APSplatBits, APSplatUndef;
19780 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
19781 bool BVNIsConstantSplat =
19782 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
19783 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
19784 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
19785 return SDValue();
19786 SplatBits = APSplatBits.getZExtValue();
19787 }
19788
19789 SDLoc DL(N);
19790 SDValue N0 = N->getOperand(0);
19791 // PPC vector shifts by word/double look at only the low 5/6 bits of the
19792 // shift vector, which means the max value is 31/63. A shift vector of all
19793 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
19794 // -16 to 15 range.
19795 if (SplatBits == (EltBits - 1)) {
19796 unsigned NewOpc;
19797 switch (Opc) {
19798 case ISD::SHL:
19799 NewOpc = PPCISD::SHL;
19800 break;
19801 case ISD::SRL:
19802 NewOpc = PPCISD::SRL;
19803 break;
19804 case ISD::SRA:
19805 NewOpc = PPCISD::SRA;
19806 break;
19807 }
19808 SDValue SplatOnes = getCanonicalConstSplat(255, 1, VT, DCI.DAG, DL);
19809 return DCI.DAG.getNode(NewOpc, DL, VT, N0, SplatOnes);
19810 }
19811
19812 if (Opc != ISD::SHL || !isOperationLegal(ISD::ADD, VT))
19813 return SDValue();
19814
19815 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
19816 // before the BUILD_VECTOR is replaced by a load.
19817 if (EltTy != MVT::i64 || SplatBits != 1)
19818 return SDValue();
19819
19820 return DCI.DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
19821}
19822
19823SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
19824 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19825 return Value;
19826
19827 if (N->getValueType(0).isVector())
19828 return combineVectorShift(N, DCI);
19829
19830 SDValue N0 = N->getOperand(0);
19831 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
19832 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
19833 N0.getOpcode() != ISD::SIGN_EXTEND ||
19834 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
19835 N->getValueType(0) != MVT::i64)
19836 return SDValue();
19837
19838 // We can't save an operation here if the value is already extended, and
19839 // the existing shift is easier to combine.
19840 SDValue ExtsSrc = N0.getOperand(0);
19841 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
19842 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
19843 return SDValue();
19844
19845 SDLoc DL(N0);
19846 SDValue ShiftBy = SDValue(CN1, 0);
19847 // We want the shift amount to be i32 on the extswli, but the shift could
19848 // have an i64.
19849 if (ShiftBy.getValueType() == MVT::i64)
19850 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
19851
19852 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
19853 ShiftBy);
19854}
19855
19856SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
19857 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19858 return Value;
19859
19860 if (N->getValueType(0).isVector())
19861 return combineVectorShift(N, DCI);
19862
19863 return SDValue();
19864}
19865
19866SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
19867 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19868 return Value;
19869
19870 if (N->getValueType(0).isVector())
19871 return combineVectorShift(N, DCI);
19872
19873 return SDValue();
19874}
19875
19876// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
19877// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
19878// When C is zero, the equation (addi Z, -C) can be simplified to Z
19879// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
19881 const PPCSubtarget &Subtarget) {
19882 if (!Subtarget.isPPC64())
19883 return SDValue();
19884
19885 SDValue LHS = N->getOperand(0);
19886 SDValue RHS = N->getOperand(1);
19887
19888 auto isZextOfCompareWithConstant = [](SDValue Op) {
19889 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
19890 Op.getValueType() != MVT::i64)
19891 return false;
19892
19893 SDValue Cmp = Op.getOperand(0);
19894 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
19895 Cmp.getOperand(0).getValueType() != MVT::i64)
19896 return false;
19897
19898 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
19899 int64_t NegConstant = 0 - Constant->getSExtValue();
19900 // Due to the limitations of the addi instruction,
19901 // -C is required to be [-32768, 32767].
19902 return isInt<16>(NegConstant);
19903 }
19904
19905 return false;
19906 };
19907
19908 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
19909 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
19910
19911 // If there is a pattern, canonicalize a zext operand to the RHS.
19912 if (LHSHasPattern && !RHSHasPattern)
19913 std::swap(LHS, RHS);
19914 else if (!LHSHasPattern && !RHSHasPattern)
19915 return SDValue();
19916
19917 SDLoc DL(N);
19918 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
19919 SDVTList VTs = DAG.getVTList(MVT::i64, CarryType);
19920 SDValue Cmp = RHS.getOperand(0);
19921 SDValue Z = Cmp.getOperand(0);
19922 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
19923 int64_t NegConstant = 0 - Constant->getSExtValue();
19924
19925 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
19926 default: break;
19927 case ISD::SETNE: {
19928 // when C == 0
19929 // --> addze X, (addic Z, -1).carry
19930 // /
19931 // add X, (zext(setne Z, C))--
19932 // \ when -32768 <= -C <= 32767 && C != 0
19933 // --> addze X, (addic (addi Z, -C), -1).carry
19934 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19935 DAG.getConstant(NegConstant, DL, MVT::i64));
19936 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19937 SDValue Addc =
19938 DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19939 AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64),
19940 DAG.getConstant(0, DL, CarryType));
19941 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19942 DAG.getConstant(0, DL, MVT::i64),
19943 SDValue(Addc.getNode(), 1));
19944 }
19945 case ISD::SETEQ: {
19946 // when C == 0
19947 // --> addze X, (subfic Z, 0).carry
19948 // /
19949 // add X, (zext(sete Z, C))--
19950 // \ when -32768 <= -C <= 32767 && C != 0
19951 // --> addze X, (subfic (addi Z, -C), 0).carry
19952 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19953 DAG.getConstant(NegConstant, DL, MVT::i64));
19954 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19955 SDValue Subc =
19956 DAG.getNode(ISD::USUBO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19957 DAG.getConstant(0, DL, MVT::i64), AddOrZ,
19958 DAG.getConstant(0, DL, CarryType));
19959 SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1),
19960 DAG.getConstant(1UL, DL, CarryType));
19961 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19962 DAG.getConstant(0, DL, MVT::i64), Invert);
19963 }
19964 }
19965
19966 return SDValue();
19967}
19968
19969// Transform
19970// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
19971// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
19972// In this case both C1 and C2 must be known constants.
19973// C1+C2 must fit into a 34 bit signed integer.
19975 const PPCSubtarget &Subtarget) {
19976 if (!Subtarget.isUsingPCRelativeCalls())
19977 return SDValue();
19978
19979 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
19980 // If we find that node try to cast the Global Address and the Constant.
19981 SDValue LHS = N->getOperand(0);
19982 SDValue RHS = N->getOperand(1);
19983
19984 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19985 std::swap(LHS, RHS);
19986
19987 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19988 return SDValue();
19989
19990 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19993
19994 // Check that both casts succeeded.
19995 if (!GSDN || !ConstNode)
19996 return SDValue();
19997
19998 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19999 SDLoc DL(GSDN);
20000
20001 // The signed int offset needs to fit in 34 bits.
20002 if (!isInt<34>(NewOffset))
20003 return SDValue();
20004
20005 // The new global address is a copy of the old global address except
20006 // that it has the updated Offset.
20007 SDValue GA =
20008 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
20009 NewOffset, GSDN->getTargetFlags());
20010 SDValue MatPCRel =
20011 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
20012 return MatPCRel;
20013}
20014
20015// Transform (add X, (build_vector (T 1), (T 1), ...)) -> (sub X, (XXLEQVOnes))
20016// XXLEQVOnes creates an all-1s vector (0xFFFFFFFF...) efficiently via xxleqv
20017// Mathematical identity: X + 1 = X - (-1)
20018// Applies to v4i32, v2i64, v8i16, v16i8 where all elements are constant 1
20019// Requirement: VSX feature for efficient xxleqv generation
20021 const PPCSubtarget &Subtarget) {
20022
20023 EVT VT = N->getValueType(0);
20024 if (!Subtarget.hasVSX())
20025 return SDValue();
20026
20027 // Handle v2i64, v4i32, v8i16 and v16i8 types
20028 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
20029 VT == MVT::v2i64))
20030 return SDValue();
20031
20032 SDValue LHS = N->getOperand(0);
20033 SDValue RHS = N->getOperand(1);
20034
20035 // Check if RHS is BUILD_VECTOR
20036 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
20037 return SDValue();
20038
20039 // Check if all the elements are 1
20040 unsigned NumOfEles = RHS.getNumOperands();
20041 for (unsigned i = 0; i < NumOfEles; ++i) {
20042 auto *CN = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
20043 if (!CN || CN->getSExtValue() != 1)
20044 return SDValue();
20045 }
20046 SDLoc DL(N);
20047
20048 SDValue MinusOne = DAG.getConstant(APInt::getAllOnes(32), DL, MVT::i32);
20049 SmallVector<SDValue, 4> Ops(4, MinusOne);
20050 SDValue AllOnesVec = DAG.getBuildVector(MVT::v4i32, DL, Ops);
20051
20052 // Bitcast to the target vector type
20053 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT, AllOnesVec);
20054
20055 return DAG.getNode(ISD::SUB, DL, VT, LHS, Bitcast);
20056}
20057
20058SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
20059 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
20060 return Value;
20061
20062 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
20063 return Value;
20064
20065 if (auto Value = combineADDToSUB(N, DCI.DAG, Subtarget))
20066 return Value;
20067 return SDValue();
20068}
20069
20070// Detect TRUNCATE operations on bitcasts of float128 values.
20071// What we are looking for here is the situtation where we extract a subset
20072// of bits from a 128 bit float.
20073// This can be of two forms:
20074// 1) BITCAST of f128 feeding TRUNCATE
20075// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
20076// The reason this is required is because we do not have a legal i128 type
20077// and so we want to prevent having to store the f128 and then reload part
20078// of it.
20079SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
20080 DAGCombinerInfo &DCI) const {
20081 // If we are using CRBits then try that first.
20082 if (Subtarget.useCRBits()) {
20083 // Check if CRBits did anything and return that if it did.
20084 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
20085 return CRTruncValue;
20086 }
20087
20088 SDLoc dl(N);
20089 SDValue Op0 = N->getOperand(0);
20090
20091 // Looking for a truncate of i128 to i64.
20092 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
20093 return SDValue();
20094
20095 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
20096
20097 // SRL feeding TRUNCATE.
20098 if (Op0.getOpcode() == ISD::SRL) {
20099 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
20100 // The right shift has to be by 64 bits.
20101 if (!ConstNode || ConstNode->getZExtValue() != 64)
20102 return SDValue();
20103
20104 // Switch the element number to extract.
20105 EltToExtract = EltToExtract ? 0 : 1;
20106 // Update Op0 past the SRL.
20107 Op0 = Op0.getOperand(0);
20108 }
20109
20110 // BITCAST feeding a TRUNCATE possibly via SRL.
20111 if (Op0.getOpcode() == ISD::BITCAST &&
20112 Op0.getValueType() == MVT::i128 &&
20113 Op0.getOperand(0).getValueType() == MVT::f128) {
20114 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
20115 return DCI.DAG.getNode(
20116 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
20117 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
20118 }
20119 return SDValue();
20120}
20121
20122SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
20123 SelectionDAG &DAG = DCI.DAG;
20124
20125 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
20126 if (!ConstOpOrElement)
20127 return SDValue();
20128
20129 // An imul is usually smaller than the alternative sequence for legal type.
20131 isOperationLegal(ISD::MUL, N->getValueType(0)))
20132 return SDValue();
20133
20134 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
20135 switch (this->Subtarget.getCPUDirective()) {
20136 default:
20137 // TODO: enhance the condition for subtarget before pwr8
20138 return false;
20139 case PPC::DIR_PWR8:
20140 // type mul add shl
20141 // scalar 4 1 1
20142 // vector 7 2 2
20143 return true;
20144 case PPC::DIR_PWR9:
20145 case PPC::DIR_PWR10:
20146 case PPC::DIR_PWR11:
20148 // type mul add shl
20149 // scalar 5 2 2
20150 // vector 7 2 2
20151
20152 // The cycle RATIO of related operations are showed as a table above.
20153 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
20154 // scalar and vector type. For 2 instrs patterns, add/sub + shl
20155 // are 4, it is always profitable; but for 3 instrs patterns
20156 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
20157 // So we should only do it for vector type.
20158 return IsAddOne && IsNeg ? VT.isVector() : true;
20159 }
20160 };
20161
20162 EVT VT = N->getValueType(0);
20163 SDLoc DL(N);
20164
20165 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
20166 bool IsNeg = MulAmt.isNegative();
20167 APInt MulAmtAbs = MulAmt.abs();
20168
20169 if ((MulAmtAbs - 1).isPowerOf2()) {
20170 // (mul x, 2^N + 1) => (add (shl x, N), x)
20171 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
20172
20173 if (!IsProfitable(IsNeg, true, VT))
20174 return SDValue();
20175
20176 SDValue Op0 = N->getOperand(0);
20177 SDValue Op1 =
20178 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
20179 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
20180 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
20181
20182 if (!IsNeg)
20183 return Res;
20184
20185 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
20186 } else if ((MulAmtAbs + 1).isPowerOf2()) {
20187 // (mul x, 2^N - 1) => (sub (shl x, N), x)
20188 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
20189
20190 if (!IsProfitable(IsNeg, false, VT))
20191 return SDValue();
20192
20193 SDValue Op0 = N->getOperand(0);
20194 SDValue Op1 =
20195 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
20196 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
20197
20198 if (!IsNeg)
20199 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
20200 else
20201 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
20202
20203 } else {
20204 return SDValue();
20205 }
20206}
20207
20208// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
20209// in combiner since we need to check SD flags and other subtarget features.
20210SDValue PPCTargetLowering::combineFMALike(SDNode *N,
20211 DAGCombinerInfo &DCI) const {
20212 SDValue N0 = N->getOperand(0);
20213 SDValue N1 = N->getOperand(1);
20214 SDValue N2 = N->getOperand(2);
20215 SDNodeFlags Flags = N->getFlags();
20216 EVT VT = N->getValueType(0);
20217 SelectionDAG &DAG = DCI.DAG;
20218 unsigned Opc = N->getOpcode();
20220 bool LegalOps = !DCI.isBeforeLegalizeOps();
20221 SDLoc Loc(N);
20222
20223 if (!isOperationLegal(ISD::FMA, VT))
20224 return SDValue();
20225
20226 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
20227 // since (fnmsub a b c)=-0 while c-ab=+0.
20228 if (!Flags.hasNoSignedZeros())
20229 return SDValue();
20230
20231 // (fma (fneg a) b c) => (fnmsub a b c)
20232 // (fnmsub (fneg a) b c) => (fma a b c)
20233 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
20234 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
20235
20236 // (fma a (fneg b) c) => (fnmsub a b c)
20237 // (fnmsub a (fneg b) c) => (fma a b c)
20238 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
20239 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
20240
20241 return SDValue();
20242}
20243
20244bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
20245 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
20246 if (!Subtarget.is64BitELFABI())
20247 return false;
20248
20249 // If not a tail call then no need to proceed.
20250 if (!CI->isTailCall())
20251 return false;
20252
20253 // If sibling calls have been disabled and tail-calls aren't guaranteed
20254 // there is no reason to duplicate.
20255 auto &TM = getTargetMachine();
20256 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
20257 return false;
20258
20259 // Can't tail call a function called indirectly, or if it has variadic args.
20260 const Function *Callee = CI->getCalledFunction();
20261 if (!Callee || Callee->isVarArg())
20262 return false;
20263
20264 // Make sure the callee and caller calling conventions are eligible for tco.
20265 const Function *Caller = CI->getParent()->getParent();
20266 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
20267 CI->getCallingConv()))
20268 return false;
20269
20270 // If the function is local then we have a good chance at tail-calling it
20271 return getTargetMachine().shouldAssumeDSOLocal(Callee);
20272}
20273
20274bool PPCTargetLowering::
20275isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
20276 const Value *Mask = AndI.getOperand(1);
20277 // If the mask is suitable for andi. or andis. we should sink the and.
20278 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
20279 // Can't handle constants wider than 64-bits.
20280 if (CI->getBitWidth() > 64)
20281 return false;
20282 int64_t ConstVal = CI->getZExtValue();
20283 return isUInt<16>(ConstVal) ||
20284 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
20285 }
20286
20287 // For non-constant masks, we can always use the record-form and.
20288 return true;
20289}
20290
20291/// getAddrModeForFlags - Based on the set of address flags, select the most
20292/// optimal instruction format to match by.
20293PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
20294 // This is not a node we should be handling here.
20295 if (Flags == PPC::MOF_None)
20296 return PPC::AM_None;
20297 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
20298 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
20299 if ((Flags & FlagSet) == FlagSet)
20300 return PPC::AM_DForm;
20301 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
20302 if ((Flags & FlagSet) == FlagSet)
20303 return PPC::AM_DSForm;
20304 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
20305 if ((Flags & FlagSet) == FlagSet)
20306 return PPC::AM_DQForm;
20307 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
20308 if ((Flags & FlagSet) == FlagSet)
20309 return PPC::AM_PrefixDForm;
20310 // If no other forms are selected, return an X-Form as it is the most
20311 // general addressing mode.
20312 return PPC::AM_XForm;
20313}
20314
20315/// Set alignment flags based on whether or not the Frame Index is aligned.
20316/// Utilized when computing flags for address computation when selecting
20317/// load and store instructions.
20318static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
20319 SelectionDAG &DAG) {
20320 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
20321 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
20322 if (!FI)
20323 return;
20325 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
20326 // If this is (add $FI, $S16Imm), the alignment flags are already set
20327 // based on the immediate. We just need to clear the alignment flags
20328 // if the FI alignment is weaker.
20329 if ((FrameIndexAlign % 4) != 0)
20330 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
20331 if ((FrameIndexAlign % 16) != 0)
20332 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
20333 // If the address is a plain FrameIndex, set alignment flags based on
20334 // FI alignment.
20335 if (!IsAdd) {
20336 if ((FrameIndexAlign % 4) == 0)
20337 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20338 if ((FrameIndexAlign % 16) == 0)
20339 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20340 }
20341}
20342
20343/// Given a node, compute flags that are used for address computation when
20344/// selecting load and store instructions. The flags computed are stored in
20345/// FlagSet. This function takes into account whether the node is a constant,
20346/// an ADD, OR, or a constant, and computes the address flags accordingly.
20347static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
20348 SelectionDAG &DAG) {
20349 // Set the alignment flags for the node depending on if the node is
20350 // 4-byte or 16-byte aligned.
20351 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
20352 if ((Imm & 0x3) == 0)
20353 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20354 if ((Imm & 0xf) == 0)
20355 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20356 };
20357
20359 // All 32-bit constants can be computed as LIS + Disp.
20360 const APInt &ConstImm = CN->getAPIntValue();
20361 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
20362 FlagSet |= PPC::MOF_AddrIsSImm32;
20363 SetAlignFlagsForImm(ConstImm.getZExtValue());
20364 setAlignFlagsForFI(N, FlagSet, DAG);
20365 }
20366 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
20367 FlagSet |= PPC::MOF_RPlusSImm34;
20368 else // Let constant materialization handle large constants.
20369 FlagSet |= PPC::MOF_NotAddNorCst;
20370 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
20371 // This address can be represented as an addition of:
20372 // - Register + Imm16 (possibly a multiple of 4/16)
20373 // - Register + Imm34
20374 // - Register + PPCISD::Lo
20375 // - Register + Register
20376 // In any case, we won't have to match this as Base + Zero.
20377 SDValue RHS = N.getOperand(1);
20379 const APInt &ConstImm = CN->getAPIntValue();
20380 if (ConstImm.isSignedIntN(16)) {
20381 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
20382 SetAlignFlagsForImm(ConstImm.getZExtValue());
20383 setAlignFlagsForFI(N, FlagSet, DAG);
20384 }
20385 if (ConstImm.isSignedIntN(34))
20386 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
20387 else
20388 FlagSet |= PPC::MOF_RPlusR; // Register.
20389 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
20390 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
20391 else
20392 FlagSet |= PPC::MOF_RPlusR;
20393 } else { // The address computation is not a constant or an addition.
20394 setAlignFlagsForFI(N, FlagSet, DAG);
20395 FlagSet |= PPC::MOF_NotAddNorCst;
20396 }
20397}
20398
20399static bool isPCRelNode(SDValue N) {
20400 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
20405}
20406
20407/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
20408/// the address flags of the load/store instruction that is to be matched.
20409unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
20410 SelectionDAG &DAG) const {
20411 unsigned FlagSet = PPC::MOF_None;
20412
20413 // Compute subtarget flags.
20414 if (!Subtarget.hasP9Vector())
20415 FlagSet |= PPC::MOF_SubtargetBeforeP9;
20416 else
20417 FlagSet |= PPC::MOF_SubtargetP9;
20418
20419 if (Subtarget.hasPrefixInstrs())
20420 FlagSet |= PPC::MOF_SubtargetP10;
20421
20422 if (Subtarget.hasSPE())
20423 FlagSet |= PPC::MOF_SubtargetSPE;
20424
20425 // Check if we have a PCRel node and return early.
20426 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
20427 return FlagSet;
20428
20429 // If the node is the paired load/store intrinsics, compute flags for
20430 // address computation and return early.
20431 unsigned ParentOp = Parent->getOpcode();
20432 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
20433 (ParentOp == ISD::INTRINSIC_VOID))) {
20434 unsigned ID = Parent->getConstantOperandVal(1);
20435 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
20436 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
20437 ? Parent->getOperand(2)
20438 : Parent->getOperand(3);
20439 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
20440 FlagSet |= PPC::MOF_Vector;
20441 return FlagSet;
20442 }
20443 }
20444
20445 // Mark this as something we don't want to handle here if it is atomic
20446 // or pre-increment instruction.
20447 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
20448 if (LSB->isIndexed())
20449 return PPC::MOF_None;
20450
20451 // Compute in-memory type flags. This is based on if there are scalars,
20452 // floats or vectors.
20453 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
20454 assert(MN && "Parent should be a MemSDNode!");
20455 EVT MemVT = MN->getMemoryVT();
20456 unsigned Size = MemVT.getSizeInBits();
20457 if (MemVT.isScalarInteger()) {
20458 assert(Size <= 128 &&
20459 "Not expecting scalar integers larger than 16 bytes!");
20460 if (Size < 32)
20461 FlagSet |= PPC::MOF_SubWordInt;
20462 else if (Size == 32)
20463 FlagSet |= PPC::MOF_WordInt;
20464 else
20465 FlagSet |= PPC::MOF_DoubleWordInt;
20466 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
20467 if (Size == 128)
20468 FlagSet |= PPC::MOF_Vector;
20469 else if (Size == 256) {
20470 assert(Subtarget.pairedVectorMemops() &&
20471 "256-bit vectors are only available when paired vector memops is "
20472 "enabled!");
20473 FlagSet |= PPC::MOF_Vector;
20474 } else
20475 llvm_unreachable("Not expecting illegal vectors!");
20476 } else { // Floating point type: can be scalar, f128 or vector types.
20477 if (Size == 32 || Size == 64)
20478 FlagSet |= PPC::MOF_ScalarFloat;
20479 else if (MemVT == MVT::f128 || MemVT.isVector())
20480 FlagSet |= PPC::MOF_Vector;
20481 else
20482 llvm_unreachable("Not expecting illegal scalar floats!");
20483 }
20484
20485 // Compute flags for address computation.
20486 computeFlagsForAddressComputation(N, FlagSet, DAG);
20487
20488 // Compute type extension flags.
20489 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
20490 switch (LN->getExtensionType()) {
20491 case ISD::SEXTLOAD:
20492 FlagSet |= PPC::MOF_SExt;
20493 break;
20494 case ISD::EXTLOAD:
20495 case ISD::ZEXTLOAD:
20496 FlagSet |= PPC::MOF_ZExt;
20497 break;
20498 case ISD::NON_EXTLOAD:
20499 FlagSet |= PPC::MOF_NoExt;
20500 break;
20501 }
20502 } else
20503 FlagSet |= PPC::MOF_NoExt;
20504
20505 // For integers, no extension is the same as zero extension.
20506 // We set the extension mode to zero extension so we don't have
20507 // to add separate entries in AddrModesMap for loads and stores.
20508 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
20509 FlagSet |= PPC::MOF_ZExt;
20510 FlagSet &= ~PPC::MOF_NoExt;
20511 }
20512
20513 // If we don't have prefixed instructions, 34-bit constants should be
20514 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
20515 bool IsNonP1034BitConst =
20517 FlagSet) == PPC::MOF_RPlusSImm34;
20518 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
20519 IsNonP1034BitConst)
20520 FlagSet |= PPC::MOF_NotAddNorCst;
20521
20522 return FlagSet;
20523}
20524
20525/// SelectForceXFormMode - Given the specified address, force it to be
20526/// represented as an indexed [r+r] operation (an XForm instruction).
20528 SDValue &Base,
20529 SelectionDAG &DAG) const {
20530
20532 int16_t ForceXFormImm = 0;
20533 if (provablyDisjointOr(DAG, N) &&
20534 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
20535 Disp = N.getOperand(0);
20536 Base = N.getOperand(1);
20537 return Mode;
20538 }
20539
20540 // If the address is the result of an add, we will utilize the fact that the
20541 // address calculation includes an implicit add. However, we can reduce
20542 // register pressure if we do not materialize a constant just for use as the
20543 // index register. We only get rid of the add if it is not an add of a
20544 // value and a 16-bit signed constant and both have a single use.
20545 if (N.getOpcode() == ISD::ADD &&
20546 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
20547 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
20548 Disp = N.getOperand(0);
20549 Base = N.getOperand(1);
20550 return Mode;
20551 }
20552
20553 // Otherwise, use R0 as the base register.
20554 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20555 N.getValueType());
20556 Base = N;
20557
20558 return Mode;
20559}
20560
20562 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20563 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20564 EVT ValVT = Val.getValueType();
20565 // If we are splitting a scalar integer into f64 parts (i.e. so they
20566 // can be placed into VFRC registers), we need to zero extend and
20567 // bitcast the values. This will ensure the value is placed into a
20568 // VSR using direct moves or stack operations as needed.
20569 if (PartVT == MVT::f64 &&
20570 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
20571 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
20572 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
20573 Parts[0] = Val;
20574 return true;
20575 }
20576 return false;
20577}
20578
20579SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
20580 SelectionDAG &DAG) const {
20581 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20583 EVT RetVT = Op.getValueType();
20584 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
20585 SDValue Callee =
20586 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
20587 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
20589 for (const SDValue &N : Op->op_values()) {
20590 EVT ArgVT = N.getValueType();
20591 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
20592 TargetLowering::ArgListEntry Entry(N, ArgTy);
20593 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
20594 Entry.IsZExt = !Entry.IsSExt;
20595 Args.push_back(Entry);
20596 }
20597
20598 SDValue InChain = DAG.getEntryNode();
20599 SDValue TCChain = InChain;
20600 const Function &F = DAG.getMachineFunction().getFunction();
20601 bool isTailCall =
20602 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
20603 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
20604 if (isTailCall)
20605 InChain = TCChain;
20606 CLI.setDebugLoc(SDLoc(Op))
20607 .setChain(InChain)
20608 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
20609 .setTailCall(isTailCall)
20610 .setSExtResult(SignExtend)
20611 .setZExtResult(!SignExtend)
20613 return TLI.LowerCallTo(CLI).first;
20614}
20615
20616SDValue PPCTargetLowering::lowerLibCallBasedOnType(
20617 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
20618 SelectionDAG &DAG) const {
20619 if (Op.getValueType() == MVT::f32)
20620 return lowerToLibCall(LibCallFloatName, Op, DAG);
20621
20622 if (Op.getValueType() == MVT::f64)
20623 return lowerToLibCall(LibCallDoubleName, Op, DAG);
20624
20625 return SDValue();
20626}
20627
20628bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
20629 SDNodeFlags Flags = Op.getNode()->getFlags();
20630 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
20631 Flags.hasNoNaNs() && Flags.hasNoInfs();
20632}
20633
20634bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
20635 return Op.getNode()->getFlags().hasApproximateFuncs();
20636}
20637
20638bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
20640}
20641
20642SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
20643 const char *LibCallFloatName,
20644 const char *LibCallDoubleNameFinite,
20645 const char *LibCallFloatNameFinite,
20646 SDValue Op,
20647 SelectionDAG &DAG) const {
20648 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
20649 return SDValue();
20650
20651 if (!isLowringToMASSFiniteSafe(Op))
20652 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
20653 DAG);
20654
20655 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
20656 LibCallDoubleNameFinite, Op, DAG);
20657}
20658
20659SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
20660 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
20661 "__xl_powf_finite", Op, DAG);
20662}
20663
20664SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
20665 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
20666 "__xl_sinf_finite", Op, DAG);
20667}
20668
20669SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
20670 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
20671 "__xl_cosf_finite", Op, DAG);
20672}
20673
20674SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
20675 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
20676 "__xl_logf_finite", Op, DAG);
20677}
20678
20679SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
20680 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
20681 "__xl_log10f_finite", Op, DAG);
20682}
20683
20684SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
20685 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
20686 "__xl_expf_finite", Op, DAG);
20687}
20688
20689// If we happen to match to an aligned D-Form, check if the Frame Index is
20690// adequately aligned. If it is not, reset the mode to match to X-Form.
20691static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
20694 return;
20695 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
20698}
20699
20700/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
20701/// compute the address flags of the node, get the optimal address mode based
20702/// on the flags, and set the Base and Disp based on the address mode.
20704 SDValue N, SDValue &Disp,
20705 SDValue &Base,
20706 SelectionDAG &DAG,
20707 MaybeAlign Align) const {
20708 SDLoc DL(Parent);
20709
20710 // Compute the address flags.
20711 unsigned Flags = computeMOFlags(Parent, N, DAG);
20712
20713 // Get the optimal address mode based on the Flags.
20714 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
20715
20716 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
20717 // Select an X-Form load if it is not.
20718 setXFormForUnalignedFI(N, Flags, Mode);
20719
20720 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
20721 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
20722 assert(Subtarget.isUsingPCRelativeCalls() &&
20723 "Must be using PC-Relative calls when a valid PC-Relative node is "
20724 "present!");
20725 Mode = PPC::AM_PCRel;
20726 }
20727
20728 // Set Base and Disp accordingly depending on the address mode.
20729 switch (Mode) {
20730 case PPC::AM_DForm:
20731 case PPC::AM_DSForm:
20732 case PPC::AM_DQForm: {
20733 // This is a register plus a 16-bit immediate. The base will be the
20734 // register and the displacement will be the immediate unless it
20735 // isn't sufficiently aligned.
20736 if (Flags & PPC::MOF_RPlusSImm16) {
20737 SDValue Op0 = N.getOperand(0);
20738 SDValue Op1 = N.getOperand(1);
20739 int16_t Imm = Op1->getAsZExtVal();
20740 if (!Align || isAligned(*Align, Imm)) {
20741 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
20742 Base = Op0;
20744 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20745 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20746 }
20747 break;
20748 }
20749 }
20750 // This is a register plus the @lo relocation. The base is the register
20751 // and the displacement is the global address.
20752 else if (Flags & PPC::MOF_RPlusLo) {
20753 Disp = N.getOperand(1).getOperand(0); // The global address.
20758 Base = N.getOperand(0);
20759 break;
20760 }
20761 // This is a constant address at most 32 bits. The base will be
20762 // zero or load-immediate-shifted and the displacement will be
20763 // the low 16 bits of the address.
20764 else if (Flags & PPC::MOF_AddrIsSImm32) {
20765 auto *CN = cast<ConstantSDNode>(N);
20766 EVT CNType = CN->getValueType(0);
20767 uint64_t CNImm = CN->getZExtValue();
20768 // If this address fits entirely in a 16-bit sext immediate field, codegen
20769 // this as "d, 0".
20770 int16_t Imm;
20771 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
20772 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
20773 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20774 CNType);
20775 break;
20776 }
20777 // Handle 32-bit sext immediate with LIS + Addr mode.
20778 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
20779 (!Align || isAligned(*Align, CNImm))) {
20780 int32_t Addr = (int32_t)CNImm;
20781 // Otherwise, break this down into LIS + Disp.
20782 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
20783 Base = DAG.getSignedTargetConstant((Addr - (int16_t)Addr) >> 16, DL,
20784 MVT::i32);
20785 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
20786 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
20787 break;
20788 }
20789 }
20790 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
20791 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
20793 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20794 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20795 } else
20796 Base = N;
20797 break;
20798 }
20799 case PPC::AM_PrefixDForm: {
20800 int64_t Imm34 = 0;
20801 unsigned Opcode = N.getOpcode();
20802 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
20803 (isIntS34Immediate(N.getOperand(1), Imm34))) {
20804 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
20805 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20806 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
20807 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20808 else
20809 Base = N.getOperand(0);
20810 } else if (isIntS34Immediate(N, Imm34)) {
20811 // The address is a 34-bit signed immediate.
20812 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20813 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
20814 }
20815 break;
20816 }
20817 case PPC::AM_PCRel: {
20818 // When selecting PC-Relative instructions, "Base" is not utilized as
20819 // we select the address as [PC+imm].
20820 Disp = N;
20821 break;
20822 }
20823 case PPC::AM_None:
20824 break;
20825 default: { // By default, X-Form is always available to be selected.
20826 // When a frame index is not aligned, we also match by XForm.
20828 Base = FI ? N : N.getOperand(1);
20829 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20830 N.getValueType())
20831 : N.getOperand(0);
20832 break;
20833 }
20834 }
20835 return Mode;
20836}
20837
20839 bool Return,
20840 bool IsVarArg) const {
20841 switch (CC) {
20842 case CallingConv::Cold:
20843 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
20844 default:
20845 return CC_PPC64_ELF;
20846 }
20847}
20848
20850 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
20851}
20852
20855 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20856 if (shouldInlineQuadwordAtomics() && Size == 128)
20858
20859 switch (AI->getOperation()) {
20865 default:
20867 }
20868
20869 llvm_unreachable("unreachable atomicrmw operation");
20870}
20871
20880
20881static Intrinsic::ID
20883 switch (BinOp) {
20884 default:
20885 llvm_unreachable("Unexpected AtomicRMW BinOp");
20887 return Intrinsic::ppc_atomicrmw_xchg_i128;
20888 case AtomicRMWInst::Add:
20889 return Intrinsic::ppc_atomicrmw_add_i128;
20890 case AtomicRMWInst::Sub:
20891 return Intrinsic::ppc_atomicrmw_sub_i128;
20892 case AtomicRMWInst::And:
20893 return Intrinsic::ppc_atomicrmw_and_i128;
20894 case AtomicRMWInst::Or:
20895 return Intrinsic::ppc_atomicrmw_or_i128;
20896 case AtomicRMWInst::Xor:
20897 return Intrinsic::ppc_atomicrmw_xor_i128;
20899 return Intrinsic::ppc_atomicrmw_nand_i128;
20900 }
20901}
20902
20904 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20905 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20906 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20907 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20908 Type *ValTy = Incr->getType();
20909 assert(ValTy->getPrimitiveSizeInBits() == 128);
20910 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20911 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
20912 Value *IncrHi =
20913 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
20914 Value *LoHi = Builder.CreateIntrinsic(
20916 {AlignedAddr, IncrLo, IncrHi});
20917 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20918 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20919 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20920 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20921 return Builder.CreateOr(
20922 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20923}
20924
20926 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20927 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20928 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20929 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20930 Type *ValTy = CmpVal->getType();
20931 assert(ValTy->getPrimitiveSizeInBits() == 128);
20932 Function *IntCmpXchg =
20933 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
20934 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20935 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
20936 Value *CmpHi =
20937 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
20938 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
20939 Value *NewHi =
20940 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
20941 emitLeadingFence(Builder, CI, Ord);
20942 Value *LoHi =
20943 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
20944 emitTrailingFence(Builder, CI, Ord);
20945 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20946 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20947 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20948 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20949 return Builder.CreateOr(
20950 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20951}
20952
20954 return Subtarget.useCRBits();
20955}
20956
20957/// Shuffle masks for vectors of bits are not legal as such vectors are
20958/// reserved for MMA/DM.
20959bool PPCTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const {
20960 if (VT.getScalarType() == MVT::i1)
20961 return false;
20962 return TargetLowering::isShuffleMaskLegal(Mask, VT);
20963}
20964
20965// Optimize the following patterns using vbpermq/vbpermd:
20966// i16 = bitcast(v16i1 truncate(v16i8))
20967// i8 = bitcast(v8i1 truncate(v8i16))
20968// i8 = bitcast(v8i1 truncate(v8i8))
20969SDValue PPCTargetLowering::DAGCombineBitcast(SDNode *N,
20970 DAGCombinerInfo &DCI) const {
20971 SDValue Op0 = N->getOperand(0);
20972 if (Op0.getOpcode() != ISD::TRUNCATE)
20973 return SDValue();
20974 SDValue Src = Op0.getOperand(0);
20975 EVT ResVT = N->getValueType(0);
20976 EVT TruncResVT = Op0.getValueType();
20977 EVT SrcVT = Src.getValueType();
20978 SDLoc dl(N);
20979 SelectionDAG &DAG = DCI.DAG;
20980 bool IsLittleEndian = Subtarget.isLittleEndian();
20981
20982 if (ResVT != MVT::i16 && ResVT != MVT::i8)
20983 return SDValue();
20984 SDValue VBPerm =
20985 GenerateVBPERM(DAG, dl, Src, SrcVT, TruncResVT, IsLittleEndian);
20986 if (!VBPerm)
20987 return SDValue();
20988 SDValue ForExtract = DAG.getBitcast(MVT::v4i32, VBPerm);
20989 SDValue Extracted =
20990 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, ForExtract,
20991 DAG.getIntPtrConstant(IsLittleEndian ? 2 : 1, dl));
20992 return DAG.getNode(ISD::TRUNCATE, dl, ResVT, Extracted);
20993}
20994
20995SDValue PPCTargetLowering::GenerateVBPERM(SelectionDAG &DAG, SDLoc dl,
20996 SDValue Src, EVT SrcVT, EVT ResVT,
20997 bool IsLE) const {
20998 bool IsV16i8 = (ResVT == MVT::v16i1 && SrcVT == MVT::v16i8);
20999 bool IsV8i16 = (ResVT == MVT::v8i1 && SrcVT == MVT::v8i16);
21000 bool IsV8i8 = (ResVT == MVT::v8i1 && SrcVT == MVT::v8i8);
21001
21002 if (!IsV16i8 && !IsV8i16 && !IsV8i8)
21003 return SDValue();
21004
21005 if (IsV8i8) {
21006 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i8,
21007 DAG.getUNDEF(MVT::v16i8), Src,
21008 DAG.getIntPtrConstant(0, dl));
21009 }
21010 SmallVector<int, 16> BitIndices(16, 128);
21011 unsigned NumElts = SrcVT.getVectorNumElements();
21012 unsigned EltSize = SrcVT.getScalarType().getSizeInBits();
21013 for (int Idx = 0, End = SrcVT.getVectorNumElements(); Idx < End; Idx++) {
21014 BitIndices[Idx] = EltSize * (NumElts - Idx) - 1;
21015 if (IsV8i8 && IsLE)
21016 BitIndices[Idx] += 64;
21017 }
21018 if (!IsLE)
21019 std::reverse(BitIndices.begin(), BitIndices.end());
21021 for (auto Idx : BitIndices)
21022 BVOps.push_back(DAG.getConstant(Idx, dl, MVT::i8));
21023 SDValue VRB = DAG.getBuildVector(MVT::v16i8, dl, BVOps);
21024 return DAG.getNode(
21025 ISD::INTRINSIC_WO_CHAIN, dl, MVT::v16i8,
21026 DAG.getConstant(Intrinsic::ppc_altivec_vbpermq, dl, MVT::i32),
21027 DAG.getBitcast(MVT::v16i8, Src), VRB);
21028}
21029
21030// For Power8/9, optimize vec splats of small FP values that can be
21031// represented as integers. Use vspltisw + xvcvsxwdp/xvcvsxwsp instead of
21032// loading from constant pool.
21033SDValue PPCTargetLowering::LowerVecSplatSmallFP(SDValue Op, SelectionDAG &DAG,
21034 bool BVNIsConstantSplat,
21035 unsigned SplatBitSize) const {
21036
21037 if (!BVNIsConstantSplat || !Subtarget.hasVSX() || !Subtarget.hasP8Vector() ||
21038 Subtarget.hasP10Vector())
21039 return SDValue();
21040
21041 EVT VT = Op->getValueType(0);
21042 if (!((SplatBitSize == 64 && VT == MVT::v2f64) ||
21043 (SplatBitSize == 32 && VT == MVT::v4f32)))
21044 return SDValue();
21045
21046 auto *CN = dyn_cast<ConstantFPSDNode>(Op.getOperand(0));
21047 if (!CN)
21048 return SDValue();
21049
21050 APFloat APFloatVal = CN->getValueAPF();
21051 bool IsExact;
21052 APSInt IntResult(16, false);
21053 APFloatVal.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
21054
21055 if (!(IsExact && IntResult <= 15 && IntResult >= -16 && !APFloatVal.isZero()))
21056 return SDValue();
21057
21058 int64_t IntVal = IntResult.getSExtValue();
21059
21060 SDLoc dl(Op);
21061 SDValue IntSplat = getCanonicalConstSplat(IntVal, 4, MVT::v4i32, DAG, dl);
21062
21063 if (SplatBitSize == 64)
21064 return DAG.getNode(
21065 ISD::INTRINSIC_WO_CHAIN, dl, MVT::v2f64,
21066 DAG.getConstant(Intrinsic::ppc_vsx_xvcvsxwdp, dl, MVT::i32), IntSplat);
21067
21068 return DAG.getNode(PPCISD::XVCVSXWSP, dl, MVT::v4f32, IntSplat);
21069}
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
return SDValue()
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static bool isConstantOrUndef(const SDValue Op)
MachineInstr unsigned OpIdx
#define P(N)
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static SDValue DAGCombineAddc(SDNode *N, llvm::PPCTargetLowering::DAGCombinerInfo &DCI)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
std::tuple< uint32_t, uint8_t > LXVKQPattern
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static MachineBasicBlock * emitAtomicCmpSwapSoftware(MachineInstr &MI, MachineBasicBlock *BB, const TargetInstrInfo *TII, const PPCSubtarget &Subtarget)
Emit software-emulated atomic compare-and-swap for I8/I16 without hardware partword atomic support.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool IsSelect(unsigned Opcode, bool CheckOnlyCC=false)
Check if the opcode is a SELECT or SELECT_CC variant.
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static void prepareOutOfLineGlueCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool IsSelectCC(unsigned Opcode)
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag, EVT CarryType, SelectionDAG &DAG, const PPCSubtarget &STI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS, bool IsPPC64)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static cl::opt< unsigned > PPCMinimumBitTestCmps("ppc-min-bit-test-cmps", cl::init(3), cl::Hidden, cl::desc("Set minimum of largest number of comparisons to use bit test for " "switch on PPC."))
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left, SelectionDAG &DAG)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static std::optional< LXVKQPattern > getPatternInfo(const APInt &FullVal)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static SDValue ConvertSETCCToXori(SDNode *N, SelectionDAG &DAG)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG)
Optimize the bitfloor(X) pattern for PowerPC.
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, bool IsLittleEndian)
static MachineBasicBlock * emitSelect(MachineInstr &MI, MachineBasicBlock *BB, const TargetInstrInfo *TII, const PPCSubtarget &Subtarget)
Emit SELECT instruction, using ISEL if available, otherwise use branch-based control flow.
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static SDValue combineADDToSUB(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static bool canConvertSETCCToXori(SDNode *N)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value, SelectionDAG &DAG, const PPCSubtarget &STI)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static void createAtomicLoopBlocks(MachineFunction *F, MachineBasicBlock *BB, MachineBasicBlock *&loop1MBB, MachineBasicBlock *&loop2MBB, MachineBasicBlock *&exitMBB, MachineInstr &MI, MachineFunction::iterator It)
Helper function to create basic blocks for atomic compare-and-swap.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue combineZextSetccWithZero(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
static MachineBasicBlock * emitAtomicCmpSwapHardware(MachineInstr &MI, MachineBasicBlock *BB, const TargetInstrInfo *TII, const PPCSubtarget &Subtarget)
Emit hardware-supported atomic compare-and-swap for I32/I64 and I8/I16 with partword atomic support.
SDValue convertTwoLoadsAndCmpToVCMPEQUB(SelectionDAG &DAG, SDNode *N, const SDLoc &DL)
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static void signExtendOperandIfUnknown(MachineInstr &MI, MachineBasicBlock *BB, unsigned OpIdx, bool IsByte, const PPCInstrInfo *TII)
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
if(PassOpts->AAPipeline)
pre isel intrinsic Pre ISel Intrinsic Lowering
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5912
bool isDenormal() const
Definition APFloat.h:1539
bool isZero() const
Definition APFloat.h:1534
APInt bitcastToAPInt() const
Definition APFloat.h:1430
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1391
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1429
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
APInt abs() const
Get the absolute value.
Definition APInt.h:1818
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1419
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:398
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:472
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1745
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:483
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const BlockAddress * getBlockAddress() const
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:217
LLVM_ABI unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
A debug info location.
Definition DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:286
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:362
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:775
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
arg_iterator arg_begin()
Definition Function.h:868
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
size_t arg_size() const
Definition Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
const Argument * const_arg_iterator
Definition Function.h:74
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:229
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
const GlobalValue * getGlobal() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:659
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
void setThreadLocalMode(ThreadLocalMode Val)
bool hasHiddenVisibility() const
LLVM_ABI StringRef getSection() const
Definition Globals.cpp:200
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:141
bool hasComdat() const
Type * getValueType() const
bool hasProtectedVisibility() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
Tracks which library functions to use for a particular subtarget.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1075
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
AtomicOrdering getFailureOrdering() const
For cmpxchg atomic operations, return the atomic ordering requirements when store does not occur.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID for this memory operation.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
static bool hasPCRelFlag(unsigned TF)
bool is32BitELFABI() const
unsigned descriptorTOCAnchorOffset() const
MVT getScalarIntVT() const
bool isAIXABI() const
MCRegister getGlueCodeDescriptorRegister() const
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
MCRegister getEnvironmentPointerRegister() const
bool isSVR4ABI() const
bool isLittleEndian() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool is64BitELFABI() const
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
const PPCRegisterInfo * getRegisterInfo() const override
unsigned descriptorEnvironmentPointerOffset() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool hasMultipleConditionRegisters(EVT VT) const override
Does the target have multiple (allocatable) condition registers that can be used to store the results...
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
void setMinimumBitTestCmps(unsigned Val)
Set the minimum of largest of number of comparisons to generate BitTest.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode, SDNodeFlags Flags={}) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
bool getFunctionSections() const
Return true if functions should be emitted into their own section, corresponding to -ffunction-sectio...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:310
LLVM_ABI bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
Definition Type.cpp:180
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
@ FloatTyID
32-bit floating point type
Definition Type.h:59
@ DoubleTyID
64-bit floating point type
Definition Type.h:60
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:62
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:282
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:326
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:158
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:273
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ TargetConstantPool
Definition ISDOpcodes.h:189
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:168
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:783
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:884
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SET_ROUNDING
Set rounding mode.
Definition ISDOpcodes.h:979
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ TargetExternalSymbol
Definition ISDOpcodes.h:190
@ BR
Control flow instructions. These all have token chains.
@ TargetJumpTable
Definition ISDOpcodes.h:188
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:800
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:185
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:974
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:815
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:809
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:963
@ INLINEASM
INLINEASM - Represents an inline asm block.
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:458
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:162
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:837
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ PARTIAL_REDUCE_SUMLA
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:186
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition NVPTX.h:155
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition PPC.h:148
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition PPC.h:196
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition PPC.h:199
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:174
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition PPC.h:205
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition PPC.h:156
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:123
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition PPC.h:152
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition PPC.h:202
@ MO_TPREL_HA
Definition PPC.h:181
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition PPC.h:115
@ MO_TLS
Symbol for VK_TLS fixup attached to an ADD instruction.
Definition PPC.h:190
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:142
@ MO_TPREL_LO
Definition PPC.h:180
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:177
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:168
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition PPC.h:193
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition PPC.h:137
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:162
@ MO_HA
Definition PPC.h:178
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:119
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
Define some predicates that are used for node matching.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering)
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Invariant opcodes: All instruction sets have these as their low opcodes.
@ XMC_PR
Program Code.
Definition XCOFF.h:106
@ XTY_ER
External reference.
Definition XCOFF.h:242
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
iterator end() const
Definition BasicBlock.h:89
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:676
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
unsigned M1(unsigned Val)
Definition VE.h:377
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr RegState getDefRegState(bool B)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
@ Success
The lock was released successfully.
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
DWARFExpression::Operation Op
LLVM_ABI bool isPhysRegUsedAfter(Register Reg, MachineBasicBlock::iterator MBI)
Check if physical register Reg is used after MBI.
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:554
constexpr unsigned BitWidth
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:31
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:347
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:863
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:307
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
LLVM_ABI std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:150
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:484
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:58
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setTailCall(bool Value=true)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.