LLVM 23.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/GlobalValue.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/InstVisitor.h"
32#include "llvm/IR/Instruction.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsAArch64.h"
36#include "llvm/IR/IntrinsicsAMDGPU.h"
37#include "llvm/IR/IntrinsicsARM.h"
38#include "llvm/IR/IntrinsicsNVPTX.h"
39#include "llvm/IR/IntrinsicsRISCV.h"
40#include "llvm/IR/IntrinsicsWebAssembly.h"
41#include "llvm/IR/IntrinsicsX86.h"
42#include "llvm/IR/LLVMContext.h"
43#include "llvm/IR/MDBuilder.h"
44#include "llvm/IR/Metadata.h"
45#include "llvm/IR/Module.h"
46#include "llvm/IR/Value.h"
47#include "llvm/IR/Verifier.h"
53#include "llvm/Support/Regex.h"
56#include <cstdint>
57#include <cstring>
58#include <numeric>
59
60using namespace llvm;
61
62static cl::opt<bool>
63 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
64 cl::desc("Disable autoupgrade of debug info"));
65
66static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
67
68// Report a fatal error along with the
69// Call Instruction which caused the error
70[[noreturn]] static void reportFatalUsageErrorWithCI(StringRef reason,
71 CallBase *CI) {
72 CI->print(llvm::errs());
73 llvm::errs() << "\n";
75}
76
77// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
78// changed their type from v4f32 to v2i64.
80 Function *&NewFn) {
81 // Check whether this is an old version of the function, which received
82 // v4f32 arguments.
83 Type *Arg0Type = F->getFunctionType()->getParamType(0);
84 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
85 return false;
86
87 // Yes, it's old, replace it with new version.
88 rename(F);
89 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
90 return true;
91}
92
93// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
94// arguments have changed their type from i32 to i8.
96 Function *&NewFn) {
97 // Check that the last argument is an i32.
98 Type *LastArgType = F->getFunctionType()->getParamType(
99 F->getFunctionType()->getNumParams() - 1);
100 if (!LastArgType->isIntegerTy(32))
101 return false;
102
103 // Move this function aside and map down.
104 rename(F);
105 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
106 return true;
107}
108
109// Upgrade the declaration of fp compare intrinsics that change return type
110// from scalar to vXi1 mask.
112 Function *&NewFn) {
113 // Check if the return type is a vector.
114 if (F->getReturnType()->isVectorTy())
115 return false;
116
117 rename(F);
118 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
119 return true;
120}
121
122// Upgrade the declaration of multiply and add bytes intrinsics whose input
123// arguments' types have changed from vectors of i32 to vectors of i8
125 Function *&NewFn) {
126 // check if input argument type is a vector of i8
127 Type *Arg1Type = F->getFunctionType()->getParamType(1);
128 Type *Arg2Type = F->getFunctionType()->getParamType(2);
129 if (Arg1Type->isVectorTy() &&
130 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
131 Arg2Type->isVectorTy() &&
132 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
133 return false;
134
135 rename(F);
136 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
137 return true;
138}
139
140// Upgrade the declaration of multipy and add words intrinsics whose input
141// arguments' types have changed to vectors of i32 to vectors of i16
143 Function *&NewFn) {
144 // check if input argument type is a vector of i16
145 Type *Arg1Type = F->getFunctionType()->getParamType(1);
146 Type *Arg2Type = F->getFunctionType()->getParamType(2);
147 if (Arg1Type->isVectorTy() &&
148 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
149 Arg2Type->isVectorTy() &&
150 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
151 return false;
152
153 rename(F);
154 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
155 return true;
156}
157
159 Function *&NewFn) {
160 if (F->getReturnType()->getScalarType()->isBFloatTy())
161 return false;
162
163 rename(F);
164 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
165 return true;
166}
167
169 Function *&NewFn) {
170 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
171 return false;
172
173 rename(F);
174 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
175 return true;
176}
177
179 // All of the intrinsics matches below should be marked with which llvm
180 // version started autoupgrading them. At some point in the future we would
181 // like to use this information to remove upgrade code for some older
182 // intrinsics. It is currently undecided how we will determine that future
183 // point.
184 if (Name.consume_front("avx."))
185 return (Name.starts_with("blend.p") || // Added in 3.7
186 Name == "cvt.ps2.pd.256" || // Added in 3.9
187 Name == "cvtdq2.pd.256" || // Added in 3.9
188 Name == "cvtdq2.ps.256" || // Added in 7.0
189 Name.starts_with("movnt.") || // Added in 3.2
190 Name.starts_with("sqrt.p") || // Added in 7.0
191 Name.starts_with("storeu.") || // Added in 3.9
192 Name.starts_with("vbroadcast.s") || // Added in 3.5
193 Name.starts_with("vbroadcastf128") || // Added in 4.0
194 Name.starts_with("vextractf128.") || // Added in 3.7
195 Name.starts_with("vinsertf128.") || // Added in 3.7
196 Name.starts_with("vperm2f128.") || // Added in 6.0
197 Name.starts_with("vpermil.")); // Added in 3.1
198
199 if (Name.consume_front("avx2."))
200 return (Name == "movntdqa" || // Added in 5.0
201 Name.starts_with("pabs.") || // Added in 6.0
202 Name.starts_with("padds.") || // Added in 8.0
203 Name.starts_with("paddus.") || // Added in 8.0
204 Name.starts_with("pblendd.") || // Added in 3.7
205 Name == "pblendw" || // Added in 3.7
206 Name.starts_with("pbroadcast") || // Added in 3.8
207 Name.starts_with("pcmpeq.") || // Added in 3.1
208 Name.starts_with("pcmpgt.") || // Added in 3.1
209 Name.starts_with("pmax") || // Added in 3.9
210 Name.starts_with("pmin") || // Added in 3.9
211 Name.starts_with("pmovsx") || // Added in 3.9
212 Name.starts_with("pmovzx") || // Added in 3.9
213 Name == "pmul.dq" || // Added in 7.0
214 Name == "pmulu.dq" || // Added in 7.0
215 Name.starts_with("psll.dq") || // Added in 3.7
216 Name.starts_with("psrl.dq") || // Added in 3.7
217 Name.starts_with("psubs.") || // Added in 8.0
218 Name.starts_with("psubus.") || // Added in 8.0
219 Name.starts_with("vbroadcast") || // Added in 3.8
220 Name == "vbroadcasti128" || // Added in 3.7
221 Name == "vextracti128" || // Added in 3.7
222 Name == "vinserti128" || // Added in 3.7
223 Name == "vperm2i128"); // Added in 6.0
224
225 if (Name.consume_front("avx512.")) {
226 if (Name.consume_front("mask."))
227 // 'avx512.mask.*'
228 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
229 Name.starts_with("and.") || // Added in 3.9
230 Name.starts_with("andn.") || // Added in 3.9
231 Name.starts_with("broadcast.s") || // Added in 3.9
232 Name.starts_with("broadcastf32x4.") || // Added in 6.0
233 Name.starts_with("broadcastf32x8.") || // Added in 6.0
234 Name.starts_with("broadcastf64x2.") || // Added in 6.0
235 Name.starts_with("broadcastf64x4.") || // Added in 6.0
236 Name.starts_with("broadcasti32x4.") || // Added in 6.0
237 Name.starts_with("broadcasti32x8.") || // Added in 6.0
238 Name.starts_with("broadcasti64x2.") || // Added in 6.0
239 Name.starts_with("broadcasti64x4.") || // Added in 6.0
240 Name.starts_with("cmp.b") || // Added in 5.0
241 Name.starts_with("cmp.d") || // Added in 5.0
242 Name.starts_with("cmp.q") || // Added in 5.0
243 Name.starts_with("cmp.w") || // Added in 5.0
244 Name.starts_with("compress.b") || // Added in 9.0
245 Name.starts_with("compress.d") || // Added in 9.0
246 Name.starts_with("compress.p") || // Added in 9.0
247 Name.starts_with("compress.q") || // Added in 9.0
248 Name.starts_with("compress.store.") || // Added in 7.0
249 Name.starts_with("compress.w") || // Added in 9.0
250 Name.starts_with("conflict.") || // Added in 9.0
251 Name.starts_with("cvtdq2pd.") || // Added in 4.0
252 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
253 Name == "cvtpd2dq.256" || // Added in 7.0
254 Name == "cvtpd2ps.256" || // Added in 7.0
255 Name == "cvtps2pd.128" || // Added in 7.0
256 Name == "cvtps2pd.256" || // Added in 7.0
257 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
258 Name == "cvtqq2ps.256" || // Added in 9.0
259 Name == "cvtqq2ps.512" || // Added in 9.0
260 Name == "cvttpd2dq.256" || // Added in 7.0
261 Name == "cvttps2dq.128" || // Added in 7.0
262 Name == "cvttps2dq.256" || // Added in 7.0
263 Name.starts_with("cvtudq2pd.") || // Added in 4.0
264 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
265 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
266 Name == "cvtuqq2ps.256" || // Added in 9.0
267 Name == "cvtuqq2ps.512" || // Added in 9.0
268 Name.starts_with("dbpsadbw.") || // Added in 7.0
269 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
270 Name.starts_with("expand.b") || // Added in 9.0
271 Name.starts_with("expand.d") || // Added in 9.0
272 Name.starts_with("expand.load.") || // Added in 7.0
273 Name.starts_with("expand.p") || // Added in 9.0
274 Name.starts_with("expand.q") || // Added in 9.0
275 Name.starts_with("expand.w") || // Added in 9.0
276 Name.starts_with("fpclass.p") || // Added in 7.0
277 Name.starts_with("insert") || // Added in 4.0
278 Name.starts_with("load.") || // Added in 3.9
279 Name.starts_with("loadu.") || // Added in 3.9
280 Name.starts_with("lzcnt.") || // Added in 5.0
281 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
282 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
283 Name.starts_with("movddup") || // Added in 3.9
284 Name.starts_with("move.s") || // Added in 4.0
285 Name.starts_with("movshdup") || // Added in 3.9
286 Name.starts_with("movsldup") || // Added in 3.9
287 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
288 Name.starts_with("or.") || // Added in 3.9
289 Name.starts_with("pabs.") || // Added in 6.0
290 Name.starts_with("packssdw.") || // Added in 5.0
291 Name.starts_with("packsswb.") || // Added in 5.0
292 Name.starts_with("packusdw.") || // Added in 5.0
293 Name.starts_with("packuswb.") || // Added in 5.0
294 Name.starts_with("padd.") || // Added in 4.0
295 Name.starts_with("padds.") || // Added in 8.0
296 Name.starts_with("paddus.") || // Added in 8.0
297 Name.starts_with("palignr.") || // Added in 3.9
298 Name.starts_with("pand.") || // Added in 3.9
299 Name.starts_with("pandn.") || // Added in 3.9
300 Name.starts_with("pavg") || // Added in 6.0
301 Name.starts_with("pbroadcast") || // Added in 6.0
302 Name.starts_with("pcmpeq.") || // Added in 3.9
303 Name.starts_with("pcmpgt.") || // Added in 3.9
304 Name.starts_with("perm.df.") || // Added in 3.9
305 Name.starts_with("perm.di.") || // Added in 3.9
306 Name.starts_with("permvar.") || // Added in 7.0
307 Name.starts_with("pmaddubs.w.") || // Added in 7.0
308 Name.starts_with("pmaddw.d.") || // Added in 7.0
309 Name.starts_with("pmax") || // Added in 4.0
310 Name.starts_with("pmin") || // Added in 4.0
311 Name == "pmov.qd.256" || // Added in 9.0
312 Name == "pmov.qd.512" || // Added in 9.0
313 Name == "pmov.wb.256" || // Added in 9.0
314 Name == "pmov.wb.512" || // Added in 9.0
315 Name.starts_with("pmovsx") || // Added in 4.0
316 Name.starts_with("pmovzx") || // Added in 4.0
317 Name.starts_with("pmul.dq.") || // Added in 4.0
318 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
319 Name.starts_with("pmulh.w.") || // Added in 7.0
320 Name.starts_with("pmulhu.w.") || // Added in 7.0
321 Name.starts_with("pmull.") || // Added in 4.0
322 Name.starts_with("pmultishift.qb.") || // Added in 8.0
323 Name.starts_with("pmulu.dq.") || // Added in 4.0
324 Name.starts_with("por.") || // Added in 3.9
325 Name.starts_with("prol.") || // Added in 8.0
326 Name.starts_with("prolv.") || // Added in 8.0
327 Name.starts_with("pror.") || // Added in 8.0
328 Name.starts_with("prorv.") || // Added in 8.0
329 Name.starts_with("pshuf.b.") || // Added in 4.0
330 Name.starts_with("pshuf.d.") || // Added in 3.9
331 Name.starts_with("pshufh.w.") || // Added in 3.9
332 Name.starts_with("pshufl.w.") || // Added in 3.9
333 Name.starts_with("psll.d") || // Added in 4.0
334 Name.starts_with("psll.q") || // Added in 4.0
335 Name.starts_with("psll.w") || // Added in 4.0
336 Name.starts_with("pslli") || // Added in 4.0
337 Name.starts_with("psllv") || // Added in 4.0
338 Name.starts_with("psra.d") || // Added in 4.0
339 Name.starts_with("psra.q") || // Added in 4.0
340 Name.starts_with("psra.w") || // Added in 4.0
341 Name.starts_with("psrai") || // Added in 4.0
342 Name.starts_with("psrav") || // Added in 4.0
343 Name.starts_with("psrl.d") || // Added in 4.0
344 Name.starts_with("psrl.q") || // Added in 4.0
345 Name.starts_with("psrl.w") || // Added in 4.0
346 Name.starts_with("psrli") || // Added in 4.0
347 Name.starts_with("psrlv") || // Added in 4.0
348 Name.starts_with("psub.") || // Added in 4.0
349 Name.starts_with("psubs.") || // Added in 8.0
350 Name.starts_with("psubus.") || // Added in 8.0
351 Name.starts_with("pternlog.") || // Added in 7.0
352 Name.starts_with("punpckh") || // Added in 3.9
353 Name.starts_with("punpckl") || // Added in 3.9
354 Name.starts_with("pxor.") || // Added in 3.9
355 Name.starts_with("shuf.f") || // Added in 6.0
356 Name.starts_with("shuf.i") || // Added in 6.0
357 Name.starts_with("shuf.p") || // Added in 4.0
358 Name.starts_with("sqrt.p") || // Added in 7.0
359 Name.starts_with("store.b.") || // Added in 3.9
360 Name.starts_with("store.d.") || // Added in 3.9
361 Name.starts_with("store.p") || // Added in 3.9
362 Name.starts_with("store.q.") || // Added in 3.9
363 Name.starts_with("store.w.") || // Added in 3.9
364 Name == "store.ss" || // Added in 7.0
365 Name.starts_with("storeu.") || // Added in 3.9
366 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
367 Name.starts_with("ucmp.") || // Added in 5.0
368 Name.starts_with("unpckh.") || // Added in 3.9
369 Name.starts_with("unpckl.") || // Added in 3.9
370 Name.starts_with("valign.") || // Added in 4.0
371 Name == "vcvtph2ps.128" || // Added in 11.0
372 Name == "vcvtph2ps.256" || // Added in 11.0
373 Name.starts_with("vextract") || // Added in 4.0
374 Name.starts_with("vfmadd.") || // Added in 7.0
375 Name.starts_with("vfmaddsub.") || // Added in 7.0
376 Name.starts_with("vfnmadd.") || // Added in 7.0
377 Name.starts_with("vfnmsub.") || // Added in 7.0
378 Name.starts_with("vpdpbusd.") || // Added in 7.0
379 Name.starts_with("vpdpbusds.") || // Added in 7.0
380 Name.starts_with("vpdpwssd.") || // Added in 7.0
381 Name.starts_with("vpdpwssds.") || // Added in 7.0
382 Name.starts_with("vpermi2var.") || // Added in 7.0
383 Name.starts_with("vpermil.p") || // Added in 3.9
384 Name.starts_with("vpermilvar.") || // Added in 4.0
385 Name.starts_with("vpermt2var.") || // Added in 7.0
386 Name.starts_with("vpmadd52") || // Added in 7.0
387 Name.starts_with("vpshld.") || // Added in 7.0
388 Name.starts_with("vpshldv.") || // Added in 8.0
389 Name.starts_with("vpshrd.") || // Added in 7.0
390 Name.starts_with("vpshrdv.") || // Added in 8.0
391 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
392 Name.starts_with("xor.")); // Added in 3.9
393
394 if (Name.consume_front("mask3."))
395 // 'avx512.mask3.*'
396 return (Name.starts_with("vfmadd.") || // Added in 7.0
397 Name.starts_with("vfmaddsub.") || // Added in 7.0
398 Name.starts_with("vfmsub.") || // Added in 7.0
399 Name.starts_with("vfmsubadd.") || // Added in 7.0
400 Name.starts_with("vfnmsub.")); // Added in 7.0
401
402 if (Name.consume_front("maskz."))
403 // 'avx512.maskz.*'
404 return (Name.starts_with("pternlog.") || // Added in 7.0
405 Name.starts_with("vfmadd.") || // Added in 7.0
406 Name.starts_with("vfmaddsub.") || // Added in 7.0
407 Name.starts_with("vpdpbusd.") || // Added in 7.0
408 Name.starts_with("vpdpbusds.") || // Added in 7.0
409 Name.starts_with("vpdpwssd.") || // Added in 7.0
410 Name.starts_with("vpdpwssds.") || // Added in 7.0
411 Name.starts_with("vpermt2var.") || // Added in 7.0
412 Name.starts_with("vpmadd52") || // Added in 7.0
413 Name.starts_with("vpshldv.") || // Added in 8.0
414 Name.starts_with("vpshrdv.")); // Added in 8.0
415
416 // 'avx512.*'
417 return (Name == "movntdqa" || // Added in 5.0
418 Name == "pmul.dq.512" || // Added in 7.0
419 Name == "pmulu.dq.512" || // Added in 7.0
420 Name.starts_with("broadcastm") || // Added in 6.0
421 Name.starts_with("cmp.p") || // Added in 12.0
422 Name.starts_with("cvtb2mask.") || // Added in 7.0
423 Name.starts_with("cvtd2mask.") || // Added in 7.0
424 Name.starts_with("cvtmask2") || // Added in 5.0
425 Name.starts_with("cvtq2mask.") || // Added in 7.0
426 Name == "cvtusi2sd" || // Added in 7.0
427 Name.starts_with("cvtw2mask.") || // Added in 7.0
428 Name == "kand.w" || // Added in 7.0
429 Name == "kandn.w" || // Added in 7.0
430 Name == "knot.w" || // Added in 7.0
431 Name == "kor.w" || // Added in 7.0
432 Name == "kortestc.w" || // Added in 7.0
433 Name == "kortestz.w" || // Added in 7.0
434 Name.starts_with("kunpck") || // added in 6.0
435 Name == "kxnor.w" || // Added in 7.0
436 Name == "kxor.w" || // Added in 7.0
437 Name.starts_with("padds.") || // Added in 8.0
438 Name.starts_with("pbroadcast") || // Added in 3.9
439 Name.starts_with("prol") || // Added in 8.0
440 Name.starts_with("pror") || // Added in 8.0
441 Name.starts_with("psll.dq") || // Added in 3.9
442 Name.starts_with("psrl.dq") || // Added in 3.9
443 Name.starts_with("psubs.") || // Added in 8.0
444 Name.starts_with("ptestm") || // Added in 6.0
445 Name.starts_with("ptestnm") || // Added in 6.0
446 Name.starts_with("storent.") || // Added in 3.9
447 Name.starts_with("vbroadcast.s") || // Added in 7.0
448 Name.starts_with("vpshld.") || // Added in 8.0
449 Name.starts_with("vpshrd.")); // Added in 8.0
450 }
451
452 if (Name.consume_front("fma."))
453 return (Name.starts_with("vfmadd.") || // Added in 7.0
454 Name.starts_with("vfmsub.") || // Added in 7.0
455 Name.starts_with("vfmsubadd.") || // Added in 7.0
456 Name.starts_with("vfnmadd.") || // Added in 7.0
457 Name.starts_with("vfnmsub.")); // Added in 7.0
458
459 if (Name.consume_front("fma4."))
460 return Name.starts_with("vfmadd.s"); // Added in 7.0
461
462 if (Name.consume_front("sse."))
463 return (Name == "add.ss" || // Added in 4.0
464 Name == "cvtsi2ss" || // Added in 7.0
465 Name == "cvtsi642ss" || // Added in 7.0
466 Name == "div.ss" || // Added in 4.0
467 Name == "mul.ss" || // Added in 4.0
468 Name.starts_with("sqrt.p") || // Added in 7.0
469 Name == "sqrt.ss" || // Added in 7.0
470 Name.starts_with("storeu.") || // Added in 3.9
471 Name == "sub.ss"); // Added in 4.0
472
473 if (Name.consume_front("sse2."))
474 return (Name == "add.sd" || // Added in 4.0
475 Name == "cvtdq2pd" || // Added in 3.9
476 Name == "cvtdq2ps" || // Added in 7.0
477 Name == "cvtps2pd" || // Added in 3.9
478 Name == "cvtsi2sd" || // Added in 7.0
479 Name == "cvtsi642sd" || // Added in 7.0
480 Name == "cvtss2sd" || // Added in 7.0
481 Name == "div.sd" || // Added in 4.0
482 Name == "mul.sd" || // Added in 4.0
483 Name.starts_with("padds.") || // Added in 8.0
484 Name.starts_with("paddus.") || // Added in 8.0
485 Name.starts_with("pcmpeq.") || // Added in 3.1
486 Name.starts_with("pcmpgt.") || // Added in 3.1
487 Name == "pmaxs.w" || // Added in 3.9
488 Name == "pmaxu.b" || // Added in 3.9
489 Name == "pmins.w" || // Added in 3.9
490 Name == "pminu.b" || // Added in 3.9
491 Name == "pmulu.dq" || // Added in 7.0
492 Name.starts_with("pshuf") || // Added in 3.9
493 Name.starts_with("psll.dq") || // Added in 3.7
494 Name.starts_with("psrl.dq") || // Added in 3.7
495 Name.starts_with("psubs.") || // Added in 8.0
496 Name.starts_with("psubus.") || // Added in 8.0
497 Name.starts_with("sqrt.p") || // Added in 7.0
498 Name == "sqrt.sd" || // Added in 7.0
499 Name == "storel.dq" || // Added in 3.9
500 Name.starts_with("storeu.") || // Added in 3.9
501 Name == "sub.sd"); // Added in 4.0
502
503 if (Name.consume_front("sse41."))
504 return (Name.starts_with("blendp") || // Added in 3.7
505 Name == "movntdqa" || // Added in 5.0
506 Name == "pblendw" || // Added in 3.7
507 Name == "pmaxsb" || // Added in 3.9
508 Name == "pmaxsd" || // Added in 3.9
509 Name == "pmaxud" || // Added in 3.9
510 Name == "pmaxuw" || // Added in 3.9
511 Name == "pminsb" || // Added in 3.9
512 Name == "pminsd" || // Added in 3.9
513 Name == "pminud" || // Added in 3.9
514 Name == "pminuw" || // Added in 3.9
515 Name.starts_with("pmovsx") || // Added in 3.8
516 Name.starts_with("pmovzx") || // Added in 3.9
517 Name == "pmuldq"); // Added in 7.0
518
519 if (Name.consume_front("sse42."))
520 return Name == "crc32.64.8"; // Added in 3.4
521
522 if (Name.consume_front("sse4a."))
523 return Name.starts_with("movnt."); // Added in 3.9
524
525 if (Name.consume_front("ssse3."))
526 return (Name == "pabs.b.128" || // Added in 6.0
527 Name == "pabs.d.128" || // Added in 6.0
528 Name == "pabs.w.128"); // Added in 6.0
529
530 if (Name.consume_front("xop."))
531 return (Name == "vpcmov" || // Added in 3.8
532 Name == "vpcmov.256" || // Added in 5.0
533 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
534 Name.starts_with("vprot")); // Added in 8.0
535
536 return (Name == "addcarry.u32" || // Added in 8.0
537 Name == "addcarry.u64" || // Added in 8.0
538 Name == "addcarryx.u32" || // Added in 8.0
539 Name == "addcarryx.u64" || // Added in 8.0
540 Name == "subborrow.u32" || // Added in 8.0
541 Name == "subborrow.u64" || // Added in 8.0
542 Name.starts_with("vcvtph2ps.")); // Added in 11.0
543}
544
546 Function *&NewFn) {
547 // Only handle intrinsics that start with "x86.".
548 if (!Name.consume_front("x86."))
549 return false;
550
551 if (shouldUpgradeX86Intrinsic(F, Name)) {
552 NewFn = nullptr;
553 return true;
554 }
555
556 if (Name == "rdtscp") { // Added in 8.0
557 // If this intrinsic has 0 operands, it's the new version.
558 if (F->getFunctionType()->getNumParams() == 0)
559 return false;
560
561 rename(F);
562 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
563 Intrinsic::x86_rdtscp);
564 return true;
565 }
566
568
569 // SSE4.1 ptest functions may have an old signature.
570 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
572 .Case("c", Intrinsic::x86_sse41_ptestc)
573 .Case("z", Intrinsic::x86_sse41_ptestz)
574 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
577 return upgradePTESTIntrinsic(F, ID, NewFn);
578
579 return false;
580 }
581
582 // Several blend and other instructions with masks used the wrong number of
583 // bits.
584
585 // Added in 3.6
587 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
588 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
589 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
590 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
591 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
592 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
595 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
596
597 if (Name.consume_front("avx512.")) {
598 if (Name.consume_front("mask.cmp.")) {
599 // Added in 7.0
601 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
602 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
603 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
604 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
605 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
606 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
609 return upgradeX86MaskedFPCompare(F, ID, NewFn);
610 } else if (Name.starts_with("vpdpbusd.") ||
611 Name.starts_with("vpdpbusds.")) {
612 // Added in 21.1
614 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
615 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
616 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
617 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
618 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
619 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
622 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
623 } else if (Name.starts_with("vpdpwssd.") ||
624 Name.starts_with("vpdpwssds.")) {
625 // Added in 21.1
627 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
628 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
629 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
630 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
631 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
632 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
635 return upgradeX86MultiplyAddWords(F, ID, NewFn);
636 }
637 return false; // No other 'x86.avx512.*'.
638 }
639
640 if (Name.consume_front("avx2.")) {
641 if (Name.consume_front("vpdpb")) {
642 // Added in 21.1
644 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
645 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
646 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
647 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
648 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
649 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
650 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
651 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
652 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
653 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
654 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
655 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
658 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
659 } else if (Name.consume_front("vpdpw")) {
660 // Added in 21.1
662 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
663 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
664 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
665 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
666 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
667 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
668 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
669 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
670 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
671 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
672 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
673 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
676 return upgradeX86MultiplyAddWords(F, ID, NewFn);
677 }
678 return false; // No other 'x86.avx2.*'
679 }
680
681 if (Name.consume_front("avx10.")) {
682 if (Name.consume_front("vpdpb")) {
683 // Added in 21.1
685 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
686 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
687 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
688 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
689 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
690 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
693 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
694 } else if (Name.consume_front("vpdpw")) {
696 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
697 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
698 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
699 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
700 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
701 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
704 return upgradeX86MultiplyAddWords(F, ID, NewFn);
705 }
706 return false; // No other 'x86.avx10.*'
707 }
708
709 if (Name.consume_front("avx512bf16.")) {
710 // Added in 9.0
712 .Case("cvtne2ps2bf16.128",
713 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
714 .Case("cvtne2ps2bf16.256",
715 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
716 .Case("cvtne2ps2bf16.512",
717 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
718 .Case("mask.cvtneps2bf16.128",
719 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
720 .Case("cvtneps2bf16.256",
721 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
722 .Case("cvtneps2bf16.512",
723 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
726 return upgradeX86BF16Intrinsic(F, ID, NewFn);
727
728 // Added in 9.0
730 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
731 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
732 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
735 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
736 return false; // No other 'x86.avx512bf16.*'.
737 }
738
739 if (Name.consume_front("xop.")) {
741 if (Name.starts_with("vpermil2")) { // Added in 3.9
742 // Upgrade any XOP PERMIL2 index operand still using a float/double
743 // vector.
744 auto Idx = F->getFunctionType()->getParamType(2);
745 if (Idx->isFPOrFPVectorTy()) {
746 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
747 unsigned EltSize = Idx->getScalarSizeInBits();
748 if (EltSize == 64 && IdxSize == 128)
749 ID = Intrinsic::x86_xop_vpermil2pd;
750 else if (EltSize == 32 && IdxSize == 128)
751 ID = Intrinsic::x86_xop_vpermil2ps;
752 else if (EltSize == 64 && IdxSize == 256)
753 ID = Intrinsic::x86_xop_vpermil2pd_256;
754 else
755 ID = Intrinsic::x86_xop_vpermil2ps_256;
756 }
757 } else if (F->arg_size() == 2)
758 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
760 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
761 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
763
765 rename(F);
766 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
767 return true;
768 }
769 return false; // No other 'x86.xop.*'
770 }
771
772 if (Name == "seh.recoverfp") {
773 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
774 Intrinsic::eh_recoverfp);
775 return true;
776 }
777
778 return false;
779}
780
781// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
782// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
784 StringRef Name,
785 Function *&NewFn) {
786 if (Name.starts_with("rbit")) {
787 // '(arm|aarch64).rbit'.
789 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
790 return true;
791 }
792
793 if (Name == "thread.pointer") {
794 // '(arm|aarch64).thread.pointer'.
796 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
797 return true;
798 }
799
800 bool Neon = Name.consume_front("neon.");
801 if (Neon) {
802 // '(arm|aarch64).neon.*'.
803 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
804 // v16i8 respectively.
805 if (Name.consume_front("bfdot.")) {
806 // (arm|aarch64).neon.bfdot.*'.
809 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
810 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
811 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
814 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
815 assert((OperandWidth == 64 || OperandWidth == 128) &&
816 "Unexpected operand width");
817 LLVMContext &Ctx = F->getParent()->getContext();
818 std::array<Type *, 2> Tys{
819 {F->getReturnType(),
820 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
821 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
822 return true;
823 }
824 return false; // No other '(arm|aarch64).neon.bfdot.*'.
825 }
826
827 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
828 // anymore and accept v8bf16 instead of v16i8.
829 if (Name.consume_front("bfm")) {
830 // (arm|aarch64).neon.bfm*'.
831 if (Name.consume_back(".v4f32.v16i8")) {
832 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
835 .Case("mla",
836 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
837 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
838 .Case("lalb",
839 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
840 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
841 .Case("lalt",
842 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
843 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
846 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
847 return true;
848 }
849 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
850 }
851 return false; // No other '(arm|aarch64).neon.bfm*.
852 }
853 // Continue on to Aarch64 Neon or Arm Neon.
854 }
855 // Continue on to Arm or Aarch64.
856
857 if (IsArm) {
858 // 'arm.*'.
859 if (Neon) {
860 // 'arm.neon.*'.
862 .StartsWith("vclz.", Intrinsic::ctlz)
863 .StartsWith("vcnt.", Intrinsic::ctpop)
864 .StartsWith("vqadds.", Intrinsic::sadd_sat)
865 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
866 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
867 .StartsWith("vqsubu.", Intrinsic::usub_sat)
868 .StartsWith("vrinta.", Intrinsic::round)
869 .StartsWith("vrintn.", Intrinsic::roundeven)
870 .StartsWith("vrintm.", Intrinsic::floor)
871 .StartsWith("vrintp.", Intrinsic::ceil)
872 .StartsWith("vrintx.", Intrinsic::rint)
873 .StartsWith("vrintz.", Intrinsic::trunc)
876 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
877 F->arg_begin()->getType());
878 return true;
879 }
880
881 if (Name.consume_front("vst")) {
882 // 'arm.neon.vst*'.
883 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
885 if (vstRegex.match(Name, &Groups)) {
886 static const Intrinsic::ID StoreInts[] = {
887 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
888 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
889
890 static const Intrinsic::ID StoreLaneInts[] = {
891 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
892 Intrinsic::arm_neon_vst4lane};
893
894 auto fArgs = F->getFunctionType()->params();
895 Type *Tys[] = {fArgs[0], fArgs[1]};
896 if (Groups[1].size() == 1)
898 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
899 else
901 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
902 return true;
903 }
904 return false; // No other 'arm.neon.vst*'.
905 }
906
907 return false; // No other 'arm.neon.*'.
908 }
909
910 if (Name.consume_front("mve.")) {
911 // 'arm.mve.*'.
912 if (Name == "vctp64") {
913 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
914 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
915 // the function and deal with it below in UpgradeIntrinsicCall.
916 rename(F);
917 return true;
918 }
919 return false; // Not 'arm.mve.vctp64'.
920 }
921
922 if (Name.starts_with("vrintn.v")) {
924 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
925 return true;
926 }
927
928 // These too are changed to accept a v2i1 instead of the old v4i1.
929 if (Name.consume_back(".v4i1")) {
930 // 'arm.mve.*.v4i1'.
931 if (Name.consume_back(".predicated.v2i64.v4i32"))
932 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
933 return Name == "mull.int" || Name == "vqdmull";
934
935 if (Name.consume_back(".v2i64")) {
936 // 'arm.mve.*.v2i64.v4i1'
937 bool IsGather = Name.consume_front("vldr.gather.");
938 if (IsGather || Name.consume_front("vstr.scatter.")) {
939 if (Name.consume_front("base.")) {
940 // Optional 'wb.' prefix.
941 Name.consume_front("wb.");
942 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
943 // predicated.v2i64.v2i64.v4i1'.
944 return Name == "predicated.v2i64";
945 }
946
947 if (Name.consume_front("offset.predicated."))
948 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
949 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
950
951 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
952 return false;
953 }
954
955 return false; // No other 'arm.mve.*.v2i64.v4i1'.
956 }
957 return false; // No other 'arm.mve.*.v4i1'.
958 }
959 return false; // No other 'arm.mve.*'.
960 }
961
962 if (Name.consume_front("cde.vcx")) {
963 // 'arm.cde.vcx*'.
964 if (Name.consume_back(".predicated.v2i64.v4i1"))
965 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
966 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
967 Name == "3q" || Name == "3qa";
968
969 return false; // No other 'arm.cde.vcx*'.
970 }
971 } else {
972 // 'aarch64.*'.
973 if (Neon) {
974 // 'aarch64.neon.*'.
976 .StartsWith("frintn", Intrinsic::roundeven)
977 .StartsWith("rbit", Intrinsic::bitreverse)
980 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
981 F->arg_begin()->getType());
982 return true;
983 }
984
985 if (Name.starts_with("addp")) {
986 // 'aarch64.neon.addp*'.
987 if (F->arg_size() != 2)
988 return false; // Invalid IR.
989 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
990 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
992 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
993 return true;
994 }
995 }
996
997 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
998 if (Name.starts_with("bfcvt")) {
999 NewFn = nullptr;
1000 return true;
1001 }
1002
1003 return false; // No other 'aarch64.neon.*'.
1004 }
1005 if (Name.consume_front("sve.")) {
1006 // 'aarch64.sve.*'.
1007 if (Name.consume_front("bf")) {
1008 if (Name == "mmla") {
1009 Type *Tys[] = {F->getReturnType(),
1010 std::next(F->arg_begin())->getType()};
1012 F->getParent(), Intrinsic::aarch64_sve_fmmla, Tys);
1013 return true;
1014 }
1015 if (Name.consume_back(".lane")) {
1016 // 'aarch64.sve.bf*.lane'.
1019 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1020 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1021 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1024 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1025 return true;
1026 }
1027 return false; // No other 'aarch64.sve.bf*.lane'.
1028 }
1029 return false; // No other 'aarch64.sve.bf*'.
1030 }
1031
1032 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1033 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1034 NewFn = nullptr;
1035 return true;
1036 }
1037
1038 if (Name.consume_front("addqv")) {
1039 // 'aarch64.sve.addqv'.
1040 if (!F->getReturnType()->isFPOrFPVectorTy())
1041 return false;
1042
1043 auto Args = F->getFunctionType()->params();
1044 Type *Tys[] = {F->getReturnType(), Args[1]};
1046 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1047 return true;
1048 }
1049
1050 if (Name.consume_front("ld")) {
1051 // 'aarch64.sve.ld*'.
1052 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1053 if (LdRegex.match(Name)) {
1054 Type *ScalarTy =
1055 cast<VectorType>(F->getReturnType())->getElementType();
1056 ElementCount EC =
1057 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1058 assert(F->arg_size() == 2 &&
1059 "Expected 2 arguments for ld* intrinsic.");
1060 Type *PtrTy = F->getArg(1)->getType();
1061 Type *Ty = VectorType::get(ScalarTy, EC);
1062 static const Intrinsic::ID LoadIDs[] = {
1063 Intrinsic::aarch64_sve_ld2_sret,
1064 Intrinsic::aarch64_sve_ld3_sret,
1065 Intrinsic::aarch64_sve_ld4_sret,
1066 };
1068 F->getParent(), LoadIDs[Name[0] - '2'], {Ty, PtrTy});
1069 return true;
1070 }
1071 return false; // No other 'aarch64.sve.ld*'.
1072 }
1073
1074 if (Name.consume_front("tuple.")) {
1075 // 'aarch64.sve.tuple.*'.
1076 if (Name.starts_with("get")) {
1077 // 'aarch64.sve.tuple.get*'.
1078 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1080 F->getParent(), Intrinsic::vector_extract, Tys);
1081 return true;
1082 }
1083
1084 if (Name.starts_with("set")) {
1085 // 'aarch64.sve.tuple.set*'.
1086 auto Args = F->getFunctionType()->params();
1087 Type *Tys[] = {Args[0], Args[2], Args[1]};
1089 F->getParent(), Intrinsic::vector_insert, Tys);
1090 return true;
1091 }
1092
1093 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1094 if (CreateTupleRegex.match(Name)) {
1095 // 'aarch64.sve.tuple.create*'.
1096 auto Args = F->getFunctionType()->params();
1097 Type *Tys[] = {F->getReturnType(), Args[1]};
1099 F->getParent(), Intrinsic::vector_insert, Tys);
1100 return true;
1101 }
1102 return false; // No other 'aarch64.sve.tuple.*'.
1103 }
1104
1105 if (Name.starts_with("rev.nxv")) {
1106 // 'aarch64.sve.rev.<Ty>'
1108 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1109 return true;
1110 }
1111
1112 return false; // No other 'aarch64.sve.*'.
1113 }
1114 }
1115 return false; // No other 'arm.*', 'aarch64.*'.
1116}
1117
1119 StringRef Name) {
1120 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1123 .Case("im2col.3d",
1124 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1125 .Case("im2col.4d",
1126 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1127 .Case("im2col.5d",
1128 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1129 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1130 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1131 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1132 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1133 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1135
1137 return ID;
1138
1139 // These intrinsics may need upgrade for two reasons:
1140 // (1) When the address-space of the first argument is shared[AS=3]
1141 // (and we upgrade it to use shared_cluster address-space[AS=7])
1142 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1144 return ID;
1145
1146 // (2) When there are only two boolean flag arguments at the end:
1147 //
1148 // The last three parameters of the older version of these
1149 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1150 //
1151 // The newer version reads as:
1152 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1153 //
1154 // So, when the type of the [N-3]rd argument is "not i1", then
1155 // it is the older version and we need to upgrade.
1156 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1157 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1158 if (!ArgType->isIntegerTy(1))
1159 return ID;
1160 }
1161
1163}
1164
1166 StringRef Name) {
1167 if (Name.consume_front("mapa.shared.cluster"))
1168 if (F->getReturnType()->getPointerAddressSpace() ==
1170 return Intrinsic::nvvm_mapa_shared_cluster;
1171
1172 if (Name.consume_front("cp.async.bulk.")) {
1175 .Case("global.to.shared.cluster",
1176 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1177 .Case("shared.cta.to.cluster",
1178 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1180
1182 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1184 return ID;
1185 }
1186
1188}
1189
1191 if (Name.consume_front("fma.rn."))
1192 return StringSwitch<Intrinsic::ID>(Name)
1193 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1194 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1195 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1196 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1198
1199 if (Name.consume_front("fmax."))
1200 return StringSwitch<Intrinsic::ID>(Name)
1201 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1202 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1203 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1204 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1205 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1206 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1207 .Case("ftz.nan.xorsign.abs.bf16",
1208 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1209 .Case("ftz.nan.xorsign.abs.bf16x2",
1210 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1211 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1212 .Case("ftz.xorsign.abs.bf16x2",
1213 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1214 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1215 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1216 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1217 .Case("nan.xorsign.abs.bf16x2",
1218 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1219 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1220 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1222
1223 if (Name.consume_front("fmin."))
1224 return StringSwitch<Intrinsic::ID>(Name)
1225 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1226 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1227 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1228 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1229 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1230 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1231 .Case("ftz.nan.xorsign.abs.bf16",
1232 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1233 .Case("ftz.nan.xorsign.abs.bf16x2",
1234 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1235 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1236 .Case("ftz.xorsign.abs.bf16x2",
1237 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1238 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1239 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1240 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1241 .Case("nan.xorsign.abs.bf16x2",
1242 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1243 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1244 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1246
1247 if (Name.consume_front("neg."))
1248 return StringSwitch<Intrinsic::ID>(Name)
1249 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1250 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1252
1254}
1255
1257 return Name.consume_front("local") || Name.consume_front("shared") ||
1258 Name.consume_front("global") || Name.consume_front("constant") ||
1259 Name.consume_front("param");
1260}
1261
1263 const FunctionType *FuncTy) {
1264 Type *HalfTy = Type::getHalfTy(FuncTy->getContext());
1265 if (Name.starts_with("to.fp16")) {
1266 return CastInst::castIsValid(Instruction::FPTrunc, FuncTy->getParamType(0),
1267 HalfTy) &&
1268 CastInst::castIsValid(Instruction::BitCast, HalfTy,
1269 FuncTy->getReturnType());
1270 }
1271
1272 if (Name.starts_with("from.fp16")) {
1273 return CastInst::castIsValid(Instruction::BitCast, FuncTy->getParamType(0),
1274 HalfTy) &&
1275 CastInst::castIsValid(Instruction::FPExt, HalfTy,
1276 FuncTy->getReturnType());
1277 }
1278
1279 return false;
1280}
1281
1283 bool CanUpgradeDebugIntrinsicsToRecords) {
1284 assert(F && "Illegal to upgrade a non-existent Function.");
1285
1286 StringRef Name = F->getName();
1287
1288 // Quickly eliminate it, if it's not a candidate.
1289 if (!Name.consume_front("llvm.") || Name.empty())
1290 return false;
1291
1292 switch (Name[0]) {
1293 default: break;
1294 case 'a': {
1295 bool IsArm = Name.consume_front("arm.");
1296 if (IsArm || Name.consume_front("aarch64.")) {
1297 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1298 return true;
1299 break;
1300 }
1301
1302 if (Name.consume_front("amdgcn.")) {
1303 if (Name == "alignbit") {
1304 // Target specific intrinsic became redundant
1306 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1307 return true;
1308 }
1309
1310 if (Name.consume_front("atomic.")) {
1311 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1312 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1313 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1314 // and usub_sat so there's no new declaration.
1315 NewFn = nullptr;
1316 return true;
1317 }
1318 break; // No other 'amdgcn.atomic.*'
1319 }
1320
1321 switch (F->getIntrinsicID()) {
1322 default:
1323 break;
1324 // Legacy wmma iu intrinsics without the optional clamp operand.
1325 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1326 if (F->arg_size() == 7) {
1327 NewFn = nullptr;
1328 return true;
1329 }
1330 break;
1331 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1332 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1333 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1334 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1335 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1336 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1337 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1338 if (F->arg_size() == 8) {
1339 NewFn = nullptr;
1340 return true;
1341 }
1342 break;
1343 }
1344
1345 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1346 Name.consume_front("flat.atomic.")) {
1347 if (Name.starts_with("fadd") ||
1348 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1349 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1350 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1351 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1352 // declaration.
1353 NewFn = nullptr;
1354 return true;
1355 }
1356 }
1357
1358 if (Name.starts_with("ldexp.")) {
1359 // Target specific intrinsic became redundant
1361 F->getParent(), Intrinsic::ldexp,
1362 {F->getReturnType(), F->getArg(1)->getType()});
1363 return true;
1364 }
1365 break; // No other 'amdgcn.*'
1366 }
1367
1368 break;
1369 }
1370 case 'c': {
1371 if (F->arg_size() == 1) {
1372 if (Name.consume_front("convert.")) {
1373 if (convertIntrinsicValidType(Name, F->getFunctionType())) {
1374 NewFn = nullptr;
1375 return true;
1376 }
1377 }
1378
1380 .StartsWith("ctlz.", Intrinsic::ctlz)
1381 .StartsWith("cttz.", Intrinsic::cttz)
1384 rename(F);
1385 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1386 F->arg_begin()->getType());
1387 return true;
1388 }
1389 }
1390
1391 if (F->arg_size() == 2 && Name == "coro.end") {
1392 rename(F);
1393 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1394 Intrinsic::coro_end);
1395 return true;
1396 }
1397
1398 break;
1399 }
1400 case 'd':
1401 if (Name.consume_front("dbg.")) {
1402 // Mark debug intrinsics for upgrade to new debug format.
1403 if (CanUpgradeDebugIntrinsicsToRecords) {
1404 if (Name == "addr" || Name == "value" || Name == "assign" ||
1405 Name == "declare" || Name == "label") {
1406 // There's no function to replace these with.
1407 NewFn = nullptr;
1408 // But we do want these to get upgraded.
1409 return true;
1410 }
1411 }
1412 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1413 // converted to DbgVariableRecords later.
1414 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1415 rename(F);
1416 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1417 Intrinsic::dbg_value);
1418 return true;
1419 }
1420 break; // No other 'dbg.*'.
1421 }
1422 break;
1423 case 'e':
1424 if (Name.consume_front("experimental.vector.")) {
1427 // Skip over extract.last.active, otherwise it will be 'upgraded'
1428 // to a regular vector extract which is a different operation.
1429 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1430 .StartsWith("extract.", Intrinsic::vector_extract)
1431 .StartsWith("insert.", Intrinsic::vector_insert)
1432 .StartsWith("reverse.", Intrinsic::vector_reverse)
1433 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1434 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1435 .StartsWith("partial.reduce.add",
1436 Intrinsic::vector_partial_reduce_add)
1439 const auto *FT = F->getFunctionType();
1441 if (ID == Intrinsic::vector_extract ||
1442 ID == Intrinsic::vector_interleave2)
1443 // Extracting overloads the return type.
1444 Tys.push_back(FT->getReturnType());
1445 if (ID != Intrinsic::vector_interleave2)
1446 Tys.push_back(FT->getParamType(0));
1447 if (ID == Intrinsic::vector_insert ||
1448 ID == Intrinsic::vector_partial_reduce_add)
1449 // Inserting overloads the inserted type.
1450 Tys.push_back(FT->getParamType(1));
1451 rename(F);
1452 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1453 return true;
1454 }
1455
1456 if (Name.consume_front("reduce.")) {
1458 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1459 if (R.match(Name, &Groups))
1461 .Case("add", Intrinsic::vector_reduce_add)
1462 .Case("mul", Intrinsic::vector_reduce_mul)
1463 .Case("and", Intrinsic::vector_reduce_and)
1464 .Case("or", Intrinsic::vector_reduce_or)
1465 .Case("xor", Intrinsic::vector_reduce_xor)
1466 .Case("smax", Intrinsic::vector_reduce_smax)
1467 .Case("smin", Intrinsic::vector_reduce_smin)
1468 .Case("umax", Intrinsic::vector_reduce_umax)
1469 .Case("umin", Intrinsic::vector_reduce_umin)
1470 .Case("fmax", Intrinsic::vector_reduce_fmax)
1471 .Case("fmin", Intrinsic::vector_reduce_fmin)
1473
1474 bool V2 = false;
1476 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1477 Groups.clear();
1478 V2 = true;
1479 if (R2.match(Name, &Groups))
1481 .Case("fadd", Intrinsic::vector_reduce_fadd)
1482 .Case("fmul", Intrinsic::vector_reduce_fmul)
1484 }
1486 rename(F);
1487 auto Args = F->getFunctionType()->params();
1488 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1489 {Args[V2 ? 1 : 0]});
1490 return true;
1491 }
1492 break; // No other 'expermental.vector.reduce.*'.
1493 }
1494
1495 if (Name.consume_front("splice"))
1496 return true;
1497 break; // No other 'experimental.vector.*'.
1498 }
1499 if (Name.consume_front("experimental.stepvector.")) {
1500 Intrinsic::ID ID = Intrinsic::stepvector;
1501 rename(F);
1503 F->getParent(), ID, F->getFunctionType()->getReturnType());
1504 return true;
1505 }
1506 break; // No other 'e*'.
1507 case 'f':
1508 if (Name.starts_with("flt.rounds")) {
1509 rename(F);
1510 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1511 Intrinsic::get_rounding);
1512 return true;
1513 }
1514 break;
1515 case 'i':
1516 if (Name.starts_with("invariant.group.barrier")) {
1517 // Rename invariant.group.barrier to launder.invariant.group
1518 auto Args = F->getFunctionType()->params();
1519 Type* ObjectPtr[1] = {Args[0]};
1520 rename(F);
1522 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1523 return true;
1524 }
1525 break;
1526 case 'l':
1527 if ((Name.starts_with("lifetime.start") ||
1528 Name.starts_with("lifetime.end")) &&
1529 F->arg_size() == 2) {
1530 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1531 ? Intrinsic::lifetime_start
1532 : Intrinsic::lifetime_end;
1533 rename(F);
1534 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1535 F->getArg(0)->getType());
1536 return true;
1537 }
1538 break;
1539 case 'm': {
1540 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1541 // alignment parameter to embedding the alignment as an attribute of
1542 // the pointer args.
1543 if (unsigned ID = StringSwitch<unsigned>(Name)
1544 .StartsWith("memcpy.", Intrinsic::memcpy)
1545 .StartsWith("memmove.", Intrinsic::memmove)
1546 .Default(0)) {
1547 if (F->arg_size() == 5) {
1548 rename(F);
1549 // Get the types of dest, src, and len
1550 ArrayRef<Type *> ParamTypes =
1551 F->getFunctionType()->params().slice(0, 3);
1552 NewFn =
1553 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1554 return true;
1555 }
1556 }
1557 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1558 rename(F);
1559 // Get the types of dest, and len
1560 const auto *FT = F->getFunctionType();
1561 Type *ParamTypes[2] = {
1562 FT->getParamType(0), // Dest
1563 FT->getParamType(2) // len
1564 };
1565 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1566 Intrinsic::memset, ParamTypes);
1567 return true;
1568 }
1569
1570 unsigned MaskedID =
1572 .StartsWith("masked.load", Intrinsic::masked_load)
1573 .StartsWith("masked.gather", Intrinsic::masked_gather)
1574 .StartsWith("masked.store", Intrinsic::masked_store)
1575 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1576 .Default(0);
1577 if (MaskedID && F->arg_size() == 4) {
1578 rename(F);
1579 if (MaskedID == Intrinsic::masked_load ||
1580 MaskedID == Intrinsic::masked_gather) {
1582 F->getParent(), MaskedID,
1583 {F->getReturnType(), F->getArg(0)->getType()});
1584 return true;
1585 }
1587 F->getParent(), MaskedID,
1588 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1589 return true;
1590 }
1591 break;
1592 }
1593 case 'n': {
1594 if (Name.consume_front("nvvm.")) {
1595 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1596 if (F->arg_size() == 1) {
1597 Intrinsic::ID IID =
1599 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1600 .Case("clz.i", Intrinsic::ctlz)
1601 .Case("popc.i", Intrinsic::ctpop)
1603 if (IID != Intrinsic::not_intrinsic) {
1604 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1605 {F->getReturnType()});
1606 return true;
1607 }
1608 } else if (F->arg_size() == 2) {
1609 Intrinsic::ID IID =
1611 .Cases({"max.s", "max.i", "max.ll"}, Intrinsic::smax)
1612 .Cases({"min.s", "min.i", "min.ll"}, Intrinsic::smin)
1613 .Cases({"max.us", "max.ui", "max.ull"}, Intrinsic::umax)
1614 .Cases({"min.us", "min.ui", "min.ull"}, Intrinsic::umin)
1616 if (IID != Intrinsic::not_intrinsic) {
1617 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1618 {F->getReturnType()});
1619 return true;
1620 }
1621 }
1622
1623 // Check for nvvm intrinsics that need a return type adjustment.
1624 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1626 if (IID != Intrinsic::not_intrinsic) {
1627 NewFn = nullptr;
1628 return true;
1629 }
1630 }
1631
1632 // Upgrade Distributed Shared Memory Intrinsics
1634 if (IID != Intrinsic::not_intrinsic) {
1635 rename(F);
1636 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1637 return true;
1638 }
1639
1640 // Upgrade TMA copy G2S Intrinsics
1642 if (IID != Intrinsic::not_intrinsic) {
1643 rename(F);
1644 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1645 return true;
1646 }
1647
1648 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1649 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1650 //
1651 // TODO: We could add lohi.i2d.
1652 bool Expand = false;
1653 if (Name.consume_front("abs."))
1654 // nvvm.abs.{i,ii}
1655 Expand =
1656 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1657 else if (Name.consume_front("fabs."))
1658 // nvvm.fabs.{f,ftz.f,d}
1659 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1660 else if (Name.consume_front("ex2.approx."))
1661 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1662 Expand =
1663 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1664 else if (Name.consume_front("atomic.load."))
1665 // nvvm.atomic.load.add.{f32,f64}.p
1666 // nvvm.atomic.load.{inc,dec}.32.p
1667 Expand = StringSwitch<bool>(Name)
1668 .StartsWith("add.f32.p", true)
1669 .StartsWith("add.f64.p", true)
1670 .StartsWith("inc.32.p", true)
1671 .StartsWith("dec.32.p", true)
1672 .Default(false);
1673 else if (Name.consume_front("atomic."))
1674 // nvvm.atomic.{add,exch,max,min,inc,dec,and,or,xor}.gen.{i,f}.{cta,sys}
1675 // nvvm.atomic.cas.gen.i.{cta,sys}
1676 Expand = StringSwitch<bool>(Name)
1677 .StartsWith("add.gen.", true)
1678 .StartsWith("exch.gen.", true)
1679 .StartsWith("max.gen.", true)
1680 .StartsWith("min.gen.", true)
1681 .StartsWith("inc.gen.", true)
1682 .StartsWith("dec.gen.", true)
1683 .StartsWith("and.gen.", true)
1684 .StartsWith("or.gen.", true)
1685 .StartsWith("xor.gen.", true)
1686 .StartsWith("cas.gen.", true)
1687 .Default(false);
1688 else if (Name.consume_front("bitcast."))
1689 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1690 Expand =
1691 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1692 else if (Name.consume_front("rotate."))
1693 // nvvm.rotate.{b32,b64,right.b64}
1694 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1695 else if (Name.consume_front("ptr.gen.to."))
1696 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1697 Expand = consumeNVVMPtrAddrSpace(Name);
1698 else if (Name.consume_front("ptr."))
1699 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1700 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1701 else if (Name.consume_front("ldg.global."))
1702 // nvvm.ldg.global.{i,p,f}
1703 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1704 Name.starts_with("p."));
1705 else
1706 Expand = StringSwitch<bool>(Name)
1707 .Case("barrier0", true)
1708 .Case("barrier.n", true)
1709 .Case("barrier.sync.cnt", true)
1710 .Case("barrier.sync", true)
1711 .Case("barrier", true)
1712 .Case("bar.sync", true)
1713 .Case("barrier0.popc", true)
1714 .Case("barrier0.and", true)
1715 .Case("barrier0.or", true)
1716 .Case("clz.ll", true)
1717 .Case("popc.ll", true)
1718 .Case("h2f", true)
1719 .Case("swap.lo.hi.b64", true)
1720 .Case("tanh.approx.f32", true)
1721 .Default(false);
1722
1723 if (Expand) {
1724 NewFn = nullptr;
1725 return true;
1726 }
1727 break; // No other 'nvvm.*'.
1728 }
1729 break;
1730 }
1731 case 'o':
1732 if (Name.starts_with("objectsize.")) {
1733 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1734 if (F->arg_size() == 2 || F->arg_size() == 3) {
1735 rename(F);
1736 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1737 Intrinsic::objectsize, Tys);
1738 return true;
1739 }
1740 }
1741 break;
1742
1743 case 'p':
1744 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1745 rename(F);
1747 F->getParent(), Intrinsic::ptr_annotation,
1748 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1749 return true;
1750 }
1751 break;
1752
1753 case 'r': {
1754 if (Name.consume_front("riscv.")) {
1757 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1758 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1759 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1760 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1763 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1764 rename(F);
1765 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1766 return true;
1767 }
1768 break; // No other applicable upgrades.
1769 }
1770
1772 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1773 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1776 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1777 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1778 rename(F);
1779 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1780 return true;
1781 }
1782 break; // No other applicable upgrades.
1783 }
1784
1786 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1787 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1788 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1789 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1790 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1791 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1794 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1795 rename(F);
1796 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1797 return true;
1798 }
1799 break; // No other applicable upgrades.
1800 }
1801
1802 // Replace llvm.riscv.clmul with llvm.clmul.
1803 if (Name == "clmul.i32" || Name == "clmul.i64") {
1805 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1806 return true;
1807 }
1808
1809 break; // No other 'riscv.*' intrinsics
1810 }
1811 } break;
1812
1813 case 's':
1814 if (Name == "stackprotectorcheck") {
1815 NewFn = nullptr;
1816 return true;
1817 }
1818 break;
1819
1820 case 't':
1821 if (Name == "thread.pointer") {
1823 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1824 return true;
1825 }
1826 break;
1827
1828 case 'v': {
1829 if (Name == "var.annotation" && F->arg_size() == 4) {
1830 rename(F);
1832 F->getParent(), Intrinsic::var_annotation,
1833 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1834 return true;
1835 }
1836 if (Name.consume_front("vector.splice")) {
1837 if (Name.starts_with(".left") || Name.starts_with(".right"))
1838 break;
1839 return true;
1840 }
1841 break;
1842 }
1843
1844 case 'w':
1845 if (Name.consume_front("wasm.")) {
1848 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1849 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1850 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1853 rename(F);
1854 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1855 F->getReturnType());
1856 return true;
1857 }
1858
1859 if (Name.consume_front("dot.i8x16.i7x16.")) {
1861 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1862 .Case("add.signed",
1863 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1866 rename(F);
1867 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1868 return true;
1869 }
1870 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1871 }
1872 break; // No other 'wasm.*'.
1873 }
1874 break;
1875
1876 case 'x':
1877 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1878 return true;
1879 }
1880
1881 auto *ST = dyn_cast<StructType>(F->getReturnType());
1882 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1883 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1884 // Replace return type with literal non-packed struct. Only do this for
1885 // intrinsics declared to return a struct, not for intrinsics with
1886 // overloaded return type, in which case the exact struct type will be
1887 // mangled into the name.
1888 if (Intrinsic::hasStructReturnType(F->getIntrinsicID())) {
1889 FunctionType *FT = F->getFunctionType();
1890 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1891 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1892 std::string Name = F->getName().str();
1893 rename(F);
1894 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1895 Name, F->getParent());
1896
1897 // The new function may also need remangling.
1898 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1899 NewFn = *Result;
1900 return true;
1901 }
1902 }
1903
1904 // Remangle our intrinsic since we upgrade the mangling
1906 if (Result != std::nullopt) {
1907 NewFn = *Result;
1908 return true;
1909 }
1910
1911 // This may not belong here. This function is effectively being overloaded
1912 // to both detect an intrinsic which needs upgrading, and to provide the
1913 // upgraded form of the intrinsic. We should perhaps have two separate
1914 // functions for this.
1915 return false;
1916}
1917
1919 bool CanUpgradeDebugIntrinsicsToRecords) {
1920 NewFn = nullptr;
1921 bool Upgraded =
1922 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1923
1924 // Upgrade intrinsic attributes. This does not change the function.
1925 if (NewFn)
1926 F = NewFn;
1927 if (Intrinsic::ID id = F->getIntrinsicID()) {
1928 // Only do this if the intrinsic signature is valid.
1929 SmallVector<Type *> OverloadTys;
1930 if (Intrinsic::isSignatureValid(id, F->getFunctionType(), OverloadTys))
1931 F->setAttributes(
1932 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1933 }
1934 return Upgraded;
1935}
1936
1938 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1939 GV->getName() == "llvm.global_dtors")) ||
1940 !GV->hasInitializer())
1941 return nullptr;
1943 if (!ATy)
1944 return nullptr;
1946 if (!STy || STy->getNumElements() != 2)
1947 return nullptr;
1948
1949 LLVMContext &C = GV->getContext();
1950 IRBuilder<> IRB(C);
1951 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1952 IRB.getPtrTy());
1953 Constant *Init = GV->getInitializer();
1954 unsigned N = Init->getNumOperands();
1955 std::vector<Constant *> NewCtors(N);
1956 for (unsigned i = 0; i != N; ++i) {
1957 auto Ctor = cast<Constant>(Init->getOperand(i));
1958 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1959 Ctor->getAggregateElement(1),
1961 }
1962 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1963
1964 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1965 NewInit, GV->getName());
1966}
1967
1968// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1969// to byte shuffles.
1971 unsigned Shift) {
1972 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1973 unsigned NumElts = ResultTy->getNumElements() * 8;
1974
1975 // Bitcast from a 64-bit element type to a byte element type.
1976 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1977 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1978
1979 // We'll be shuffling in zeroes.
1980 Value *Res = Constant::getNullValue(VecTy);
1981
1982 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1983 // we'll just return the zero vector.
1984 if (Shift < 16) {
1985 int Idxs[64];
1986 // 256/512-bit version is split into 2/4 16-byte lanes.
1987 for (unsigned l = 0; l != NumElts; l += 16)
1988 for (unsigned i = 0; i != 16; ++i) {
1989 unsigned Idx = NumElts + i - Shift;
1990 if (Idx < NumElts)
1991 Idx -= NumElts - 16; // end of lane, switch operand.
1992 Idxs[l + i] = Idx + l;
1993 }
1994
1995 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1996 }
1997
1998 // Bitcast back to a 64-bit element type.
1999 return Builder.CreateBitCast(Res, ResultTy, "cast");
2000}
2001
2002// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
2003// to byte shuffles.
2005 unsigned Shift) {
2006 auto *ResultTy = cast<FixedVectorType>(Op->getType());
2007 unsigned NumElts = ResultTy->getNumElements() * 8;
2008
2009 // Bitcast from a 64-bit element type to a byte element type.
2010 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
2011 Op = Builder.CreateBitCast(Op, VecTy, "cast");
2012
2013 // We'll be shuffling in zeroes.
2014 Value *Res = Constant::getNullValue(VecTy);
2015
2016 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
2017 // we'll just return the zero vector.
2018 if (Shift < 16) {
2019 int Idxs[64];
2020 // 256/512-bit version is split into 2/4 16-byte lanes.
2021 for (unsigned l = 0; l != NumElts; l += 16)
2022 for (unsigned i = 0; i != 16; ++i) {
2023 unsigned Idx = i + Shift;
2024 if (Idx >= 16)
2025 Idx += NumElts - 16; // end of lane, switch operand.
2026 Idxs[l + i] = Idx + l;
2027 }
2028
2029 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
2030 }
2031
2032 // Bitcast back to a 64-bit element type.
2033 return Builder.CreateBitCast(Res, ResultTy, "cast");
2034}
2035
2036static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
2037 unsigned NumElts) {
2038 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
2040 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
2041 Mask = Builder.CreateBitCast(Mask, MaskTy);
2042
2043 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
2044 // i8 and we need to extract down to the right number of elements.
2045 if (NumElts <= 4) {
2046 int Indices[4];
2047 for (unsigned i = 0; i != NumElts; ++i)
2048 Indices[i] = i;
2049 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
2050 "extract");
2051 }
2052
2053 return Mask;
2054}
2055
2056static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2057 Value *Op1) {
2058 // If the mask is all ones just emit the first operation.
2059 if (const auto *C = dyn_cast<Constant>(Mask))
2060 if (C->isAllOnesValue())
2061 return Op0;
2062
2063 Mask = getX86MaskVec(Builder, Mask,
2064 cast<FixedVectorType>(Op0->getType())->getNumElements());
2065 return Builder.CreateSelect(Mask, Op0, Op1);
2066}
2067
2068static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2069 Value *Op1) {
2070 // If the mask is all ones just emit the first operation.
2071 if (const auto *C = dyn_cast<Constant>(Mask))
2072 if (C->isAllOnesValue())
2073 return Op0;
2074
2075 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
2076 Mask->getType()->getIntegerBitWidth());
2077 Mask = Builder.CreateBitCast(Mask, MaskTy);
2078 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2079 return Builder.CreateSelect(Mask, Op0, Op1);
2080}
2081
2082// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2083// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2084// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2086 Value *Op1, Value *Shift,
2087 Value *Passthru, Value *Mask,
2088 bool IsVALIGN) {
2089 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
2090
2091 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2092 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2093 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2094 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2095
2096 // Mask the immediate for VALIGN.
2097 if (IsVALIGN)
2098 ShiftVal &= (NumElts - 1);
2099
2100 // If palignr is shifting the pair of vectors more than the size of two
2101 // lanes, emit zero.
2102 if (ShiftVal >= 32)
2104
2105 // If palignr is shifting the pair of input vectors more than one lane,
2106 // but less than two lanes, convert to shifting in zeroes.
2107 if (ShiftVal > 16) {
2108 ShiftVal -= 16;
2109 Op1 = Op0;
2111 }
2112
2113 int Indices[64];
2114 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2115 for (unsigned l = 0; l < NumElts; l += 16) {
2116 for (unsigned i = 0; i != 16; ++i) {
2117 unsigned Idx = ShiftVal + i;
2118 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2119 Idx += NumElts - 16; // End of lane, switch operand.
2120 Indices[l + i] = Idx + l;
2121 }
2122 }
2123
2124 Value *Align = Builder.CreateShuffleVector(
2125 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2126
2127 return emitX86Select(Builder, Mask, Align, Passthru);
2128}
2129
2131 bool ZeroMask, bool IndexForm) {
2132 Type *Ty = CI.getType();
2133 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2134 unsigned EltWidth = Ty->getScalarSizeInBits();
2135 bool IsFloat = Ty->isFPOrFPVectorTy();
2136 Intrinsic::ID IID;
2137 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2138 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2139 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2140 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2141 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2142 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2143 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2144 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2145 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2146 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2147 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2148 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2149 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2150 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2151 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2152 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2153 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2154 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2155 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2156 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2157 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2158 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2159 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2160 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2161 else if (VecWidth == 128 && EltWidth == 16)
2162 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2163 else if (VecWidth == 256 && EltWidth == 16)
2164 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2165 else if (VecWidth == 512 && EltWidth == 16)
2166 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2167 else if (VecWidth == 128 && EltWidth == 8)
2168 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2169 else if (VecWidth == 256 && EltWidth == 8)
2170 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2171 else if (VecWidth == 512 && EltWidth == 8)
2172 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2173 else
2174 llvm_unreachable("Unexpected intrinsic");
2175
2176 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2177 CI.getArgOperand(2) };
2178
2179 // If this isn't index form we need to swap operand 0 and 1.
2180 if (!IndexForm)
2181 std::swap(Args[0], Args[1]);
2182
2183 Value *V = Builder.CreateIntrinsic(IID, Args);
2184 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2185 : Builder.CreateBitCast(CI.getArgOperand(1),
2186 Ty);
2187 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2188}
2189
2191 Intrinsic::ID IID) {
2192 Type *Ty = CI.getType();
2193 Value *Op0 = CI.getOperand(0);
2194 Value *Op1 = CI.getOperand(1);
2195 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2196
2197 if (CI.arg_size() == 4) { // For masked intrinsics.
2198 Value *VecSrc = CI.getOperand(2);
2199 Value *Mask = CI.getOperand(3);
2200 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2201 }
2202 return Res;
2203}
2204
2206 bool IsRotateRight) {
2207 Type *Ty = CI.getType();
2208 Value *Src = CI.getArgOperand(0);
2209 Value *Amt = CI.getArgOperand(1);
2210
2211 // Amount may be scalar immediate, in which case create a splat vector.
2212 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2213 // we only care about the lowest log2 bits anyway.
2214 if (Amt->getType() != Ty) {
2215 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2216 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2217 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2218 }
2219
2220 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2221 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2222
2223 if (CI.arg_size() == 4) { // For masked intrinsics.
2224 Value *VecSrc = CI.getOperand(2);
2225 Value *Mask = CI.getOperand(3);
2226 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2227 }
2228 return Res;
2229}
2230
2231static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2232 bool IsSigned) {
2233 Type *Ty = CI.getType();
2234 Value *LHS = CI.getArgOperand(0);
2235 Value *RHS = CI.getArgOperand(1);
2236
2237 CmpInst::Predicate Pred;
2238 switch (Imm) {
2239 case 0x0:
2240 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2241 break;
2242 case 0x1:
2243 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2244 break;
2245 case 0x2:
2246 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2247 break;
2248 case 0x3:
2249 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2250 break;
2251 case 0x4:
2252 Pred = ICmpInst::ICMP_EQ;
2253 break;
2254 case 0x5:
2255 Pred = ICmpInst::ICMP_NE;
2256 break;
2257 case 0x6:
2258 return Constant::getNullValue(Ty); // FALSE
2259 case 0x7:
2260 return Constant::getAllOnesValue(Ty); // TRUE
2261 default:
2262 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2263 }
2264
2265 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2266 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2267 return Ext;
2268}
2269
2271 bool IsShiftRight, bool ZeroMask) {
2272 Type *Ty = CI.getType();
2273 Value *Op0 = CI.getArgOperand(0);
2274 Value *Op1 = CI.getArgOperand(1);
2275 Value *Amt = CI.getArgOperand(2);
2276
2277 if (IsShiftRight)
2278 std::swap(Op0, Op1);
2279
2280 // Amount may be scalar immediate, in which case create a splat vector.
2281 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2282 // we only care about the lowest log2 bits anyway.
2283 if (Amt->getType() != Ty) {
2284 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2285 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2286 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2287 }
2288
2289 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2290 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2291
2292 unsigned NumArgs = CI.arg_size();
2293 if (NumArgs >= 4) { // For masked intrinsics.
2294 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2295 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2296 CI.getArgOperand(0);
2297 Value *Mask = CI.getOperand(NumArgs - 1);
2298 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2299 }
2300 return Res;
2301}
2302
2304 Value *Mask, bool Aligned) {
2305 const Align Alignment =
2306 Aligned
2307 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2308 : Align(1);
2309
2310 // If the mask is all ones just emit a regular store.
2311 if (const auto *C = dyn_cast<Constant>(Mask))
2312 if (C->isAllOnesValue())
2313 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2314
2315 // Convert the mask from an integer type to a vector of i1.
2316 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2317 Mask = getX86MaskVec(Builder, Mask, NumElts);
2318 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2319}
2320
2322 Value *Passthru, Value *Mask, bool Aligned) {
2323 Type *ValTy = Passthru->getType();
2324 const Align Alignment =
2325 Aligned
2326 ? Align(
2328 8)
2329 : Align(1);
2330
2331 // If the mask is all ones just emit a regular store.
2332 if (const auto *C = dyn_cast<Constant>(Mask))
2333 if (C->isAllOnesValue())
2334 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2335
2336 // Convert the mask from an integer type to a vector of i1.
2337 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2338 Mask = getX86MaskVec(Builder, Mask, NumElts);
2339 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2340}
2341
2342static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2343 Type *Ty = CI.getType();
2344 Value *Op0 = CI.getArgOperand(0);
2345 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2346 {Op0, Builder.getInt1(false)});
2347 if (CI.arg_size() == 3)
2348 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2349 return Res;
2350}
2351
2352static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2353 Type *Ty = CI.getType();
2354
2355 // Arguments have a vXi32 type so cast to vXi64.
2356 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2357 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2358
2359 if (IsSigned) {
2360 // Shift left then arithmetic shift right.
2361 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2362 LHS = Builder.CreateShl(LHS, ShiftAmt);
2363 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2364 RHS = Builder.CreateShl(RHS, ShiftAmt);
2365 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2366 } else {
2367 // Clear the upper bits.
2368 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2369 LHS = Builder.CreateAnd(LHS, Mask);
2370 RHS = Builder.CreateAnd(RHS, Mask);
2371 }
2372
2373 Value *Res = Builder.CreateMul(LHS, RHS);
2374
2375 if (CI.arg_size() == 4)
2376 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2377
2378 return Res;
2379}
2380
2381// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2383 Value *Mask) {
2384 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2385 if (Mask) {
2386 const auto *C = dyn_cast<Constant>(Mask);
2387 if (!C || !C->isAllOnesValue())
2388 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2389 }
2390
2391 if (NumElts < 8) {
2392 int Indices[8];
2393 for (unsigned i = 0; i != NumElts; ++i)
2394 Indices[i] = i;
2395 for (unsigned i = NumElts; i != 8; ++i)
2396 Indices[i] = NumElts + i % NumElts;
2397 Vec = Builder.CreateShuffleVector(Vec,
2399 Indices);
2400 }
2401 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2402}
2403
2405 unsigned CC, bool Signed) {
2406 Value *Op0 = CI.getArgOperand(0);
2407 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2408
2409 Value *Cmp;
2410 if (CC == 3) {
2412 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2413 } else if (CC == 7) {
2415 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2416 } else {
2418 switch (CC) {
2419 default: llvm_unreachable("Unknown condition code");
2420 case 0: Pred = ICmpInst::ICMP_EQ; break;
2421 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2422 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2423 case 4: Pred = ICmpInst::ICMP_NE; break;
2424 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2425 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2426 }
2427 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2428 }
2429
2430 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2431
2432 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2433}
2434
2435// Replace a masked intrinsic with an older unmasked intrinsic.
2437 Intrinsic::ID IID) {
2438 Value *Rep =
2439 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2440 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2441}
2442
2444 Value* A = CI.getArgOperand(0);
2445 Value* B = CI.getArgOperand(1);
2446 Value* Src = CI.getArgOperand(2);
2447 Value* Mask = CI.getArgOperand(3);
2448
2449 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2450 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2451 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2452 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2453 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2454 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2455}
2456
2458 Value* Op = CI.getArgOperand(0);
2459 Type* ReturnOp = CI.getType();
2460 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2461 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2462 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2463}
2464
2465// Replace intrinsic with unmasked version and a select.
2467 CallBase &CI, Value *&Rep) {
2468 Name = Name.substr(12); // Remove avx512.mask.
2469
2470 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2471 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2472 Intrinsic::ID IID;
2473 if (Name.starts_with("max.p")) {
2474 if (VecWidth == 128 && EltWidth == 32)
2475 IID = Intrinsic::x86_sse_max_ps;
2476 else if (VecWidth == 128 && EltWidth == 64)
2477 IID = Intrinsic::x86_sse2_max_pd;
2478 else if (VecWidth == 256 && EltWidth == 32)
2479 IID = Intrinsic::x86_avx_max_ps_256;
2480 else if (VecWidth == 256 && EltWidth == 64)
2481 IID = Intrinsic::x86_avx_max_pd_256;
2482 else
2483 llvm_unreachable("Unexpected intrinsic");
2484 } else if (Name.starts_with("min.p")) {
2485 if (VecWidth == 128 && EltWidth == 32)
2486 IID = Intrinsic::x86_sse_min_ps;
2487 else if (VecWidth == 128 && EltWidth == 64)
2488 IID = Intrinsic::x86_sse2_min_pd;
2489 else if (VecWidth == 256 && EltWidth == 32)
2490 IID = Intrinsic::x86_avx_min_ps_256;
2491 else if (VecWidth == 256 && EltWidth == 64)
2492 IID = Intrinsic::x86_avx_min_pd_256;
2493 else
2494 llvm_unreachable("Unexpected intrinsic");
2495 } else if (Name.starts_with("pshuf.b.")) {
2496 if (VecWidth == 128)
2497 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2498 else if (VecWidth == 256)
2499 IID = Intrinsic::x86_avx2_pshuf_b;
2500 else if (VecWidth == 512)
2501 IID = Intrinsic::x86_avx512_pshuf_b_512;
2502 else
2503 llvm_unreachable("Unexpected intrinsic");
2504 } else if (Name.starts_with("pmul.hr.sw.")) {
2505 if (VecWidth == 128)
2506 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2507 else if (VecWidth == 256)
2508 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2509 else if (VecWidth == 512)
2510 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2511 else
2512 llvm_unreachable("Unexpected intrinsic");
2513 } else if (Name.starts_with("pmulh.w.")) {
2514 if (VecWidth == 128)
2515 IID = Intrinsic::x86_sse2_pmulh_w;
2516 else if (VecWidth == 256)
2517 IID = Intrinsic::x86_avx2_pmulh_w;
2518 else if (VecWidth == 512)
2519 IID = Intrinsic::x86_avx512_pmulh_w_512;
2520 else
2521 llvm_unreachable("Unexpected intrinsic");
2522 } else if (Name.starts_with("pmulhu.w.")) {
2523 if (VecWidth == 128)
2524 IID = Intrinsic::x86_sse2_pmulhu_w;
2525 else if (VecWidth == 256)
2526 IID = Intrinsic::x86_avx2_pmulhu_w;
2527 else if (VecWidth == 512)
2528 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2529 else
2530 llvm_unreachable("Unexpected intrinsic");
2531 } else if (Name.starts_with("pmaddw.d.")) {
2532 if (VecWidth == 128)
2533 IID = Intrinsic::x86_sse2_pmadd_wd;
2534 else if (VecWidth == 256)
2535 IID = Intrinsic::x86_avx2_pmadd_wd;
2536 else if (VecWidth == 512)
2537 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2538 else
2539 llvm_unreachable("Unexpected intrinsic");
2540 } else if (Name.starts_with("pmaddubs.w.")) {
2541 if (VecWidth == 128)
2542 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2543 else if (VecWidth == 256)
2544 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2545 else if (VecWidth == 512)
2546 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2547 else
2548 llvm_unreachable("Unexpected intrinsic");
2549 } else if (Name.starts_with("packsswb.")) {
2550 if (VecWidth == 128)
2551 IID = Intrinsic::x86_sse2_packsswb_128;
2552 else if (VecWidth == 256)
2553 IID = Intrinsic::x86_avx2_packsswb;
2554 else if (VecWidth == 512)
2555 IID = Intrinsic::x86_avx512_packsswb_512;
2556 else
2557 llvm_unreachable("Unexpected intrinsic");
2558 } else if (Name.starts_with("packssdw.")) {
2559 if (VecWidth == 128)
2560 IID = Intrinsic::x86_sse2_packssdw_128;
2561 else if (VecWidth == 256)
2562 IID = Intrinsic::x86_avx2_packssdw;
2563 else if (VecWidth == 512)
2564 IID = Intrinsic::x86_avx512_packssdw_512;
2565 else
2566 llvm_unreachable("Unexpected intrinsic");
2567 } else if (Name.starts_with("packuswb.")) {
2568 if (VecWidth == 128)
2569 IID = Intrinsic::x86_sse2_packuswb_128;
2570 else if (VecWidth == 256)
2571 IID = Intrinsic::x86_avx2_packuswb;
2572 else if (VecWidth == 512)
2573 IID = Intrinsic::x86_avx512_packuswb_512;
2574 else
2575 llvm_unreachable("Unexpected intrinsic");
2576 } else if (Name.starts_with("packusdw.")) {
2577 if (VecWidth == 128)
2578 IID = Intrinsic::x86_sse41_packusdw;
2579 else if (VecWidth == 256)
2580 IID = Intrinsic::x86_avx2_packusdw;
2581 else if (VecWidth == 512)
2582 IID = Intrinsic::x86_avx512_packusdw_512;
2583 else
2584 llvm_unreachable("Unexpected intrinsic");
2585 } else if (Name.starts_with("vpermilvar.")) {
2586 if (VecWidth == 128 && EltWidth == 32)
2587 IID = Intrinsic::x86_avx_vpermilvar_ps;
2588 else if (VecWidth == 128 && EltWidth == 64)
2589 IID = Intrinsic::x86_avx_vpermilvar_pd;
2590 else if (VecWidth == 256 && EltWidth == 32)
2591 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2592 else if (VecWidth == 256 && EltWidth == 64)
2593 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2594 else if (VecWidth == 512 && EltWidth == 32)
2595 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2596 else if (VecWidth == 512 && EltWidth == 64)
2597 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2598 else
2599 llvm_unreachable("Unexpected intrinsic");
2600 } else if (Name == "cvtpd2dq.256") {
2601 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2602 } else if (Name == "cvtpd2ps.256") {
2603 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2604 } else if (Name == "cvttpd2dq.256") {
2605 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2606 } else if (Name == "cvttps2dq.128") {
2607 IID = Intrinsic::x86_sse2_cvttps2dq;
2608 } else if (Name == "cvttps2dq.256") {
2609 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2610 } else if (Name.starts_with("permvar.")) {
2611 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2612 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2613 IID = Intrinsic::x86_avx2_permps;
2614 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2615 IID = Intrinsic::x86_avx2_permd;
2616 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2617 IID = Intrinsic::x86_avx512_permvar_df_256;
2618 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2619 IID = Intrinsic::x86_avx512_permvar_di_256;
2620 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2621 IID = Intrinsic::x86_avx512_permvar_sf_512;
2622 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2623 IID = Intrinsic::x86_avx512_permvar_si_512;
2624 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2625 IID = Intrinsic::x86_avx512_permvar_df_512;
2626 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2627 IID = Intrinsic::x86_avx512_permvar_di_512;
2628 else if (VecWidth == 128 && EltWidth == 16)
2629 IID = Intrinsic::x86_avx512_permvar_hi_128;
2630 else if (VecWidth == 256 && EltWidth == 16)
2631 IID = Intrinsic::x86_avx512_permvar_hi_256;
2632 else if (VecWidth == 512 && EltWidth == 16)
2633 IID = Intrinsic::x86_avx512_permvar_hi_512;
2634 else if (VecWidth == 128 && EltWidth == 8)
2635 IID = Intrinsic::x86_avx512_permvar_qi_128;
2636 else if (VecWidth == 256 && EltWidth == 8)
2637 IID = Intrinsic::x86_avx512_permvar_qi_256;
2638 else if (VecWidth == 512 && EltWidth == 8)
2639 IID = Intrinsic::x86_avx512_permvar_qi_512;
2640 else
2641 llvm_unreachable("Unexpected intrinsic");
2642 } else if (Name.starts_with("dbpsadbw.")) {
2643 if (VecWidth == 128)
2644 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2645 else if (VecWidth == 256)
2646 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2647 else if (VecWidth == 512)
2648 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2649 else
2650 llvm_unreachable("Unexpected intrinsic");
2651 } else if (Name.starts_with("pmultishift.qb.")) {
2652 if (VecWidth == 128)
2653 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2654 else if (VecWidth == 256)
2655 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2656 else if (VecWidth == 512)
2657 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2658 else
2659 llvm_unreachable("Unexpected intrinsic");
2660 } else if (Name.starts_with("conflict.")) {
2661 if (Name[9] == 'd' && VecWidth == 128)
2662 IID = Intrinsic::x86_avx512_conflict_d_128;
2663 else if (Name[9] == 'd' && VecWidth == 256)
2664 IID = Intrinsic::x86_avx512_conflict_d_256;
2665 else if (Name[9] == 'd' && VecWidth == 512)
2666 IID = Intrinsic::x86_avx512_conflict_d_512;
2667 else if (Name[9] == 'q' && VecWidth == 128)
2668 IID = Intrinsic::x86_avx512_conflict_q_128;
2669 else if (Name[9] == 'q' && VecWidth == 256)
2670 IID = Intrinsic::x86_avx512_conflict_q_256;
2671 else if (Name[9] == 'q' && VecWidth == 512)
2672 IID = Intrinsic::x86_avx512_conflict_q_512;
2673 else
2674 llvm_unreachable("Unexpected intrinsic");
2675 } else if (Name.starts_with("pavg.")) {
2676 if (Name[5] == 'b' && VecWidth == 128)
2677 IID = Intrinsic::x86_sse2_pavg_b;
2678 else if (Name[5] == 'b' && VecWidth == 256)
2679 IID = Intrinsic::x86_avx2_pavg_b;
2680 else if (Name[5] == 'b' && VecWidth == 512)
2681 IID = Intrinsic::x86_avx512_pavg_b_512;
2682 else if (Name[5] == 'w' && VecWidth == 128)
2683 IID = Intrinsic::x86_sse2_pavg_w;
2684 else if (Name[5] == 'w' && VecWidth == 256)
2685 IID = Intrinsic::x86_avx2_pavg_w;
2686 else if (Name[5] == 'w' && VecWidth == 512)
2687 IID = Intrinsic::x86_avx512_pavg_w_512;
2688 else
2689 llvm_unreachable("Unexpected intrinsic");
2690 } else
2691 return false;
2692
2693 SmallVector<Value *, 4> Args(CI.args());
2694 Args.pop_back();
2695 Args.pop_back();
2696 Rep = Builder.CreateIntrinsic(IID, Args);
2697 unsigned NumArgs = CI.arg_size();
2698 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2699 CI.getArgOperand(NumArgs - 2));
2700 return true;
2701}
2702
2703/// Upgrade comment in call to inline asm that represents an objc retain release
2704/// marker.
2705void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2706 size_t Pos;
2707 if (AsmStr->find("mov\tfp") == 0 &&
2708 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2709 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2710 AsmStr->replace(Pos, 1, ";");
2711 }
2712}
2713
2715 Function *F, IRBuilder<> &Builder) {
2716 Value *Rep = nullptr;
2717
2718 if (Name == "abs.i" || Name == "abs.ll") {
2719 Value *Arg = CI->getArgOperand(0);
2720 Rep = Builder.CreateIntrinsic(Intrinsic::abs, {Arg->getType()},
2721 {Arg, Builder.getTrue()},
2722 /*FMFSource=*/nullptr, "abs");
2723 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2724 Type *Ty = (Name == "abs.bf16")
2725 ? Builder.getBFloatTy()
2726 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2727 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2728 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2729 Rep = Builder.CreateBitCast(Abs, CI->getType());
2730 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2731 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2732 : Intrinsic::nvvm_fabs;
2733 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2734 } else if (Name.consume_front("ex2.approx.")) {
2735 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2736 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2737 : Intrinsic::nvvm_ex2_approx;
2738 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2739 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2740 Name.starts_with("atomic.load.add.f64.p")) {
2741 Value *Ptr = CI->getArgOperand(0);
2742 Value *Val = CI->getArgOperand(1);
2743 Rep = Builder.CreateAtomicRMW(
2745 CI->getContext().getOrInsertSyncScopeID("device"));
2746 // The default scope for atomic.load.* intrinsics is device
2747 // (= gpu scope in ptx), but the default LLVM atomic scope is
2748 // "system"
2749 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2750 Name.starts_with("atomic.load.dec.32.p")) {
2751 Value *Ptr = CI->getArgOperand(0);
2752 Value *Val = CI->getArgOperand(1);
2753 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2755 Rep = Builder.CreateAtomicRMW(
2757 CI->getContext().getOrInsertSyncScopeID("device"));
2758 // See comment above.
2759 } else if (Name.starts_with("atomic.") && Name.contains(".gen.")) {
2760 // nvvm.atomic.{op}.gen.{i,f}.{cta,sys} -> atomicrmw / cmpxchg.
2761 StringRef Op = Name.substr(StringRef("atomic.").size());
2762 Value *Ptr = CI->getArgOperand(0);
2763 Value *Val = CI->getArgOperand(1);
2765 Op.contains(".cta.") ? "block" : "");
2766 if (Op.starts_with("cas.")) {
2767 Value *New = CI->getArgOperand(2);
2768 Value *Pair = Builder.CreateAtomicCmpXchg(
2769 Ptr, Val, New, MaybeAlign(), AtomicOrdering::Monotonic,
2771 Rep = Builder.CreateExtractValue(Pair, 0);
2772 } else {
2773 // Note we don't upgrade anything to AtomicRMWInst::UMin/UMax. This is
2774 // because we were actually missing those intrinsics!
2775 AtomicRMWInst::BinOp BinOp =
2777 .StartsWith("add.gen.f", AtomicRMWInst::FAdd)
2778 .StartsWith("add.gen.i", AtomicRMWInst::Add)
2789 "unexpected nvvm scoped atomic intrinsic");
2790 Rep = Builder.CreateAtomicRMW(BinOp, Ptr, Val, MaybeAlign(),
2792 }
2793 } else if (Name == "clz.ll") {
2794 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2795 Value *Arg = CI->getArgOperand(0);
2796 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2797 {Arg, Builder.getFalse()},
2798 /*FMFSource=*/nullptr, "ctlz");
2799 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2800 } else if (Name == "popc.ll") {
2801 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2802 // i64.
2803 Value *Arg = CI->getArgOperand(0);
2804 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2805 Arg, /*FMFSource=*/nullptr, "ctpop");
2806 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2807 } else if (Name == "h2f") {
2808 Value *Cast =
2809 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
2810 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2811 } else if (Name.consume_front("bitcast.") &&
2812 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2813 Name == "d2ll")) {
2814 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2815 } else if (Name == "rotate.b32") {
2816 Value *Arg = CI->getOperand(0);
2817 Value *ShiftAmt = CI->getOperand(1);
2818 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2819 {Arg, Arg, ShiftAmt});
2820 } else if (Name == "rotate.b64") {
2821 Type *Int64Ty = Builder.getInt64Ty();
2822 Value *Arg = CI->getOperand(0);
2823 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2824 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2825 {Arg, Arg, ZExtShiftAmt});
2826 } else if (Name == "rotate.right.b64") {
2827 Type *Int64Ty = Builder.getInt64Ty();
2828 Value *Arg = CI->getOperand(0);
2829 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2830 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2831 {Arg, Arg, ZExtShiftAmt});
2832 } else if (Name == "swap.lo.hi.b64") {
2833 Type *Int64Ty = Builder.getInt64Ty();
2834 Value *Arg = CI->getOperand(0);
2835 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2836 {Arg, Arg, Builder.getInt64(32)});
2837 } else if ((Name.consume_front("ptr.gen.to.") &&
2838 consumeNVVMPtrAddrSpace(Name)) ||
2839 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2840 Name.starts_with(".to.gen"))) {
2841 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2842 } else if (Name.consume_front("ldg.global")) {
2843 Value *Ptr = CI->getArgOperand(0);
2844 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2845 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2846 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2847 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2848 MDNode *MD = MDNode::get(Builder.getContext(), {});
2849 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2850 return LD;
2851 } else if (Name == "tanh.approx.f32") {
2852 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2853 FastMathFlags FMF;
2854 FMF.setApproxFunc();
2855 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2856 FMF);
2857 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2858 Value *Arg =
2859 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2860 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2861 {}, {Arg});
2862 } else if (Name == "barrier") {
2863 Rep = Builder.CreateIntrinsic(
2864 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2865 {CI->getArgOperand(0), CI->getArgOperand(1)});
2866 } else if (Name == "barrier.sync") {
2867 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2868 {CI->getArgOperand(0)});
2869 } else if (Name == "barrier.sync.cnt") {
2870 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2871 {CI->getArgOperand(0), CI->getArgOperand(1)});
2872 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2873 Name == "barrier0.or") {
2874 Value *C = CI->getArgOperand(0);
2875 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2876
2877 Intrinsic::ID IID =
2879 .Case("barrier0.popc",
2880 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2881 .Case("barrier0.and",
2882 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2883 .Case("barrier0.or",
2884 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2885 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2886 Rep = Builder.CreateZExt(Bar, CI->getType());
2887 } else {
2889 if (IID != Intrinsic::not_intrinsic &&
2890 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2891 rename(F);
2892 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2894 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2895 Value *Arg = CI->getArgOperand(I);
2896 Type *OldType = Arg->getType();
2897 Type *NewType = NewFn->getArg(I)->getType();
2898 Args.push_back(
2899 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2900 ? Builder.CreateBitCast(Arg, NewType)
2901 : Arg);
2902 }
2903 Rep = Builder.CreateCall(NewFn, Args);
2904 if (F->getReturnType()->isIntegerTy())
2905 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2906 }
2907 }
2908
2909 return Rep;
2910}
2911
2913 IRBuilder<> &Builder) {
2914 LLVMContext &C = F->getContext();
2915 Value *Rep = nullptr;
2916
2917 if (Name.starts_with("sse4a.movnt.")) {
2919 Elts.push_back(
2920 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2921 MDNode *Node = MDNode::get(C, Elts);
2922
2923 Value *Arg0 = CI->getArgOperand(0);
2924 Value *Arg1 = CI->getArgOperand(1);
2925
2926 // Nontemporal (unaligned) store of the 0'th element of the float/double
2927 // vector.
2928 Value *Extract =
2929 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2930
2931 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2932 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2933 } else if (Name.starts_with("avx.movnt.") ||
2934 Name.starts_with("avx512.storent.")) {
2936 Elts.push_back(
2937 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2938 MDNode *Node = MDNode::get(C, Elts);
2939
2940 Value *Arg0 = CI->getArgOperand(0);
2941 Value *Arg1 = CI->getArgOperand(1);
2942
2943 StoreInst *SI = Builder.CreateAlignedStore(
2944 Arg1, Arg0,
2946 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2947 } else if (Name == "sse2.storel.dq") {
2948 Value *Arg0 = CI->getArgOperand(0);
2949 Value *Arg1 = CI->getArgOperand(1);
2950
2951 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2952 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2953 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2954 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2955 } else if (Name.starts_with("sse.storeu.") ||
2956 Name.starts_with("sse2.storeu.") ||
2957 Name.starts_with("avx.storeu.")) {
2958 Value *Arg0 = CI->getArgOperand(0);
2959 Value *Arg1 = CI->getArgOperand(1);
2960 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2961 } else if (Name == "avx512.mask.store.ss") {
2962 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2963 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2964 Mask, false);
2965 } else if (Name.starts_with("avx512.mask.store")) {
2966 // "avx512.mask.storeu." or "avx512.mask.store."
2967 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2968 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2969 CI->getArgOperand(2), Aligned);
2970 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2971 // Upgrade packed integer vector compare intrinsics to compare instructions.
2972 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2973 bool CmpEq = Name[9] == 'e';
2974 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2975 CI->getArgOperand(0), CI->getArgOperand(1));
2976 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2977 } else if (Name.starts_with("avx512.broadcastm")) {
2978 Type *ExtTy = Type::getInt32Ty(C);
2979 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2980 ExtTy = Type::getInt64Ty(C);
2981 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2982 ExtTy->getPrimitiveSizeInBits();
2983 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2984 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2985 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2986 Value *Vec = CI->getArgOperand(0);
2987 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2988 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2989 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2990 } else if (Name.starts_with("avx.sqrt.p") ||
2991 Name.starts_with("sse2.sqrt.p") ||
2992 Name.starts_with("sse.sqrt.p")) {
2993 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2994 {CI->getArgOperand(0)});
2995 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2996 if (CI->arg_size() == 4 &&
2997 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2998 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2999 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
3000 : Intrinsic::x86_avx512_sqrt_pd_512;
3001
3002 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
3003 Rep = Builder.CreateIntrinsic(IID, Args);
3004 } else {
3005 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
3006 {CI->getArgOperand(0)});
3007 }
3008 Rep =
3009 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3010 } else if (Name.starts_with("avx512.ptestm") ||
3011 Name.starts_with("avx512.ptestnm")) {
3012 Value *Op0 = CI->getArgOperand(0);
3013 Value *Op1 = CI->getArgOperand(1);
3014 Value *Mask = CI->getArgOperand(2);
3015 Rep = Builder.CreateAnd(Op0, Op1);
3016 llvm::Type *Ty = Op0->getType();
3018 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
3021 Rep = Builder.CreateICmp(Pred, Rep, Zero);
3022 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
3023 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
3024 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
3025 ->getNumElements();
3026 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
3027 Rep =
3028 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3029 } else if (Name.starts_with("avx512.kunpck")) {
3030 unsigned NumElts = CI->getType()->getScalarSizeInBits();
3031 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
3032 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
3033 int Indices[64];
3034 for (unsigned i = 0; i != NumElts; ++i)
3035 Indices[i] = i;
3036
3037 // First extract half of each vector. This gives better codegen than
3038 // doing it in a single shuffle.
3039 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
3040 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
3041 // Concat the vectors.
3042 // NOTE: Operands have to be swapped to match intrinsic definition.
3043 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
3044 Rep = Builder.CreateBitCast(Rep, CI->getType());
3045 } else if (Name == "avx512.kand.w") {
3046 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3047 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3048 Rep = Builder.CreateAnd(LHS, RHS);
3049 Rep = Builder.CreateBitCast(Rep, CI->getType());
3050 } else if (Name == "avx512.kandn.w") {
3051 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3052 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3053 LHS = Builder.CreateNot(LHS);
3054 Rep = Builder.CreateAnd(LHS, RHS);
3055 Rep = Builder.CreateBitCast(Rep, CI->getType());
3056 } else if (Name == "avx512.kor.w") {
3057 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3058 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3059 Rep = Builder.CreateOr(LHS, RHS);
3060 Rep = Builder.CreateBitCast(Rep, CI->getType());
3061 } else if (Name == "avx512.kxor.w") {
3062 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3063 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3064 Rep = Builder.CreateXor(LHS, RHS);
3065 Rep = Builder.CreateBitCast(Rep, CI->getType());
3066 } else if (Name == "avx512.kxnor.w") {
3067 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3068 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3069 LHS = Builder.CreateNot(LHS);
3070 Rep = Builder.CreateXor(LHS, RHS);
3071 Rep = Builder.CreateBitCast(Rep, CI->getType());
3072 } else if (Name == "avx512.knot.w") {
3073 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3074 Rep = Builder.CreateNot(Rep);
3075 Rep = Builder.CreateBitCast(Rep, CI->getType());
3076 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
3077 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3078 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3079 Rep = Builder.CreateOr(LHS, RHS);
3080 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3081 Value *C;
3082 if (Name[14] == 'c')
3083 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
3084 else
3085 C = ConstantInt::getNullValue(Builder.getInt16Ty());
3086 Rep = Builder.CreateICmpEQ(Rep, C);
3087 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3088 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
3089 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
3090 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
3091 Name == "sse.div.ss" || Name == "sse2.div.sd") {
3092 Type *I32Ty = Type::getInt32Ty(C);
3093 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
3094 ConstantInt::get(I32Ty, 0));
3095 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
3096 ConstantInt::get(I32Ty, 0));
3097 Value *EltOp;
3098 if (Name.contains(".add."))
3099 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3100 else if (Name.contains(".sub."))
3101 EltOp = Builder.CreateFSub(Elt0, Elt1);
3102 else if (Name.contains(".mul."))
3103 EltOp = Builder.CreateFMul(Elt0, Elt1);
3104 else
3105 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3106 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
3107 ConstantInt::get(I32Ty, 0));
3108 } else if (Name.starts_with("avx512.mask.pcmp")) {
3109 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3110 bool CmpEq = Name[16] == 'e';
3111 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
3112 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
3113 Type *OpTy = CI->getArgOperand(0)->getType();
3114 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3115 Intrinsic::ID IID;
3116 switch (VecWidth) {
3117 default:
3118 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3119 break;
3120 case 128:
3121 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3122 break;
3123 case 256:
3124 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3125 break;
3126 case 512:
3127 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3128 break;
3129 }
3130
3131 Rep =
3132 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3133 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3134 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3135 Type *OpTy = CI->getArgOperand(0)->getType();
3136 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3137 unsigned EltWidth = OpTy->getScalarSizeInBits();
3138 Intrinsic::ID IID;
3139 if (VecWidth == 128 && EltWidth == 32)
3140 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3141 else if (VecWidth == 256 && EltWidth == 32)
3142 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3143 else if (VecWidth == 512 && EltWidth == 32)
3144 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3145 else if (VecWidth == 128 && EltWidth == 64)
3146 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3147 else if (VecWidth == 256 && EltWidth == 64)
3148 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3149 else if (VecWidth == 512 && EltWidth == 64)
3150 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3151 else
3152 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3153
3154 Rep =
3155 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3156 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3157 } else if (Name.starts_with("avx512.cmp.p")) {
3158 SmallVector<Value *, 4> Args(CI->args());
3159 Type *OpTy = Args[0]->getType();
3160 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3161 unsigned EltWidth = OpTy->getScalarSizeInBits();
3162 Intrinsic::ID IID;
3163 if (VecWidth == 128 && EltWidth == 32)
3164 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3165 else if (VecWidth == 256 && EltWidth == 32)
3166 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3167 else if (VecWidth == 512 && EltWidth == 32)
3168 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3169 else if (VecWidth == 128 && EltWidth == 64)
3170 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3171 else if (VecWidth == 256 && EltWidth == 64)
3172 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3173 else if (VecWidth == 512 && EltWidth == 64)
3174 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3175 else
3176 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3177
3179 if (VecWidth == 512)
3180 std::swap(Mask, Args.back());
3181 Args.push_back(Mask);
3182
3183 Rep = Builder.CreateIntrinsic(IID, Args);
3184 } else if (Name.starts_with("avx512.mask.cmp.")) {
3185 // Integer compare intrinsics.
3186 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3187 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3188 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3189 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3190 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3191 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3192 Name.starts_with("avx512.cvtw2mask.") ||
3193 Name.starts_with("avx512.cvtd2mask.") ||
3194 Name.starts_with("avx512.cvtq2mask.")) {
3195 Value *Op = CI->getArgOperand(0);
3196 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3197 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3198 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3199 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3200 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3201 Name.starts_with("avx512.mask.pabs")) {
3202 Rep = upgradeAbs(Builder, *CI);
3203 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3204 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3205 Name.starts_with("avx512.mask.pmaxs")) {
3206 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3207 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3208 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3209 Name.starts_with("avx512.mask.pmaxu")) {
3210 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3211 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3212 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3213 Name.starts_with("avx512.mask.pmins")) {
3214 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3215 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3216 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3217 Name.starts_with("avx512.mask.pminu")) {
3218 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3219 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3220 Name == "avx512.pmulu.dq.512" ||
3221 Name.starts_with("avx512.mask.pmulu.dq.")) {
3222 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3223 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3224 Name == "avx512.pmul.dq.512" ||
3225 Name.starts_with("avx512.mask.pmul.dq.")) {
3226 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3227 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3228 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3229 Rep =
3230 Builder.CreateSIToFP(CI->getArgOperand(1),
3231 cast<VectorType>(CI->getType())->getElementType());
3232 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3233 } else if (Name == "avx512.cvtusi2sd") {
3234 Rep =
3235 Builder.CreateUIToFP(CI->getArgOperand(1),
3236 cast<VectorType>(CI->getType())->getElementType());
3237 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3238 } else if (Name == "sse2.cvtss2sd") {
3239 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3240 Rep = Builder.CreateFPExt(
3241 Rep, cast<VectorType>(CI->getType())->getElementType());
3242 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3243 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3244 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3245 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3246 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3247 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3248 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3249 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3250 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3251 Name == "avx512.mask.cvtqq2ps.256" ||
3252 Name == "avx512.mask.cvtqq2ps.512" ||
3253 Name == "avx512.mask.cvtuqq2ps.256" ||
3254 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3255 Name == "avx.cvt.ps2.pd.256" ||
3256 Name == "avx512.mask.cvtps2pd.128" ||
3257 Name == "avx512.mask.cvtps2pd.256") {
3258 auto *DstTy = cast<FixedVectorType>(CI->getType());
3259 Rep = CI->getArgOperand(0);
3260 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3261
3262 unsigned NumDstElts = DstTy->getNumElements();
3263 if (NumDstElts < SrcTy->getNumElements()) {
3264 assert(NumDstElts == 2 && "Unexpected vector size");
3265 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3266 }
3267
3268 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3269 bool IsUnsigned = Name.contains("cvtu");
3270 if (IsPS2PD)
3271 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3272 else if (CI->arg_size() == 4 &&
3273 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3274 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3275 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3276 : Intrinsic::x86_avx512_sitofp_round;
3277 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3278 {Rep, CI->getArgOperand(3)});
3279 } else {
3280 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3281 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3282 }
3283
3284 if (CI->arg_size() >= 3)
3285 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3286 CI->getArgOperand(1));
3287 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3288 Name.starts_with("vcvtph2ps.")) {
3289 auto *DstTy = cast<FixedVectorType>(CI->getType());
3290 Rep = CI->getArgOperand(0);
3291 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3292 unsigned NumDstElts = DstTy->getNumElements();
3293 if (NumDstElts != SrcTy->getNumElements()) {
3294 assert(NumDstElts == 4 && "Unexpected vector size");
3295 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3296 }
3297 Rep = Builder.CreateBitCast(
3298 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3299 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3300 if (CI->arg_size() >= 3)
3301 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3302 CI->getArgOperand(1));
3303 } else if (Name.starts_with("avx512.mask.load")) {
3304 // "avx512.mask.loadu." or "avx512.mask.load."
3305 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3306 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3307 CI->getArgOperand(2), Aligned);
3308 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3309 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3310 auto *PtrTy = CI->getOperand(0)->getType();
3311 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3312 ResultTy->getNumElements());
3313 Rep = Builder.CreateIntrinsic(
3314 Intrinsic::masked_expandload, {ResultTy, PtrTy},
3315 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3316 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3317 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3318 auto *PtrTy = CI->getArgOperand(0)->getType();
3319 Value *MaskVec =
3320 getX86MaskVec(Builder, CI->getArgOperand(2),
3321 cast<FixedVectorType>(ResultTy)->getNumElements());
3322 Rep = Builder.CreateIntrinsic(
3323 Intrinsic::masked_compressstore, {ResultTy, PtrTy},
3324 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3325 } else if (Name.starts_with("avx512.mask.compress.") ||
3326 Name.starts_with("avx512.mask.expand.")) {
3327 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3328
3329 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3330 ResultTy->getNumElements());
3331
3332 bool IsCompress = Name[12] == 'c';
3333 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3334 : Intrinsic::x86_avx512_mask_expand;
3335 Rep = Builder.CreateIntrinsic(
3336 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3337 } else if (Name.starts_with("xop.vpcom")) {
3338 bool IsSigned;
3339 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3340 Name.ends_with("uq"))
3341 IsSigned = false;
3342 else if (Name.ends_with("b") || Name.ends_with("w") ||
3343 Name.ends_with("d") || Name.ends_with("q"))
3344 IsSigned = true;
3345 else
3346 reportFatalUsageErrorWithCI("Intrinsic has unknown suffix", CI);
3347
3348 unsigned Imm;
3349 if (CI->arg_size() == 3) {
3350 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3351 } else {
3352 Name = Name.substr(9); // strip off "xop.vpcom"
3353 if (Name.starts_with("lt"))
3354 Imm = 0;
3355 else if (Name.starts_with("le"))
3356 Imm = 1;
3357 else if (Name.starts_with("gt"))
3358 Imm = 2;
3359 else if (Name.starts_with("ge"))
3360 Imm = 3;
3361 else if (Name.starts_with("eq"))
3362 Imm = 4;
3363 else if (Name.starts_with("ne"))
3364 Imm = 5;
3365 else if (Name.starts_with("false"))
3366 Imm = 6;
3367 else if (Name.starts_with("true"))
3368 Imm = 7;
3369 else
3370 llvm_unreachable("Unknown condition");
3371 }
3372
3373 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3374 } else if (Name.starts_with("xop.vpcmov")) {
3375 Value *Sel = CI->getArgOperand(2);
3376 Value *NotSel = Builder.CreateNot(Sel);
3377 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3378 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3379 Rep = Builder.CreateOr(Sel0, Sel1);
3380 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3381 Name.starts_with("avx512.mask.prol")) {
3382 Rep = upgradeX86Rotate(Builder, *CI, false);
3383 } else if (Name.starts_with("avx512.pror") ||
3384 Name.starts_with("avx512.mask.pror")) {
3385 Rep = upgradeX86Rotate(Builder, *CI, true);
3386 } else if (Name.starts_with("avx512.vpshld.") ||
3387 Name.starts_with("avx512.mask.vpshld") ||
3388 Name.starts_with("avx512.maskz.vpshld")) {
3389 bool ZeroMask = Name[11] == 'z';
3390 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3391 } else if (Name.starts_with("avx512.vpshrd.") ||
3392 Name.starts_with("avx512.mask.vpshrd") ||
3393 Name.starts_with("avx512.maskz.vpshrd")) {
3394 bool ZeroMask = Name[11] == 'z';
3395 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3396 } else if (Name == "sse42.crc32.64.8") {
3397 Value *Trunc0 =
3398 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3399 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3400 {Trunc0, CI->getArgOperand(1)});
3401 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3402 } else if (Name.starts_with("avx.vbroadcast.s") ||
3403 Name.starts_with("avx512.vbroadcast.s")) {
3404 // Replace broadcasts with a series of insertelements.
3405 auto *VecTy = cast<FixedVectorType>(CI->getType());
3406 Type *EltTy = VecTy->getElementType();
3407 unsigned EltNum = VecTy->getNumElements();
3408 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3409 Type *I32Ty = Type::getInt32Ty(C);
3410 Rep = PoisonValue::get(VecTy);
3411 for (unsigned I = 0; I < EltNum; ++I)
3412 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3413 } else if (Name.starts_with("sse41.pmovsx") ||
3414 Name.starts_with("sse41.pmovzx") ||
3415 Name.starts_with("avx2.pmovsx") ||
3416 Name.starts_with("avx2.pmovzx") ||
3417 Name.starts_with("avx512.mask.pmovsx") ||
3418 Name.starts_with("avx512.mask.pmovzx")) {
3419 auto *DstTy = cast<FixedVectorType>(CI->getType());
3420 unsigned NumDstElts = DstTy->getNumElements();
3421
3422 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3423 SmallVector<int, 8> ShuffleMask(NumDstElts);
3424 for (unsigned i = 0; i != NumDstElts; ++i)
3425 ShuffleMask[i] = i;
3426
3427 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3428
3429 bool DoSext = Name.contains("pmovsx");
3430 Rep =
3431 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3432 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3433 if (CI->arg_size() == 3)
3434 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3435 CI->getArgOperand(1));
3436 } else if (Name == "avx512.mask.pmov.qd.256" ||
3437 Name == "avx512.mask.pmov.qd.512" ||
3438 Name == "avx512.mask.pmov.wb.256" ||
3439 Name == "avx512.mask.pmov.wb.512") {
3440 Type *Ty = CI->getArgOperand(1)->getType();
3441 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3442 Rep =
3443 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3444 } else if (Name.starts_with("avx.vbroadcastf128") ||
3445 Name == "avx2.vbroadcasti128") {
3446 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3447 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3448 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3449 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3450 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3451 if (NumSrcElts == 2)
3452 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3453 else
3454 Rep = Builder.CreateShuffleVector(Load,
3455 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3456 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3457 Name.starts_with("avx512.mask.shuf.f")) {
3458 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3459 Type *VT = CI->getType();
3460 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3461 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3462 unsigned ControlBitsMask = NumLanes - 1;
3463 unsigned NumControlBits = NumLanes / 2;
3464 SmallVector<int, 8> ShuffleMask(0);
3465
3466 for (unsigned l = 0; l != NumLanes; ++l) {
3467 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3468 // We actually need the other source.
3469 if (l >= NumLanes / 2)
3470 LaneMask += NumLanes;
3471 for (unsigned i = 0; i != NumElementsInLane; ++i)
3472 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3473 }
3474 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3475 CI->getArgOperand(1), ShuffleMask);
3476 Rep =
3477 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3478 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3479 Name.starts_with("avx512.mask.broadcasti")) {
3480 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3481 ->getNumElements();
3482 unsigned NumDstElts =
3483 cast<FixedVectorType>(CI->getType())->getNumElements();
3484
3485 SmallVector<int, 8> ShuffleMask(NumDstElts);
3486 for (unsigned i = 0; i != NumDstElts; ++i)
3487 ShuffleMask[i] = i % NumSrcElts;
3488
3489 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3490 CI->getArgOperand(0), ShuffleMask);
3491 Rep =
3492 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3493 } else if (Name.starts_with("avx2.pbroadcast") ||
3494 Name.starts_with("avx2.vbroadcast") ||
3495 Name.starts_with("avx512.pbroadcast") ||
3496 Name.starts_with("avx512.mask.broadcast.s")) {
3497 // Replace vp?broadcasts with a vector shuffle.
3498 Value *Op = CI->getArgOperand(0);
3499 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3500 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3503 Rep = Builder.CreateShuffleVector(Op, M);
3504
3505 if (CI->arg_size() == 3)
3506 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3507 CI->getArgOperand(1));
3508 } else if (Name.starts_with("sse2.padds.") ||
3509 Name.starts_with("avx2.padds.") ||
3510 Name.starts_with("avx512.padds.") ||
3511 Name.starts_with("avx512.mask.padds.")) {
3512 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3513 } else if (Name.starts_with("sse2.psubs.") ||
3514 Name.starts_with("avx2.psubs.") ||
3515 Name.starts_with("avx512.psubs.") ||
3516 Name.starts_with("avx512.mask.psubs.")) {
3517 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3518 } else if (Name.starts_with("sse2.paddus.") ||
3519 Name.starts_with("avx2.paddus.") ||
3520 Name.starts_with("avx512.mask.paddus.")) {
3521 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3522 } else if (Name.starts_with("sse2.psubus.") ||
3523 Name.starts_with("avx2.psubus.") ||
3524 Name.starts_with("avx512.mask.psubus.")) {
3525 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3526 } else if (Name.starts_with("avx512.mask.palignr.")) {
3527 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3528 CI->getArgOperand(1), CI->getArgOperand(2),
3529 CI->getArgOperand(3), CI->getArgOperand(4),
3530 false);
3531 } else if (Name.starts_with("avx512.mask.valign.")) {
3533 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3534 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3535 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3536 // 128/256-bit shift left specified in bits.
3537 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3538 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3539 Shift / 8); // Shift is in bits.
3540 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3541 // 128/256-bit shift right specified in bits.
3542 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3543 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3544 Shift / 8); // Shift is in bits.
3545 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3546 Name == "avx512.psll.dq.512") {
3547 // 128/256/512-bit shift left specified in bytes.
3548 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3549 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3550 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3551 Name == "avx512.psrl.dq.512") {
3552 // 128/256/512-bit shift right specified in bytes.
3553 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3554 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3555 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3556 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3557 Name.starts_with("avx2.pblendd.")) {
3558 Value *Op0 = CI->getArgOperand(0);
3559 Value *Op1 = CI->getArgOperand(1);
3560 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3561 auto *VecTy = cast<FixedVectorType>(CI->getType());
3562 unsigned NumElts = VecTy->getNumElements();
3563
3564 SmallVector<int, 16> Idxs(NumElts);
3565 for (unsigned i = 0; i != NumElts; ++i)
3566 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3567
3568 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3569 } else if (Name.starts_with("avx.vinsertf128.") ||
3570 Name == "avx2.vinserti128" ||
3571 Name.starts_with("avx512.mask.insert")) {
3572 Value *Op0 = CI->getArgOperand(0);
3573 Value *Op1 = CI->getArgOperand(1);
3574 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3575 unsigned DstNumElts =
3576 cast<FixedVectorType>(CI->getType())->getNumElements();
3577 unsigned SrcNumElts =
3578 cast<FixedVectorType>(Op1->getType())->getNumElements();
3579 unsigned Scale = DstNumElts / SrcNumElts;
3580
3581 // Mask off the high bits of the immediate value; hardware ignores those.
3582 Imm = Imm % Scale;
3583
3584 // Extend the second operand into a vector the size of the destination.
3585 SmallVector<int, 8> Idxs(DstNumElts);
3586 for (unsigned i = 0; i != SrcNumElts; ++i)
3587 Idxs[i] = i;
3588 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3589 Idxs[i] = SrcNumElts;
3590 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3591
3592 // Insert the second operand into the first operand.
3593
3594 // Note that there is no guarantee that instruction lowering will actually
3595 // produce a vinsertf128 instruction for the created shuffles. In
3596 // particular, the 0 immediate case involves no lane changes, so it can
3597 // be handled as a blend.
3598
3599 // Example of shuffle mask for 32-bit elements:
3600 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3601 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3602
3603 // First fill with identify mask.
3604 for (unsigned i = 0; i != DstNumElts; ++i)
3605 Idxs[i] = i;
3606 // Then replace the elements where we need to insert.
3607 for (unsigned i = 0; i != SrcNumElts; ++i)
3608 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3609 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3610
3611 // If the intrinsic has a mask operand, handle that.
3612 if (CI->arg_size() == 5)
3613 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3614 CI->getArgOperand(3));
3615 } else if (Name.starts_with("avx.vextractf128.") ||
3616 Name == "avx2.vextracti128" ||
3617 Name.starts_with("avx512.mask.vextract")) {
3618 Value *Op0 = CI->getArgOperand(0);
3619 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3620 unsigned DstNumElts =
3621 cast<FixedVectorType>(CI->getType())->getNumElements();
3622 unsigned SrcNumElts =
3623 cast<FixedVectorType>(Op0->getType())->getNumElements();
3624 unsigned Scale = SrcNumElts / DstNumElts;
3625
3626 // Mask off the high bits of the immediate value; hardware ignores those.
3627 Imm = Imm % Scale;
3628
3629 // Get indexes for the subvector of the input vector.
3630 SmallVector<int, 8> Idxs(DstNumElts);
3631 for (unsigned i = 0; i != DstNumElts; ++i) {
3632 Idxs[i] = i + (Imm * DstNumElts);
3633 }
3634 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3635
3636 // If the intrinsic has a mask operand, handle that.
3637 if (CI->arg_size() == 4)
3638 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3639 CI->getArgOperand(2));
3640 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3641 Name.starts_with("avx512.mask.perm.di.")) {
3642 Value *Op0 = CI->getArgOperand(0);
3643 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3644 auto *VecTy = cast<FixedVectorType>(CI->getType());
3645 unsigned NumElts = VecTy->getNumElements();
3646
3647 SmallVector<int, 8> Idxs(NumElts);
3648 for (unsigned i = 0; i != NumElts; ++i)
3649 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3650
3651 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3652
3653 if (CI->arg_size() == 4)
3654 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3655 CI->getArgOperand(2));
3656 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3657 // The immediate permute control byte looks like this:
3658 // [1:0] - select 128 bits from sources for low half of destination
3659 // [2] - ignore
3660 // [3] - zero low half of destination
3661 // [5:4] - select 128 bits from sources for high half of destination
3662 // [6] - ignore
3663 // [7] - zero high half of destination
3664
3665 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3666
3667 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3668 unsigned HalfSize = NumElts / 2;
3669 SmallVector<int, 8> ShuffleMask(NumElts);
3670
3671 // Determine which operand(s) are actually in use for this instruction.
3672 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3673 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3674
3675 // If needed, replace operands based on zero mask.
3676 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3677 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3678
3679 // Permute low half of result.
3680 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3681 for (unsigned i = 0; i < HalfSize; ++i)
3682 ShuffleMask[i] = StartIndex + i;
3683
3684 // Permute high half of result.
3685 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3686 for (unsigned i = 0; i < HalfSize; ++i)
3687 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3688
3689 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3690
3691 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3692 Name.starts_with("avx512.mask.vpermil.p") ||
3693 Name.starts_with("avx512.mask.pshuf.d.")) {
3694 Value *Op0 = CI->getArgOperand(0);
3695 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3696 auto *VecTy = cast<FixedVectorType>(CI->getType());
3697 unsigned NumElts = VecTy->getNumElements();
3698 // Calculate the size of each index in the immediate.
3699 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3700 unsigned IdxMask = ((1 << IdxSize) - 1);
3701
3702 SmallVector<int, 8> Idxs(NumElts);
3703 // Lookup the bits for this element, wrapping around the immediate every
3704 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3705 // to offset by the first index of each group.
3706 for (unsigned i = 0; i != NumElts; ++i)
3707 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3708
3709 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3710
3711 if (CI->arg_size() == 4)
3712 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3713 CI->getArgOperand(2));
3714 } else if (Name == "sse2.pshufl.w" ||
3715 Name.starts_with("avx512.mask.pshufl.w.")) {
3716 Value *Op0 = CI->getArgOperand(0);
3717 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3718 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3719
3720 if (Name == "sse2.pshufl.w" && NumElts % 8 != 0)
3721 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3722
3723 SmallVector<int, 16> Idxs(NumElts);
3724 for (unsigned l = 0; l != NumElts; l += 8) {
3725 for (unsigned i = 0; i != 4; ++i)
3726 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3727 for (unsigned i = 4; i != 8; ++i)
3728 Idxs[i + l] = i + l;
3729 }
3730
3731 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3732
3733 if (CI->arg_size() == 4)
3734 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3735 CI->getArgOperand(2));
3736 } else if (Name == "sse2.pshufh.w" ||
3737 Name.starts_with("avx512.mask.pshufh.w.")) {
3738 Value *Op0 = CI->getArgOperand(0);
3739 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3740 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3741
3742 if (Name == "sse2.pshufh.w" && NumElts % 8 != 0)
3743 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3744
3745 SmallVector<int, 16> Idxs(NumElts);
3746 for (unsigned l = 0; l != NumElts; l += 8) {
3747 for (unsigned i = 0; i != 4; ++i)
3748 Idxs[i + l] = i + l;
3749 for (unsigned i = 0; i != 4; ++i)
3750 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3751 }
3752
3753 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3754
3755 if (CI->arg_size() == 4)
3756 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3757 CI->getArgOperand(2));
3758 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3759 Value *Op0 = CI->getArgOperand(0);
3760 Value *Op1 = CI->getArgOperand(1);
3761 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3762 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3763
3764 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3765 unsigned HalfLaneElts = NumLaneElts / 2;
3766
3767 SmallVector<int, 16> Idxs(NumElts);
3768 for (unsigned i = 0; i != NumElts; ++i) {
3769 // Base index is the starting element of the lane.
3770 Idxs[i] = i - (i % NumLaneElts);
3771 // If we are half way through the lane switch to the other source.
3772 if ((i % NumLaneElts) >= HalfLaneElts)
3773 Idxs[i] += NumElts;
3774 // Now select the specific element. By adding HalfLaneElts bits from
3775 // the immediate. Wrapping around the immediate every 8-bits.
3776 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3777 }
3778
3779 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3780
3781 Rep =
3782 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3783 } else if (Name.starts_with("avx512.mask.movddup") ||
3784 Name.starts_with("avx512.mask.movshdup") ||
3785 Name.starts_with("avx512.mask.movsldup")) {
3786 Value *Op0 = CI->getArgOperand(0);
3787 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3788 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3789
3790 unsigned Offset = 0;
3791 if (Name.starts_with("avx512.mask.movshdup."))
3792 Offset = 1;
3793
3794 SmallVector<int, 16> Idxs(NumElts);
3795 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3796 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3797 Idxs[i + l + 0] = i + l + Offset;
3798 Idxs[i + l + 1] = i + l + Offset;
3799 }
3800
3801 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3802
3803 Rep =
3804 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3805 } else if (Name.starts_with("avx512.mask.punpckl") ||
3806 Name.starts_with("avx512.mask.unpckl.")) {
3807 Value *Op0 = CI->getArgOperand(0);
3808 Value *Op1 = CI->getArgOperand(1);
3809 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3810 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3811
3812 SmallVector<int, 64> Idxs(NumElts);
3813 for (int l = 0; l != NumElts; l += NumLaneElts)
3814 for (int i = 0; i != NumLaneElts; ++i)
3815 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3816
3817 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3818
3819 Rep =
3820 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3821 } else if (Name.starts_with("avx512.mask.punpckh") ||
3822 Name.starts_with("avx512.mask.unpckh.")) {
3823 Value *Op0 = CI->getArgOperand(0);
3824 Value *Op1 = CI->getArgOperand(1);
3825 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3826 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3827
3828 SmallVector<int, 64> Idxs(NumElts);
3829 for (int l = 0; l != NumElts; l += NumLaneElts)
3830 for (int i = 0; i != NumLaneElts; ++i)
3831 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3832
3833 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3834
3835 Rep =
3836 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3837 } else if (Name.starts_with("avx512.mask.and.") ||
3838 Name.starts_with("avx512.mask.pand.")) {
3839 VectorType *FTy = cast<VectorType>(CI->getType());
3841 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3842 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3843 Rep = Builder.CreateBitCast(Rep, FTy);
3844 Rep =
3845 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3846 } else if (Name.starts_with("avx512.mask.andn.") ||
3847 Name.starts_with("avx512.mask.pandn.")) {
3848 VectorType *FTy = cast<VectorType>(CI->getType());
3850 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3851 Rep = Builder.CreateAnd(Rep,
3852 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3853 Rep = Builder.CreateBitCast(Rep, FTy);
3854 Rep =
3855 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3856 } else if (Name.starts_with("avx512.mask.or.") ||
3857 Name.starts_with("avx512.mask.por.")) {
3858 VectorType *FTy = cast<VectorType>(CI->getType());
3860 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3861 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3862 Rep = Builder.CreateBitCast(Rep, FTy);
3863 Rep =
3864 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3865 } else if (Name.starts_with("avx512.mask.xor.") ||
3866 Name.starts_with("avx512.mask.pxor.")) {
3867 VectorType *FTy = cast<VectorType>(CI->getType());
3869 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3870 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3871 Rep = Builder.CreateBitCast(Rep, FTy);
3872 Rep =
3873 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3874 } else if (Name.starts_with("avx512.mask.padd.")) {
3875 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3876 Rep =
3877 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3878 } else if (Name.starts_with("avx512.mask.psub.")) {
3879 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3880 Rep =
3881 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3882 } else if (Name.starts_with("avx512.mask.pmull.")) {
3883 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3884 Rep =
3885 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3886 } else if (Name.starts_with("avx512.mask.add.p")) {
3887 if (Name.ends_with(".512")) {
3888 Intrinsic::ID IID;
3889 if (Name[17] == 's')
3890 IID = Intrinsic::x86_avx512_add_ps_512;
3891 else
3892 IID = Intrinsic::x86_avx512_add_pd_512;
3893
3894 Rep = Builder.CreateIntrinsic(
3895 IID,
3896 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3897 } else {
3898 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3899 }
3900 Rep =
3901 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3902 } else if (Name.starts_with("avx512.mask.div.p")) {
3903 if (Name.ends_with(".512")) {
3904 Intrinsic::ID IID;
3905 if (Name[17] == 's')
3906 IID = Intrinsic::x86_avx512_div_ps_512;
3907 else
3908 IID = Intrinsic::x86_avx512_div_pd_512;
3909
3910 Rep = Builder.CreateIntrinsic(
3911 IID,
3912 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3913 } else {
3914 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3915 }
3916 Rep =
3917 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3918 } else if (Name.starts_with("avx512.mask.mul.p")) {
3919 if (Name.ends_with(".512")) {
3920 Intrinsic::ID IID;
3921 if (Name[17] == 's')
3922 IID = Intrinsic::x86_avx512_mul_ps_512;
3923 else
3924 IID = Intrinsic::x86_avx512_mul_pd_512;
3925
3926 Rep = Builder.CreateIntrinsic(
3927 IID,
3928 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3929 } else {
3930 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3931 }
3932 Rep =
3933 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3934 } else if (Name.starts_with("avx512.mask.sub.p")) {
3935 if (Name.ends_with(".512")) {
3936 Intrinsic::ID IID;
3937 if (Name[17] == 's')
3938 IID = Intrinsic::x86_avx512_sub_ps_512;
3939 else
3940 IID = Intrinsic::x86_avx512_sub_pd_512;
3941
3942 Rep = Builder.CreateIntrinsic(
3943 IID,
3944 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3945 } else {
3946 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3947 }
3948 Rep =
3949 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3950 } else if ((Name.starts_with("avx512.mask.max.p") ||
3951 Name.starts_with("avx512.mask.min.p")) &&
3952 Name.drop_front(18) == ".512") {
3953 bool IsDouble = Name[17] == 'd';
3954 bool IsMin = Name[13] == 'i';
3955 static const Intrinsic::ID MinMaxTbl[2][2] = {
3956 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3957 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3958 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3959
3960 Rep = Builder.CreateIntrinsic(
3961 IID,
3962 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3963 Rep =
3964 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3965 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3966 Rep =
3967 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3968 {CI->getArgOperand(0), Builder.getInt1(false)});
3969 Rep =
3970 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3971 } else if (Name.starts_with("avx512.mask.psll")) {
3972 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3973 bool IsVariable = Name[16] == 'v';
3974 char Size = Name[16] == '.' ? Name[17]
3975 : Name[17] == '.' ? Name[18]
3976 : Name[18] == '.' ? Name[19]
3977 : Name[20];
3978
3979 Intrinsic::ID IID;
3980 if (IsVariable && Name[17] != '.') {
3981 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3982 IID = Intrinsic::x86_avx2_psllv_q;
3983 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3984 IID = Intrinsic::x86_avx2_psllv_q_256;
3985 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3986 IID = Intrinsic::x86_avx2_psllv_d;
3987 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3988 IID = Intrinsic::x86_avx2_psllv_d_256;
3989 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3990 IID = Intrinsic::x86_avx512_psllv_w_128;
3991 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3992 IID = Intrinsic::x86_avx512_psllv_w_256;
3993 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3994 IID = Intrinsic::x86_avx512_psllv_w_512;
3995 else
3996 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3997 } else if (Name.ends_with(".128")) {
3998 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3999 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
4000 : Intrinsic::x86_sse2_psll_d;
4001 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
4002 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
4003 : Intrinsic::x86_sse2_psll_q;
4004 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
4005 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
4006 : Intrinsic::x86_sse2_psll_w;
4007 else
4008 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4009 } else if (Name.ends_with(".256")) {
4010 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
4011 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
4012 : Intrinsic::x86_avx2_psll_d;
4013 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
4014 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
4015 : Intrinsic::x86_avx2_psll_q;
4016 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
4017 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
4018 : Intrinsic::x86_avx2_psll_w;
4019 else
4020 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4021 } else {
4022 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
4023 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
4024 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
4025 : Intrinsic::x86_avx512_psll_d_512;
4026 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
4027 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
4028 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
4029 : Intrinsic::x86_avx512_psll_q_512;
4030 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
4031 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
4032 : Intrinsic::x86_avx512_psll_w_512;
4033 else
4034 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4035 }
4036
4037 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4038 } else if (Name.starts_with("avx512.mask.psrl")) {
4039 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4040 bool IsVariable = Name[16] == 'v';
4041 char Size = Name[16] == '.' ? Name[17]
4042 : Name[17] == '.' ? Name[18]
4043 : Name[18] == '.' ? Name[19]
4044 : Name[20];
4045
4046 Intrinsic::ID IID;
4047 if (IsVariable && Name[17] != '.') {
4048 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
4049 IID = Intrinsic::x86_avx2_psrlv_q;
4050 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
4051 IID = Intrinsic::x86_avx2_psrlv_q_256;
4052 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
4053 IID = Intrinsic::x86_avx2_psrlv_d;
4054 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
4055 IID = Intrinsic::x86_avx2_psrlv_d_256;
4056 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
4057 IID = Intrinsic::x86_avx512_psrlv_w_128;
4058 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
4059 IID = Intrinsic::x86_avx512_psrlv_w_256;
4060 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
4061 IID = Intrinsic::x86_avx512_psrlv_w_512;
4062 else
4063 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4064 } else if (Name.ends_with(".128")) {
4065 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
4066 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4067 : Intrinsic::x86_sse2_psrl_d;
4068 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
4069 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4070 : Intrinsic::x86_sse2_psrl_q;
4071 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
4072 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4073 : Intrinsic::x86_sse2_psrl_w;
4074 else
4075 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4076 } else if (Name.ends_with(".256")) {
4077 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
4078 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4079 : Intrinsic::x86_avx2_psrl_d;
4080 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
4081 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4082 : Intrinsic::x86_avx2_psrl_q;
4083 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
4084 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4085 : Intrinsic::x86_avx2_psrl_w;
4086 else
4087 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4088 } else {
4089 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
4090 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4091 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4092 : Intrinsic::x86_avx512_psrl_d_512;
4093 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
4094 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4095 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4096 : Intrinsic::x86_avx512_psrl_q_512;
4097 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
4098 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4099 : Intrinsic::x86_avx512_psrl_w_512;
4100 else
4101 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4102 }
4103
4104 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4105 } else if (Name.starts_with("avx512.mask.psra")) {
4106 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4107 bool IsVariable = Name[16] == 'v';
4108 char Size = Name[16] == '.' ? Name[17]
4109 : Name[17] == '.' ? Name[18]
4110 : Name[18] == '.' ? Name[19]
4111 : Name[20];
4112
4113 Intrinsic::ID IID;
4114 if (IsVariable && Name[17] != '.') {
4115 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4116 IID = Intrinsic::x86_avx2_psrav_d;
4117 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4118 IID = Intrinsic::x86_avx2_psrav_d_256;
4119 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4120 IID = Intrinsic::x86_avx512_psrav_w_128;
4121 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4122 IID = Intrinsic::x86_avx512_psrav_w_256;
4123 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4124 IID = Intrinsic::x86_avx512_psrav_w_512;
4125 else
4126 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4127 } else if (Name.ends_with(".128")) {
4128 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4129 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4130 : Intrinsic::x86_sse2_psra_d;
4131 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4132 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4133 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4134 : Intrinsic::x86_avx512_psra_q_128;
4135 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4136 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4137 : Intrinsic::x86_sse2_psra_w;
4138 else
4139 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4140 } else if (Name.ends_with(".256")) {
4141 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4142 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4143 : Intrinsic::x86_avx2_psra_d;
4144 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4145 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4146 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4147 : Intrinsic::x86_avx512_psra_q_256;
4148 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4149 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4150 : Intrinsic::x86_avx2_psra_w;
4151 else
4152 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4153 } else {
4154 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4155 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4156 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4157 : Intrinsic::x86_avx512_psra_d_512;
4158 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4159 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4160 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4161 : Intrinsic::x86_avx512_psra_q_512;
4162 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4163 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4164 : Intrinsic::x86_avx512_psra_w_512;
4165 else
4166 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4167 }
4168
4169 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4170 } else if (Name.starts_with("avx512.mask.move.s")) {
4171 Rep = upgradeMaskedMove(Builder, *CI);
4172 } else if (Name.starts_with("avx512.cvtmask2")) {
4173 Rep = upgradeMaskToInt(Builder, *CI);
4174 } else if (Name.ends_with(".movntdqa")) {
4176 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4177
4178 LoadInst *LI = Builder.CreateAlignedLoad(
4179 CI->getType(), CI->getArgOperand(0),
4181 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4182 Rep = LI;
4183 } else if (Name.starts_with("fma.vfmadd.") ||
4184 Name.starts_with("fma.vfmsub.") ||
4185 Name.starts_with("fma.vfnmadd.") ||
4186 Name.starts_with("fma.vfnmsub.")) {
4187 bool NegMul = Name[6] == 'n';
4188 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4189 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4190
4191 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4192 CI->getArgOperand(2)};
4193
4194 if (IsScalar) {
4195 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4196 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4197 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4198 }
4199
4200 if (NegMul && !IsScalar)
4201 Ops[0] = Builder.CreateFNeg(Ops[0]);
4202 if (NegMul && IsScalar)
4203 Ops[1] = Builder.CreateFNeg(Ops[1]);
4204 if (NegAcc)
4205 Ops[2] = Builder.CreateFNeg(Ops[2]);
4206
4207 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4208
4209 if (IsScalar)
4210 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4211 } else if (Name.starts_with("fma4.vfmadd.s")) {
4212 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4213 CI->getArgOperand(2)};
4214
4215 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4216 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4217 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4218
4219 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4220
4221 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4222 Rep, (uint64_t)0);
4223 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4224 Name.starts_with("avx512.maskz.vfmadd.s") ||
4225 Name.starts_with("avx512.mask3.vfmadd.s") ||
4226 Name.starts_with("avx512.mask3.vfmsub.s") ||
4227 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4228 bool IsMask3 = Name[11] == '3';
4229 bool IsMaskZ = Name[11] == 'z';
4230 // Drop the "avx512.mask." to make it easier.
4231 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4232 bool NegMul = Name[2] == 'n';
4233 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4234
4235 Value *A = CI->getArgOperand(0);
4236 Value *B = CI->getArgOperand(1);
4237 Value *C = CI->getArgOperand(2);
4238
4239 if (NegMul && (IsMask3 || IsMaskZ))
4240 A = Builder.CreateFNeg(A);
4241 if (NegMul && !(IsMask3 || IsMaskZ))
4242 B = Builder.CreateFNeg(B);
4243 if (NegAcc)
4244 C = Builder.CreateFNeg(C);
4245
4246 A = Builder.CreateExtractElement(A, (uint64_t)0);
4247 B = Builder.CreateExtractElement(B, (uint64_t)0);
4248 C = Builder.CreateExtractElement(C, (uint64_t)0);
4249
4250 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4251 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4252 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4253
4254 Intrinsic::ID IID;
4255 if (Name.back() == 'd')
4256 IID = Intrinsic::x86_avx512_vfmadd_f64;
4257 else
4258 IID = Intrinsic::x86_avx512_vfmadd_f32;
4259 Rep = Builder.CreateIntrinsic(IID, Ops);
4260 } else {
4261 Rep = Builder.CreateFMA(A, B, C);
4262 }
4263
4264 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4265 : IsMask3 ? C
4266 : A;
4267
4268 // For Mask3 with NegAcc, we need to create a new extractelement that
4269 // avoids the negation above.
4270 if (NegAcc && IsMask3)
4271 PassThru =
4272 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4273
4274 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4275 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4276 (uint64_t)0);
4277 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4278 Name.starts_with("avx512.mask.vfnmadd.p") ||
4279 Name.starts_with("avx512.mask.vfnmsub.p") ||
4280 Name.starts_with("avx512.mask3.vfmadd.p") ||
4281 Name.starts_with("avx512.mask3.vfmsub.p") ||
4282 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4283 Name.starts_with("avx512.maskz.vfmadd.p")) {
4284 bool IsMask3 = Name[11] == '3';
4285 bool IsMaskZ = Name[11] == 'z';
4286 // Drop the "avx512.mask." to make it easier.
4287 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4288 bool NegMul = Name[2] == 'n';
4289 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4290
4291 Value *A = CI->getArgOperand(0);
4292 Value *B = CI->getArgOperand(1);
4293 Value *C = CI->getArgOperand(2);
4294
4295 if (NegMul && (IsMask3 || IsMaskZ))
4296 A = Builder.CreateFNeg(A);
4297 if (NegMul && !(IsMask3 || IsMaskZ))
4298 B = Builder.CreateFNeg(B);
4299 if (NegAcc)
4300 C = Builder.CreateFNeg(C);
4301
4302 if (CI->arg_size() == 5 &&
4303 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4304 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4305 Intrinsic::ID IID;
4306 // Check the character before ".512" in string.
4307 if (Name[Name.size() - 5] == 's')
4308 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4309 else
4310 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4311
4312 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4313 } else {
4314 Rep = Builder.CreateFMA(A, B, C);
4315 }
4316
4317 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4318 : IsMask3 ? CI->getArgOperand(2)
4319 : CI->getArgOperand(0);
4320
4321 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4322 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4323 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4324 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4325 Intrinsic::ID IID;
4326 if (VecWidth == 128 && EltWidth == 32)
4327 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4328 else if (VecWidth == 256 && EltWidth == 32)
4329 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4330 else if (VecWidth == 128 && EltWidth == 64)
4331 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4332 else if (VecWidth == 256 && EltWidth == 64)
4333 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4334 else
4335 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4336
4337 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4338 CI->getArgOperand(2)};
4339 Ops[2] = Builder.CreateFNeg(Ops[2]);
4340 Rep = Builder.CreateIntrinsic(IID, Ops);
4341 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4342 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4343 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4344 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4345 bool IsMask3 = Name[11] == '3';
4346 bool IsMaskZ = Name[11] == 'z';
4347 // Drop the "avx512.mask." to make it easier.
4348 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4349 bool IsSubAdd = Name[3] == 's';
4350 if (CI->arg_size() == 5) {
4351 Intrinsic::ID IID;
4352 // Check the character before ".512" in string.
4353 if (Name[Name.size() - 5] == 's')
4354 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4355 else
4356 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4357
4358 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4359 CI->getArgOperand(2), CI->getArgOperand(4)};
4360 if (IsSubAdd)
4361 Ops[2] = Builder.CreateFNeg(Ops[2]);
4362
4363 Rep = Builder.CreateIntrinsic(IID, Ops);
4364 } else {
4365 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4366
4367 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4368 CI->getArgOperand(2)};
4369
4371 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4372 Value *Odd = Builder.CreateCall(FMA, Ops);
4373 Ops[2] = Builder.CreateFNeg(Ops[2]);
4374 Value *Even = Builder.CreateCall(FMA, Ops);
4375
4376 if (IsSubAdd)
4377 std::swap(Even, Odd);
4378
4379 SmallVector<int, 32> Idxs(NumElts);
4380 for (int i = 0; i != NumElts; ++i)
4381 Idxs[i] = i + (i % 2) * NumElts;
4382
4383 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4384 }
4385
4386 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4387 : IsMask3 ? CI->getArgOperand(2)
4388 : CI->getArgOperand(0);
4389
4390 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4391 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4392 Name.starts_with("avx512.maskz.pternlog.")) {
4393 bool ZeroMask = Name[11] == 'z';
4394 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4395 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4396 Intrinsic::ID IID;
4397 if (VecWidth == 128 && EltWidth == 32)
4398 IID = Intrinsic::x86_avx512_pternlog_d_128;
4399 else if (VecWidth == 256 && EltWidth == 32)
4400 IID = Intrinsic::x86_avx512_pternlog_d_256;
4401 else if (VecWidth == 512 && EltWidth == 32)
4402 IID = Intrinsic::x86_avx512_pternlog_d_512;
4403 else if (VecWidth == 128 && EltWidth == 64)
4404 IID = Intrinsic::x86_avx512_pternlog_q_128;
4405 else if (VecWidth == 256 && EltWidth == 64)
4406 IID = Intrinsic::x86_avx512_pternlog_q_256;
4407 else if (VecWidth == 512 && EltWidth == 64)
4408 IID = Intrinsic::x86_avx512_pternlog_q_512;
4409 else
4410 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4411
4412 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4413 CI->getArgOperand(2), CI->getArgOperand(3)};
4414 Rep = Builder.CreateIntrinsic(IID, Args);
4415 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4416 : CI->getArgOperand(0);
4417 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4418 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4419 Name.starts_with("avx512.maskz.vpmadd52")) {
4420 bool ZeroMask = Name[11] == 'z';
4421 bool High = Name[20] == 'h' || Name[21] == 'h';
4422 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4423 Intrinsic::ID IID;
4424 if (VecWidth == 128 && !High)
4425 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4426 else if (VecWidth == 256 && !High)
4427 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4428 else if (VecWidth == 512 && !High)
4429 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4430 else if (VecWidth == 128 && High)
4431 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4432 else if (VecWidth == 256 && High)
4433 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4434 else if (VecWidth == 512 && High)
4435 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4436 else
4437 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4438
4439 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4440 CI->getArgOperand(2)};
4441 Rep = Builder.CreateIntrinsic(IID, Args);
4442 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4443 : CI->getArgOperand(0);
4444 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4445 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4446 Name.starts_with("avx512.mask.vpermt2var.") ||
4447 Name.starts_with("avx512.maskz.vpermt2var.")) {
4448 bool ZeroMask = Name[11] == 'z';
4449 bool IndexForm = Name[17] == 'i';
4450 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4451 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4452 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4453 Name.starts_with("avx512.mask.vpdpbusds.") ||
4454 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4455 bool ZeroMask = Name[11] == 'z';
4456 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4457 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4458 Intrinsic::ID IID;
4459 if (VecWidth == 128 && !IsSaturating)
4460 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4461 else if (VecWidth == 256 && !IsSaturating)
4462 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4463 else if (VecWidth == 512 && !IsSaturating)
4464 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4465 else if (VecWidth == 128 && IsSaturating)
4466 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4467 else if (VecWidth == 256 && IsSaturating)
4468 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4469 else if (VecWidth == 512 && IsSaturating)
4470 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4471 else
4472 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4473
4474 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4475 CI->getArgOperand(2)};
4476
4477 // Input arguments types were incorrectly set to vectors of i32 before but
4478 // they should be vectors of i8. Insert bit cast when encountering the old
4479 // types
4480 if (Args[1]->getType()->isVectorTy() &&
4481 cast<VectorType>(Args[1]->getType())
4482 ->getElementType()
4483 ->isIntegerTy(32) &&
4484 Args[2]->getType()->isVectorTy() &&
4485 cast<VectorType>(Args[2]->getType())
4486 ->getElementType()
4487 ->isIntegerTy(32)) {
4488 Type *NewArgType = nullptr;
4489 if (VecWidth == 128)
4490 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4491 else if (VecWidth == 256)
4492 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4493 else if (VecWidth == 512)
4494 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4495 else
4496 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4497 CI);
4498
4499 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4500 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4501 }
4502
4503 Rep = Builder.CreateIntrinsic(IID, Args);
4504 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4505 : CI->getArgOperand(0);
4506 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4507 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4508 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4509 Name.starts_with("avx512.mask.vpdpwssds.") ||
4510 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4511 bool ZeroMask = Name[11] == 'z';
4512 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4513 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4514 Intrinsic::ID IID;
4515 if (VecWidth == 128 && !IsSaturating)
4516 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4517 else if (VecWidth == 256 && !IsSaturating)
4518 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4519 else if (VecWidth == 512 && !IsSaturating)
4520 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4521 else if (VecWidth == 128 && IsSaturating)
4522 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4523 else if (VecWidth == 256 && IsSaturating)
4524 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4525 else if (VecWidth == 512 && IsSaturating)
4526 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4527 else
4528 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4529
4530 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4531 CI->getArgOperand(2)};
4532
4533 // Input arguments types were incorrectly set to vectors of i32 before but
4534 // they should be vectors of i16. Insert bit cast when encountering the old
4535 // types
4536 if (Args[1]->getType()->isVectorTy() &&
4537 cast<VectorType>(Args[1]->getType())
4538 ->getElementType()
4539 ->isIntegerTy(32) &&
4540 Args[2]->getType()->isVectorTy() &&
4541 cast<VectorType>(Args[2]->getType())
4542 ->getElementType()
4543 ->isIntegerTy(32)) {
4544 Type *NewArgType = nullptr;
4545 if (VecWidth == 128)
4546 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4547 else if (VecWidth == 256)
4548 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4549 else if (VecWidth == 512)
4550 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4551 else
4552 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4553 CI);
4554
4555 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4556 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4557 }
4558
4559 Rep = Builder.CreateIntrinsic(IID, Args);
4560 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4561 : CI->getArgOperand(0);
4562 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4563 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4564 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4565 Name == "subborrow.u32" || Name == "subborrow.u64") {
4566 Intrinsic::ID IID;
4567 if (Name[0] == 'a' && Name.back() == '2')
4568 IID = Intrinsic::x86_addcarry_32;
4569 else if (Name[0] == 'a' && Name.back() == '4')
4570 IID = Intrinsic::x86_addcarry_64;
4571 else if (Name[0] == 's' && Name.back() == '2')
4572 IID = Intrinsic::x86_subborrow_32;
4573 else if (Name[0] == 's' && Name.back() == '4')
4574 IID = Intrinsic::x86_subborrow_64;
4575 else
4576 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4577
4578 // Make a call with 3 operands.
4579 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4580 CI->getArgOperand(2)};
4581 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4582
4583 // Extract the second result and store it.
4584 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4585 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4586 // Replace the original call result with the first result of the new call.
4587 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4588
4589 CI->replaceAllUsesWith(CF);
4590 Rep = nullptr;
4591 } else if (Name.starts_with("avx512.mask.") &&
4592 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4593 // Rep will be updated by the call in the condition.
4594 } else
4595 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4596
4597 return Rep;
4598}
4599
4601 Function *F, IRBuilder<> &Builder) {
4602 if (Name.starts_with("neon.bfcvt")) {
4603 if (Name.starts_with("neon.bfcvtn2")) {
4604 SmallVector<int, 32> LoMask(4);
4605 std::iota(LoMask.begin(), LoMask.end(), 0);
4606 SmallVector<int, 32> ConcatMask(8);
4607 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4608 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4609 Value *Trunc =
4610 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4611 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4612 } else if (Name.starts_with("neon.bfcvtn")) {
4613 SmallVector<int, 32> ConcatMask(8);
4614 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4615 Type *V4BF16 =
4616 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4617 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4618 dbgs() << "Trunc: " << *Trunc << "\n";
4619 return Builder.CreateShuffleVector(
4620 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4621 } else {
4622 return Builder.CreateFPTrunc(CI->getOperand(0),
4623 Type::getBFloatTy(F->getContext()));
4624 }
4625 } else if (Name.starts_with("sve.fcvt")) {
4626 Intrinsic::ID NewID =
4628 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4629 .Case("sve.fcvtnt.bf16f32",
4630 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4632 if (NewID == Intrinsic::not_intrinsic)
4633 llvm_unreachable("Unhandled Intrinsic!");
4634
4635 SmallVector<Value *, 3> Args(CI->args());
4636
4637 // The original intrinsics incorrectly used a predicate based on the
4638 // smallest element type rather than the largest.
4639 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4640 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4641
4642 if (Args[1]->getType() != BadPredTy)
4643 llvm_unreachable("Unexpected predicate type!");
4644
4645 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4646 BadPredTy, Args[1]);
4647 Args[1] = Builder.CreateIntrinsic(
4648 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4649
4650 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4651 CI->getName());
4652 }
4653
4654 llvm_unreachable("Unhandled Intrinsic!");
4655}
4656
4658 IRBuilder<> &Builder) {
4659 if (Name == "mve.vctp64.old") {
4660 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4661 // correct type.
4662 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4663 CI->getArgOperand(0),
4664 /*FMFSource=*/nullptr, CI->getName());
4665 Value *C1 = Builder.CreateIntrinsic(
4666 Intrinsic::arm_mve_pred_v2i,
4667 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4668 return Builder.CreateIntrinsic(
4669 Intrinsic::arm_mve_pred_i2v,
4670 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4671 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4672 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4673 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4674 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4675 Name ==
4676 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4677 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4678 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4679 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4680 Name ==
4681 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4682 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4683 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4684 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4685 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4686 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4687 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4688 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4689 std::vector<Type *> Tys;
4690 unsigned ID = CI->getIntrinsicID();
4691 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4692 switch (ID) {
4693 case Intrinsic::arm_mve_mull_int_predicated:
4694 case Intrinsic::arm_mve_vqdmull_predicated:
4695 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4696 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4697 break;
4698 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4699 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4700 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4701 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4702 V2I1Ty};
4703 break;
4704 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4705 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4706 CI->getOperand(1)->getType(), V2I1Ty};
4707 break;
4708 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4709 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4710 CI->getOperand(2)->getType(), V2I1Ty};
4711 break;
4712 case Intrinsic::arm_cde_vcx1q_predicated:
4713 case Intrinsic::arm_cde_vcx1qa_predicated:
4714 case Intrinsic::arm_cde_vcx2q_predicated:
4715 case Intrinsic::arm_cde_vcx2qa_predicated:
4716 case Intrinsic::arm_cde_vcx3q_predicated:
4717 case Intrinsic::arm_cde_vcx3qa_predicated:
4718 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4719 break;
4720 default:
4721 llvm_unreachable("Unhandled Intrinsic!");
4722 }
4723
4724 std::vector<Value *> Ops;
4725 for (Value *Op : CI->args()) {
4726 Type *Ty = Op->getType();
4727 if (Ty->getScalarSizeInBits() == 1) {
4728 Value *C1 = Builder.CreateIntrinsic(
4729 Intrinsic::arm_mve_pred_v2i,
4730 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4731 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4732 }
4733 Ops.push_back(Op);
4734 }
4735
4736 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4737 CI->getName());
4738 }
4739 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4740}
4741
4742// These are expected to have the arguments:
4743// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4744//
4745// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4746//
4748 Function *F, IRBuilder<> &Builder) {
4749 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4750 // for compatibility.
4751 auto UpgradeLegacyWMMAIUIntrinsicCall =
4752 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4753 ArrayRef<Type *> OverloadTys) -> Value * {
4754 // Prepare arguments, append clamp=0 for compatibility
4755 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4756 Args.push_back(Builder.getFalse());
4757
4758 // Insert the declaration for the right overload types
4760 F->getParent(), F->getIntrinsicID(), OverloadTys);
4761
4762 // Copy operand bundles if any
4764 CI->getOperandBundlesAsDefs(Bundles);
4765
4766 // Create the new call and copy calling properties
4767 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4768 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4769 NewCall->setCallingConv(CI->getCallingConv());
4770 NewCall->setAttributes(CI->getAttributes());
4771 NewCall->setDebugLoc(CI->getDebugLoc());
4772 NewCall->copyMetadata(*CI);
4773 return NewCall;
4774 };
4775
4776 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4777 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4778 "intrinsic should have 7 arguments");
4779 Type *T1 = CI->getArgOperand(4)->getType();
4780 Type *T2 = CI->getArgOperand(1)->getType();
4781 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4782 }
4783 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4784 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4785 "intrinsic should have 8 arguments");
4786 Type *T1 = CI->getArgOperand(4)->getType();
4787 Type *T2 = CI->getArgOperand(1)->getType();
4788 Type *T3 = CI->getArgOperand(3)->getType();
4789 Type *T4 = CI->getArgOperand(5)->getType();
4790 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4791 }
4792
4793 switch (F->getIntrinsicID()) {
4794 default:
4795 break;
4796 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4797 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4798 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4799 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4800 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4801 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4802 // Drop src0 and src1 modifiers.
4803 const Value *Op0 = CI->getArgOperand(0);
4804 const Value *Op2 = CI->getArgOperand(2);
4805 assert(Op0->getType()->isIntegerTy() && Op2->getType()->isIntegerTy());
4806 const ConstantInt *ModA = dyn_cast<ConstantInt>(Op0);
4807 const ConstantInt *ModB = dyn_cast<ConstantInt>(Op2);
4808 if (!ModA->isZero() || !ModB->isZero())
4809 reportFatalUsageError(Name + " matrix A and B modifiers shall be zero");
4810
4812 for (int I = 4, E = CI->arg_size(); I < E; ++I)
4813 Args.push_back(CI->getArgOperand(I));
4814
4815 SmallVector<Type *, 3> Overloads{F->getReturnType(), Args[0]->getType()};
4816 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4817 Overloads.push_back(Args[3]->getType());
4819 F->getParent(), F->getIntrinsicID(), Overloads);
4820
4822 CI->getOperandBundlesAsDefs(Bundles);
4823
4824 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4825 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4826 NewCall->setCallingConv(CI->getCallingConv());
4827 NewCall->setAttributes(CI->getAttributes());
4828 NewCall->setDebugLoc(CI->getDebugLoc());
4829 NewCall->copyMetadata(*CI);
4830 NewCall->takeName(CI);
4831 return NewCall;
4832 }
4833 }
4834
4835 AtomicRMWInst::BinOp RMWOp =
4837 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4838 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4839 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4840 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4841 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4842 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4843 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4844 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4845 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4846 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4847 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4848 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4849 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4850
4851 unsigned NumOperands = CI->getNumOperands();
4852 if (NumOperands < 3) // Malformed bitcode.
4853 return nullptr;
4854
4855 Value *Ptr = CI->getArgOperand(0);
4856 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4857 if (!PtrTy) // Malformed.
4858 return nullptr;
4859
4860 Value *Val = CI->getArgOperand(1);
4861 if (Val->getType() != CI->getType()) // Malformed.
4862 return nullptr;
4863
4864 ConstantInt *OrderArg = nullptr;
4865 bool IsVolatile = false;
4866
4867 // These should have 5 arguments (plus the callee). A separate version of the
4868 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4869 if (NumOperands > 3)
4870 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4871
4872 // Ignore scope argument at 3
4873
4874 if (NumOperands > 5) {
4875 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4876 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4877 }
4878
4880 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4881 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4884
4885 LLVMContext &Ctx = F->getContext();
4886
4887 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4888 Type *RetTy = CI->getType();
4889 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4890 if (VT->getElementType()->isIntegerTy(16)) {
4891 VectorType *AsBF16 =
4892 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4893 Val = Builder.CreateBitCast(Val, AsBF16);
4894 }
4895 }
4896
4897 // The scope argument never really worked correctly. Use agent as the most
4898 // conservative option which should still always produce the instruction.
4899 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4900 AtomicRMWInst *RMW =
4901 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4902
4903 unsigned AddrSpace = PtrTy->getAddressSpace();
4904 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4905 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4906 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4907 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4908 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4909 }
4910
4911 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4912 MDBuilder MDB(F->getContext());
4913 MDNode *RangeNotPrivate =
4916 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4917 }
4918
4919 if (IsVolatile)
4920 RMW->setVolatile(true);
4921
4922 return Builder.CreateBitCast(RMW, RetTy);
4923}
4924
4925/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4926/// plain MDNode, as it's the verifier's job to check these are the correct
4927/// types later.
4928static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4929 if (Op < CI->arg_size()) {
4930 if (MetadataAsValue *MAV =
4932 Metadata *MD = MAV->getMetadata();
4933 return dyn_cast_if_present<MDNode>(MD);
4934 }
4935 }
4936 return nullptr;
4937}
4938
4939/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4940static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4941 if (Op < CI->arg_size())
4943 return MAV->getMetadata();
4944 return nullptr;
4945}
4946
4947/// Convert debug intrinsic calls to non-instruction debug records.
4948/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4949/// \p CI - The debug intrinsic call.
4951 DbgRecord *DR = nullptr;
4952 if (Name == "label") {
4954 } else if (Name == "assign") {
4957 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4958 unwrapMAVMetadataOp(CI, 4),
4959 /*The address is a Value ref, it will be stored as a Metadata */
4960 unwrapMAVOp(CI, 5));
4961 } else if (Name == "declare") {
4964 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr);
4965 } else if (Name == "addr") {
4966 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4967 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4968 // Don't try to add something to the expression if it's not an expression.
4969 // Instead, allow the verifier to fail later.
4970 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4971 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4972 }
4975 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr);
4976 } else if (Name == "value") {
4977 // An old version of dbg.value had an extra offset argument.
4978 unsigned VarOp = 1;
4979 unsigned ExprOp = 2;
4980 if (CI->arg_size() == 4) {
4982 // Nonzero offset dbg.values get dropped without a replacement.
4983 if (!Offset || !Offset->isNullValue())
4984 return;
4985 VarOp = 2;
4986 ExprOp = 3;
4987 }
4990 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4991 nullptr);
4992 }
4993 DR->setDebugLoc(CI->getDebugLoc());
4994 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4995 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4996}
4997
5000 if (!Offset)
5001 reportFatalUsageError("Invalid llvm.vector.splice offset argument");
5002 int64_t OffsetVal = Offset->getSExtValue();
5003 return Builder.CreateIntrinsic(OffsetVal >= 0
5004 ? Intrinsic::vector_splice_left
5005 : Intrinsic::vector_splice_right,
5006 CI->getType(),
5007 {CI->getArgOperand(0), CI->getArgOperand(1),
5008 Builder.getInt32(std::abs(OffsetVal))});
5009}
5010
5012 Function *F, IRBuilder<> &Builder) {
5013 if (Name.starts_with("to.fp16")) {
5014 Value *Cast =
5015 Builder.CreateFPTrunc(CI->getArgOperand(0), Builder.getHalfTy());
5016 return Builder.CreateBitCast(Cast, CI->getType());
5017 }
5018
5019 if (Name.starts_with("from.fp16")) {
5020 Value *Cast =
5021 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
5022 return Builder.CreateFPExt(Cast, CI->getType());
5023 }
5024
5025 return nullptr;
5026}
5027
5028/// Upgrade a call to an old intrinsic. All argument and return casting must be
5029/// provided to seamlessly integrate with existing context.
5031 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
5032 // checks the callee's function type matches. It's likely we need to handle
5033 // type changes here.
5035 if (!F)
5036 return;
5037
5038 LLVMContext &C = CI->getContext();
5039 IRBuilder<> Builder(C);
5040 if (isa<FPMathOperator>(CI))
5041 Builder.setFastMathFlags(CI->getFastMathFlags());
5042 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
5043
5044 if (!NewFn) {
5045 // Get the Function's name.
5046 StringRef Name = F->getName();
5047 if (!Name.consume_front("llvm."))
5048 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
5049
5050 bool IsX86 = Name.consume_front("x86.");
5051 bool IsNVVM = Name.consume_front("nvvm.");
5052 bool IsAArch64 = Name.consume_front("aarch64.");
5053 bool IsARM = Name.consume_front("arm.");
5054 bool IsAMDGCN = Name.consume_front("amdgcn.");
5055 bool IsDbg = Name.consume_front("dbg.");
5056 bool IsOldSplice =
5057 (Name.consume_front("experimental.vector.splice") ||
5058 Name.consume_front("vector.splice")) &&
5059 !(Name.starts_with(".left") || Name.starts_with(".right"));
5060 Value *Rep = nullptr;
5061
5062 if (!IsX86 && Name == "stackprotectorcheck") {
5063 Rep = nullptr;
5064 } else if (IsNVVM) {
5065 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
5066 } else if (IsX86) {
5067 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
5068 } else if (IsAArch64) {
5069 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
5070 } else if (IsARM) {
5071 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
5072 } else if (IsAMDGCN) {
5073 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
5074 } else if (IsDbg) {
5076 } else if (IsOldSplice) {
5077 Rep = upgradeVectorSplice(CI, Builder);
5078 } else if (Name.consume_front("convert.")) {
5079 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
5080 } else {
5081 llvm_unreachable("Unknown function for CallBase upgrade.");
5082 }
5083
5084 if (Rep)
5085 CI->replaceAllUsesWith(Rep);
5086 CI->eraseFromParent();
5087 return;
5088 }
5089
5090 const auto &DefaultCase = [&]() -> void {
5091 if (F == NewFn)
5092 return;
5093
5094 if (CI->getFunctionType() == NewFn->getFunctionType()) {
5095 // Handle generic mangling change.
5096 assert(
5097 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
5098 "Unknown function for CallBase upgrade and isn't just a name change");
5099 CI->setCalledFunction(NewFn);
5100 return;
5101 }
5102
5103 // This must be an upgrade from a named to a literal struct.
5104 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
5105 assert(OldST != NewFn->getReturnType() &&
5106 "Return type must have changed");
5107 assert(OldST->getNumElements() ==
5108 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
5109 "Must have same number of elements");
5110
5111 SmallVector<Value *> Args(CI->args());
5112 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5113 NewCI->setAttributes(CI->getAttributes());
5114 Value *Res = PoisonValue::get(OldST);
5115 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5116 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5117 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5118 }
5119 CI->replaceAllUsesWith(Res);
5120 CI->eraseFromParent();
5121 return;
5122 }
5123
5124 // We're probably about to produce something invalid. Let the verifier catch
5125 // it instead of dying here.
5126 CI->setCalledOperand(
5128 return;
5129 };
5130 CallInst *NewCall = nullptr;
5131 switch (NewFn->getIntrinsicID()) {
5132 default: {
5133 DefaultCase();
5134 return;
5135 }
5136 case Intrinsic::arm_neon_vst1:
5137 case Intrinsic::arm_neon_vst2:
5138 case Intrinsic::arm_neon_vst3:
5139 case Intrinsic::arm_neon_vst4:
5140 case Intrinsic::arm_neon_vst2lane:
5141 case Intrinsic::arm_neon_vst3lane:
5142 case Intrinsic::arm_neon_vst4lane: {
5143 SmallVector<Value *, 4> Args(CI->args());
5144 NewCall = Builder.CreateCall(NewFn, Args);
5145 break;
5146 }
5147 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5148 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5149 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5150 LLVMContext &Ctx = F->getParent()->getContext();
5151 SmallVector<Value *, 4> Args(CI->args());
5152 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
5153 cast<ConstantInt>(Args[3])->getZExtValue());
5154 NewCall = Builder.CreateCall(NewFn, Args);
5155 break;
5156 }
5157 case Intrinsic::aarch64_sve_ld3_sret:
5158 case Intrinsic::aarch64_sve_ld4_sret:
5159 case Intrinsic::aarch64_sve_ld2_sret: {
5160 // Is this a trivial remangle of the name to support ptr address spaces?
5161 if (isa<StructType>(F->getReturnType())) {
5162 DefaultCase();
5163 return;
5164 }
5165
5166 StringRef Name = F->getName();
5167 Name = Name.substr(5);
5168 unsigned N = StringSwitch<unsigned>(Name)
5169 .StartsWith("aarch64.sve.ld2", 2)
5170 .StartsWith("aarch64.sve.ld3", 3)
5171 .StartsWith("aarch64.sve.ld4", 4)
5172 .Default(0);
5173 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5174 unsigned MinElts = RetTy->getMinNumElements() / N;
5175 SmallVector<Value *, 2> Args(CI->args());
5176 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5177 Value *Ret = llvm::PoisonValue::get(RetTy);
5178 for (unsigned I = 0; I < N; I++) {
5179 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
5180 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
5181 }
5182 NewCall = dyn_cast<CallInst>(Ret);
5183 break;
5184 }
5185
5186 case Intrinsic::coro_end: {
5187 SmallVector<Value *, 3> Args(CI->args());
5188 Args.push_back(ConstantTokenNone::get(CI->getContext()));
5189 NewCall = Builder.CreateCall(NewFn, Args);
5190 break;
5191 }
5192
5193 case Intrinsic::vector_extract: {
5194 StringRef Name = F->getName();
5195 Name = Name.substr(5); // Strip llvm
5196 if (!Name.starts_with("aarch64.sve.tuple.get")) {
5197 DefaultCase();
5198 return;
5199 }
5200 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5201 unsigned MinElts = RetTy->getMinNumElements();
5202 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5203 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
5204 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
5205 break;
5206 }
5207
5208 case Intrinsic::vector_insert: {
5209 StringRef Name = F->getName();
5210 Name = Name.substr(5);
5211 if (!Name.starts_with("aarch64.sve.tuple")) {
5212 DefaultCase();
5213 return;
5214 }
5215 if (Name.starts_with("aarch64.sve.tuple.set")) {
5216 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5217 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
5218 Value *NewIdx =
5219 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
5220 NewCall = Builder.CreateCall(
5221 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
5222 break;
5223 }
5224 if (Name.starts_with("aarch64.sve.tuple.create")) {
5225 unsigned N = StringSwitch<unsigned>(Name)
5226 .StartsWith("aarch64.sve.tuple.create2", 2)
5227 .StartsWith("aarch64.sve.tuple.create3", 3)
5228 .StartsWith("aarch64.sve.tuple.create4", 4)
5229 .Default(0);
5230 assert(N > 1 && "Create is expected to be between 2-4");
5231 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5232 Value *Ret = llvm::PoisonValue::get(RetTy);
5233 unsigned MinElts = RetTy->getMinNumElements() / N;
5234 for (unsigned I = 0; I < N; I++) {
5235 Value *V = CI->getArgOperand(I);
5236 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5237 }
5238 NewCall = dyn_cast<CallInst>(Ret);
5239 }
5240 break;
5241 }
5242
5243 case Intrinsic::arm_neon_bfdot:
5244 case Intrinsic::arm_neon_bfmmla:
5245 case Intrinsic::arm_neon_bfmlalb:
5246 case Intrinsic::arm_neon_bfmlalt:
5247 case Intrinsic::aarch64_neon_bfdot:
5248 case Intrinsic::aarch64_neon_bfmmla:
5249 case Intrinsic::aarch64_neon_bfmlalb:
5250 case Intrinsic::aarch64_neon_bfmlalt: {
5252 assert(CI->arg_size() == 3 &&
5253 "Mismatch between function args and call args");
5254 size_t OperandWidth =
5256 assert((OperandWidth == 64 || OperandWidth == 128) &&
5257 "Unexpected operand width");
5258 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5259 auto Iter = CI->args().begin();
5260 Args.push_back(*Iter++);
5261 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5262 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5263 NewCall = Builder.CreateCall(NewFn, Args);
5264 break;
5265 }
5266
5267 case Intrinsic::bitreverse:
5268 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5269 break;
5270
5271 case Intrinsic::ctlz:
5272 case Intrinsic::cttz: {
5273 if (CI->arg_size() != 1) {
5274 DefaultCase();
5275 return;
5276 }
5277
5278 NewCall =
5279 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5280 break;
5281 }
5282
5283 case Intrinsic::objectsize: {
5284 Value *NullIsUnknownSize =
5285 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5286 Value *Dynamic =
5287 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5288 NewCall = Builder.CreateCall(
5289 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5290 break;
5291 }
5292
5293 case Intrinsic::ctpop:
5294 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5295 break;
5296 case Intrinsic::dbg_value: {
5297 StringRef Name = F->getName();
5298 Name = Name.substr(5); // Strip llvm.
5299 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5300 if (Name.starts_with("dbg.addr")) {
5302 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5303 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5304 NewCall =
5305 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5306 MetadataAsValue::get(C, Expr)});
5307 break;
5308 }
5309
5310 // Upgrade from the old version that had an extra offset argument.
5311 assert(CI->arg_size() == 4);
5312 // Drop nonzero offsets instead of attempting to upgrade them.
5314 if (Offset->isNullValue()) {
5315 NewCall = Builder.CreateCall(
5316 NewFn,
5317 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5318 break;
5319 }
5320 CI->eraseFromParent();
5321 return;
5322 }
5323
5324 case Intrinsic::ptr_annotation:
5325 // Upgrade from versions that lacked the annotation attribute argument.
5326 if (CI->arg_size() != 4) {
5327 DefaultCase();
5328 return;
5329 }
5330
5331 // Create a new call with an added null annotation attribute argument.
5332 NewCall = Builder.CreateCall(
5333 NewFn,
5334 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5335 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5336 NewCall->takeName(CI);
5337 CI->replaceAllUsesWith(NewCall);
5338 CI->eraseFromParent();
5339 return;
5340
5341 case Intrinsic::var_annotation:
5342 // Upgrade from versions that lacked the annotation attribute argument.
5343 if (CI->arg_size() != 4) {
5344 DefaultCase();
5345 return;
5346 }
5347 // Create a new call with an added null annotation attribute argument.
5348 NewCall = Builder.CreateCall(
5349 NewFn,
5350 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5351 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5352 NewCall->takeName(CI);
5353 CI->replaceAllUsesWith(NewCall);
5354 CI->eraseFromParent();
5355 return;
5356
5357 case Intrinsic::riscv_aes32dsi:
5358 case Intrinsic::riscv_aes32dsmi:
5359 case Intrinsic::riscv_aes32esi:
5360 case Intrinsic::riscv_aes32esmi:
5361 case Intrinsic::riscv_sm4ks:
5362 case Intrinsic::riscv_sm4ed: {
5363 // The last argument to these intrinsics used to be i8 and changed to i32.
5364 // The type overload for sm4ks and sm4ed was removed.
5365 Value *Arg2 = CI->getArgOperand(2);
5366 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5367 return;
5368
5369 Value *Arg0 = CI->getArgOperand(0);
5370 Value *Arg1 = CI->getArgOperand(1);
5371 if (CI->getType()->isIntegerTy(64)) {
5372 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5373 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5374 }
5375
5376 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5377 cast<ConstantInt>(Arg2)->getZExtValue());
5378
5379 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5380 Value *Res = NewCall;
5381 if (Res->getType() != CI->getType())
5382 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5383 NewCall->takeName(CI);
5384 CI->replaceAllUsesWith(Res);
5385 CI->eraseFromParent();
5386 return;
5387 }
5388 case Intrinsic::nvvm_mapa_shared_cluster: {
5389 // Create a new call with the correct address space.
5390 NewCall =
5391 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5392 Value *Res = NewCall;
5393 Res = Builder.CreateAddrSpaceCast(
5394 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5395 NewCall->takeName(CI);
5396 CI->replaceAllUsesWith(Res);
5397 CI->eraseFromParent();
5398 return;
5399 }
5400 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5401 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5402 // Create a new call with the correct address space.
5403 SmallVector<Value *, 4> Args(CI->args());
5404 Args[0] = Builder.CreateAddrSpaceCast(
5405 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5406
5407 NewCall = Builder.CreateCall(NewFn, Args);
5408 NewCall->takeName(CI);
5409 CI->replaceAllUsesWith(NewCall);
5410 CI->eraseFromParent();
5411 return;
5412 }
5413 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5414 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5415 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5416 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5417 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5418 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5419 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5420 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5421 SmallVector<Value *, 16> Args(CI->args());
5422
5423 // Create AddrSpaceCast to shared_cluster if needed.
5424 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5425 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5427 Args[0] = Builder.CreateAddrSpaceCast(
5428 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5429
5430 // Attach the flag argument for cta_group, with a
5431 // default value of 0. This handles case (2) in
5432 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5433 size_t NumArgs = CI->arg_size();
5434 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5435 if (!FlagArg->getType()->isIntegerTy(1))
5436 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5437
5438 NewCall = Builder.CreateCall(NewFn, Args);
5439 NewCall->takeName(CI);
5440 CI->replaceAllUsesWith(NewCall);
5441 CI->eraseFromParent();
5442 return;
5443 }
5444 case Intrinsic::riscv_sha256sig0:
5445 case Intrinsic::riscv_sha256sig1:
5446 case Intrinsic::riscv_sha256sum0:
5447 case Intrinsic::riscv_sha256sum1:
5448 case Intrinsic::riscv_sm3p0:
5449 case Intrinsic::riscv_sm3p1: {
5450 // The last argument to these intrinsics used to be i8 and changed to i32.
5451 // The type overload for sm4ks and sm4ed was removed.
5452 if (!CI->getType()->isIntegerTy(64))
5453 return;
5454
5455 Value *Arg =
5456 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5457
5458 NewCall = Builder.CreateCall(NewFn, Arg);
5459 Value *Res =
5460 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5461 NewCall->takeName(CI);
5462 CI->replaceAllUsesWith(Res);
5463 CI->eraseFromParent();
5464 return;
5465 }
5466
5467 case Intrinsic::x86_xop_vfrcz_ss:
5468 case Intrinsic::x86_xop_vfrcz_sd:
5469 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5470 break;
5471
5472 case Intrinsic::x86_xop_vpermil2pd:
5473 case Intrinsic::x86_xop_vpermil2ps:
5474 case Intrinsic::x86_xop_vpermil2pd_256:
5475 case Intrinsic::x86_xop_vpermil2ps_256: {
5476 SmallVector<Value *, 4> Args(CI->args());
5477 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5478 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5479 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5480 NewCall = Builder.CreateCall(NewFn, Args);
5481 break;
5482 }
5483
5484 case Intrinsic::x86_sse41_ptestc:
5485 case Intrinsic::x86_sse41_ptestz:
5486 case Intrinsic::x86_sse41_ptestnzc: {
5487 // The arguments for these intrinsics used to be v4f32, and changed
5488 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5489 // So, the only thing required is a bitcast for both arguments.
5490 // First, check the arguments have the old type.
5491 Value *Arg0 = CI->getArgOperand(0);
5492 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5493 return;
5494
5495 // Old intrinsic, add bitcasts
5496 Value *Arg1 = CI->getArgOperand(1);
5497
5498 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5499
5500 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5501 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5502
5503 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5504 break;
5505 }
5506
5507 case Intrinsic::x86_rdtscp: {
5508 // This used to take 1 arguments. If we have no arguments, it is already
5509 // upgraded.
5510 if (CI->getNumOperands() == 0)
5511 return;
5512
5513 NewCall = Builder.CreateCall(NewFn);
5514 // Extract the second result and store it.
5515 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5516 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5517 // Replace the original call result with the first result of the new call.
5518 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5519
5520 NewCall->takeName(CI);
5521 CI->replaceAllUsesWith(TSC);
5522 CI->eraseFromParent();
5523 return;
5524 }
5525
5526 case Intrinsic::x86_sse41_insertps:
5527 case Intrinsic::x86_sse41_dppd:
5528 case Intrinsic::x86_sse41_dpps:
5529 case Intrinsic::x86_sse41_mpsadbw:
5530 case Intrinsic::x86_avx_dp_ps_256:
5531 case Intrinsic::x86_avx2_mpsadbw: {
5532 // Need to truncate the last argument from i32 to i8 -- this argument models
5533 // an inherently 8-bit immediate operand to these x86 instructions.
5534 SmallVector<Value *, 4> Args(CI->args());
5535
5536 // Replace the last argument with a trunc.
5537 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5538 NewCall = Builder.CreateCall(NewFn, Args);
5539 break;
5540 }
5541
5542 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5543 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5544 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5545 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5546 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5547 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5548 SmallVector<Value *, 4> Args(CI->args());
5549 unsigned NumElts =
5550 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5551 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5552
5553 NewCall = Builder.CreateCall(NewFn, Args);
5554 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5555
5556 NewCall->takeName(CI);
5557 CI->replaceAllUsesWith(Res);
5558 CI->eraseFromParent();
5559 return;
5560 }
5561
5562 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5563 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5564 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5565 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5566 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5567 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5568 SmallVector<Value *, 4> Args(CI->args());
5569 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5570 if (NewFn->getIntrinsicID() ==
5571 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5572 Args[1] = Builder.CreateBitCast(
5573 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5574
5575 NewCall = Builder.CreateCall(NewFn, Args);
5576 Value *Res = Builder.CreateBitCast(
5577 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5578
5579 NewCall->takeName(CI);
5580 CI->replaceAllUsesWith(Res);
5581 CI->eraseFromParent();
5582 return;
5583 }
5584 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5585 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5586 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5587 SmallVector<Value *, 4> Args(CI->args());
5588 unsigned NumElts =
5589 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5590 Args[1] = Builder.CreateBitCast(
5591 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5592 Args[2] = Builder.CreateBitCast(
5593 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5594
5595 NewCall = Builder.CreateCall(NewFn, Args);
5596 break;
5597 }
5598
5599 case Intrinsic::thread_pointer: {
5600 NewCall = Builder.CreateCall(NewFn, {});
5601 break;
5602 }
5603
5604 case Intrinsic::memcpy:
5605 case Intrinsic::memmove:
5606 case Intrinsic::memset: {
5607 // We have to make sure that the call signature is what we're expecting.
5608 // We only want to change the old signatures by removing the alignment arg:
5609 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5610 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5611 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5612 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5613 // Note: i8*'s in the above can be any pointer type
5614 if (CI->arg_size() != 5) {
5615 DefaultCase();
5616 return;
5617 }
5618 // Remove alignment argument (3), and add alignment attributes to the
5619 // dest/src pointers.
5620 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5621 CI->getArgOperand(2), CI->getArgOperand(4)};
5622 NewCall = Builder.CreateCall(NewFn, Args);
5623 AttributeList OldAttrs = CI->getAttributes();
5624 AttributeList NewAttrs = AttributeList::get(
5625 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5626 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5627 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5628 NewCall->setAttributes(NewAttrs);
5629 auto *MemCI = cast<MemIntrinsic>(NewCall);
5630 // All mem intrinsics support dest alignment.
5632 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5633 // Memcpy/Memmove also support source alignment.
5634 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5635 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5636 break;
5637 }
5638
5639 case Intrinsic::masked_load:
5640 case Intrinsic::masked_gather:
5641 case Intrinsic::masked_store:
5642 case Intrinsic::masked_scatter: {
5643 if (CI->arg_size() != 4) {
5644 DefaultCase();
5645 return;
5646 }
5647
5648 auto GetMaybeAlign = [](Value *Op) {
5649 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5650 uint64_t Val = CI->getZExtValue();
5651 if (Val == 0)
5652 return MaybeAlign();
5653 if (isPowerOf2_64(Val))
5654 return MaybeAlign(Val);
5655 }
5656 reportFatalUsageError("Invalid alignment argument");
5657 };
5658 auto GetAlign = [&](Value *Op) {
5659 MaybeAlign Align = GetMaybeAlign(Op);
5660 if (Align)
5661 return *Align;
5662 reportFatalUsageError("Invalid zero alignment argument");
5663 };
5664
5665 const DataLayout &DL = CI->getDataLayout();
5666 switch (NewFn->getIntrinsicID()) {
5667 case Intrinsic::masked_load:
5668 NewCall = Builder.CreateMaskedLoad(
5669 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5670 CI->getArgOperand(2), CI->getArgOperand(3));
5671 break;
5672 case Intrinsic::masked_gather:
5673 NewCall = Builder.CreateMaskedGather(
5674 CI->getType(), CI->getArgOperand(0),
5675 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5676 CI->getType()->getScalarType()),
5677 CI->getArgOperand(2), CI->getArgOperand(3));
5678 break;
5679 case Intrinsic::masked_store:
5680 NewCall = Builder.CreateMaskedStore(
5681 CI->getArgOperand(0), CI->getArgOperand(1),
5682 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5683 break;
5684 case Intrinsic::masked_scatter:
5685 NewCall = Builder.CreateMaskedScatter(
5686 CI->getArgOperand(0), CI->getArgOperand(1),
5687 DL.getValueOrABITypeAlignment(
5688 GetMaybeAlign(CI->getArgOperand(2)),
5689 CI->getArgOperand(0)->getType()->getScalarType()),
5690 CI->getArgOperand(3));
5691 break;
5692 default:
5693 llvm_unreachable("Unexpected intrinsic ID");
5694 }
5695 // Previous metadata is still valid.
5696 NewCall->copyMetadata(*CI);
5697 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5698 break;
5699 }
5700
5701 case Intrinsic::lifetime_start:
5702 case Intrinsic::lifetime_end: {
5703 if (CI->arg_size() != 2) {
5704 DefaultCase();
5705 return;
5706 }
5707
5708 Value *Ptr = CI->getArgOperand(1);
5709 // Try to strip pointer casts, such that the lifetime works on an alloca.
5710 Ptr = Ptr->stripPointerCasts();
5711 if (isa<AllocaInst>(Ptr)) {
5712 // Don't use NewFn, as we might have looked through an addrspacecast.
5713 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5714 NewCall = Builder.CreateLifetimeStart(Ptr);
5715 else
5716 NewCall = Builder.CreateLifetimeEnd(Ptr);
5717 break;
5718 }
5719
5720 // Otherwise remove the lifetime marker.
5721 CI->eraseFromParent();
5722 return;
5723 }
5724
5725 case Intrinsic::x86_avx512_vpdpbusd_128:
5726 case Intrinsic::x86_avx512_vpdpbusd_256:
5727 case Intrinsic::x86_avx512_vpdpbusd_512:
5728 case Intrinsic::x86_avx512_vpdpbusds_128:
5729 case Intrinsic::x86_avx512_vpdpbusds_256:
5730 case Intrinsic::x86_avx512_vpdpbusds_512:
5731 case Intrinsic::x86_avx2_vpdpbssd_128:
5732 case Intrinsic::x86_avx2_vpdpbssd_256:
5733 case Intrinsic::x86_avx10_vpdpbssd_512:
5734 case Intrinsic::x86_avx2_vpdpbssds_128:
5735 case Intrinsic::x86_avx2_vpdpbssds_256:
5736 case Intrinsic::x86_avx10_vpdpbssds_512:
5737 case Intrinsic::x86_avx2_vpdpbsud_128:
5738 case Intrinsic::x86_avx2_vpdpbsud_256:
5739 case Intrinsic::x86_avx10_vpdpbsud_512:
5740 case Intrinsic::x86_avx2_vpdpbsuds_128:
5741 case Intrinsic::x86_avx2_vpdpbsuds_256:
5742 case Intrinsic::x86_avx10_vpdpbsuds_512:
5743 case Intrinsic::x86_avx2_vpdpbuud_128:
5744 case Intrinsic::x86_avx2_vpdpbuud_256:
5745 case Intrinsic::x86_avx10_vpdpbuud_512:
5746 case Intrinsic::x86_avx2_vpdpbuuds_128:
5747 case Intrinsic::x86_avx2_vpdpbuuds_256:
5748 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5749 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5750 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5751 CI->getArgOperand(2)};
5752 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5753 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5754 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5755
5756 NewCall = Builder.CreateCall(NewFn, Args);
5757 break;
5758 }
5759 case Intrinsic::x86_avx512_vpdpwssd_128:
5760 case Intrinsic::x86_avx512_vpdpwssd_256:
5761 case Intrinsic::x86_avx512_vpdpwssd_512:
5762 case Intrinsic::x86_avx512_vpdpwssds_128:
5763 case Intrinsic::x86_avx512_vpdpwssds_256:
5764 case Intrinsic::x86_avx512_vpdpwssds_512:
5765 case Intrinsic::x86_avx2_vpdpwsud_128:
5766 case Intrinsic::x86_avx2_vpdpwsud_256:
5767 case Intrinsic::x86_avx10_vpdpwsud_512:
5768 case Intrinsic::x86_avx2_vpdpwsuds_128:
5769 case Intrinsic::x86_avx2_vpdpwsuds_256:
5770 case Intrinsic::x86_avx10_vpdpwsuds_512:
5771 case Intrinsic::x86_avx2_vpdpwusd_128:
5772 case Intrinsic::x86_avx2_vpdpwusd_256:
5773 case Intrinsic::x86_avx10_vpdpwusd_512:
5774 case Intrinsic::x86_avx2_vpdpwusds_128:
5775 case Intrinsic::x86_avx2_vpdpwusds_256:
5776 case Intrinsic::x86_avx10_vpdpwusds_512:
5777 case Intrinsic::x86_avx2_vpdpwuud_128:
5778 case Intrinsic::x86_avx2_vpdpwuud_256:
5779 case Intrinsic::x86_avx10_vpdpwuud_512:
5780 case Intrinsic::x86_avx2_vpdpwuuds_128:
5781 case Intrinsic::x86_avx2_vpdpwuuds_256:
5782 case Intrinsic::x86_avx10_vpdpwuuds_512:
5783 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5784 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5785 CI->getArgOperand(2)};
5786 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5787 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5788 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5789
5790 NewCall = Builder.CreateCall(NewFn, Args);
5791 break;
5792 }
5793 assert(NewCall && "Should have either set this variable or returned through "
5794 "the default case");
5795 NewCall->takeName(CI);
5796 CI->replaceAllUsesWith(NewCall);
5797 CI->eraseFromParent();
5798}
5799
5801 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5802
5803 // Check if this function should be upgraded and get the replacement function
5804 // if there is one.
5805 Function *NewFn;
5806 if (UpgradeIntrinsicFunction(F, NewFn)) {
5807 // Replace all users of the old function with the new function or new
5808 // instructions. This is not a range loop because the call is deleted.
5809 for (User *U : make_early_inc_range(F->users()))
5810 if (CallBase *CB = dyn_cast<CallBase>(U))
5811 UpgradeIntrinsicCall(CB, NewFn);
5812
5813 // Remove old function, no longer used, from the module.
5814 if (F != NewFn)
5815 F->eraseFromParent();
5816 }
5817}
5818
5820 const unsigned NumOperands = MD.getNumOperands();
5821 if (NumOperands == 0)
5822 return &MD; // Invalid, punt to a verifier error.
5823
5824 // Check if the tag uses struct-path aware TBAA format.
5825 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5826 return &MD;
5827
5828 auto &Context = MD.getContext();
5829 if (NumOperands == 3) {
5830 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5831 MDNode *ScalarType = MDNode::get(Context, Elts);
5832 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5833 Metadata *Elts2[] = {ScalarType, ScalarType,
5836 MD.getOperand(2)};
5837 return MDNode::get(Context, Elts2);
5838 }
5839 // Create a MDNode <MD, MD, offset 0>
5841 Type::getInt64Ty(Context)))};
5842 return MDNode::get(Context, Elts);
5843}
5844
5846 Instruction *&Temp) {
5847 if (Opc != Instruction::BitCast)
5848 return nullptr;
5849
5850 Temp = nullptr;
5851 Type *SrcTy = V->getType();
5852 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5853 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5854 LLVMContext &Context = V->getContext();
5855
5856 // We have no information about target data layout, so we assume that
5857 // the maximum pointer size is 64bit.
5858 Type *MidTy = Type::getInt64Ty(Context);
5859 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5860
5861 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5862 }
5863
5864 return nullptr;
5865}
5866
5868 if (Opc != Instruction::BitCast)
5869 return nullptr;
5870
5871 Type *SrcTy = C->getType();
5872 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5873 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5874 LLVMContext &Context = C->getContext();
5875
5876 // We have no information about target data layout, so we assume that
5877 // the maximum pointer size is 64bit.
5878 Type *MidTy = Type::getInt64Ty(Context);
5879
5881 DestTy);
5882 }
5883
5884 return nullptr;
5885}
5886
5887/// Check the debug info version number, if it is out-dated, drop the debug
5888/// info. Return true if module is modified.
5891 return false;
5892
5893 llvm::TimeTraceScope timeScope("Upgrade debug info");
5894 // We need to get metadata before the module is verified (i.e., getModuleFlag
5895 // makes assumptions that we haven't verified yet). Carefully extract the flag
5896 // from the metadata.
5897 unsigned Version = 0;
5898 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5899 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5900 if (Flag->getNumOperands() < 3)
5901 return false;
5902 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5903 return K->getString() == "Debug Info Version";
5904 return false;
5905 });
5906 if (OpIt != ModFlags->op_end()) {
5907 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5908 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5909 Version = CI->getZExtValue();
5910 }
5911 }
5912
5914 bool BrokenDebugInfo = false;
5915 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5916 report_fatal_error("Broken module found, compilation aborted!");
5917 if (!BrokenDebugInfo)
5918 // Everything is ok.
5919 return false;
5920 else {
5921 // Diagnose malformed debug info.
5923 M.getContext().diagnose(Diag);
5924 }
5925 }
5926 bool Modified = StripDebugInfo(M);
5928 // Diagnose a version mismatch.
5930 M.getContext().diagnose(DiagVersion);
5931 }
5932 return Modified;
5933}
5934
5935static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5936 GlobalValue *GV, const Metadata *V) {
5937 Function *F = cast<Function>(GV);
5938
5939 constexpr StringLiteral DefaultValue = "1";
5940 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5941 unsigned Length = 0;
5942
5943 if (F->hasFnAttribute(Attr)) {
5944 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5945 // parse these elements placing them into Vect3
5946 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5947 for (; Length < 3 && !S.empty(); Length++) {
5948 auto [Part, Rest] = S.split(',');
5949 Vect3[Length] = Part.trim();
5950 S = Rest;
5951 }
5952 }
5953
5954 const unsigned Dim = DimC - 'x';
5955 assert(Dim < 3 && "Unexpected dim char");
5956
5957 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5958
5959 // local variable required for StringRef in Vect3 to point to.
5960 const std::string VStr = llvm::utostr(VInt);
5961 Vect3[Dim] = VStr;
5962 Length = std::max(Length, Dim + 1);
5963
5964 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5965 F->addFnAttr(Attr, NewAttr);
5966}
5967
5968static inline bool isXYZ(StringRef S) {
5969 return S == "x" || S == "y" || S == "z";
5970}
5971
5973 const Metadata *V) {
5974 if (K == "kernel") {
5976 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5977 return true;
5978 }
5979 if (K == "align") {
5980 // V is a bitfeild specifying two 16-bit values. The alignment value is
5981 // specfied in low 16-bits, The index is specified in the high bits. For the
5982 // index, 0 indicates the return value while higher values correspond to
5983 // each parameter (idx = param + 1).
5984 const uint64_t AlignIdxValuePair =
5985 mdconst::extract<ConstantInt>(V)->getZExtValue();
5986 const unsigned Idx = (AlignIdxValuePair >> 16);
5987 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5988 cast<Function>(GV)->addAttributeAtIndex(
5989 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5990 return true;
5991 }
5992 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5993 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5995 return true;
5996 }
5997 if (K == "minctasm") {
5998 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5999 cast<Function>(GV)->addFnAttr(NVVMAttr::MinCTASm, llvm::utostr(CV));
6000 return true;
6001 }
6002 if (K == "maxnreg") {
6003 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
6004 cast<Function>(GV)->addFnAttr(NVVMAttr::MaxNReg, llvm::utostr(CV));
6005 return true;
6006 }
6007 if (K.consume_front("maxntid") && isXYZ(K)) {
6009 return true;
6010 }
6011 if (K.consume_front("reqntid") && isXYZ(K)) {
6013 return true;
6014 }
6015 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
6017 return true;
6018 }
6019 if (K == "grid_constant") {
6020 const auto Attr = Attribute::get(GV->getContext(), NVVMAttr::GridConstant);
6021 for (const auto &Op : cast<MDNode>(V)->operands()) {
6022 // For some reason, the index is 1-based in the metadata. Good thing we're
6023 // able to auto-upgrade it!
6024 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
6025 cast<Function>(GV)->addParamAttr(Index, Attr);
6026 }
6027 return true;
6028 }
6029
6030 return false;
6031}
6032
6034 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
6035 if (!NamedMD)
6036 return;
6037
6038 SmallVector<MDNode *, 8> NewNodes;
6040 for (MDNode *MD : NamedMD->operands()) {
6041 if (!SeenNodes.insert(MD).second)
6042 continue;
6043
6044 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
6045 if (!GV)
6046 continue;
6047
6048 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
6049
6050 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
6051 // Each nvvm.annotations metadata entry will be of the following form:
6052 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
6053 // start index = 1, to skip the global variable key
6054 // increment = 2, to skip the value for each property-value pairs
6055 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6056 MDString *K = cast<MDString>(MD->getOperand(j));
6057 const MDOperand &V = MD->getOperand(j + 1);
6058 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
6059 if (!Upgraded)
6060 NewOperands.append({K, V});
6061 }
6062
6063 if (NewOperands.size() > 1)
6064 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
6065 }
6066
6067 NamedMD->clearOperands();
6068 for (MDNode *N : NewNodes)
6069 NamedMD->addOperand(N);
6070}
6071
6072/// This checks for objc retain release marker which should be upgraded. It
6073/// returns true if module is modified.
6075 bool Changed = false;
6076 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
6077 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6078 if (ModRetainReleaseMarker) {
6079 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
6080 if (Op) {
6081 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
6082 if (ID) {
6083 SmallVector<StringRef, 4> ValueComp;
6084 ID->getString().split(ValueComp, "#");
6085 if (ValueComp.size() == 2) {
6086 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
6087 ID = MDString::get(M.getContext(), NewValue);
6088 }
6089 M.addModuleFlag(Module::Error, MarkerKey, ID);
6090 M.eraseNamedMetadata(ModRetainReleaseMarker);
6091 Changed = true;
6092 }
6093 }
6094 }
6095 return Changed;
6096}
6097
6099 // This lambda converts normal function calls to ARC runtime functions to
6100 // intrinsic calls.
6101 auto UpgradeToIntrinsic = [&](const char *OldFunc,
6102 llvm::Intrinsic::ID IntrinsicFunc) {
6103 Function *Fn = M.getFunction(OldFunc);
6104
6105 if (!Fn)
6106 return;
6107
6108 Function *NewFn =
6109 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
6110
6111 for (User *U : make_early_inc_range(Fn->users())) {
6113 if (!CI || CI->getCalledFunction() != Fn)
6114 continue;
6115
6116 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
6117 FunctionType *NewFuncTy = NewFn->getFunctionType();
6119
6120 // Don't upgrade the intrinsic if it's not valid to bitcast the return
6121 // value to the return type of the old function.
6122 if (NewFuncTy->getReturnType() != CI->getType() &&
6123 !CastInst::castIsValid(Instruction::BitCast, CI,
6124 NewFuncTy->getReturnType()))
6125 continue;
6126
6127 bool InvalidCast = false;
6128
6129 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
6130 Value *Arg = CI->getArgOperand(I);
6131
6132 // Bitcast argument to the parameter type of the new function if it's
6133 // not a variadic argument.
6134 if (I < NewFuncTy->getNumParams()) {
6135 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
6136 // to the parameter type of the new function.
6137 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
6138 NewFuncTy->getParamType(I))) {
6139 InvalidCast = true;
6140 break;
6141 }
6142 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
6143 }
6144 Args.push_back(Arg);
6145 }
6146
6147 if (InvalidCast)
6148 continue;
6149
6150 // Create a call instruction that calls the new function.
6151 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6152 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
6153 NewCall->takeName(CI);
6154
6155 // Bitcast the return value back to the type of the old call.
6156 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
6157
6158 if (!CI->use_empty())
6159 CI->replaceAllUsesWith(NewRetVal);
6160 CI->eraseFromParent();
6161 }
6162
6163 if (Fn->use_empty())
6164 Fn->eraseFromParent();
6165 };
6166
6167 // Unconditionally convert a call to "clang.arc.use" to a call to
6168 // "llvm.objc.clang.arc.use".
6169 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6170
6171 // Upgrade the retain release marker. If there is no need to upgrade
6172 // the marker, that means either the module is already new enough to contain
6173 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6175 return;
6176
6177 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6178 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6179 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6180 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6181 {"objc_autoreleaseReturnValue",
6182 llvm::Intrinsic::objc_autoreleaseReturnValue},
6183 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6184 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6185 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6186 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6187 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6188 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6189 {"objc_release", llvm::Intrinsic::objc_release},
6190 {"objc_retain", llvm::Intrinsic::objc_retain},
6191 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6192 {"objc_retainAutoreleaseReturnValue",
6193 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6194 {"objc_retainAutoreleasedReturnValue",
6195 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6196 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6197 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6198 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6199 {"objc_unsafeClaimAutoreleasedReturnValue",
6200 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6201 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6202 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6203 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6204 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6205 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6206 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6207 {"objc_arc_annotation_topdown_bbstart",
6208 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6209 {"objc_arc_annotation_topdown_bbend",
6210 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6211 {"objc_arc_annotation_bottomup_bbstart",
6212 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6213 {"objc_arc_annotation_bottomup_bbend",
6214 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6215
6216 for (auto &I : RuntimeFuncs)
6217 UpgradeToIntrinsic(I.first, I.second);
6218}
6219
6221 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6222 if (!ModFlags)
6223 return false;
6224
6225 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6226 bool HasSwiftVersionFlag = false;
6227 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6228 uint32_t SwiftABIVersion;
6229 auto Int8Ty = Type::getInt8Ty(M.getContext());
6230 auto Int32Ty = Type::getInt32Ty(M.getContext());
6231
6232 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6233 MDNode *Op = ModFlags->getOperand(I);
6234 if (Op->getNumOperands() != 3)
6235 continue;
6236 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6237 if (!ID)
6238 continue;
6239 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6240 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6241 Type::getInt32Ty(M.getContext()), B)),
6242 MDString::get(M.getContext(), ID->getString()),
6243 Op->getOperand(2)};
6244 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6245 Changed = true;
6246 };
6247
6248 if (ID->getString() == "Objective-C Image Info Version")
6249 HasObjCFlag = true;
6250 if (ID->getString() == "Objective-C Class Properties")
6251 HasClassProperties = true;
6252 // Upgrade PIC from Error/Max to Min.
6253 if (ID->getString() == "PIC Level") {
6254 if (auto *Behavior =
6256 uint64_t V = Behavior->getLimitedValue();
6257 if (V == Module::Error || V == Module::Max)
6258 SetBehavior(Module::Min);
6259 }
6260 }
6261 // Upgrade "PIE Level" from Error to Max.
6262 if (ID->getString() == "PIE Level")
6263 if (auto *Behavior =
6265 if (Behavior->getLimitedValue() == Module::Error)
6266 SetBehavior(Module::Max);
6267
6268 // Upgrade branch protection and return address signing module flags. The
6269 // module flag behavior for these fields were Error and now they are Min.
6270 if (ID->getString() == "branch-target-enforcement" ||
6271 ID->getString().starts_with("sign-return-address")) {
6272 if (auto *Behavior =
6274 if (Behavior->getLimitedValue() == Module::Error) {
6275 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6276 Metadata *Ops[3] = {
6277 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6278 Op->getOperand(1), Op->getOperand(2)};
6279 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6280 Changed = true;
6281 }
6282 }
6283 }
6284
6285 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6286 // section name so that llvm-lto will not complain about mismatching
6287 // module flags that is functionally the same.
6288 if (ID->getString() == "Objective-C Image Info Section") {
6289 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6290 SmallVector<StringRef, 4> ValueComp;
6291 Value->getString().split(ValueComp, " ");
6292 if (ValueComp.size() != 1) {
6293 std::string NewValue;
6294 for (auto &S : ValueComp)
6295 NewValue += S.str();
6296 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6297 MDString::get(M.getContext(), NewValue)};
6298 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6299 Changed = true;
6300 }
6301 }
6302 }
6303
6304 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6305 // If the higher bits are set, it adds new module flag for swift info.
6306 if (ID->getString() == "Objective-C Garbage Collection") {
6307 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6308 if (Md) {
6309 assert(Md->getValue() && "Expected non-empty metadata");
6310 auto Type = Md->getValue()->getType();
6311 if (Type == Int8Ty)
6312 continue;
6313 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6314 if ((Val & 0xff) != Val) {
6315 HasSwiftVersionFlag = true;
6316 SwiftABIVersion = (Val & 0xff00) >> 8;
6317 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6318 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6319 }
6320 Metadata *Ops[3] = {
6322 Op->getOperand(1),
6323 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6324 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6325 Changed = true;
6326 }
6327 }
6328
6329 if (ID->getString() == "amdgpu_code_object_version") {
6330 Metadata *Ops[3] = {
6331 Op->getOperand(0),
6332 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6333 Op->getOperand(2)};
6334 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6335 Changed = true;
6336 }
6337 }
6338
6339 // "Objective-C Class Properties" is recently added for Objective-C. We
6340 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6341 // flag of value 0, so we can correclty downgrade this flag when trying to
6342 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6343 // this module flag.
6344 if (HasObjCFlag && !HasClassProperties) {
6345 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6346 (uint32_t)0);
6347 Changed = true;
6348 }
6349
6350 if (HasSwiftVersionFlag) {
6351 M.addModuleFlag(Module::Error, "Swift ABI Version",
6352 SwiftABIVersion);
6353 M.addModuleFlag(Module::Error, "Swift Major Version",
6354 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6355 M.addModuleFlag(Module::Error, "Swift Minor Version",
6356 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6357 Changed = true;
6358 }
6359
6360 return Changed;
6361}
6362
6364 NamedMDNode *CFIConsts = M.getNamedMetadata("cfi.functions");
6365 // If this metadata has operands, we expect all of them to be either from
6366 // before or from after the format change handled here, so we can bail out
6367 // fast if the first (if any) operands is of the new format.
6368 auto MatchesVersion = [](const MDNode *Op) {
6369 return Op->getNumOperands() >= 3 &&
6370 isa<ConstantAsMetadata>(Op->getOperand(2)) &&
6371 cast<ConstantAsMetadata>(Op->getOperand(2))
6372 ->getType()
6373 ->isIntegerTy(64);
6374 };
6375
6376 if (!CFIConsts || !CFIConsts->getNumOperands() ||
6377 MatchesVersion(CFIConsts->getOperand(0)))
6378 return false;
6379
6380 bool Changed = false;
6381 for (unsigned I = 0, E = CFIConsts->getNumOperands(); I != E; ++I) {
6382 MDNode *Op = CFIConsts->getOperand(I);
6383 assert(!MatchesVersion(Op) && "Unexpected mix of CFIConstant formats");
6384 assert(Op->getNumOperands() >= 2 &&
6385 "Expected at least 2 operands - name and linkage type");
6386 MDString *NameMD = dyn_cast<MDString>(Op->getOperand(0));
6387 StringRef Name = NameMD->getString();
6390
6392 Elts.push_back(Op->getOperand(0));
6393 Elts.push_back(Op->getOperand(1));
6395 ConstantInt::get(Type::getInt64Ty(M.getContext()), GUID)));
6396
6397 for (unsigned J = 2, EJ = Op->getNumOperands(); J != EJ; ++J)
6398 Elts.push_back(Op->getOperand(J));
6399
6400 CFIConsts->setOperand(I, MDNode::get(M.getContext(), Elts));
6401 Changed = true;
6402 }
6403
6404 return Changed;
6405}
6406
6408 auto TrimSpaces = [](StringRef Section) -> std::string {
6409 SmallVector<StringRef, 5> Components;
6410 Section.split(Components, ',');
6411
6412 SmallString<32> Buffer;
6413 raw_svector_ostream OS(Buffer);
6414
6415 for (auto Component : Components)
6416 OS << ',' << Component.trim();
6417
6418 return std::string(OS.str().substr(1));
6419 };
6420
6421 for (auto &GV : M.globals()) {
6422 if (!GV.hasSection())
6423 continue;
6424
6425 StringRef Section = GV.getSection();
6426
6427 if (!Section.starts_with("__DATA, __objc_catlist"))
6428 continue;
6429
6430 // __DATA, __objc_catlist, regular, no_dead_strip
6431 // __DATA,__objc_catlist,regular,no_dead_strip
6432 GV.setSection(TrimSpaces(Section));
6433 }
6434}
6435
6436namespace {
6437// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6438// callsites within a function that did not also have the strictfp attribute.
6439// Since 10.0, if strict FP semantics are needed within a function, the
6440// function must have the strictfp attribute and all calls within the function
6441// must also have the strictfp attribute. This latter restriction is
6442// necessary to prevent unwanted libcall simplification when a function is
6443// being cloned (such as for inlining).
6444//
6445// The "dangling" strictfp attribute usage was only used to prevent constant
6446// folding and other libcall simplification. The nobuiltin attribute on the
6447// callsite has the same effect.
6448struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6449 StrictFPUpgradeVisitor() = default;
6450
6451 void visitCallBase(CallBase &Call) {
6452 if (!Call.isStrictFP())
6453 return;
6455 return;
6456 // If we get here, the caller doesn't have the strictfp attribute
6457 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6458 Call.removeFnAttr(Attribute::StrictFP);
6459 Call.addFnAttr(Attribute::NoBuiltin);
6460 }
6461};
6462
6463/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6464struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6465 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6466 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6467
6468 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6469 if (!RMW.isFloatingPointOperation())
6470 return;
6471
6472 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6473 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6474 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6475 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6476 }
6477};
6478} // namespace
6479
6481 // If a function definition doesn't have the strictfp attribute,
6482 // convert any callsite strictfp attributes to nobuiltin.
6483 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6484 StrictFPUpgradeVisitor SFPV;
6485 SFPV.visit(F);
6486 }
6487
6488 // Remove all incompatibile attributes from function.
6489 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6490 F.getReturnType(), F.getAttributes().getRetAttrs()));
6491 for (auto &Arg : F.args())
6492 Arg.removeAttrs(
6493 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6494
6495 bool AddingAttrs = false, RemovingAttrs = false;
6496 AttrBuilder AttrsToAdd(F.getContext());
6497 AttributeMask AttrsToRemove;
6498
6499 // Older versions of LLVM treated an "implicit-section-name" attribute
6500 // similarly to directly setting the section on a Function.
6501 if (Attribute A = F.getFnAttribute("implicit-section-name");
6502 A.isValid() && A.isStringAttribute()) {
6503 F.setSection(A.getValueAsString());
6504 AttrsToRemove.addAttribute("implicit-section-name");
6505 RemovingAttrs = true;
6506 }
6507
6508 if (Attribute A = F.getFnAttribute("nooutline");
6509 A.isValid() && A.isStringAttribute()) {
6510 AttrsToRemove.addAttribute("nooutline");
6511 AttrsToAdd.addAttribute(Attribute::NoOutline);
6512 AddingAttrs = RemovingAttrs = true;
6513 }
6514
6515 if (Attribute A = F.getFnAttribute("uniform-work-group-size");
6516 A.isValid() && A.isStringAttribute() && !A.getValueAsString().empty()) {
6517 AttrsToRemove.addAttribute("uniform-work-group-size");
6518 RemovingAttrs = true;
6519 if (A.getValueAsString() == "true") {
6520 AttrsToAdd.addAttribute("uniform-work-group-size");
6521 AddingAttrs = true;
6522 }
6523 }
6524
6525 if (!F.empty()) {
6526 // For some reason this is called twice, and the first time is before any
6527 // instructions are loaded into the body.
6528
6529 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6530 A.isValid()) {
6531
6532 if (A.getValueAsBool()) {
6533 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6534 Visitor.visit(F);
6535 }
6536
6537 // We will leave behind dead attribute uses on external declarations, but
6538 // clang never added these to declarations anyway.
6539 AttrsToRemove.addAttribute("amdgpu-unsafe-fp-atomics");
6540 RemovingAttrs = true;
6541 }
6542 }
6543
6544 DenormalMode DenormalFPMath = DenormalMode::getIEEE();
6545 DenormalMode DenormalFPMathF32 = DenormalMode::getInvalid();
6546
6547 bool HandleDenormalMode = false;
6548
6549 if (Attribute Attr = F.getFnAttribute("denormal-fp-math"); Attr.isValid()) {
6550 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6551 if (ParsedMode.isValid()) {
6552 DenormalFPMath = ParsedMode;
6553 AttrsToRemove.addAttribute("denormal-fp-math");
6554 AddingAttrs = RemovingAttrs = true;
6555 HandleDenormalMode = true;
6556 }
6557 }
6558
6559 if (Attribute Attr = F.getFnAttribute("denormal-fp-math-f32");
6560 Attr.isValid()) {
6561 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6562 if (ParsedMode.isValid()) {
6563 DenormalFPMathF32 = ParsedMode;
6564 AttrsToRemove.addAttribute("denormal-fp-math-f32");
6565 AddingAttrs = RemovingAttrs = true;
6566 HandleDenormalMode = true;
6567 }
6568 }
6569
6570 if (HandleDenormalMode)
6571 AttrsToAdd.addDenormalFPEnvAttr(
6572 DenormalFPEnv(DenormalFPMath, DenormalFPMathF32));
6573
6574 if (RemovingAttrs)
6575 F.removeFnAttrs(AttrsToRemove);
6576
6577 if (AddingAttrs)
6578 F.addFnAttrs(AttrsToAdd);
6579}
6580
6581// Check if the function attribute is not present and set it.
6583 StringRef Value) {
6584 if (!F.hasFnAttribute(FnAttrName))
6585 F.addFnAttr(FnAttrName, Value);
6586}
6587
6588// Check if the function attribute is not present and set it if needed.
6589// If the attribute is "false" then removes it.
6590// If the attribute is "true" resets it to a valueless attribute.
6591static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6592 if (!F.hasFnAttribute(FnAttrName)) {
6593 if (Set)
6594 F.addFnAttr(FnAttrName);
6595 } else {
6596 auto A = F.getFnAttribute(FnAttrName);
6597 if ("false" == A.getValueAsString())
6598 F.removeFnAttr(FnAttrName);
6599 else if ("true" == A.getValueAsString()) {
6600 F.removeFnAttr(FnAttrName);
6601 F.addFnAttr(FnAttrName);
6602 }
6603 }
6604}
6605
6607 Triple T(M.getTargetTriple());
6608 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6609 return;
6610
6611 uint64_t BTEValue = 0;
6612 uint64_t BPPLRValue = 0;
6613 uint64_t GCSValue = 0;
6614 uint64_t SRAValue = 0;
6615 uint64_t SRAALLValue = 0;
6616 uint64_t SRABKeyValue = 0;
6617
6618 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6619 if (ModFlags) {
6620 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6621 MDNode *Op = ModFlags->getOperand(I);
6622 if (Op->getNumOperands() != 3)
6623 continue;
6624
6625 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6626 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6627 if (!ID || !CI)
6628 continue;
6629
6630 StringRef IDStr = ID->getString();
6631 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6632 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6633 : IDStr == "guarded-control-stack" ? &GCSValue
6634 : IDStr == "sign-return-address" ? &SRAValue
6635 : IDStr == "sign-return-address-all" ? &SRAALLValue
6636 : IDStr == "sign-return-address-with-bkey"
6637 ? &SRABKeyValue
6638 : nullptr;
6639 if (!ValPtr)
6640 continue;
6641
6642 *ValPtr = CI->getZExtValue();
6643 if (*ValPtr == 2)
6644 return;
6645 }
6646 }
6647
6648 bool BTE = BTEValue == 1;
6649 bool BPPLR = BPPLRValue == 1;
6650 bool GCS = GCSValue == 1;
6651 bool SRA = SRAValue == 1;
6652
6653 StringRef SignTypeValue = "non-leaf";
6654 if (SRA && SRAALLValue == 1)
6655 SignTypeValue = "all";
6656
6657 StringRef SignKeyValue = "a_key";
6658 if (SRA && SRABKeyValue == 1)
6659 SignKeyValue = "b_key";
6660
6661 for (Function &F : M.getFunctionList()) {
6662 if (F.isDeclaration())
6663 continue;
6664
6665 if (SRA) {
6666 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6667 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6668 } else {
6669 if (auto A = F.getFnAttribute("sign-return-address");
6670 A.isValid() && "none" == A.getValueAsString()) {
6671 F.removeFnAttr("sign-return-address");
6672 F.removeFnAttr("sign-return-address-key");
6673 }
6674 }
6675 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6676 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6677 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6678 }
6679
6680 if (BTE)
6681 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6682 if (BPPLR)
6683 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6684 if (GCS)
6685 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6686 if (SRA) {
6687 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6688 if (SRAALLValue == 1)
6689 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6690 if (SRABKeyValue == 1)
6691 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6692 }
6693}
6694
6695static bool isOldLoopArgument(Metadata *MD) {
6696 auto *T = dyn_cast_or_null<MDTuple>(MD);
6697 if (!T)
6698 return false;
6699 if (T->getNumOperands() < 1)
6700 return false;
6701 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6702 if (!S)
6703 return false;
6704 return S->getString().starts_with("llvm.vectorizer.");
6705}
6706
6708 StringRef OldPrefix = "llvm.vectorizer.";
6709 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6710
6711 if (OldTag == "llvm.vectorizer.unroll")
6712 return MDString::get(C, "llvm.loop.interleave.count");
6713
6714 return MDString::get(
6715 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6716 .str());
6717}
6718
6720 auto *T = dyn_cast_or_null<MDTuple>(MD);
6721 if (!T)
6722 return MD;
6723 if (T->getNumOperands() < 1)
6724 return MD;
6725 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6726 if (!OldTag)
6727 return MD;
6728 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6729 return MD;
6730
6731 // This has an old tag. Upgrade it.
6733 Ops.reserve(T->getNumOperands());
6734 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6735 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6736 Ops.push_back(T->getOperand(I));
6737
6738 return MDTuple::get(T->getContext(), Ops);
6739}
6740
6742 auto *T = dyn_cast<MDTuple>(&N);
6743 if (!T)
6744 return &N;
6745
6746 if (none_of(T->operands(), isOldLoopArgument))
6747 return &N;
6748
6750 Ops.reserve(T->getNumOperands());
6751 for (Metadata *MD : T->operands())
6752 Ops.push_back(upgradeLoopArgument(MD));
6753
6754 return MDTuple::get(T->getContext(), Ops);
6755}
6756
6758 Triple T(TT);
6759 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6760 // the address space of globals to 1. This does not apply to SPIRV Logical.
6761 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6762 !DL.contains("-G") && !DL.starts_with("G")) {
6763 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6764 }
6765
6766 if (T.isLoongArch64() || T.isRISCV64()) {
6767 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6768 auto I = DL.find("-n64-");
6769 if (I != StringRef::npos)
6770 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6771 return DL.str();
6772 }
6773
6774 // AMDGPU data layout upgrades.
6775 std::string Res = DL.str();
6776 if (T.isAMDGPU()) {
6777 // Define address spaces for constants.
6778 if (!DL.contains("-G") && !DL.starts_with("G"))
6779 Res.append(Res.empty() ? "G1" : "-G1");
6780
6781 // AMDGCN data layout upgrades.
6782 if (T.isAMDGCN()) {
6783
6784 // Add missing non-integral declarations.
6785 // This goes before adding new address spaces to prevent incoherent string
6786 // values.
6787 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6788 Res.append("-ni:7:8:9");
6789 // Update ni:7 to ni:7:8:9.
6790 if (DL.ends_with("ni:7"))
6791 Res.append(":8:9");
6792 if (DL.ends_with("ni:7:8"))
6793 Res.append(":9");
6794
6795 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6796 // resources) An empty data layout has already been upgraded to G1 by now.
6797 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6798 Res.append("-p7:160:256:256:32");
6799 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6800 Res.append("-p8:128:128:128:48");
6801 constexpr StringRef OldP8("-p8:128:128-");
6802 if (DL.contains(OldP8))
6803 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6804 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6805 Res.append("-p9:192:256:256:32");
6806 }
6807
6808 // Upgrade the ELF mangling mode.
6809 if (!DL.contains("m:e"))
6810 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6811
6812 return Res;
6813 }
6814
6815 if (T.isSystemZ() && !DL.empty()) {
6816 // Make sure the stack alignment is present.
6817 if (!DL.contains("-S64"))
6818 return "E-S64" + DL.drop_front(1).str();
6819 return DL.str();
6820 }
6821
6822 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6823 // If the datalayout matches the expected format, add pointer size address
6824 // spaces to the datalayout.
6825 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6826 if (!DL.contains(AddrSpaces)) {
6828 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6829 if (R.match(Res, &Groups))
6830 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6831 }
6832 };
6833
6834 // AArch64 data layout upgrades.
6835 if (T.isAArch64()) {
6836 // Add "-Fn32"
6837 if (!DL.empty() && !DL.contains("-Fn32"))
6838 Res.append("-Fn32");
6839 AddPtr32Ptr64AddrSpaces();
6840 return Res;
6841 }
6842
6843 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6844 T.isWasm()) {
6845 // Mips64 with o32 ABI did not add "-i128:128".
6846 // Add "-i128:128"
6847 std::string I64 = "-i64:64";
6848 std::string I128 = "-i128:128";
6849 if (!StringRef(Res).contains(I128)) {
6850 size_t Pos = Res.find(I64);
6851 if (Pos != size_t(-1))
6852 Res.insert(Pos + I64.size(), I128);
6853 }
6854 }
6855
6856 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6857 size_t Pos = Res.find("-S128");
6858 if (Pos == StringRef::npos)
6859 Pos = Res.size();
6860 Res.insert(Pos, "-f64:32:64");
6861 }
6862
6863 if (!T.isX86())
6864 return Res;
6865
6866 AddPtr32Ptr64AddrSpaces();
6867
6868 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6869 // for i128 operations prior to this being reflected in the data layout, and
6870 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6871 // boundaries, so although this is a breaking change, the upgrade is expected
6872 // to fix more IR than it breaks.
6873 // Intel MCU is an exception and uses 4-byte-alignment.
6874 if (!T.isOSIAMCU()) {
6875 std::string I128 = "-i128:128";
6876 if (StringRef Ref = Res; !Ref.contains(I128)) {
6878 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6879 if (R.match(Res, &Groups))
6880 Res = (Groups[1] + I128 + Groups[3]).str();
6881 }
6882 }
6883
6884 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6885 // Raising the alignment is safe because Clang did not produce f80 values in
6886 // the MSVC environment before this upgrade was added.
6887 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6888 StringRef Ref = Res;
6889 auto I = Ref.find("-f80:32-");
6890 if (I != StringRef::npos)
6891 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6892 }
6893
6894 return Res;
6895}
6896
6897void llvm::UpgradeAttributes(AttrBuilder &B) {
6898 StringRef FramePointer;
6899 Attribute A = B.getAttribute("no-frame-pointer-elim");
6900 if (A.isValid()) {
6901 // The value can be "true" or "false".
6902 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6903 B.removeAttribute("no-frame-pointer-elim");
6904 }
6905 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6906 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6907 if (FramePointer != "all")
6908 FramePointer = "non-leaf";
6909 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6910 }
6911 if (!FramePointer.empty())
6912 B.addAttribute("frame-pointer", FramePointer);
6913
6914 A = B.getAttribute("null-pointer-is-valid");
6915 if (A.isValid()) {
6916 // The value can be "true" or "false".
6917 bool NullPointerIsValid = A.getValueAsString() == "true";
6918 B.removeAttribute("null-pointer-is-valid");
6919 if (NullPointerIsValid)
6920 B.addAttribute(Attribute::NullPointerIsValid);
6921 }
6922
6923 A = B.getAttribute("uniform-work-group-size");
6924 if (A.isValid()) {
6925 StringRef Val = A.getValueAsString();
6926 if (!Val.empty()) {
6927 bool IsTrue = Val == "true";
6928 B.removeAttribute("uniform-work-group-size");
6929 if (IsTrue)
6930 B.addAttribute("uniform-work-group-size");
6931 }
6932 }
6933}
6934
6935void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6936 // clang.arc.attachedcall bundles are now required to have an operand.
6937 // If they don't, it's okay to drop them entirely: when there is an operand,
6938 // the "attachedcall" is meaningful and required, but without an operand,
6939 // it's just a marker NOP. Dropping it merely prevents an optimization.
6940 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6941 return OBD.getTag() == "clang.arc.attachedcall" &&
6942 OBD.inputs().empty();
6943 });
6944}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
#define T1
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
void setDebugLoc(DebugLoc Loc)
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:869
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:168
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:246
const Function & getFunction() const
Definition Function.h:166
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:449
size_t arg_size() const
Definition Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
Argument * getArg(unsigned i) const
Definition Function.h:886
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
Definition Globals.cpp:80
LinkageTypes getLinkage() const
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
static StringRef dropLLVMManglingEscape(StringRef Name)
If the given string begins with the GlobalValue name mangling escape character '\1',...
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:629
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2868
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1075
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1439
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1567
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1445
LLVMContext & getContext() const
Definition Metadata.h:1239
Tracking metadata reference owned by Metadata.
Definition Metadata.h:897
A single uniqued string.
Definition Metadata.h:722
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:632
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1524
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:184
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1755
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1851
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:891
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
static constexpr size_t npos
Definition StringRef.h:58
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:844
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:479
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:310
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:307
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:285
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:227
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:286
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:284
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:393
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:552
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:712
bool use_empty() const
Definition Value.h:346
bool hasName() const
Definition Value.h:261
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
LLVM_ABI bool hasStructReturnType(ID id)
Returns true if id has a struct return type.
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:709
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
@ Length
Definition DWP.cpp:558
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:328
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool UpgradeCFIFunctionsMetadata(Module &M)
Upgrade the cfi.functions metadata node by calculating and inserting the GUID for each function entry...
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:863
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106