35#include "llvm/IR/IntrinsicsAArch64.h"
36#include "llvm/IR/IntrinsicsAMDGPU.h"
37#include "llvm/IR/IntrinsicsARM.h"
38#include "llvm/IR/IntrinsicsNVPTX.h"
39#include "llvm/IR/IntrinsicsRISCV.h"
40#include "llvm/IR/IntrinsicsWebAssembly.h"
41#include "llvm/IR/IntrinsicsX86.h"
64 cl::desc(
"Disable autoupgrade of debug info"));
83 Type *Arg0Type =
F->getFunctionType()->getParamType(0);
98 Type *LastArgType =
F->getFunctionType()->getParamType(
99 F->getFunctionType()->getNumParams() - 1);
114 if (
F->getReturnType()->isVectorTy())
127 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
128 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
145 Type *Arg1Type =
F->getFunctionType()->getParamType(1);
146 Type *Arg2Type =
F->getFunctionType()->getParamType(2);
160 if (
F->getReturnType()->getScalarType()->isBFloatTy())
170 if (
F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
184 if (Name.consume_front(
"avx."))
185 return (Name.starts_with(
"blend.p") ||
186 Name ==
"cvt.ps2.pd.256" ||
187 Name ==
"cvtdq2.pd.256" ||
188 Name ==
"cvtdq2.ps.256" ||
189 Name.starts_with(
"movnt.") ||
190 Name.starts_with(
"sqrt.p") ||
191 Name.starts_with(
"storeu.") ||
192 Name.starts_with(
"vbroadcast.s") ||
193 Name.starts_with(
"vbroadcastf128") ||
194 Name.starts_with(
"vextractf128.") ||
195 Name.starts_with(
"vinsertf128.") ||
196 Name.starts_with(
"vperm2f128.") ||
197 Name.starts_with(
"vpermil."));
199 if (Name.consume_front(
"avx2."))
200 return (Name ==
"movntdqa" ||
201 Name.starts_with(
"pabs.") ||
202 Name.starts_with(
"padds.") ||
203 Name.starts_with(
"paddus.") ||
204 Name.starts_with(
"pblendd.") ||
206 Name.starts_with(
"pbroadcast") ||
207 Name.starts_with(
"pcmpeq.") ||
208 Name.starts_with(
"pcmpgt.") ||
209 Name.starts_with(
"pmax") ||
210 Name.starts_with(
"pmin") ||
211 Name.starts_with(
"pmovsx") ||
212 Name.starts_with(
"pmovzx") ||
214 Name ==
"pmulu.dq" ||
215 Name.starts_with(
"psll.dq") ||
216 Name.starts_with(
"psrl.dq") ||
217 Name.starts_with(
"psubs.") ||
218 Name.starts_with(
"psubus.") ||
219 Name.starts_with(
"vbroadcast") ||
220 Name ==
"vbroadcasti128" ||
221 Name ==
"vextracti128" ||
222 Name ==
"vinserti128" ||
223 Name ==
"vperm2i128");
225 if (Name.consume_front(
"avx512.")) {
226 if (Name.consume_front(
"mask."))
228 return (Name.starts_with(
"add.p") ||
229 Name.starts_with(
"and.") ||
230 Name.starts_with(
"andn.") ||
231 Name.starts_with(
"broadcast.s") ||
232 Name.starts_with(
"broadcastf32x4.") ||
233 Name.starts_with(
"broadcastf32x8.") ||
234 Name.starts_with(
"broadcastf64x2.") ||
235 Name.starts_with(
"broadcastf64x4.") ||
236 Name.starts_with(
"broadcasti32x4.") ||
237 Name.starts_with(
"broadcasti32x8.") ||
238 Name.starts_with(
"broadcasti64x2.") ||
239 Name.starts_with(
"broadcasti64x4.") ||
240 Name.starts_with(
"cmp.b") ||
241 Name.starts_with(
"cmp.d") ||
242 Name.starts_with(
"cmp.q") ||
243 Name.starts_with(
"cmp.w") ||
244 Name.starts_with(
"compress.b") ||
245 Name.starts_with(
"compress.d") ||
246 Name.starts_with(
"compress.p") ||
247 Name.starts_with(
"compress.q") ||
248 Name.starts_with(
"compress.store.") ||
249 Name.starts_with(
"compress.w") ||
250 Name.starts_with(
"conflict.") ||
251 Name.starts_with(
"cvtdq2pd.") ||
252 Name.starts_with(
"cvtdq2ps.") ||
253 Name ==
"cvtpd2dq.256" ||
254 Name ==
"cvtpd2ps.256" ||
255 Name ==
"cvtps2pd.128" ||
256 Name ==
"cvtps2pd.256" ||
257 Name.starts_with(
"cvtqq2pd.") ||
258 Name ==
"cvtqq2ps.256" ||
259 Name ==
"cvtqq2ps.512" ||
260 Name ==
"cvttpd2dq.256" ||
261 Name ==
"cvttps2dq.128" ||
262 Name ==
"cvttps2dq.256" ||
263 Name.starts_with(
"cvtudq2pd.") ||
264 Name.starts_with(
"cvtudq2ps.") ||
265 Name.starts_with(
"cvtuqq2pd.") ||
266 Name ==
"cvtuqq2ps.256" ||
267 Name ==
"cvtuqq2ps.512" ||
268 Name.starts_with(
"dbpsadbw.") ||
269 Name.starts_with(
"div.p") ||
270 Name.starts_with(
"expand.b") ||
271 Name.starts_with(
"expand.d") ||
272 Name.starts_with(
"expand.load.") ||
273 Name.starts_with(
"expand.p") ||
274 Name.starts_with(
"expand.q") ||
275 Name.starts_with(
"expand.w") ||
276 Name.starts_with(
"fpclass.p") ||
277 Name.starts_with(
"insert") ||
278 Name.starts_with(
"load.") ||
279 Name.starts_with(
"loadu.") ||
280 Name.starts_with(
"lzcnt.") ||
281 Name.starts_with(
"max.p") ||
282 Name.starts_with(
"min.p") ||
283 Name.starts_with(
"movddup") ||
284 Name.starts_with(
"move.s") ||
285 Name.starts_with(
"movshdup") ||
286 Name.starts_with(
"movsldup") ||
287 Name.starts_with(
"mul.p") ||
288 Name.starts_with(
"or.") ||
289 Name.starts_with(
"pabs.") ||
290 Name.starts_with(
"packssdw.") ||
291 Name.starts_with(
"packsswb.") ||
292 Name.starts_with(
"packusdw.") ||
293 Name.starts_with(
"packuswb.") ||
294 Name.starts_with(
"padd.") ||
295 Name.starts_with(
"padds.") ||
296 Name.starts_with(
"paddus.") ||
297 Name.starts_with(
"palignr.") ||
298 Name.starts_with(
"pand.") ||
299 Name.starts_with(
"pandn.") ||
300 Name.starts_with(
"pavg") ||
301 Name.starts_with(
"pbroadcast") ||
302 Name.starts_with(
"pcmpeq.") ||
303 Name.starts_with(
"pcmpgt.") ||
304 Name.starts_with(
"perm.df.") ||
305 Name.starts_with(
"perm.di.") ||
306 Name.starts_with(
"permvar.") ||
307 Name.starts_with(
"pmaddubs.w.") ||
308 Name.starts_with(
"pmaddw.d.") ||
309 Name.starts_with(
"pmax") ||
310 Name.starts_with(
"pmin") ||
311 Name ==
"pmov.qd.256" ||
312 Name ==
"pmov.qd.512" ||
313 Name ==
"pmov.wb.256" ||
314 Name ==
"pmov.wb.512" ||
315 Name.starts_with(
"pmovsx") ||
316 Name.starts_with(
"pmovzx") ||
317 Name.starts_with(
"pmul.dq.") ||
318 Name.starts_with(
"pmul.hr.sw.") ||
319 Name.starts_with(
"pmulh.w.") ||
320 Name.starts_with(
"pmulhu.w.") ||
321 Name.starts_with(
"pmull.") ||
322 Name.starts_with(
"pmultishift.qb.") ||
323 Name.starts_with(
"pmulu.dq.") ||
324 Name.starts_with(
"por.") ||
325 Name.starts_with(
"prol.") ||
326 Name.starts_with(
"prolv.") ||
327 Name.starts_with(
"pror.") ||
328 Name.starts_with(
"prorv.") ||
329 Name.starts_with(
"pshuf.b.") ||
330 Name.starts_with(
"pshuf.d.") ||
331 Name.starts_with(
"pshufh.w.") ||
332 Name.starts_with(
"pshufl.w.") ||
333 Name.starts_with(
"psll.d") ||
334 Name.starts_with(
"psll.q") ||
335 Name.starts_with(
"psll.w") ||
336 Name.starts_with(
"pslli") ||
337 Name.starts_with(
"psllv") ||
338 Name.starts_with(
"psra.d") ||
339 Name.starts_with(
"psra.q") ||
340 Name.starts_with(
"psra.w") ||
341 Name.starts_with(
"psrai") ||
342 Name.starts_with(
"psrav") ||
343 Name.starts_with(
"psrl.d") ||
344 Name.starts_with(
"psrl.q") ||
345 Name.starts_with(
"psrl.w") ||
346 Name.starts_with(
"psrli") ||
347 Name.starts_with(
"psrlv") ||
348 Name.starts_with(
"psub.") ||
349 Name.starts_with(
"psubs.") ||
350 Name.starts_with(
"psubus.") ||
351 Name.starts_with(
"pternlog.") ||
352 Name.starts_with(
"punpckh") ||
353 Name.starts_with(
"punpckl") ||
354 Name.starts_with(
"pxor.") ||
355 Name.starts_with(
"shuf.f") ||
356 Name.starts_with(
"shuf.i") ||
357 Name.starts_with(
"shuf.p") ||
358 Name.starts_with(
"sqrt.p") ||
359 Name.starts_with(
"store.b.") ||
360 Name.starts_with(
"store.d.") ||
361 Name.starts_with(
"store.p") ||
362 Name.starts_with(
"store.q.") ||
363 Name.starts_with(
"store.w.") ||
364 Name ==
"store.ss" ||
365 Name.starts_with(
"storeu.") ||
366 Name.starts_with(
"sub.p") ||
367 Name.starts_with(
"ucmp.") ||
368 Name.starts_with(
"unpckh.") ||
369 Name.starts_with(
"unpckl.") ||
370 Name.starts_with(
"valign.") ||
371 Name ==
"vcvtph2ps.128" ||
372 Name ==
"vcvtph2ps.256" ||
373 Name.starts_with(
"vextract") ||
374 Name.starts_with(
"vfmadd.") ||
375 Name.starts_with(
"vfmaddsub.") ||
376 Name.starts_with(
"vfnmadd.") ||
377 Name.starts_with(
"vfnmsub.") ||
378 Name.starts_with(
"vpdpbusd.") ||
379 Name.starts_with(
"vpdpbusds.") ||
380 Name.starts_with(
"vpdpwssd.") ||
381 Name.starts_with(
"vpdpwssds.") ||
382 Name.starts_with(
"vpermi2var.") ||
383 Name.starts_with(
"vpermil.p") ||
384 Name.starts_with(
"vpermilvar.") ||
385 Name.starts_with(
"vpermt2var.") ||
386 Name.starts_with(
"vpmadd52") ||
387 Name.starts_with(
"vpshld.") ||
388 Name.starts_with(
"vpshldv.") ||
389 Name.starts_with(
"vpshrd.") ||
390 Name.starts_with(
"vpshrdv.") ||
391 Name.starts_with(
"vpshufbitqmb.") ||
392 Name.starts_with(
"xor."));
394 if (Name.consume_front(
"mask3."))
396 return (Name.starts_with(
"vfmadd.") ||
397 Name.starts_with(
"vfmaddsub.") ||
398 Name.starts_with(
"vfmsub.") ||
399 Name.starts_with(
"vfmsubadd.") ||
400 Name.starts_with(
"vfnmsub."));
402 if (Name.consume_front(
"maskz."))
404 return (Name.starts_with(
"pternlog.") ||
405 Name.starts_with(
"vfmadd.") ||
406 Name.starts_with(
"vfmaddsub.") ||
407 Name.starts_with(
"vpdpbusd.") ||
408 Name.starts_with(
"vpdpbusds.") ||
409 Name.starts_with(
"vpdpwssd.") ||
410 Name.starts_with(
"vpdpwssds.") ||
411 Name.starts_with(
"vpermt2var.") ||
412 Name.starts_with(
"vpmadd52") ||
413 Name.starts_with(
"vpshldv.") ||
414 Name.starts_with(
"vpshrdv."));
417 return (Name ==
"movntdqa" ||
418 Name ==
"pmul.dq.512" ||
419 Name ==
"pmulu.dq.512" ||
420 Name.starts_with(
"broadcastm") ||
421 Name.starts_with(
"cmp.p") ||
422 Name.starts_with(
"cvtb2mask.") ||
423 Name.starts_with(
"cvtd2mask.") ||
424 Name.starts_with(
"cvtmask2") ||
425 Name.starts_with(
"cvtq2mask.") ||
426 Name ==
"cvtusi2sd" ||
427 Name.starts_with(
"cvtw2mask.") ||
432 Name ==
"kortestc.w" ||
433 Name ==
"kortestz.w" ||
434 Name.starts_with(
"kunpck") ||
437 Name.starts_with(
"padds.") ||
438 Name.starts_with(
"pbroadcast") ||
439 Name.starts_with(
"prol") ||
440 Name.starts_with(
"pror") ||
441 Name.starts_with(
"psll.dq") ||
442 Name.starts_with(
"psrl.dq") ||
443 Name.starts_with(
"psubs.") ||
444 Name.starts_with(
"ptestm") ||
445 Name.starts_with(
"ptestnm") ||
446 Name.starts_with(
"storent.") ||
447 Name.starts_with(
"vbroadcast.s") ||
448 Name.starts_with(
"vpshld.") ||
449 Name.starts_with(
"vpshrd."));
452 if (Name.consume_front(
"fma."))
453 return (Name.starts_with(
"vfmadd.") ||
454 Name.starts_with(
"vfmsub.") ||
455 Name.starts_with(
"vfmsubadd.") ||
456 Name.starts_with(
"vfnmadd.") ||
457 Name.starts_with(
"vfnmsub."));
459 if (Name.consume_front(
"fma4."))
460 return Name.starts_with(
"vfmadd.s");
462 if (Name.consume_front(
"sse."))
463 return (Name ==
"add.ss" ||
464 Name ==
"cvtsi2ss" ||
465 Name ==
"cvtsi642ss" ||
468 Name.starts_with(
"sqrt.p") ||
470 Name.starts_with(
"storeu.") ||
473 if (Name.consume_front(
"sse2."))
474 return (Name ==
"add.sd" ||
475 Name ==
"cvtdq2pd" ||
476 Name ==
"cvtdq2ps" ||
477 Name ==
"cvtps2pd" ||
478 Name ==
"cvtsi2sd" ||
479 Name ==
"cvtsi642sd" ||
480 Name ==
"cvtss2sd" ||
483 Name.starts_with(
"padds.") ||
484 Name.starts_with(
"paddus.") ||
485 Name.starts_with(
"pcmpeq.") ||
486 Name.starts_with(
"pcmpgt.") ||
491 Name ==
"pmulu.dq" ||
492 Name.starts_with(
"pshuf") ||
493 Name.starts_with(
"psll.dq") ||
494 Name.starts_with(
"psrl.dq") ||
495 Name.starts_with(
"psubs.") ||
496 Name.starts_with(
"psubus.") ||
497 Name.starts_with(
"sqrt.p") ||
499 Name ==
"storel.dq" ||
500 Name.starts_with(
"storeu.") ||
503 if (Name.consume_front(
"sse41."))
504 return (Name.starts_with(
"blendp") ||
505 Name ==
"movntdqa" ||
515 Name.starts_with(
"pmovsx") ||
516 Name.starts_with(
"pmovzx") ||
519 if (Name.consume_front(
"sse42."))
520 return Name ==
"crc32.64.8";
522 if (Name.consume_front(
"sse4a."))
523 return Name.starts_with(
"movnt.");
525 if (Name.consume_front(
"ssse3."))
526 return (Name ==
"pabs.b.128" ||
527 Name ==
"pabs.d.128" ||
528 Name ==
"pabs.w.128");
530 if (Name.consume_front(
"xop."))
531 return (Name ==
"vpcmov" ||
532 Name ==
"vpcmov.256" ||
533 Name.starts_with(
"vpcom") ||
534 Name.starts_with(
"vprot"));
536 return (Name ==
"addcarry.u32" ||
537 Name ==
"addcarry.u64" ||
538 Name ==
"addcarryx.u32" ||
539 Name ==
"addcarryx.u64" ||
540 Name ==
"subborrow.u32" ||
541 Name ==
"subborrow.u64" ||
542 Name.starts_with(
"vcvtph2ps."));
548 if (!Name.consume_front(
"x86."))
556 if (Name ==
"rdtscp") {
558 if (
F->getFunctionType()->getNumParams() == 0)
563 Intrinsic::x86_rdtscp);
570 if (Name.consume_front(
"sse41.ptest")) {
572 .
Case(
"c", Intrinsic::x86_sse41_ptestc)
573 .
Case(
"z", Intrinsic::x86_sse41_ptestz)
574 .
Case(
"nzc", Intrinsic::x86_sse41_ptestnzc)
587 .
Case(
"sse41.insertps", Intrinsic::x86_sse41_insertps)
588 .
Case(
"sse41.dppd", Intrinsic::x86_sse41_dppd)
589 .
Case(
"sse41.dpps", Intrinsic::x86_sse41_dpps)
590 .
Case(
"sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
591 .
Case(
"avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
592 .
Case(
"avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
597 if (Name.consume_front(
"avx512.")) {
598 if (Name.consume_front(
"mask.cmp.")) {
601 .
Case(
"pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
602 .
Case(
"pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
603 .
Case(
"pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
604 .
Case(
"ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
605 .
Case(
"ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
606 .
Case(
"ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
610 }
else if (Name.starts_with(
"vpdpbusd.") ||
611 Name.starts_with(
"vpdpbusds.")) {
614 .
Case(
"vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
615 .
Case(
"vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
616 .
Case(
"vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
617 .
Case(
"vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
618 .
Case(
"vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
619 .
Case(
"vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
623 }
else if (Name.starts_with(
"vpdpwssd.") ||
624 Name.starts_with(
"vpdpwssds.")) {
627 .
Case(
"vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
628 .
Case(
"vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
629 .
Case(
"vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
630 .
Case(
"vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
631 .
Case(
"vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
632 .
Case(
"vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
640 if (Name.consume_front(
"avx2.")) {
641 if (Name.consume_front(
"vpdpb")) {
644 .
Case(
"ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
645 .
Case(
"ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
646 .
Case(
"ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
647 .
Case(
"ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
648 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
649 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
650 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
651 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
652 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
653 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
654 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
655 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
659 }
else if (Name.consume_front(
"vpdpw")) {
662 .
Case(
"sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
663 .
Case(
"sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
664 .
Case(
"suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
665 .
Case(
"suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
666 .
Case(
"usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
667 .
Case(
"usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
668 .
Case(
"usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
669 .
Case(
"usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
670 .
Case(
"uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
671 .
Case(
"uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
672 .
Case(
"uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
673 .
Case(
"uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
681 if (Name.consume_front(
"avx10.")) {
682 if (Name.consume_front(
"vpdpb")) {
685 .
Case(
"ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
686 .
Case(
"ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
687 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
688 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
689 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
690 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
694 }
else if (Name.consume_front(
"vpdpw")) {
696 .
Case(
"sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
697 .
Case(
"suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
698 .
Case(
"usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
699 .
Case(
"usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
700 .
Case(
"uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
701 .
Case(
"uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
709 if (Name.consume_front(
"avx512bf16.")) {
712 .
Case(
"cvtne2ps2bf16.128",
713 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
714 .
Case(
"cvtne2ps2bf16.256",
715 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
716 .
Case(
"cvtne2ps2bf16.512",
717 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
718 .
Case(
"mask.cvtneps2bf16.128",
719 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
720 .
Case(
"cvtneps2bf16.256",
721 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
722 .
Case(
"cvtneps2bf16.512",
723 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
730 .
Case(
"dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
731 .
Case(
"dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
732 .
Case(
"dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
739 if (Name.consume_front(
"xop.")) {
741 if (Name.starts_with(
"vpermil2")) {
744 auto Idx =
F->getFunctionType()->getParamType(2);
745 if (Idx->isFPOrFPVectorTy()) {
746 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
747 unsigned EltSize = Idx->getScalarSizeInBits();
748 if (EltSize == 64 && IdxSize == 128)
749 ID = Intrinsic::x86_xop_vpermil2pd;
750 else if (EltSize == 32 && IdxSize == 128)
751 ID = Intrinsic::x86_xop_vpermil2ps;
752 else if (EltSize == 64 && IdxSize == 256)
753 ID = Intrinsic::x86_xop_vpermil2pd_256;
755 ID = Intrinsic::x86_xop_vpermil2ps_256;
757 }
else if (
F->arg_size() == 2)
760 .
Case(
"vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
761 .
Case(
"vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
772 if (Name ==
"seh.recoverfp") {
774 Intrinsic::eh_recoverfp);
786 if (Name.starts_with(
"rbit")) {
789 F->getParent(), Intrinsic::bitreverse,
F->arg_begin()->getType());
793 if (Name ==
"thread.pointer") {
796 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
800 bool Neon = Name.consume_front(
"neon.");
805 if (Name.consume_front(
"bfdot.")) {
809 .
Cases({
"v2f32.v8i8",
"v4f32.v16i8"},
814 size_t OperandWidth =
F->getReturnType()->getPrimitiveSizeInBits();
815 assert((OperandWidth == 64 || OperandWidth == 128) &&
816 "Unexpected operand width");
818 std::array<Type *, 2> Tys{
829 if (Name.consume_front(
"bfm")) {
831 if (Name.consume_back(
".v4f32.v16i8")) {
877 F->arg_begin()->getType());
881 if (Name.consume_front(
"vst")) {
883 static const Regex vstRegex(
"^([1234]|[234]lane)\\.v[a-z0-9]*$");
887 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
888 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
891 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
892 Intrinsic::arm_neon_vst4lane};
894 auto fArgs =
F->getFunctionType()->params();
895 Type *Tys[] = {fArgs[0], fArgs[1]};
898 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
901 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
910 if (Name.consume_front(
"mve.")) {
912 if (Name ==
"vctp64") {
922 if (Name.starts_with(
"vrintn.v")) {
924 F->getParent(), Intrinsic::roundeven,
F->arg_begin()->getType());
929 if (Name.consume_back(
".v4i1")) {
931 if (Name.consume_back(
".predicated.v2i64.v4i32"))
933 return Name ==
"mull.int" || Name ==
"vqdmull";
935 if (Name.consume_back(
".v2i64")) {
937 bool IsGather = Name.consume_front(
"vldr.gather.");
938 if (IsGather || Name.consume_front(
"vstr.scatter.")) {
939 if (Name.consume_front(
"base.")) {
941 Name.consume_front(
"wb.");
944 return Name ==
"predicated.v2i64";
947 if (Name.consume_front(
"offset.predicated."))
948 return Name == (IsGather ?
"v2i64.p0i64" :
"p0i64.v2i64") ||
949 Name == (IsGather ?
"v2i64.p0" :
"p0.v2i64");
962 if (Name.consume_front(
"cde.vcx")) {
964 if (Name.consume_back(
".predicated.v2i64.v4i1"))
966 return Name ==
"1q" || Name ==
"1qa" || Name ==
"2q" || Name ==
"2qa" ||
967 Name ==
"3q" || Name ==
"3qa";
981 F->arg_begin()->getType());
985 if (Name.starts_with(
"addp")) {
987 if (
F->arg_size() != 2)
990 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
992 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
998 if (Name.starts_with(
"bfcvt")) {
1005 if (Name.consume_front(
"sve.")) {
1007 if (Name.consume_front(
"bf")) {
1008 if (Name ==
"mmla") {
1009 Type *Tys[] = {
F->getReturnType(),
1010 std::next(
F->arg_begin())->getType()};
1012 F->getParent(), Intrinsic::aarch64_sve_fmmla, Tys);
1015 if (Name.consume_back(
".lane")) {
1019 .
Case(
"dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1020 .
Case(
"mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1021 .
Case(
"mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1033 if (Name ==
"fcvt.bf16f32" || Name ==
"fcvtnt.bf16f32") {
1038 if (Name.consume_front(
"addqv")) {
1040 if (!
F->getReturnType()->isFPOrFPVectorTy())
1043 auto Args =
F->getFunctionType()->params();
1044 Type *Tys[] = {
F->getReturnType(), Args[1]};
1046 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1050 if (Name.consume_front(
"ld")) {
1052 static const Regex LdRegex(
"^[234](.nxv[a-z0-9]+|$)");
1053 if (LdRegex.
match(Name)) {
1059 "Expected 2 arguments for ld* intrinsic.");
1060 Type *PtrTy =
F->getArg(1)->getType();
1063 Intrinsic::aarch64_sve_ld2_sret,
1064 Intrinsic::aarch64_sve_ld3_sret,
1065 Intrinsic::aarch64_sve_ld4_sret,
1068 F->getParent(), LoadIDs[Name[0] -
'2'], {Ty, PtrTy});
1074 if (Name.consume_front(
"tuple.")) {
1076 if (Name.starts_with(
"get")) {
1078 Type *Tys[] = {
F->getReturnType(),
F->arg_begin()->getType()};
1080 F->getParent(), Intrinsic::vector_extract, Tys);
1084 if (Name.starts_with(
"set")) {
1086 auto Args =
F->getFunctionType()->params();
1087 Type *Tys[] = {Args[0], Args[2], Args[1]};
1089 F->getParent(), Intrinsic::vector_insert, Tys);
1093 static const Regex CreateTupleRegex(
"^create[234](.nxv[a-z0-9]+|$)");
1094 if (CreateTupleRegex.
match(Name)) {
1096 auto Args =
F->getFunctionType()->params();
1097 Type *Tys[] = {
F->getReturnType(), Args[1]};
1099 F->getParent(), Intrinsic::vector_insert, Tys);
1105 if (Name.starts_with(
"rev.nxv")) {
1108 F->getParent(), Intrinsic::vector_reverse,
F->getReturnType());
1120 if (Name.consume_front(
"cp.async.bulk.tensor.g2s.")) {
1124 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1126 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1128 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1129 .
Case(
"tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1130 .
Case(
"tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1131 .
Case(
"tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1132 .
Case(
"tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1133 .
Case(
"tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1142 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1156 size_t FlagStartIndex =
F->getFunctionType()->getNumParams() - 3;
1157 Type *ArgType =
F->getFunctionType()->getParamType(FlagStartIndex);
1167 if (Name.consume_front(
"mapa.shared.cluster"))
1168 if (
F->getReturnType()->getPointerAddressSpace() ==
1170 return Intrinsic::nvvm_mapa_shared_cluster;
1172 if (Name.consume_front(
"cp.async.bulk.")) {
1175 .
Case(
"global.to.shared.cluster",
1176 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1177 .
Case(
"shared.cta.to.cluster",
1178 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1182 if (
F->getArg(0)->getType()->getPointerAddressSpace() ==
1191 if (Name.consume_front(
"fma.rn."))
1193 .
Case(
"bf16", Intrinsic::nvvm_fma_rn_bf16)
1194 .
Case(
"bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1195 .
Case(
"relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1196 .
Case(
"relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1199 if (Name.consume_front(
"fmax."))
1201 .
Case(
"bf16", Intrinsic::nvvm_fmax_bf16)
1202 .
Case(
"bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1203 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1204 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1205 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1206 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1207 .
Case(
"ftz.nan.xorsign.abs.bf16",
1208 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1209 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1210 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1211 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1212 .
Case(
"ftz.xorsign.abs.bf16x2",
1213 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1214 .
Case(
"nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1215 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1216 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1217 .
Case(
"nan.xorsign.abs.bf16x2",
1218 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1219 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1220 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1223 if (Name.consume_front(
"fmin."))
1225 .
Case(
"bf16", Intrinsic::nvvm_fmin_bf16)
1226 .
Case(
"bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1227 .
Case(
"ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1228 .
Case(
"ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1229 .
Case(
"ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1230 .
Case(
"ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1231 .
Case(
"ftz.nan.xorsign.abs.bf16",
1232 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1233 .
Case(
"ftz.nan.xorsign.abs.bf16x2",
1234 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1235 .
Case(
"ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1236 .
Case(
"ftz.xorsign.abs.bf16x2",
1237 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1238 .
Case(
"nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1239 .
Case(
"nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1240 .
Case(
"nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1241 .
Case(
"nan.xorsign.abs.bf16x2",
1242 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1243 .
Case(
"xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1244 .
Case(
"xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1247 if (Name.consume_front(
"neg."))
1249 .
Case(
"bf16", Intrinsic::nvvm_neg_bf16)
1250 .
Case(
"bf16x2", Intrinsic::nvvm_neg_bf16x2)
1257 return Name.consume_front(
"local") || Name.consume_front(
"shared") ||
1258 Name.consume_front(
"global") || Name.consume_front(
"constant") ||
1259 Name.consume_front(
"param");
1265 if (Name.starts_with(
"to.fp16")) {
1269 FuncTy->getReturnType());
1272 if (Name.starts_with(
"from.fp16")) {
1276 FuncTy->getReturnType());
1283 bool CanUpgradeDebugIntrinsicsToRecords) {
1284 assert(
F &&
"Illegal to upgrade a non-existent Function.");
1289 if (!Name.consume_front(
"llvm.") || Name.empty())
1295 bool IsArm = Name.consume_front(
"arm.");
1296 if (IsArm || Name.consume_front(
"aarch64.")) {
1302 if (Name.consume_front(
"amdgcn.")) {
1303 if (Name ==
"alignbit") {
1306 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1310 if (Name.consume_front(
"atomic.")) {
1311 if (Name.starts_with(
"inc") || Name.starts_with(
"dec") ||
1312 Name.starts_with(
"cond.sub") || Name.starts_with(
"csub")) {
1321 switch (
F->getIntrinsicID()) {
1325 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1326 if (
F->arg_size() == 7) {
1331 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1332 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1333 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1334 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1335 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1336 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1337 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1338 if (
F->arg_size() == 8) {
1345 if (Name.consume_front(
"ds.") || Name.consume_front(
"global.atomic.") ||
1346 Name.consume_front(
"flat.atomic.")) {
1347 if (Name.starts_with(
"fadd") ||
1349 (Name.starts_with(
"fmin") && !Name.starts_with(
"fmin.num")) ||
1350 (Name.starts_with(
"fmax") && !Name.starts_with(
"fmax.num"))) {
1358 if (Name.starts_with(
"ldexp.")) {
1361 F->getParent(), Intrinsic::ldexp,
1362 {F->getReturnType(), F->getArg(1)->getType()});
1371 if (
F->arg_size() == 1) {
1372 if (Name.consume_front(
"convert.")) {
1386 F->arg_begin()->getType());
1391 if (
F->arg_size() == 2 && Name ==
"coro.end") {
1394 Intrinsic::coro_end);
1401 if (Name.consume_front(
"dbg.")) {
1403 if (CanUpgradeDebugIntrinsicsToRecords) {
1404 if (Name ==
"addr" || Name ==
"value" || Name ==
"assign" ||
1405 Name ==
"declare" || Name ==
"label") {
1414 if (Name ==
"addr" || (Name ==
"value" &&
F->arg_size() == 4)) {
1417 Intrinsic::dbg_value);
1424 if (Name.consume_front(
"experimental.vector.")) {
1430 .
StartsWith(
"extract.", Intrinsic::vector_extract)
1431 .
StartsWith(
"insert.", Intrinsic::vector_insert)
1432 .
StartsWith(
"reverse.", Intrinsic::vector_reverse)
1433 .
StartsWith(
"interleave2.", Intrinsic::vector_interleave2)
1434 .
StartsWith(
"deinterleave2.", Intrinsic::vector_deinterleave2)
1436 Intrinsic::vector_partial_reduce_add)
1439 const auto *FT =
F->getFunctionType();
1441 if (
ID == Intrinsic::vector_extract ||
1442 ID == Intrinsic::vector_interleave2)
1445 if (
ID != Intrinsic::vector_interleave2)
1447 if (
ID == Intrinsic::vector_insert ||
1448 ID == Intrinsic::vector_partial_reduce_add)
1456 if (Name.consume_front(
"reduce.")) {
1458 static const Regex R(
"^([a-z]+)\\.[a-z][0-9]+");
1459 if (R.match(Name, &
Groups))
1461 .
Case(
"add", Intrinsic::vector_reduce_add)
1462 .
Case(
"mul", Intrinsic::vector_reduce_mul)
1463 .
Case(
"and", Intrinsic::vector_reduce_and)
1464 .
Case(
"or", Intrinsic::vector_reduce_or)
1465 .
Case(
"xor", Intrinsic::vector_reduce_xor)
1466 .
Case(
"smax", Intrinsic::vector_reduce_smax)
1467 .
Case(
"smin", Intrinsic::vector_reduce_smin)
1468 .
Case(
"umax", Intrinsic::vector_reduce_umax)
1469 .
Case(
"umin", Intrinsic::vector_reduce_umin)
1470 .
Case(
"fmax", Intrinsic::vector_reduce_fmax)
1471 .
Case(
"fmin", Intrinsic::vector_reduce_fmin)
1476 static const Regex R2(
"^v2\\.([a-z]+)\\.[fi][0-9]+");
1481 .
Case(
"fadd", Intrinsic::vector_reduce_fadd)
1482 .
Case(
"fmul", Intrinsic::vector_reduce_fmul)
1487 auto Args =
F->getFunctionType()->params();
1489 {Args[V2 ? 1 : 0]});
1495 if (Name.consume_front(
"splice"))
1499 if (Name.consume_front(
"experimental.stepvector.")) {
1503 F->getParent(),
ID,
F->getFunctionType()->getReturnType());
1508 if (Name.starts_with(
"flt.rounds")) {
1511 Intrinsic::get_rounding);
1516 if (Name.starts_with(
"invariant.group.barrier")) {
1518 auto Args =
F->getFunctionType()->params();
1519 Type* ObjectPtr[1] = {Args[0]};
1522 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1527 if ((Name.starts_with(
"lifetime.start") ||
1528 Name.starts_with(
"lifetime.end")) &&
1529 F->arg_size() == 2) {
1531 ? Intrinsic::lifetime_start
1532 : Intrinsic::lifetime_end;
1535 F->getArg(0)->getType());
1544 .StartsWith(
"memcpy.", Intrinsic::memcpy)
1545 .StartsWith(
"memmove.", Intrinsic::memmove)
1547 if (
F->arg_size() == 5) {
1551 F->getFunctionType()->params().slice(0, 3);
1557 if (Name.starts_with(
"memset.") &&
F->arg_size() == 5) {
1560 const auto *FT =
F->getFunctionType();
1561 Type *ParamTypes[2] = {
1562 FT->getParamType(0),
1566 Intrinsic::memset, ParamTypes);
1572 .
StartsWith(
"masked.load", Intrinsic::masked_load)
1573 .
StartsWith(
"masked.gather", Intrinsic::masked_gather)
1574 .
StartsWith(
"masked.store", Intrinsic::masked_store)
1575 .
StartsWith(
"masked.scatter", Intrinsic::masked_scatter)
1577 if (MaskedID &&
F->arg_size() == 4) {
1579 if (MaskedID == Intrinsic::masked_load ||
1580 MaskedID == Intrinsic::masked_gather) {
1582 F->getParent(), MaskedID,
1583 {F->getReturnType(), F->getArg(0)->getType()});
1587 F->getParent(), MaskedID,
1588 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1594 if (Name.consume_front(
"nvvm.")) {
1596 if (
F->arg_size() == 1) {
1599 .
Cases({
"brev32",
"brev64"}, Intrinsic::bitreverse)
1600 .Case(
"clz.i", Intrinsic::ctlz)
1601 .
Case(
"popc.i", Intrinsic::ctpop)
1605 {F->getReturnType()});
1608 }
else if (
F->arg_size() == 2) {
1611 .
Cases({
"max.s",
"max.i",
"max.ll"}, Intrinsic::smax)
1612 .Cases({
"min.s",
"min.i",
"min.ll"}, Intrinsic::smin)
1613 .Cases({
"max.us",
"max.ui",
"max.ull"}, Intrinsic::umax)
1614 .Cases({
"min.us",
"min.ui",
"min.ull"}, Intrinsic::umin)
1618 {F->getReturnType()});
1624 if (!
F->getReturnType()->getScalarType()->isBFloatTy()) {
1652 bool Expand =
false;
1653 if (Name.consume_front(
"abs."))
1656 Name ==
"i" || Name ==
"ll" || Name ==
"bf16" || Name ==
"bf16x2";
1657 else if (Name.consume_front(
"fabs."))
1659 Expand = Name ==
"f" || Name ==
"ftz.f" || Name ==
"d";
1660 else if (Name.consume_front(
"ex2.approx."))
1663 Name ==
"f" || Name ==
"ftz.f" || Name ==
"d" || Name ==
"f16x2";
1664 else if (Name.consume_front(
"atomic.load."))
1673 else if (Name.consume_front(
"atomic."))
1688 else if (Name.consume_front(
"bitcast."))
1691 Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" || Name ==
"d2ll";
1692 else if (Name.consume_front(
"rotate."))
1694 Expand = Name ==
"b32" || Name ==
"b64" || Name ==
"right.b64";
1695 else if (Name.consume_front(
"ptr.gen.to."))
1698 else if (Name.consume_front(
"ptr."))
1701 else if (Name.consume_front(
"ldg.global."))
1703 Expand = (Name.starts_with(
"i.") || Name.starts_with(
"f.") ||
1704 Name.starts_with(
"p."));
1707 .
Case(
"barrier0",
true)
1708 .
Case(
"barrier.n",
true)
1709 .
Case(
"barrier.sync.cnt",
true)
1710 .
Case(
"barrier.sync",
true)
1711 .
Case(
"barrier",
true)
1712 .
Case(
"bar.sync",
true)
1713 .
Case(
"barrier0.popc",
true)
1714 .
Case(
"barrier0.and",
true)
1715 .
Case(
"barrier0.or",
true)
1716 .
Case(
"clz.ll",
true)
1717 .
Case(
"popc.ll",
true)
1719 .
Case(
"swap.lo.hi.b64",
true)
1720 .
Case(
"tanh.approx.f32",
true)
1732 if (Name.starts_with(
"objectsize.")) {
1733 Type *Tys[2] = {
F->getReturnType(),
F->arg_begin()->getType() };
1734 if (
F->arg_size() == 2 ||
F->arg_size() == 3) {
1737 Intrinsic::objectsize, Tys);
1744 if (Name.starts_with(
"ptr.annotation.") &&
F->arg_size() == 4) {
1747 F->getParent(), Intrinsic::ptr_annotation,
1748 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1754 if (Name.consume_front(
"riscv.")) {
1757 .
Case(
"aes32dsi", Intrinsic::riscv_aes32dsi)
1758 .
Case(
"aes32dsmi", Intrinsic::riscv_aes32dsmi)
1759 .
Case(
"aes32esi", Intrinsic::riscv_aes32esi)
1760 .
Case(
"aes32esmi", Intrinsic::riscv_aes32esmi)
1763 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1776 if (!
F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1777 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1786 .
StartsWith(
"sha256sig0", Intrinsic::riscv_sha256sig0)
1787 .
StartsWith(
"sha256sig1", Intrinsic::riscv_sha256sig1)
1788 .
StartsWith(
"sha256sum0", Intrinsic::riscv_sha256sum0)
1789 .
StartsWith(
"sha256sum1", Intrinsic::riscv_sha256sum1)
1794 if (
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1803 if (Name ==
"clmul.i32" || Name ==
"clmul.i64") {
1805 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1814 if (Name ==
"stackprotectorcheck") {
1821 if (Name ==
"thread.pointer") {
1823 F->getParent(), Intrinsic::thread_pointer,
F->getReturnType());
1829 if (Name ==
"var.annotation" &&
F->arg_size() == 4) {
1832 F->getParent(), Intrinsic::var_annotation,
1833 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1836 if (Name.consume_front(
"vector.splice")) {
1837 if (Name.starts_with(
".left") || Name.starts_with(
".right"))
1845 if (Name.consume_front(
"wasm.")) {
1848 .
StartsWith(
"fma.", Intrinsic::wasm_relaxed_madd)
1849 .
StartsWith(
"fms.", Intrinsic::wasm_relaxed_nmadd)
1850 .
StartsWith(
"laneselect.", Intrinsic::wasm_relaxed_laneselect)
1855 F->getReturnType());
1859 if (Name.consume_front(
"dot.i8x16.i7x16.")) {
1861 .
Case(
"signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1863 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1882 if (ST && (!
ST->isLiteral() ||
ST->isPacked()) &&
1892 std::string
Name =
F->getName().str();
1895 Name,
F->getParent());
1906 if (Result != std::nullopt) {
1919 bool CanUpgradeDebugIntrinsicsToRecords) {
1939 GV->
getName() ==
"llvm.global_dtors")) ||
1954 unsigned N =
Init->getNumOperands();
1955 std::vector<Constant *> NewCtors(
N);
1956 for (
unsigned i = 0; i !=
N; ++i) {
1959 Ctor->getAggregateElement(1),
1973 unsigned NumElts = ResultTy->getNumElements() * 8;
1977 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
1987 for (
unsigned l = 0; l != NumElts; l += 16)
1988 for (
unsigned i = 0; i != 16; ++i) {
1989 unsigned Idx = NumElts + i - Shift;
1991 Idx -= NumElts - 16;
1992 Idxs[l + i] = Idx + l;
1995 Res = Builder.CreateShuffleVector(Res,
Op,
ArrayRef(Idxs, NumElts));
1999 return Builder.CreateBitCast(Res, ResultTy,
"cast");
2007 unsigned NumElts = ResultTy->getNumElements() * 8;
2011 Op = Builder.CreateBitCast(
Op, VecTy,
"cast");
2021 for (
unsigned l = 0; l != NumElts; l += 16)
2022 for (
unsigned i = 0; i != 16; ++i) {
2023 unsigned Idx = i + Shift;
2025 Idx += NumElts - 16;
2026 Idxs[l + i] = Idx + l;
2029 Res = Builder.CreateShuffleVector(
Op, Res,
ArrayRef(Idxs, NumElts));
2033 return Builder.CreateBitCast(Res, ResultTy,
"cast");
2041 Mask = Builder.CreateBitCast(Mask, MaskTy);
2047 for (
unsigned i = 0; i != NumElts; ++i)
2049 Mask = Builder.CreateShuffleVector(Mask, Mask,
ArrayRef(Indices, NumElts),
2060 if (
C->isAllOnesValue())
2065 return Builder.CreateSelect(Mask, Op0, Op1);
2072 if (
C->isAllOnesValue())
2076 Mask->getType()->getIntegerBitWidth());
2077 Mask = Builder.CreateBitCast(Mask, MaskTy);
2078 Mask = Builder.CreateExtractElement(Mask, (
uint64_t)0);
2079 return Builder.CreateSelect(Mask, Op0, Op1);
2092 assert((IsVALIGN || NumElts % 16 == 0) &&
"Illegal NumElts for PALIGNR!");
2093 assert((!IsVALIGN || NumElts <= 16) &&
"NumElts too large for VALIGN!");
2098 ShiftVal &= (NumElts - 1);
2107 if (ShiftVal > 16) {
2115 for (
unsigned l = 0; l < NumElts; l += 16) {
2116 for (
unsigned i = 0; i != 16; ++i) {
2117 unsigned Idx = ShiftVal + i;
2118 if (!IsVALIGN && Idx >= 16)
2119 Idx += NumElts - 16;
2120 Indices[l + i] = Idx + l;
2125 Op1, Op0,
ArrayRef(Indices, NumElts),
"palignr");
2131 bool ZeroMask,
bool IndexForm) {
2134 unsigned EltWidth = Ty->getScalarSizeInBits();
2135 bool IsFloat = Ty->isFPOrFPVectorTy();
2137 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2138 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2139 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2140 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2141 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2142 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2143 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2144 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2145 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2146 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2147 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2148 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2149 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2150 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2151 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2152 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2153 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2154 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2155 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2156 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2157 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2158 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2159 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2160 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2161 else if (VecWidth == 128 && EltWidth == 16)
2162 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2163 else if (VecWidth == 256 && EltWidth == 16)
2164 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2165 else if (VecWidth == 512 && EltWidth == 16)
2166 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2167 else if (VecWidth == 128 && EltWidth == 8)
2168 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2169 else if (VecWidth == 256 && EltWidth == 8)
2170 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2171 else if (VecWidth == 512 && EltWidth == 8)
2172 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2183 Value *V = Builder.CreateIntrinsic(IID, Args);
2195 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2206 bool IsRotateRight) {
2216 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2217 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2220 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2221 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2266 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2271 bool IsShiftRight,
bool ZeroMask) {
2285 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(),
false);
2286 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2289 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2290 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2305 const Align Alignment =
2307 ?
Align(
Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2312 if (
C->isAllOnesValue())
2313 return Builder.CreateAlignedStore(
Data, Ptr, Alignment);
2318 return Builder.CreateMaskedStore(
Data, Ptr, Alignment, Mask);
2324 const Align Alignment =
2333 if (
C->isAllOnesValue())
2334 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2339 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2345 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2346 {Op0, Builder.getInt1(
false)});
2361 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2362 LHS = Builder.CreateShl(
LHS, ShiftAmt);
2363 LHS = Builder.CreateAShr(
LHS, ShiftAmt);
2364 RHS = Builder.CreateShl(
RHS, ShiftAmt);
2365 RHS = Builder.CreateAShr(
RHS, ShiftAmt);
2368 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2369 LHS = Builder.CreateAnd(
LHS, Mask);
2370 RHS = Builder.CreateAnd(
RHS, Mask);
2387 if (!
C || !
C->isAllOnesValue())
2388 Vec = Builder.CreateAnd(Vec,
getX86MaskVec(Builder, Mask, NumElts));
2393 for (
unsigned i = 0; i != NumElts; ++i)
2395 for (
unsigned i = NumElts; i != 8; ++i)
2396 Indices[i] = NumElts + i % NumElts;
2397 Vec = Builder.CreateShuffleVector(Vec,
2401 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2405 unsigned CC,
bool Signed) {
2413 }
else if (CC == 7) {
2449 Value* AndNode = Builder.CreateAnd(Mask,
APInt(8, 1));
2450 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2452 Value* Extract2 = Builder.CreateExtractElement(Src, (
uint64_t)0);
2453 Value*
Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2462 return Builder.CreateSExt(Mask, ReturnOp,
"vpmovm2");
2468 Name = Name.substr(12);
2473 if (Name.starts_with(
"max.p")) {
2474 if (VecWidth == 128 && EltWidth == 32)
2475 IID = Intrinsic::x86_sse_max_ps;
2476 else if (VecWidth == 128 && EltWidth == 64)
2477 IID = Intrinsic::x86_sse2_max_pd;
2478 else if (VecWidth == 256 && EltWidth == 32)
2479 IID = Intrinsic::x86_avx_max_ps_256;
2480 else if (VecWidth == 256 && EltWidth == 64)
2481 IID = Intrinsic::x86_avx_max_pd_256;
2484 }
else if (Name.starts_with(
"min.p")) {
2485 if (VecWidth == 128 && EltWidth == 32)
2486 IID = Intrinsic::x86_sse_min_ps;
2487 else if (VecWidth == 128 && EltWidth == 64)
2488 IID = Intrinsic::x86_sse2_min_pd;
2489 else if (VecWidth == 256 && EltWidth == 32)
2490 IID = Intrinsic::x86_avx_min_ps_256;
2491 else if (VecWidth == 256 && EltWidth == 64)
2492 IID = Intrinsic::x86_avx_min_pd_256;
2495 }
else if (Name.starts_with(
"pshuf.b.")) {
2496 if (VecWidth == 128)
2497 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2498 else if (VecWidth == 256)
2499 IID = Intrinsic::x86_avx2_pshuf_b;
2500 else if (VecWidth == 512)
2501 IID = Intrinsic::x86_avx512_pshuf_b_512;
2504 }
else if (Name.starts_with(
"pmul.hr.sw.")) {
2505 if (VecWidth == 128)
2506 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2507 else if (VecWidth == 256)
2508 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2509 else if (VecWidth == 512)
2510 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2513 }
else if (Name.starts_with(
"pmulh.w.")) {
2514 if (VecWidth == 128)
2515 IID = Intrinsic::x86_sse2_pmulh_w;
2516 else if (VecWidth == 256)
2517 IID = Intrinsic::x86_avx2_pmulh_w;
2518 else if (VecWidth == 512)
2519 IID = Intrinsic::x86_avx512_pmulh_w_512;
2522 }
else if (Name.starts_with(
"pmulhu.w.")) {
2523 if (VecWidth == 128)
2524 IID = Intrinsic::x86_sse2_pmulhu_w;
2525 else if (VecWidth == 256)
2526 IID = Intrinsic::x86_avx2_pmulhu_w;
2527 else if (VecWidth == 512)
2528 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2531 }
else if (Name.starts_with(
"pmaddw.d.")) {
2532 if (VecWidth == 128)
2533 IID = Intrinsic::x86_sse2_pmadd_wd;
2534 else if (VecWidth == 256)
2535 IID = Intrinsic::x86_avx2_pmadd_wd;
2536 else if (VecWidth == 512)
2537 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2540 }
else if (Name.starts_with(
"pmaddubs.w.")) {
2541 if (VecWidth == 128)
2542 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2543 else if (VecWidth == 256)
2544 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2545 else if (VecWidth == 512)
2546 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2549 }
else if (Name.starts_with(
"packsswb.")) {
2550 if (VecWidth == 128)
2551 IID = Intrinsic::x86_sse2_packsswb_128;
2552 else if (VecWidth == 256)
2553 IID = Intrinsic::x86_avx2_packsswb;
2554 else if (VecWidth == 512)
2555 IID = Intrinsic::x86_avx512_packsswb_512;
2558 }
else if (Name.starts_with(
"packssdw.")) {
2559 if (VecWidth == 128)
2560 IID = Intrinsic::x86_sse2_packssdw_128;
2561 else if (VecWidth == 256)
2562 IID = Intrinsic::x86_avx2_packssdw;
2563 else if (VecWidth == 512)
2564 IID = Intrinsic::x86_avx512_packssdw_512;
2567 }
else if (Name.starts_with(
"packuswb.")) {
2568 if (VecWidth == 128)
2569 IID = Intrinsic::x86_sse2_packuswb_128;
2570 else if (VecWidth == 256)
2571 IID = Intrinsic::x86_avx2_packuswb;
2572 else if (VecWidth == 512)
2573 IID = Intrinsic::x86_avx512_packuswb_512;
2576 }
else if (Name.starts_with(
"packusdw.")) {
2577 if (VecWidth == 128)
2578 IID = Intrinsic::x86_sse41_packusdw;
2579 else if (VecWidth == 256)
2580 IID = Intrinsic::x86_avx2_packusdw;
2581 else if (VecWidth == 512)
2582 IID = Intrinsic::x86_avx512_packusdw_512;
2585 }
else if (Name.starts_with(
"vpermilvar.")) {
2586 if (VecWidth == 128 && EltWidth == 32)
2587 IID = Intrinsic::x86_avx_vpermilvar_ps;
2588 else if (VecWidth == 128 && EltWidth == 64)
2589 IID = Intrinsic::x86_avx_vpermilvar_pd;
2590 else if (VecWidth == 256 && EltWidth == 32)
2591 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2592 else if (VecWidth == 256 && EltWidth == 64)
2593 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2594 else if (VecWidth == 512 && EltWidth == 32)
2595 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2596 else if (VecWidth == 512 && EltWidth == 64)
2597 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2600 }
else if (Name ==
"cvtpd2dq.256") {
2601 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2602 }
else if (Name ==
"cvtpd2ps.256") {
2603 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2604 }
else if (Name ==
"cvttpd2dq.256") {
2605 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2606 }
else if (Name ==
"cvttps2dq.128") {
2607 IID = Intrinsic::x86_sse2_cvttps2dq;
2608 }
else if (Name ==
"cvttps2dq.256") {
2609 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2610 }
else if (Name.starts_with(
"permvar.")) {
2612 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2613 IID = Intrinsic::x86_avx2_permps;
2614 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2615 IID = Intrinsic::x86_avx2_permd;
2616 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2617 IID = Intrinsic::x86_avx512_permvar_df_256;
2618 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2619 IID = Intrinsic::x86_avx512_permvar_di_256;
2620 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2621 IID = Intrinsic::x86_avx512_permvar_sf_512;
2622 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2623 IID = Intrinsic::x86_avx512_permvar_si_512;
2624 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2625 IID = Intrinsic::x86_avx512_permvar_df_512;
2626 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2627 IID = Intrinsic::x86_avx512_permvar_di_512;
2628 else if (VecWidth == 128 && EltWidth == 16)
2629 IID = Intrinsic::x86_avx512_permvar_hi_128;
2630 else if (VecWidth == 256 && EltWidth == 16)
2631 IID = Intrinsic::x86_avx512_permvar_hi_256;
2632 else if (VecWidth == 512 && EltWidth == 16)
2633 IID = Intrinsic::x86_avx512_permvar_hi_512;
2634 else if (VecWidth == 128 && EltWidth == 8)
2635 IID = Intrinsic::x86_avx512_permvar_qi_128;
2636 else if (VecWidth == 256 && EltWidth == 8)
2637 IID = Intrinsic::x86_avx512_permvar_qi_256;
2638 else if (VecWidth == 512 && EltWidth == 8)
2639 IID = Intrinsic::x86_avx512_permvar_qi_512;
2642 }
else if (Name.starts_with(
"dbpsadbw.")) {
2643 if (VecWidth == 128)
2644 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2645 else if (VecWidth == 256)
2646 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2647 else if (VecWidth == 512)
2648 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2651 }
else if (Name.starts_with(
"pmultishift.qb.")) {
2652 if (VecWidth == 128)
2653 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2654 else if (VecWidth == 256)
2655 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2656 else if (VecWidth == 512)
2657 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2660 }
else if (Name.starts_with(
"conflict.")) {
2661 if (Name[9] ==
'd' && VecWidth == 128)
2662 IID = Intrinsic::x86_avx512_conflict_d_128;
2663 else if (Name[9] ==
'd' && VecWidth == 256)
2664 IID = Intrinsic::x86_avx512_conflict_d_256;
2665 else if (Name[9] ==
'd' && VecWidth == 512)
2666 IID = Intrinsic::x86_avx512_conflict_d_512;
2667 else if (Name[9] ==
'q' && VecWidth == 128)
2668 IID = Intrinsic::x86_avx512_conflict_q_128;
2669 else if (Name[9] ==
'q' && VecWidth == 256)
2670 IID = Intrinsic::x86_avx512_conflict_q_256;
2671 else if (Name[9] ==
'q' && VecWidth == 512)
2672 IID = Intrinsic::x86_avx512_conflict_q_512;
2675 }
else if (Name.starts_with(
"pavg.")) {
2676 if (Name[5] ==
'b' && VecWidth == 128)
2677 IID = Intrinsic::x86_sse2_pavg_b;
2678 else if (Name[5] ==
'b' && VecWidth == 256)
2679 IID = Intrinsic::x86_avx2_pavg_b;
2680 else if (Name[5] ==
'b' && VecWidth == 512)
2681 IID = Intrinsic::x86_avx512_pavg_b_512;
2682 else if (Name[5] ==
'w' && VecWidth == 128)
2683 IID = Intrinsic::x86_sse2_pavg_w;
2684 else if (Name[5] ==
'w' && VecWidth == 256)
2685 IID = Intrinsic::x86_avx2_pavg_w;
2686 else if (Name[5] ==
'w' && VecWidth == 512)
2687 IID = Intrinsic::x86_avx512_pavg_w_512;
2696 Rep = Builder.CreateIntrinsic(IID, Args);
2707 if (AsmStr->find(
"mov\tfp") == 0 &&
2708 AsmStr->find(
"objc_retainAutoreleaseReturnValue") != std::string::npos &&
2709 (Pos = AsmStr->find(
"# marker")) != std::string::npos) {
2710 AsmStr->replace(Pos, 1,
";");
2716 Value *Rep =
nullptr;
2718 if (Name ==
"abs.i" || Name ==
"abs.ll") {
2720 Rep = Builder.CreateIntrinsic(Intrinsic::abs, {Arg->
getType()},
2721 {Arg, Builder.getTrue()},
2723 }
else if (Name ==
"abs.bf16" || Name ==
"abs.bf16x2") {
2724 Type *Ty = (Name ==
"abs.bf16")
2728 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2729 Rep = Builder.CreateBitCast(Abs, CI->
getType());
2730 }
else if (Name ==
"fabs.f" || Name ==
"fabs.ftz.f" || Name ==
"fabs.d") {
2731 Intrinsic::ID IID = (Name ==
"fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2732 : Intrinsic::nvvm_fabs;
2733 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2734 }
else if (Name.consume_front(
"ex2.approx.")) {
2736 Intrinsic::ID IID = Name.starts_with(
"ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2737 : Intrinsic::nvvm_ex2_approx;
2738 Rep = Builder.CreateUnaryIntrinsic(IID, CI->
getArgOperand(0));
2739 }
else if (Name.starts_with(
"atomic.load.add.f32.p") ||
2740 Name.starts_with(
"atomic.load.add.f64.p")) {
2743 Rep = Builder.CreateAtomicRMW(
2749 }
else if (Name.starts_with(
"atomic.load.inc.32.p") ||
2750 Name.starts_with(
"atomic.load.dec.32.p")) {
2755 Rep = Builder.CreateAtomicRMW(
2759 }
else if (Name.starts_with(
"atomic.") && Name.contains(
".gen.")) {
2765 Op.contains(
".cta.") ?
"block" :
"");
2766 if (
Op.starts_with(
"cas.")) {
2768 Value *Pair = Builder.CreateAtomicCmpXchg(
2771 Rep = Builder.CreateExtractValue(Pair, 0);
2789 "unexpected nvvm scoped atomic intrinsic");
2790 Rep = Builder.CreateAtomicRMW(BinOp, Ptr, Val,
MaybeAlign(),
2793 }
else if (Name ==
"clz.ll") {
2796 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->
getType()},
2797 {Arg, Builder.getFalse()},
2799 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(),
"ctlz.trunc");
2800 }
else if (Name ==
"popc.ll") {
2804 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->
getType()},
2805 Arg,
nullptr,
"ctpop");
2806 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(),
"ctpop.trunc");
2807 }
else if (Name ==
"h2f") {
2809 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
2810 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2811 }
else if (Name.consume_front(
"bitcast.") &&
2812 (Name ==
"f2i" || Name ==
"i2f" || Name ==
"ll2d" ||
2815 }
else if (Name ==
"rotate.b32") {
2818 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2819 {Arg, Arg, ShiftAmt});
2820 }
else if (Name ==
"rotate.b64") {
2824 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2825 {Arg, Arg, ZExtShiftAmt});
2826 }
else if (Name ==
"rotate.right.b64") {
2830 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2831 {Arg, Arg, ZExtShiftAmt});
2832 }
else if (Name ==
"swap.lo.hi.b64") {
2835 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2836 {Arg, Arg, Builder.getInt64(32)});
2837 }
else if ((Name.consume_front(
"ptr.gen.to.") &&
2840 Name.starts_with(
".to.gen"))) {
2842 }
else if (Name.consume_front(
"ldg.global")) {
2846 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2849 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2851 }
else if (Name ==
"tanh.approx.f32") {
2855 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->
getArgOperand(0),
2857 }
else if (Name ==
"barrier0" || Name ==
"barrier.n" || Name ==
"bar.sync") {
2859 Name.ends_with(
'0') ? Builder.getInt32(0) : CI->
getArgOperand(0);
2860 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2862 }
else if (Name ==
"barrier") {
2863 Rep = Builder.CreateIntrinsic(
2864 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2866 }
else if (Name ==
"barrier.sync") {
2867 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2869 }
else if (Name ==
"barrier.sync.cnt") {
2870 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2872 }
else if (Name ==
"barrier0.popc" || Name ==
"barrier0.and" ||
2873 Name ==
"barrier0.or") {
2875 C = Builder.CreateICmpNE(
C, Builder.getInt32(0));
2879 .
Case(
"barrier0.popc",
2880 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2881 .
Case(
"barrier0.and",
2882 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2883 .
Case(
"barrier0.or",
2884 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2885 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0),
C});
2886 Rep = Builder.CreateZExt(Bar, CI->
getType());
2890 !
F->getReturnType()->getScalarType()->isBFloatTy()) {
2900 ? Builder.CreateBitCast(Arg, NewType)
2903 Rep = Builder.CreateCall(NewFn, Args);
2904 if (
F->getReturnType()->isIntegerTy())
2905 Rep = Builder.CreateBitCast(Rep,
F->getReturnType());
2915 Value *Rep =
nullptr;
2917 if (Name.starts_with(
"sse4a.movnt.")) {
2929 Builder.CreateExtractElement(Arg1, (
uint64_t)0,
"extractelement");
2932 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2933 }
else if (Name.starts_with(
"avx.movnt.") ||
2934 Name.starts_with(
"avx512.storent.")) {
2946 SI->setMetadata(LLVMContext::MD_nontemporal,
Node);
2947 }
else if (Name ==
"sse2.storel.dq") {
2952 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
2953 Value *Elt = Builder.CreateExtractElement(BC0, (
uint64_t)0);
2954 Builder.CreateAlignedStore(Elt, Arg0,
Align(1));
2955 }
else if (Name.starts_with(
"sse.storeu.") ||
2956 Name.starts_with(
"sse2.storeu.") ||
2957 Name.starts_with(
"avx.storeu.")) {
2960 Builder.CreateAlignedStore(Arg1, Arg0,
Align(1));
2961 }
else if (Name ==
"avx512.mask.store.ss") {
2965 }
else if (Name.starts_with(
"avx512.mask.store")) {
2967 bool Aligned = Name[17] !=
'u';
2970 }
else if (Name.starts_with(
"sse2.pcmp") || Name.starts_with(
"avx2.pcmp")) {
2973 bool CmpEq = Name[9] ==
'e';
2976 Rep = Builder.CreateSExt(Rep, CI->
getType(),
"");
2977 }
else if (Name.starts_with(
"avx512.broadcastm")) {
2984 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2985 }
else if (Name ==
"sse.sqrt.ss" || Name ==
"sse2.sqrt.sd") {
2987 Value *Elt0 = Builder.CreateExtractElement(Vec, (
uint64_t)0);
2988 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->
getType(), Elt0);
2989 Rep = Builder.CreateInsertElement(Vec, Elt0, (
uint64_t)0);
2990 }
else if (Name.starts_with(
"avx.sqrt.p") ||
2991 Name.starts_with(
"sse2.sqrt.p") ||
2992 Name.starts_with(
"sse.sqrt.p")) {
2993 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
2994 {CI->getArgOperand(0)});
2995 }
else if (Name.starts_with(
"avx512.mask.sqrt.p")) {
2999 Intrinsic::ID IID = Name[18] ==
's' ? Intrinsic::x86_avx512_sqrt_ps_512
3000 : Intrinsic::x86_avx512_sqrt_pd_512;
3003 Rep = Builder.CreateIntrinsic(IID, Args);
3005 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->
getType(),
3006 {CI->getArgOperand(0)});
3010 }
else if (Name.starts_with(
"avx512.ptestm") ||
3011 Name.starts_with(
"avx512.ptestnm")) {
3015 Rep = Builder.CreateAnd(Op0, Op1);
3021 Rep = Builder.CreateICmp(Pred, Rep, Zero);
3023 }
else if (Name.starts_with(
"avx512.mask.pbroadcast")) {
3026 Rep = Builder.CreateVectorSplat(NumElts, CI->
getArgOperand(0));
3029 }
else if (Name.starts_with(
"avx512.kunpck")) {
3034 for (
unsigned i = 0; i != NumElts; ++i)
3043 Rep = Builder.CreateShuffleVector(
RHS,
LHS,
ArrayRef(Indices, NumElts));
3044 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3045 }
else if (Name ==
"avx512.kand.w") {
3048 Rep = Builder.CreateAnd(
LHS,
RHS);
3049 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3050 }
else if (Name ==
"avx512.kandn.w") {
3053 LHS = Builder.CreateNot(
LHS);
3054 Rep = Builder.CreateAnd(
LHS,
RHS);
3055 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3056 }
else if (Name ==
"avx512.kor.w") {
3059 Rep = Builder.CreateOr(
LHS,
RHS);
3060 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3061 }
else if (Name ==
"avx512.kxor.w") {
3064 Rep = Builder.CreateXor(
LHS,
RHS);
3065 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3066 }
else if (Name ==
"avx512.kxnor.w") {
3069 LHS = Builder.CreateNot(
LHS);
3070 Rep = Builder.CreateXor(
LHS,
RHS);
3071 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3072 }
else if (Name ==
"avx512.knot.w") {
3074 Rep = Builder.CreateNot(Rep);
3075 Rep = Builder.CreateBitCast(Rep, CI->
getType());
3076 }
else if (Name ==
"avx512.kortestz.w" || Name ==
"avx512.kortestc.w") {
3079 Rep = Builder.CreateOr(
LHS,
RHS);
3080 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3082 if (Name[14] ==
'c')
3086 Rep = Builder.CreateICmpEQ(Rep,
C);
3087 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3088 }
else if (Name ==
"sse.add.ss" || Name ==
"sse2.add.sd" ||
3089 Name ==
"sse.sub.ss" || Name ==
"sse2.sub.sd" ||
3090 Name ==
"sse.mul.ss" || Name ==
"sse2.mul.sd" ||
3091 Name ==
"sse.div.ss" || Name ==
"sse2.div.sd") {
3094 ConstantInt::get(I32Ty, 0));
3096 ConstantInt::get(I32Ty, 0));
3098 if (Name.contains(
".add."))
3099 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3100 else if (Name.contains(
".sub."))
3101 EltOp = Builder.CreateFSub(Elt0, Elt1);
3102 else if (Name.contains(
".mul."))
3103 EltOp = Builder.CreateFMul(Elt0, Elt1);
3105 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3106 Rep = Builder.CreateInsertElement(CI->
getArgOperand(0), EltOp,
3107 ConstantInt::get(I32Ty, 0));
3108 }
else if (Name.starts_with(
"avx512.mask.pcmp")) {
3110 bool CmpEq = Name[16] ==
'e';
3112 }
else if (Name.starts_with(
"avx512.mask.vpshufbitqmb.")) {
3121 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3124 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3127 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3134 }
else if (Name.starts_with(
"avx512.mask.fpclass.p")) {
3139 if (VecWidth == 128 && EltWidth == 32)
3140 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3141 else if (VecWidth == 256 && EltWidth == 32)
3142 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3143 else if (VecWidth == 512 && EltWidth == 32)
3144 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3145 else if (VecWidth == 128 && EltWidth == 64)
3146 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3147 else if (VecWidth == 256 && EltWidth == 64)
3148 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3149 else if (VecWidth == 512 && EltWidth == 64)
3150 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3157 }
else if (Name.starts_with(
"avx512.cmp.p")) {
3159 Type *OpTy = Args[0]->getType();
3163 if (VecWidth == 128 && EltWidth == 32)
3164 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3165 else if (VecWidth == 256 && EltWidth == 32)
3166 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3167 else if (VecWidth == 512 && EltWidth == 32)
3168 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3169 else if (VecWidth == 128 && EltWidth == 64)
3170 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3171 else if (VecWidth == 256 && EltWidth == 64)
3172 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3173 else if (VecWidth == 512 && EltWidth == 64)
3174 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3179 if (VecWidth == 512)
3181 Args.push_back(Mask);
3183 Rep = Builder.CreateIntrinsic(IID, Args);
3184 }
else if (Name.starts_with(
"avx512.mask.cmp.")) {
3188 }
else if (Name.starts_with(
"avx512.mask.ucmp.")) {
3191 }
else if (Name.starts_with(
"avx512.cvtb2mask.") ||
3192 Name.starts_with(
"avx512.cvtw2mask.") ||
3193 Name.starts_with(
"avx512.cvtd2mask.") ||
3194 Name.starts_with(
"avx512.cvtq2mask.")) {
3199 }
else if (Name ==
"ssse3.pabs.b.128" || Name ==
"ssse3.pabs.w.128" ||
3200 Name ==
"ssse3.pabs.d.128" || Name.starts_with(
"avx2.pabs") ||
3201 Name.starts_with(
"avx512.mask.pabs")) {
3203 }
else if (Name ==
"sse41.pmaxsb" || Name ==
"sse2.pmaxs.w" ||
3204 Name ==
"sse41.pmaxsd" || Name.starts_with(
"avx2.pmaxs") ||
3205 Name.starts_with(
"avx512.mask.pmaxs")) {
3207 }
else if (Name ==
"sse2.pmaxu.b" || Name ==
"sse41.pmaxuw" ||
3208 Name ==
"sse41.pmaxud" || Name.starts_with(
"avx2.pmaxu") ||
3209 Name.starts_with(
"avx512.mask.pmaxu")) {
3211 }
else if (Name ==
"sse41.pminsb" || Name ==
"sse2.pmins.w" ||
3212 Name ==
"sse41.pminsd" || Name.starts_with(
"avx2.pmins") ||
3213 Name.starts_with(
"avx512.mask.pmins")) {
3215 }
else if (Name ==
"sse2.pminu.b" || Name ==
"sse41.pminuw" ||
3216 Name ==
"sse41.pminud" || Name.starts_with(
"avx2.pminu") ||
3217 Name.starts_with(
"avx512.mask.pminu")) {
3219 }
else if (Name ==
"sse2.pmulu.dq" || Name ==
"avx2.pmulu.dq" ||
3220 Name ==
"avx512.pmulu.dq.512" ||
3221 Name.starts_with(
"avx512.mask.pmulu.dq.")) {
3223 }
else if (Name ==
"sse41.pmuldq" || Name ==
"avx2.pmul.dq" ||
3224 Name ==
"avx512.pmul.dq.512" ||
3225 Name.starts_with(
"avx512.mask.pmul.dq.")) {
3227 }
else if (Name ==
"sse.cvtsi2ss" || Name ==
"sse2.cvtsi2sd" ||
3228 Name ==
"sse.cvtsi642ss" || Name ==
"sse2.cvtsi642sd") {
3233 }
else if (Name ==
"avx512.cvtusi2sd") {
3238 }
else if (Name ==
"sse2.cvtss2sd") {
3240 Rep = Builder.CreateFPExt(
3243 }
else if (Name ==
"sse2.cvtdq2pd" || Name ==
"sse2.cvtdq2ps" ||
3244 Name ==
"avx.cvtdq2.pd.256" || Name ==
"avx.cvtdq2.ps.256" ||
3245 Name.starts_with(
"avx512.mask.cvtdq2pd.") ||
3246 Name.starts_with(
"avx512.mask.cvtudq2pd.") ||
3247 Name.starts_with(
"avx512.mask.cvtdq2ps.") ||
3248 Name.starts_with(
"avx512.mask.cvtudq2ps.") ||
3249 Name.starts_with(
"avx512.mask.cvtqq2pd.") ||
3250 Name.starts_with(
"avx512.mask.cvtuqq2pd.") ||
3251 Name ==
"avx512.mask.cvtqq2ps.256" ||
3252 Name ==
"avx512.mask.cvtqq2ps.512" ||
3253 Name ==
"avx512.mask.cvtuqq2ps.256" ||
3254 Name ==
"avx512.mask.cvtuqq2ps.512" || Name ==
"sse2.cvtps2pd" ||
3255 Name ==
"avx.cvt.ps2.pd.256" ||
3256 Name ==
"avx512.mask.cvtps2pd.128" ||
3257 Name ==
"avx512.mask.cvtps2pd.256") {
3262 unsigned NumDstElts = DstTy->getNumElements();
3264 assert(NumDstElts == 2 &&
"Unexpected vector size");
3265 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1});
3268 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3269 bool IsUnsigned = Name.contains(
"cvtu");
3271 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtps2pd");
3275 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3276 : Intrinsic::x86_avx512_sitofp_round;
3277 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3280 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy,
"cvt")
3281 : Builder.CreateSIToFP(Rep, DstTy,
"cvt");
3287 }
else if (Name.starts_with(
"avx512.mask.vcvtph2ps.") ||
3288 Name.starts_with(
"vcvtph2ps.")) {
3292 unsigned NumDstElts = DstTy->getNumElements();
3293 if (NumDstElts != SrcTy->getNumElements()) {
3294 assert(NumDstElts == 4 &&
"Unexpected vector size");
3295 Rep = Builder.CreateShuffleVector(Rep, Rep,
ArrayRef<int>{0, 1, 2, 3});
3297 Rep = Builder.CreateBitCast(
3299 Rep = Builder.CreateFPExt(Rep, DstTy,
"cvtph2ps");
3303 }
else if (Name.starts_with(
"avx512.mask.load")) {
3305 bool Aligned = Name[16] !=
'u';
3308 }
else if (Name.starts_with(
"avx512.mask.expand.load.")) {
3312 ResultTy->getNumElements());
3313 Rep = Builder.CreateIntrinsic(
3314 Intrinsic::masked_expandload, {ResultTy, PtrTy},
3316 }
else if (Name.starts_with(
"avx512.mask.compress.store.")) {
3322 Rep = Builder.CreateIntrinsic(
3323 Intrinsic::masked_compressstore, {ResultTy, PtrTy},
3325 }
else if (Name.starts_with(
"avx512.mask.compress.") ||
3326 Name.starts_with(
"avx512.mask.expand.")) {
3330 ResultTy->getNumElements());
3332 bool IsCompress = Name[12] ==
'c';
3333 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3334 : Intrinsic::x86_avx512_mask_expand;
3335 Rep = Builder.CreateIntrinsic(
3337 }
else if (Name.starts_with(
"xop.vpcom")) {
3339 if (Name.ends_with(
"ub") || Name.ends_with(
"uw") || Name.ends_with(
"ud") ||
3340 Name.ends_with(
"uq"))
3342 else if (Name.ends_with(
"b") || Name.ends_with(
"w") ||
3343 Name.ends_with(
"d") || Name.ends_with(
"q"))
3352 Name = Name.substr(9);
3353 if (Name.starts_with(
"lt"))
3355 else if (Name.starts_with(
"le"))
3357 else if (Name.starts_with(
"gt"))
3359 else if (Name.starts_with(
"ge"))
3361 else if (Name.starts_with(
"eq"))
3363 else if (Name.starts_with(
"ne"))
3365 else if (Name.starts_with(
"false"))
3367 else if (Name.starts_with(
"true"))
3374 }
else if (Name.starts_with(
"xop.vpcmov")) {
3376 Value *NotSel = Builder.CreateNot(Sel);
3379 Rep = Builder.CreateOr(Sel0, Sel1);
3380 }
else if (Name.starts_with(
"xop.vprot") || Name.starts_with(
"avx512.prol") ||
3381 Name.starts_with(
"avx512.mask.prol")) {
3383 }
else if (Name.starts_with(
"avx512.pror") ||
3384 Name.starts_with(
"avx512.mask.pror")) {
3386 }
else if (Name.starts_with(
"avx512.vpshld.") ||
3387 Name.starts_with(
"avx512.mask.vpshld") ||
3388 Name.starts_with(
"avx512.maskz.vpshld")) {
3389 bool ZeroMask = Name[11] ==
'z';
3391 }
else if (Name.starts_with(
"avx512.vpshrd.") ||
3392 Name.starts_with(
"avx512.mask.vpshrd") ||
3393 Name.starts_with(
"avx512.maskz.vpshrd")) {
3394 bool ZeroMask = Name[11] ==
'z';
3396 }
else if (Name ==
"sse42.crc32.64.8") {
3399 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3401 Rep = Builder.CreateZExt(Rep, CI->
getType(),
"");
3402 }
else if (Name.starts_with(
"avx.vbroadcast.s") ||
3403 Name.starts_with(
"avx512.vbroadcast.s")) {
3406 Type *EltTy = VecTy->getElementType();
3407 unsigned EltNum = VecTy->getNumElements();
3411 for (
unsigned I = 0;
I < EltNum; ++
I)
3412 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty,
I));
3413 }
else if (Name.starts_with(
"sse41.pmovsx") ||
3414 Name.starts_with(
"sse41.pmovzx") ||
3415 Name.starts_with(
"avx2.pmovsx") ||
3416 Name.starts_with(
"avx2.pmovzx") ||
3417 Name.starts_with(
"avx512.mask.pmovsx") ||
3418 Name.starts_with(
"avx512.mask.pmovzx")) {
3420 unsigned NumDstElts = DstTy->getNumElements();
3424 for (
unsigned i = 0; i != NumDstElts; ++i)
3429 bool DoSext = Name.contains(
"pmovsx");
3431 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3436 }
else if (Name ==
"avx512.mask.pmov.qd.256" ||
3437 Name ==
"avx512.mask.pmov.qd.512" ||
3438 Name ==
"avx512.mask.pmov.wb.256" ||
3439 Name ==
"avx512.mask.pmov.wb.512") {
3444 }
else if (Name.starts_with(
"avx.vbroadcastf128") ||
3445 Name ==
"avx2.vbroadcasti128") {
3451 if (NumSrcElts == 2)
3452 Rep = Builder.CreateShuffleVector(Load,
ArrayRef<int>{0, 1, 0, 1});
3454 Rep = Builder.CreateShuffleVector(Load,
3456 }
else if (Name.starts_with(
"avx512.mask.shuf.i") ||
3457 Name.starts_with(
"avx512.mask.shuf.f")) {
3462 unsigned ControlBitsMask = NumLanes - 1;
3463 unsigned NumControlBits = NumLanes / 2;
3466 for (
unsigned l = 0; l != NumLanes; ++l) {
3467 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3469 if (l >= NumLanes / 2)
3470 LaneMask += NumLanes;
3471 for (
unsigned i = 0; i != NumElementsInLane; ++i)
3472 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3478 }
else if (Name.starts_with(
"avx512.mask.broadcastf") ||
3479 Name.starts_with(
"avx512.mask.broadcasti")) {
3482 unsigned NumDstElts =
3486 for (
unsigned i = 0; i != NumDstElts; ++i)
3487 ShuffleMask[i] = i % NumSrcElts;
3493 }
else if (Name.starts_with(
"avx2.pbroadcast") ||
3494 Name.starts_with(
"avx2.vbroadcast") ||
3495 Name.starts_with(
"avx512.pbroadcast") ||
3496 Name.starts_with(
"avx512.mask.broadcast.s")) {
3503 Rep = Builder.CreateShuffleVector(
Op, M);
3508 }
else if (Name.starts_with(
"sse2.padds.") ||
3509 Name.starts_with(
"avx2.padds.") ||
3510 Name.starts_with(
"avx512.padds.") ||
3511 Name.starts_with(
"avx512.mask.padds.")) {
3513 }
else if (Name.starts_with(
"sse2.psubs.") ||
3514 Name.starts_with(
"avx2.psubs.") ||
3515 Name.starts_with(
"avx512.psubs.") ||
3516 Name.starts_with(
"avx512.mask.psubs.")) {
3518 }
else if (Name.starts_with(
"sse2.paddus.") ||
3519 Name.starts_with(
"avx2.paddus.") ||
3520 Name.starts_with(
"avx512.mask.paddus.")) {
3522 }
else if (Name.starts_with(
"sse2.psubus.") ||
3523 Name.starts_with(
"avx2.psubus.") ||
3524 Name.starts_with(
"avx512.mask.psubus.")) {
3526 }
else if (Name.starts_with(
"avx512.mask.palignr.")) {
3531 }
else if (Name.starts_with(
"avx512.mask.valign.")) {
3535 }
else if (Name ==
"sse2.psll.dq" || Name ==
"avx2.psll.dq") {
3540 }
else if (Name ==
"sse2.psrl.dq" || Name ==
"avx2.psrl.dq") {
3545 }
else if (Name ==
"sse2.psll.dq.bs" || Name ==
"avx2.psll.dq.bs" ||
3546 Name ==
"avx512.psll.dq.512") {
3550 }
else if (Name ==
"sse2.psrl.dq.bs" || Name ==
"avx2.psrl.dq.bs" ||
3551 Name ==
"avx512.psrl.dq.512") {
3555 }
else if (Name ==
"sse41.pblendw" || Name.starts_with(
"sse41.blendp") ||
3556 Name.starts_with(
"avx.blend.p") || Name ==
"avx2.pblendw" ||
3557 Name.starts_with(
"avx2.pblendd.")) {
3562 unsigned NumElts = VecTy->getNumElements();
3565 for (
unsigned i = 0; i != NumElts; ++i)
3566 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3568 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3569 }
else if (Name.starts_with(
"avx.vinsertf128.") ||
3570 Name ==
"avx2.vinserti128" ||
3571 Name.starts_with(
"avx512.mask.insert")) {
3575 unsigned DstNumElts =
3577 unsigned SrcNumElts =
3579 unsigned Scale = DstNumElts / SrcNumElts;
3586 for (
unsigned i = 0; i != SrcNumElts; ++i)
3588 for (
unsigned i = SrcNumElts; i != DstNumElts; ++i)
3589 Idxs[i] = SrcNumElts;
3590 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3604 for (
unsigned i = 0; i != DstNumElts; ++i)
3607 for (
unsigned i = 0; i != SrcNumElts; ++i)
3608 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3609 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3615 }
else if (Name.starts_with(
"avx.vextractf128.") ||
3616 Name ==
"avx2.vextracti128" ||
3617 Name.starts_with(
"avx512.mask.vextract")) {
3620 unsigned DstNumElts =
3622 unsigned SrcNumElts =
3624 unsigned Scale = SrcNumElts / DstNumElts;
3631 for (
unsigned i = 0; i != DstNumElts; ++i) {
3632 Idxs[i] = i + (Imm * DstNumElts);
3634 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3640 }
else if (Name.starts_with(
"avx512.mask.perm.df.") ||
3641 Name.starts_with(
"avx512.mask.perm.di.")) {
3645 unsigned NumElts = VecTy->getNumElements();
3648 for (
unsigned i = 0; i != NumElts; ++i)
3649 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3651 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3656 }
else if (Name.starts_with(
"avx.vperm2f128.") || Name ==
"avx2.vperm2i128") {
3668 unsigned HalfSize = NumElts / 2;
3680 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3681 for (
unsigned i = 0; i < HalfSize; ++i)
3682 ShuffleMask[i] = StartIndex + i;
3685 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3686 for (
unsigned i = 0; i < HalfSize; ++i)
3687 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3689 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3691 }
else if (Name.starts_with(
"avx.vpermil.") || Name ==
"sse2.pshuf.d" ||
3692 Name.starts_with(
"avx512.mask.vpermil.p") ||
3693 Name.starts_with(
"avx512.mask.pshuf.d.")) {
3697 unsigned NumElts = VecTy->getNumElements();
3699 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3700 unsigned IdxMask = ((1 << IdxSize) - 1);
3706 for (
unsigned i = 0; i != NumElts; ++i)
3707 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3709 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3714 }
else if (Name ==
"sse2.pshufl.w" ||
3715 Name.starts_with(
"avx512.mask.pshufl.w.")) {
3720 if (Name ==
"sse2.pshufl.w" && NumElts % 8 != 0)
3724 for (
unsigned l = 0; l != NumElts; l += 8) {
3725 for (
unsigned i = 0; i != 4; ++i)
3726 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3727 for (
unsigned i = 4; i != 8; ++i)
3728 Idxs[i + l] = i + l;
3731 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3736 }
else if (Name ==
"sse2.pshufh.w" ||
3737 Name.starts_with(
"avx512.mask.pshufh.w.")) {
3742 if (Name ==
"sse2.pshufh.w" && NumElts % 8 != 0)
3746 for (
unsigned l = 0; l != NumElts; l += 8) {
3747 for (
unsigned i = 0; i != 4; ++i)
3748 Idxs[i + l] = i + l;
3749 for (
unsigned i = 0; i != 4; ++i)
3750 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3753 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3758 }
else if (Name.starts_with(
"avx512.mask.shuf.p")) {
3765 unsigned HalfLaneElts = NumLaneElts / 2;
3768 for (
unsigned i = 0; i != NumElts; ++i) {
3770 Idxs[i] = i - (i % NumLaneElts);
3772 if ((i % NumLaneElts) >= HalfLaneElts)
3776 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3779 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3783 }
else if (Name.starts_with(
"avx512.mask.movddup") ||
3784 Name.starts_with(
"avx512.mask.movshdup") ||
3785 Name.starts_with(
"avx512.mask.movsldup")) {
3791 if (Name.starts_with(
"avx512.mask.movshdup."))
3795 for (
unsigned l = 0; l != NumElts; l += NumLaneElts)
3796 for (
unsigned i = 0; i != NumLaneElts; i += 2) {
3797 Idxs[i + l + 0] = i + l +
Offset;
3798 Idxs[i + l + 1] = i + l +
Offset;
3801 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3805 }
else if (Name.starts_with(
"avx512.mask.punpckl") ||
3806 Name.starts_with(
"avx512.mask.unpckl.")) {
3813 for (
int l = 0; l != NumElts; l += NumLaneElts)
3814 for (
int i = 0; i != NumLaneElts; ++i)
3815 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3817 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3821 }
else if (Name.starts_with(
"avx512.mask.punpckh") ||
3822 Name.starts_with(
"avx512.mask.unpckh.")) {
3829 for (
int l = 0; l != NumElts; l += NumLaneElts)
3830 for (
int i = 0; i != NumLaneElts; ++i)
3831 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3833 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3837 }
else if (Name.starts_with(
"avx512.mask.and.") ||
3838 Name.starts_with(
"avx512.mask.pand.")) {
3841 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3843 Rep = Builder.CreateBitCast(Rep, FTy);
3846 }
else if (Name.starts_with(
"avx512.mask.andn.") ||
3847 Name.starts_with(
"avx512.mask.pandn.")) {
3850 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->
getArgOperand(0), ITy));
3851 Rep = Builder.CreateAnd(Rep,
3853 Rep = Builder.CreateBitCast(Rep, FTy);
3856 }
else if (Name.starts_with(
"avx512.mask.or.") ||
3857 Name.starts_with(
"avx512.mask.por.")) {
3860 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3862 Rep = Builder.CreateBitCast(Rep, FTy);
3865 }
else if (Name.starts_with(
"avx512.mask.xor.") ||
3866 Name.starts_with(
"avx512.mask.pxor.")) {
3869 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->
getArgOperand(0), ITy),
3871 Rep = Builder.CreateBitCast(Rep, FTy);
3874 }
else if (Name.starts_with(
"avx512.mask.padd.")) {
3878 }
else if (Name.starts_with(
"avx512.mask.psub.")) {
3882 }
else if (Name.starts_with(
"avx512.mask.pmull.")) {
3886 }
else if (Name.starts_with(
"avx512.mask.add.p")) {
3887 if (Name.ends_with(
".512")) {
3889 if (Name[17] ==
's')
3890 IID = Intrinsic::x86_avx512_add_ps_512;
3892 IID = Intrinsic::x86_avx512_add_pd_512;
3894 Rep = Builder.CreateIntrinsic(
3902 }
else if (Name.starts_with(
"avx512.mask.div.p")) {
3903 if (Name.ends_with(
".512")) {
3905 if (Name[17] ==
's')
3906 IID = Intrinsic::x86_avx512_div_ps_512;
3908 IID = Intrinsic::x86_avx512_div_pd_512;
3910 Rep = Builder.CreateIntrinsic(
3918 }
else if (Name.starts_with(
"avx512.mask.mul.p")) {
3919 if (Name.ends_with(
".512")) {
3921 if (Name[17] ==
's')
3922 IID = Intrinsic::x86_avx512_mul_ps_512;
3924 IID = Intrinsic::x86_avx512_mul_pd_512;
3926 Rep = Builder.CreateIntrinsic(
3934 }
else if (Name.starts_with(
"avx512.mask.sub.p")) {
3935 if (Name.ends_with(
".512")) {
3937 if (Name[17] ==
's')
3938 IID = Intrinsic::x86_avx512_sub_ps_512;
3940 IID = Intrinsic::x86_avx512_sub_pd_512;
3942 Rep = Builder.CreateIntrinsic(
3950 }
else if ((Name.starts_with(
"avx512.mask.max.p") ||
3951 Name.starts_with(
"avx512.mask.min.p")) &&
3952 Name.drop_front(18) ==
".512") {
3953 bool IsDouble = Name[17] ==
'd';
3954 bool IsMin = Name[13] ==
'i';
3956 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3957 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3960 Rep = Builder.CreateIntrinsic(
3965 }
else if (Name.starts_with(
"avx512.mask.lzcnt.")) {
3967 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->
getType(),
3968 {CI->getArgOperand(0), Builder.getInt1(false)});
3971 }
else if (Name.starts_with(
"avx512.mask.psll")) {
3972 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
3973 bool IsVariable = Name[16] ==
'v';
3974 char Size = Name[16] ==
'.' ? Name[17]
3975 : Name[17] ==
'.' ? Name[18]
3976 : Name[18] ==
'.' ? Name[19]
3980 if (IsVariable && Name[17] !=
'.') {
3981 if (
Size ==
'd' && Name[17] ==
'2')
3982 IID = Intrinsic::x86_avx2_psllv_q;
3983 else if (
Size ==
'd' && Name[17] ==
'4')
3984 IID = Intrinsic::x86_avx2_psllv_q_256;
3985 else if (
Size ==
's' && Name[17] ==
'4')
3986 IID = Intrinsic::x86_avx2_psllv_d;
3987 else if (
Size ==
's' && Name[17] ==
'8')
3988 IID = Intrinsic::x86_avx2_psllv_d_256;
3989 else if (
Size ==
'h' && Name[17] ==
'8')
3990 IID = Intrinsic::x86_avx512_psllv_w_128;
3991 else if (
Size ==
'h' && Name[17] ==
'1')
3992 IID = Intrinsic::x86_avx512_psllv_w_256;
3993 else if (Name[17] ==
'3' && Name[18] ==
'2')
3994 IID = Intrinsic::x86_avx512_psllv_w_512;
3997 }
else if (Name.ends_with(
".128")) {
3999 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
4000 : Intrinsic::x86_sse2_psll_d;
4001 else if (
Size ==
'q')
4002 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
4003 : Intrinsic::x86_sse2_psll_q;
4004 else if (
Size ==
'w')
4005 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
4006 : Intrinsic::x86_sse2_psll_w;
4009 }
else if (Name.ends_with(
".256")) {
4011 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
4012 : Intrinsic::x86_avx2_psll_d;
4013 else if (
Size ==
'q')
4014 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
4015 : Intrinsic::x86_avx2_psll_q;
4016 else if (
Size ==
'w')
4017 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
4018 : Intrinsic::x86_avx2_psll_w;
4023 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
4024 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
4025 : Intrinsic::x86_avx512_psll_d_512;
4026 else if (
Size ==
'q')
4027 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
4028 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
4029 : Intrinsic::x86_avx512_psll_q_512;
4030 else if (
Size ==
'w')
4031 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
4032 : Intrinsic::x86_avx512_psll_w_512;
4038 }
else if (Name.starts_with(
"avx512.mask.psrl")) {
4039 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4040 bool IsVariable = Name[16] ==
'v';
4041 char Size = Name[16] ==
'.' ? Name[17]
4042 : Name[17] ==
'.' ? Name[18]
4043 : Name[18] ==
'.' ? Name[19]
4047 if (IsVariable && Name[17] !=
'.') {
4048 if (
Size ==
'd' && Name[17] ==
'2')
4049 IID = Intrinsic::x86_avx2_psrlv_q;
4050 else if (
Size ==
'd' && Name[17] ==
'4')
4051 IID = Intrinsic::x86_avx2_psrlv_q_256;
4052 else if (
Size ==
's' && Name[17] ==
'4')
4053 IID = Intrinsic::x86_avx2_psrlv_d;
4054 else if (
Size ==
's' && Name[17] ==
'8')
4055 IID = Intrinsic::x86_avx2_psrlv_d_256;
4056 else if (
Size ==
'h' && Name[17] ==
'8')
4057 IID = Intrinsic::x86_avx512_psrlv_w_128;
4058 else if (
Size ==
'h' && Name[17] ==
'1')
4059 IID = Intrinsic::x86_avx512_psrlv_w_256;
4060 else if (Name[17] ==
'3' && Name[18] ==
'2')
4061 IID = Intrinsic::x86_avx512_psrlv_w_512;
4064 }
else if (Name.ends_with(
".128")) {
4066 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4067 : Intrinsic::x86_sse2_psrl_d;
4068 else if (
Size ==
'q')
4069 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4070 : Intrinsic::x86_sse2_psrl_q;
4071 else if (
Size ==
'w')
4072 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4073 : Intrinsic::x86_sse2_psrl_w;
4076 }
else if (Name.ends_with(
".256")) {
4078 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4079 : Intrinsic::x86_avx2_psrl_d;
4080 else if (
Size ==
'q')
4081 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4082 : Intrinsic::x86_avx2_psrl_q;
4083 else if (
Size ==
'w')
4084 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4085 : Intrinsic::x86_avx2_psrl_w;
4090 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4091 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4092 : Intrinsic::x86_avx512_psrl_d_512;
4093 else if (
Size ==
'q')
4094 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4095 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4096 : Intrinsic::x86_avx512_psrl_q_512;
4097 else if (
Size ==
'w')
4098 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4099 : Intrinsic::x86_avx512_psrl_w_512;
4105 }
else if (Name.starts_with(
"avx512.mask.psra")) {
4106 bool IsImmediate = Name[16] ==
'i' || (Name.size() > 18 && Name[18] ==
'i');
4107 bool IsVariable = Name[16] ==
'v';
4108 char Size = Name[16] ==
'.' ? Name[17]
4109 : Name[17] ==
'.' ? Name[18]
4110 : Name[18] ==
'.' ? Name[19]
4114 if (IsVariable && Name[17] !=
'.') {
4115 if (
Size ==
's' && Name[17] ==
'4')
4116 IID = Intrinsic::x86_avx2_psrav_d;
4117 else if (
Size ==
's' && Name[17] ==
'8')
4118 IID = Intrinsic::x86_avx2_psrav_d_256;
4119 else if (
Size ==
'h' && Name[17] ==
'8')
4120 IID = Intrinsic::x86_avx512_psrav_w_128;
4121 else if (
Size ==
'h' && Name[17] ==
'1')
4122 IID = Intrinsic::x86_avx512_psrav_w_256;
4123 else if (Name[17] ==
'3' && Name[18] ==
'2')
4124 IID = Intrinsic::x86_avx512_psrav_w_512;
4127 }
else if (Name.ends_with(
".128")) {
4129 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4130 : Intrinsic::x86_sse2_psra_d;
4131 else if (
Size ==
'q')
4132 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4133 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4134 : Intrinsic::x86_avx512_psra_q_128;
4135 else if (
Size ==
'w')
4136 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4137 : Intrinsic::x86_sse2_psra_w;
4140 }
else if (Name.ends_with(
".256")) {
4142 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4143 : Intrinsic::x86_avx2_psra_d;
4144 else if (
Size ==
'q')
4145 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4146 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4147 : Intrinsic::x86_avx512_psra_q_256;
4148 else if (
Size ==
'w')
4149 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4150 : Intrinsic::x86_avx2_psra_w;
4155 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4156 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4157 : Intrinsic::x86_avx512_psra_d_512;
4158 else if (
Size ==
'q')
4159 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4160 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4161 : Intrinsic::x86_avx512_psra_q_512;
4162 else if (
Size ==
'w')
4163 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4164 : Intrinsic::x86_avx512_psra_w_512;
4170 }
else if (Name.starts_with(
"avx512.mask.move.s")) {
4172 }
else if (Name.starts_with(
"avx512.cvtmask2")) {
4174 }
else if (Name.ends_with(
".movntdqa")) {
4178 LoadInst *LI = Builder.CreateAlignedLoad(
4183 }
else if (Name.starts_with(
"fma.vfmadd.") ||
4184 Name.starts_with(
"fma.vfmsub.") ||
4185 Name.starts_with(
"fma.vfnmadd.") ||
4186 Name.starts_with(
"fma.vfnmsub.")) {
4187 bool NegMul = Name[6] ==
'n';
4188 bool NegAcc = NegMul ? Name[8] ==
's' : Name[7] ==
's';
4189 bool IsScalar = NegMul ? Name[12] ==
's' : Name[11] ==
's';
4200 if (NegMul && !IsScalar)
4201 Ops[0] = Builder.CreateFNeg(
Ops[0]);
4202 if (NegMul && IsScalar)
4203 Ops[1] = Builder.CreateFNeg(
Ops[1]);
4205 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4207 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4211 }
else if (Name.starts_with(
"fma4.vfmadd.s")) {
4219 Rep = Builder.CreateIntrinsic(Intrinsic::fma,
Ops[0]->
getType(),
Ops);
4223 }
else if (Name.starts_with(
"avx512.mask.vfmadd.s") ||
4224 Name.starts_with(
"avx512.maskz.vfmadd.s") ||
4225 Name.starts_with(
"avx512.mask3.vfmadd.s") ||
4226 Name.starts_with(
"avx512.mask3.vfmsub.s") ||
4227 Name.starts_with(
"avx512.mask3.vfnmsub.s")) {
4228 bool IsMask3 = Name[11] ==
'3';
4229 bool IsMaskZ = Name[11] ==
'z';
4231 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4232 bool NegMul = Name[2] ==
'n';
4233 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4239 if (NegMul && (IsMask3 || IsMaskZ))
4240 A = Builder.CreateFNeg(
A);
4241 if (NegMul && !(IsMask3 || IsMaskZ))
4242 B = Builder.CreateFNeg(
B);
4244 C = Builder.CreateFNeg(
C);
4246 A = Builder.CreateExtractElement(
A, (
uint64_t)0);
4247 B = Builder.CreateExtractElement(
B, (
uint64_t)0);
4248 C = Builder.CreateExtractElement(
C, (
uint64_t)0);
4255 if (Name.back() ==
'd')
4256 IID = Intrinsic::x86_avx512_vfmadd_f64;
4258 IID = Intrinsic::x86_avx512_vfmadd_f32;
4259 Rep = Builder.CreateIntrinsic(IID,
Ops);
4261 Rep = Builder.CreateFMA(
A,
B,
C);
4270 if (NegAcc && IsMask3)
4275 Rep = Builder.CreateInsertElement(CI->
getArgOperand(IsMask3 ? 2 : 0), Rep,
4277 }
else if (Name.starts_with(
"avx512.mask.vfmadd.p") ||
4278 Name.starts_with(
"avx512.mask.vfnmadd.p") ||
4279 Name.starts_with(
"avx512.mask.vfnmsub.p") ||
4280 Name.starts_with(
"avx512.mask3.vfmadd.p") ||
4281 Name.starts_with(
"avx512.mask3.vfmsub.p") ||
4282 Name.starts_with(
"avx512.mask3.vfnmsub.p") ||
4283 Name.starts_with(
"avx512.maskz.vfmadd.p")) {
4284 bool IsMask3 = Name[11] ==
'3';
4285 bool IsMaskZ = Name[11] ==
'z';
4287 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4288 bool NegMul = Name[2] ==
'n';
4289 bool NegAcc = NegMul ? Name[4] ==
's' : Name[3] ==
's';
4295 if (NegMul && (IsMask3 || IsMaskZ))
4296 A = Builder.CreateFNeg(
A);
4297 if (NegMul && !(IsMask3 || IsMaskZ))
4298 B = Builder.CreateFNeg(
B);
4300 C = Builder.CreateFNeg(
C);
4307 if (Name[Name.size() - 5] ==
's')
4308 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4310 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4314 Rep = Builder.CreateFMA(
A,
B,
C);
4322 }
else if (Name.starts_with(
"fma.vfmsubadd.p")) {
4326 if (VecWidth == 128 && EltWidth == 32)
4327 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4328 else if (VecWidth == 256 && EltWidth == 32)
4329 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4330 else if (VecWidth == 128 && EltWidth == 64)
4331 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4332 else if (VecWidth == 256 && EltWidth == 64)
4333 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4339 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4340 Rep = Builder.CreateIntrinsic(IID,
Ops);
4341 }
else if (Name.starts_with(
"avx512.mask.vfmaddsub.p") ||
4342 Name.starts_with(
"avx512.mask3.vfmaddsub.p") ||
4343 Name.starts_with(
"avx512.maskz.vfmaddsub.p") ||
4344 Name.starts_with(
"avx512.mask3.vfmsubadd.p")) {
4345 bool IsMask3 = Name[11] ==
'3';
4346 bool IsMaskZ = Name[11] ==
'z';
4348 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4349 bool IsSubAdd = Name[3] ==
's';
4353 if (Name[Name.size() - 5] ==
's')
4354 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4356 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4361 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4363 Rep = Builder.CreateIntrinsic(IID,
Ops);
4372 Value *Odd = Builder.CreateCall(FMA,
Ops);
4373 Ops[2] = Builder.CreateFNeg(
Ops[2]);
4374 Value *Even = Builder.CreateCall(FMA,
Ops);
4380 for (
int i = 0; i != NumElts; ++i)
4381 Idxs[i] = i + (i % 2) * NumElts;
4383 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4391 }
else if (Name.starts_with(
"avx512.mask.pternlog.") ||
4392 Name.starts_with(
"avx512.maskz.pternlog.")) {
4393 bool ZeroMask = Name[11] ==
'z';
4397 if (VecWidth == 128 && EltWidth == 32)
4398 IID = Intrinsic::x86_avx512_pternlog_d_128;
4399 else if (VecWidth == 256 && EltWidth == 32)
4400 IID = Intrinsic::x86_avx512_pternlog_d_256;
4401 else if (VecWidth == 512 && EltWidth == 32)
4402 IID = Intrinsic::x86_avx512_pternlog_d_512;
4403 else if (VecWidth == 128 && EltWidth == 64)
4404 IID = Intrinsic::x86_avx512_pternlog_q_128;
4405 else if (VecWidth == 256 && EltWidth == 64)
4406 IID = Intrinsic::x86_avx512_pternlog_q_256;
4407 else if (VecWidth == 512 && EltWidth == 64)
4408 IID = Intrinsic::x86_avx512_pternlog_q_512;
4414 Rep = Builder.CreateIntrinsic(IID, Args);
4418 }
else if (Name.starts_with(
"avx512.mask.vpmadd52") ||
4419 Name.starts_with(
"avx512.maskz.vpmadd52")) {
4420 bool ZeroMask = Name[11] ==
'z';
4421 bool High = Name[20] ==
'h' || Name[21] ==
'h';
4424 if (VecWidth == 128 && !
High)
4425 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4426 else if (VecWidth == 256 && !
High)
4427 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4428 else if (VecWidth == 512 && !
High)
4429 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4430 else if (VecWidth == 128 &&
High)
4431 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4432 else if (VecWidth == 256 &&
High)
4433 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4434 else if (VecWidth == 512 &&
High)
4435 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4441 Rep = Builder.CreateIntrinsic(IID, Args);
4445 }
else if (Name.starts_with(
"avx512.mask.vpermi2var.") ||
4446 Name.starts_with(
"avx512.mask.vpermt2var.") ||
4447 Name.starts_with(
"avx512.maskz.vpermt2var.")) {
4448 bool ZeroMask = Name[11] ==
'z';
4449 bool IndexForm = Name[17] ==
'i';
4451 }
else if (Name.starts_with(
"avx512.mask.vpdpbusd.") ||
4452 Name.starts_with(
"avx512.maskz.vpdpbusd.") ||
4453 Name.starts_with(
"avx512.mask.vpdpbusds.") ||
4454 Name.starts_with(
"avx512.maskz.vpdpbusds.")) {
4455 bool ZeroMask = Name[11] ==
'z';
4456 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4459 if (VecWidth == 128 && !IsSaturating)
4460 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4461 else if (VecWidth == 256 && !IsSaturating)
4462 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4463 else if (VecWidth == 512 && !IsSaturating)
4464 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4465 else if (VecWidth == 128 && IsSaturating)
4466 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4467 else if (VecWidth == 256 && IsSaturating)
4468 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4469 else if (VecWidth == 512 && IsSaturating)
4470 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4480 if (Args[1]->
getType()->isVectorTy() &&
4483 ->isIntegerTy(32) &&
4484 Args[2]->
getType()->isVectorTy() &&
4487 ->isIntegerTy(32)) {
4488 Type *NewArgType =
nullptr;
4489 if (VecWidth == 128)
4491 else if (VecWidth == 256)
4493 else if (VecWidth == 512)
4499 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4500 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4503 Rep = Builder.CreateIntrinsic(IID, Args);
4507 }
else if (Name.starts_with(
"avx512.mask.vpdpwssd.") ||
4508 Name.starts_with(
"avx512.maskz.vpdpwssd.") ||
4509 Name.starts_with(
"avx512.mask.vpdpwssds.") ||
4510 Name.starts_with(
"avx512.maskz.vpdpwssds.")) {
4511 bool ZeroMask = Name[11] ==
'z';
4512 bool IsSaturating = Name[ZeroMask ? 21 : 20] ==
's';
4515 if (VecWidth == 128 && !IsSaturating)
4516 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4517 else if (VecWidth == 256 && !IsSaturating)
4518 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4519 else if (VecWidth == 512 && !IsSaturating)
4520 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4521 else if (VecWidth == 128 && IsSaturating)
4522 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4523 else if (VecWidth == 256 && IsSaturating)
4524 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4525 else if (VecWidth == 512 && IsSaturating)
4526 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4536 if (Args[1]->
getType()->isVectorTy() &&
4539 ->isIntegerTy(32) &&
4540 Args[2]->
getType()->isVectorTy() &&
4543 ->isIntegerTy(32)) {
4544 Type *NewArgType =
nullptr;
4545 if (VecWidth == 128)
4547 else if (VecWidth == 256)
4549 else if (VecWidth == 512)
4555 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4556 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4559 Rep = Builder.CreateIntrinsic(IID, Args);
4563 }
else if (Name ==
"addcarryx.u32" || Name ==
"addcarryx.u64" ||
4564 Name ==
"addcarry.u32" || Name ==
"addcarry.u64" ||
4565 Name ==
"subborrow.u32" || Name ==
"subborrow.u64") {
4567 if (Name[0] ==
'a' && Name.back() ==
'2')
4568 IID = Intrinsic::x86_addcarry_32;
4569 else if (Name[0] ==
'a' && Name.back() ==
'4')
4570 IID = Intrinsic::x86_addcarry_64;
4571 else if (Name[0] ==
's' && Name.back() ==
'2')
4572 IID = Intrinsic::x86_subborrow_32;
4573 else if (Name[0] ==
's' && Name.back() ==
'4')
4574 IID = Intrinsic::x86_subborrow_64;
4581 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4584 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
4587 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4591 }
else if (Name.starts_with(
"avx512.mask.") &&
4602 if (Name.starts_with(
"neon.bfcvt")) {
4603 if (Name.starts_with(
"neon.bfcvtn2")) {
4605 std::iota(LoMask.
begin(), LoMask.
end(), 0);
4607 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4608 Value *Inactive = Builder.CreateShuffleVector(CI->
getOperand(0), LoMask);
4611 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4612 }
else if (Name.starts_with(
"neon.bfcvtn")) {
4614 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
4618 dbgs() <<
"Trunc: " << *Trunc <<
"\n";
4619 return Builder.CreateShuffleVector(
4622 return Builder.CreateFPTrunc(CI->
getOperand(0),
4625 }
else if (Name.starts_with(
"sve.fcvt")) {
4628 .
Case(
"sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4629 .
Case(
"sve.fcvtnt.bf16f32",
4630 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4642 if (Args[1]->
getType() != BadPredTy)
4645 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4646 BadPredTy, Args[1]);
4647 Args[1] = Builder.CreateIntrinsic(
4648 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4650 return Builder.CreateIntrinsic(NewID, Args,
nullptr,
4659 if (Name ==
"mve.vctp64.old") {
4662 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4665 Value *C1 = Builder.CreateIntrinsic(
4666 Intrinsic::arm_mve_pred_v2i,
4668 return Builder.CreateIntrinsic(
4669 Intrinsic::arm_mve_pred_i2v,
4671 }
else if (Name ==
"mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4672 Name ==
"mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4673 Name ==
"mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4674 Name ==
"mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4676 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4677 Name ==
"mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4678 Name ==
"mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4679 Name ==
"mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4681 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4682 Name ==
"mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4683 Name ==
"cde.vcx1q.predicated.v2i64.v4i1" ||
4684 Name ==
"cde.vcx1qa.predicated.v2i64.v4i1" ||
4685 Name ==
"cde.vcx2q.predicated.v2i64.v4i1" ||
4686 Name ==
"cde.vcx2qa.predicated.v2i64.v4i1" ||
4687 Name ==
"cde.vcx3q.predicated.v2i64.v4i1" ||
4688 Name ==
"cde.vcx3qa.predicated.v2i64.v4i1") {
4689 std::vector<Type *> Tys;
4693 case Intrinsic::arm_mve_mull_int_predicated:
4694 case Intrinsic::arm_mve_vqdmull_predicated:
4695 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4698 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4699 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4700 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4704 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4708 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4712 case Intrinsic::arm_cde_vcx1q_predicated:
4713 case Intrinsic::arm_cde_vcx1qa_predicated:
4714 case Intrinsic::arm_cde_vcx2q_predicated:
4715 case Intrinsic::arm_cde_vcx2qa_predicated:
4716 case Intrinsic::arm_cde_vcx3q_predicated:
4717 case Intrinsic::arm_cde_vcx3qa_predicated:
4724 std::vector<Value *>
Ops;
4726 Type *Ty =
Op->getType();
4727 if (Ty->getScalarSizeInBits() == 1) {
4728 Value *C1 = Builder.CreateIntrinsic(
4729 Intrinsic::arm_mve_pred_v2i,
4731 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4736 return Builder.CreateIntrinsic(
ID, Tys,
Ops,
nullptr,
4751 auto UpgradeLegacyWMMAIUIntrinsicCall =
4756 Args.push_back(Builder.getFalse());
4760 F->getParent(),
F->getIntrinsicID(), OverloadTys);
4767 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4772 NewCall->copyMetadata(*CI);
4776 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4777 assert(CI->
arg_size() == 7 &&
"Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4778 "intrinsic should have 7 arguments");
4781 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2});
4783 if (
F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4784 assert(CI->
arg_size() == 8 &&
"Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4785 "intrinsic should have 8 arguments");
4790 return UpgradeLegacyWMMAIUIntrinsicCall(
F, CI, Builder, {
T1, T2, T3, T4});
4793 switch (
F->getIntrinsicID()) {
4796 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4797 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4798 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4799 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4800 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4801 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4816 if (
F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4819 F->getParent(),
F->getIntrinsicID(), Overloads);
4824 auto *NewCall =
cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4829 NewCall->copyMetadata(*CI);
4830 NewCall->takeName(CI);
4852 if (NumOperands < 3)
4865 bool IsVolatile =
false;
4869 if (NumOperands > 3)
4874 if (NumOperands > 5) {
4876 IsVolatile = !VolatileArg || !VolatileArg->
isZero();
4890 if (VT->getElementType()->isIntegerTy(16)) {
4893 Val = Builder.CreateBitCast(Val, AsBF16);
4901 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4903 unsigned AddrSpace = PtrTy->getAddressSpace();
4906 RMW->
setMetadata(
"amdgpu.no.fine.grained.memory", EmptyMD);
4908 RMW->
setMetadata(
"amdgpu.ignore.denormal.mode", EmptyMD);
4913 MDNode *RangeNotPrivate =
4916 RMW->
setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4922 return Builder.CreateBitCast(RMW, RetTy);
4943 return MAV->getMetadata();
4952 if (Name ==
"label") {
4954 }
else if (Name ==
"assign") {
4961 }
else if (Name ==
"declare") {
4965 }
else if (Name ==
"addr") {
4975 unwrapMAVOp(CI, 1), ExprNode,
nullptr,
nullptr,
nullptr);
4976 }
else if (Name ==
"value") {
4979 unsigned ExprOp = 2;
4994 assert(DR &&
"Unhandled intrinsic kind in upgrade to DbgRecord");
5002 int64_t OffsetVal =
Offset->getSExtValue();
5003 return Builder.CreateIntrinsic(OffsetVal >= 0
5004 ? Intrinsic::vector_splice_left
5005 : Intrinsic::vector_splice_right,
5007 {CI->getArgOperand(0), CI->getArgOperand(1),
5008 Builder.getInt32(std::abs(OffsetVal))});
5013 if (Name.starts_with(
"to.fp16")) {
5015 Builder.CreateFPTrunc(CI->
getArgOperand(0), Builder.getHalfTy());
5016 return Builder.CreateBitCast(Cast, CI->
getType());
5019 if (Name.starts_with(
"from.fp16")) {
5021 Builder.CreateBitCast(CI->
getArgOperand(0), Builder.getHalfTy());
5022 return Builder.CreateFPExt(Cast, CI->
getType());
5047 if (!Name.consume_front(
"llvm."))
5050 bool IsX86 = Name.consume_front(
"x86.");
5051 bool IsNVVM = Name.consume_front(
"nvvm.");
5052 bool IsAArch64 = Name.consume_front(
"aarch64.");
5053 bool IsARM = Name.consume_front(
"arm.");
5054 bool IsAMDGCN = Name.consume_front(
"amdgcn.");
5055 bool IsDbg = Name.consume_front(
"dbg.");
5057 (Name.consume_front(
"experimental.vector.splice") ||
5058 Name.consume_front(
"vector.splice")) &&
5059 !(Name.starts_with(
".left") || Name.starts_with(
".right"));
5060 Value *Rep =
nullptr;
5062 if (!IsX86 && Name ==
"stackprotectorcheck") {
5064 }
else if (IsNVVM) {
5068 }
else if (IsAArch64) {
5072 }
else if (IsAMDGCN) {
5076 }
else if (IsOldSplice) {
5078 }
else if (Name.consume_front(
"convert.")) {
5090 const auto &DefaultCase = [&]() ->
void {
5098 "Unknown function for CallBase upgrade and isn't just a name change");
5106 "Return type must have changed");
5107 assert(OldST->getNumElements() ==
5109 "Must have same number of elements");
5112 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5115 for (
unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5116 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5117 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5136 case Intrinsic::arm_neon_vst1:
5137 case Intrinsic::arm_neon_vst2:
5138 case Intrinsic::arm_neon_vst3:
5139 case Intrinsic::arm_neon_vst4:
5140 case Intrinsic::arm_neon_vst2lane:
5141 case Intrinsic::arm_neon_vst3lane:
5142 case Intrinsic::arm_neon_vst4lane: {
5144 NewCall = Builder.CreateCall(NewFn, Args);
5147 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5148 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5149 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5154 NewCall = Builder.CreateCall(NewFn, Args);
5157 case Intrinsic::aarch64_sve_ld3_sret:
5158 case Intrinsic::aarch64_sve_ld4_sret:
5159 case Intrinsic::aarch64_sve_ld2_sret: {
5167 Name = Name.substr(5);
5174 unsigned MinElts = RetTy->getMinNumElements() /
N;
5176 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5178 for (
unsigned I = 0;
I <
N;
I++) {
5179 Value *SRet = Builder.CreateExtractValue(NewLdCall,
I);
5180 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet,
I * MinElts);
5186 case Intrinsic::coro_end: {
5189 NewCall = Builder.CreateCall(NewFn, Args);
5193 case Intrinsic::vector_extract: {
5195 Name = Name.substr(5);
5196 if (!Name.starts_with(
"aarch64.sve.tuple.get")) {
5201 unsigned MinElts = RetTy->getMinNumElements();
5204 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0), NewIdx});
5208 case Intrinsic::vector_insert: {
5210 Name = Name.substr(5);
5211 if (!Name.starts_with(
"aarch64.sve.tuple")) {
5215 if (Name.starts_with(
"aarch64.sve.tuple.set")) {
5220 NewCall = Builder.CreateCall(
5224 if (Name.starts_with(
"aarch64.sve.tuple.create")) {
5230 assert(
N > 1 &&
"Create is expected to be between 2-4");
5233 unsigned MinElts = RetTy->getMinNumElements() /
N;
5234 for (
unsigned I = 0;
I <
N;
I++) {
5236 Ret = Builder.CreateInsertVector(RetTy, Ret, V,
I * MinElts);
5243 case Intrinsic::arm_neon_bfdot:
5244 case Intrinsic::arm_neon_bfmmla:
5245 case Intrinsic::arm_neon_bfmlalb:
5246 case Intrinsic::arm_neon_bfmlalt:
5247 case Intrinsic::aarch64_neon_bfdot:
5248 case Intrinsic::aarch64_neon_bfmmla:
5249 case Intrinsic::aarch64_neon_bfmlalb:
5250 case Intrinsic::aarch64_neon_bfmlalt: {
5253 "Mismatch between function args and call args");
5254 size_t OperandWidth =
5256 assert((OperandWidth == 64 || OperandWidth == 128) &&
5257 "Unexpected operand width");
5259 auto Iter = CI->
args().begin();
5260 Args.push_back(*Iter++);
5261 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5262 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5263 NewCall = Builder.CreateCall(NewFn, Args);
5267 case Intrinsic::bitreverse:
5268 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5271 case Intrinsic::ctlz:
5272 case Intrinsic::cttz: {
5279 Builder.CreateCall(NewFn, {CI->
getArgOperand(0), Builder.getFalse()});
5283 case Intrinsic::objectsize: {
5284 Value *NullIsUnknownSize =
5288 NewCall = Builder.CreateCall(
5293 case Intrinsic::ctpop:
5294 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(0)});
5296 case Intrinsic::dbg_value: {
5298 Name = Name.substr(5);
5300 if (Name.starts_with(
"dbg.addr")) {
5314 if (
Offset->isNullValue()) {
5315 NewCall = Builder.CreateCall(
5324 case Intrinsic::ptr_annotation:
5332 NewCall = Builder.CreateCall(
5341 case Intrinsic::var_annotation:
5348 NewCall = Builder.CreateCall(
5357 case Intrinsic::riscv_aes32dsi:
5358 case Intrinsic::riscv_aes32dsmi:
5359 case Intrinsic::riscv_aes32esi:
5360 case Intrinsic::riscv_aes32esmi:
5361 case Intrinsic::riscv_sm4ks:
5362 case Intrinsic::riscv_sm4ed: {
5372 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5373 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5379 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5380 Value *Res = NewCall;
5382 Res = Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5388 case Intrinsic::nvvm_mapa_shared_cluster: {
5392 Value *Res = NewCall;
5393 Res = Builder.CreateAddrSpaceCast(
5400 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5401 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5404 Args[0] = Builder.CreateAddrSpaceCast(
5407 NewCall = Builder.CreateCall(NewFn, Args);
5413 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5414 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5415 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5416 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5417 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5418 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5419 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5420 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5427 Args[0] = Builder.CreateAddrSpaceCast(
5436 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5438 NewCall = Builder.CreateCall(NewFn, Args);
5444 case Intrinsic::riscv_sha256sig0:
5445 case Intrinsic::riscv_sha256sig1:
5446 case Intrinsic::riscv_sha256sum0:
5447 case Intrinsic::riscv_sha256sum1:
5448 case Intrinsic::riscv_sm3p0:
5449 case Intrinsic::riscv_sm3p1: {
5456 Builder.CreateTrunc(CI->
getArgOperand(0), Builder.getInt32Ty());
5458 NewCall = Builder.CreateCall(NewFn, Arg);
5460 Builder.CreateIntCast(NewCall, CI->
getType(),
true);
5467 case Intrinsic::x86_xop_vfrcz_ss:
5468 case Intrinsic::x86_xop_vfrcz_sd:
5469 NewCall = Builder.CreateCall(NewFn, {CI->
getArgOperand(1)});
5472 case Intrinsic::x86_xop_vpermil2pd:
5473 case Intrinsic::x86_xop_vpermil2ps:
5474 case Intrinsic::x86_xop_vpermil2pd_256:
5475 case Intrinsic::x86_xop_vpermil2ps_256: {
5479 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5480 NewCall = Builder.CreateCall(NewFn, Args);
5484 case Intrinsic::x86_sse41_ptestc:
5485 case Intrinsic::x86_sse41_ptestz:
5486 case Intrinsic::x86_sse41_ptestnzc: {
5500 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy,
"cast");
5501 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy,
"cast");
5503 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5507 case Intrinsic::x86_rdtscp: {
5513 NewCall = Builder.CreateCall(NewFn);
5515 Value *
Data = Builder.CreateExtractValue(NewCall, 1);
5518 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5526 case Intrinsic::x86_sse41_insertps:
5527 case Intrinsic::x86_sse41_dppd:
5528 case Intrinsic::x86_sse41_dpps:
5529 case Intrinsic::x86_sse41_mpsadbw:
5530 case Intrinsic::x86_avx_dp_ps_256:
5531 case Intrinsic::x86_avx2_mpsadbw: {
5537 Args.back() = Builder.CreateTrunc(Args.back(),
Type::getInt8Ty(
C),
"trunc");
5538 NewCall = Builder.CreateCall(NewFn, Args);
5542 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5543 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5544 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5545 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5546 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5547 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5553 NewCall = Builder.CreateCall(NewFn, Args);
5562 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5563 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5564 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5565 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5566 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5567 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5571 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5572 Args[1] = Builder.CreateBitCast(
5575 NewCall = Builder.CreateCall(NewFn, Args);
5576 Value *Res = Builder.CreateBitCast(
5584 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5585 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5586 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5590 Args[1] = Builder.CreateBitCast(
5592 Args[2] = Builder.CreateBitCast(
5595 NewCall = Builder.CreateCall(NewFn, Args);
5599 case Intrinsic::thread_pointer: {
5600 NewCall = Builder.CreateCall(NewFn, {});
5604 case Intrinsic::memcpy:
5605 case Intrinsic::memmove:
5606 case Intrinsic::memset: {
5622 NewCall = Builder.CreateCall(NewFn, Args);
5624 AttributeList NewAttrs = AttributeList::get(
5625 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5626 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5627 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5632 MemCI->setDestAlignment(
Align->getMaybeAlignValue());
5635 MTI->setSourceAlignment(
Align->getMaybeAlignValue());
5639 case Intrinsic::masked_load:
5640 case Intrinsic::masked_gather:
5641 case Intrinsic::masked_store:
5642 case Intrinsic::masked_scatter: {
5648 auto GetMaybeAlign = [](
Value *
Op) {
5658 auto GetAlign = [&](
Value *
Op) {
5667 case Intrinsic::masked_load:
5668 NewCall = Builder.CreateMaskedLoad(
5672 case Intrinsic::masked_gather:
5673 NewCall = Builder.CreateMaskedGather(
5679 case Intrinsic::masked_store:
5680 NewCall = Builder.CreateMaskedStore(
5684 case Intrinsic::masked_scatter:
5685 NewCall = Builder.CreateMaskedScatter(
5687 DL.getValueOrABITypeAlignment(
5701 case Intrinsic::lifetime_start:
5702 case Intrinsic::lifetime_end: {
5714 NewCall = Builder.CreateLifetimeStart(Ptr);
5716 NewCall = Builder.CreateLifetimeEnd(Ptr);
5725 case Intrinsic::x86_avx512_vpdpbusd_128:
5726 case Intrinsic::x86_avx512_vpdpbusd_256:
5727 case Intrinsic::x86_avx512_vpdpbusd_512:
5728 case Intrinsic::x86_avx512_vpdpbusds_128:
5729 case Intrinsic::x86_avx512_vpdpbusds_256:
5730 case Intrinsic::x86_avx512_vpdpbusds_512:
5731 case Intrinsic::x86_avx2_vpdpbssd_128:
5732 case Intrinsic::x86_avx2_vpdpbssd_256:
5733 case Intrinsic::x86_avx10_vpdpbssd_512:
5734 case Intrinsic::x86_avx2_vpdpbssds_128:
5735 case Intrinsic::x86_avx2_vpdpbssds_256:
5736 case Intrinsic::x86_avx10_vpdpbssds_512:
5737 case Intrinsic::x86_avx2_vpdpbsud_128:
5738 case Intrinsic::x86_avx2_vpdpbsud_256:
5739 case Intrinsic::x86_avx10_vpdpbsud_512:
5740 case Intrinsic::x86_avx2_vpdpbsuds_128:
5741 case Intrinsic::x86_avx2_vpdpbsuds_256:
5742 case Intrinsic::x86_avx10_vpdpbsuds_512:
5743 case Intrinsic::x86_avx2_vpdpbuud_128:
5744 case Intrinsic::x86_avx2_vpdpbuud_256:
5745 case Intrinsic::x86_avx10_vpdpbuud_512:
5746 case Intrinsic::x86_avx2_vpdpbuuds_128:
5747 case Intrinsic::x86_avx2_vpdpbuuds_256:
5748 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5753 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5754 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5756 NewCall = Builder.CreateCall(NewFn, Args);
5759 case Intrinsic::x86_avx512_vpdpwssd_128:
5760 case Intrinsic::x86_avx512_vpdpwssd_256:
5761 case Intrinsic::x86_avx512_vpdpwssd_512:
5762 case Intrinsic::x86_avx512_vpdpwssds_128:
5763 case Intrinsic::x86_avx512_vpdpwssds_256:
5764 case Intrinsic::x86_avx512_vpdpwssds_512:
5765 case Intrinsic::x86_avx2_vpdpwsud_128:
5766 case Intrinsic::x86_avx2_vpdpwsud_256:
5767 case Intrinsic::x86_avx10_vpdpwsud_512:
5768 case Intrinsic::x86_avx2_vpdpwsuds_128:
5769 case Intrinsic::x86_avx2_vpdpwsuds_256:
5770 case Intrinsic::x86_avx10_vpdpwsuds_512:
5771 case Intrinsic::x86_avx2_vpdpwusd_128:
5772 case Intrinsic::x86_avx2_vpdpwusd_256:
5773 case Intrinsic::x86_avx10_vpdpwusd_512:
5774 case Intrinsic::x86_avx2_vpdpwusds_128:
5775 case Intrinsic::x86_avx2_vpdpwusds_256:
5776 case Intrinsic::x86_avx10_vpdpwusds_512:
5777 case Intrinsic::x86_avx2_vpdpwuud_128:
5778 case Intrinsic::x86_avx2_vpdpwuud_256:
5779 case Intrinsic::x86_avx10_vpdpwuud_512:
5780 case Intrinsic::x86_avx2_vpdpwuuds_128:
5781 case Intrinsic::x86_avx2_vpdpwuuds_256:
5782 case Intrinsic::x86_avx10_vpdpwuuds_512:
5787 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5788 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5790 NewCall = Builder.CreateCall(NewFn, Args);
5793 assert(NewCall &&
"Should have either set this variable or returned through "
5794 "the default case");
5801 assert(
F &&
"Illegal attempt to upgrade a non-existent intrinsic.");
5815 F->eraseFromParent();
5821 if (NumOperands == 0)
5829 if (NumOperands == 3) {
5833 Metadata *Elts2[] = {ScalarType, ScalarType,
5847 if (
Opc != Instruction::BitCast)
5851 Type *SrcTy = V->getType();
5868 if (
Opc != Instruction::BitCast)
5871 Type *SrcTy =
C->getType();
5898 if (
NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5899 auto OpIt =
find_if(ModFlags->operands(), [](
const MDNode *Flag) {
5900 if (Flag->getNumOperands() < 3)
5902 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5903 return K->getString() ==
"Debug Info Version";
5906 if (OpIt != ModFlags->op_end()) {
5907 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5914 bool BrokenDebugInfo =
false;
5917 if (!BrokenDebugInfo)
5923 M.getContext().diagnose(Diag);
5930 M.getContext().diagnose(DiagVersion);
5940 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5943 if (
F->hasFnAttribute(Attr)) {
5946 StringRef S =
F->getFnAttribute(Attr).getValueAsString();
5948 auto [Part, Rest] = S.
split(
',');
5954 const unsigned Dim = DimC -
'x';
5955 assert(Dim < 3 &&
"Unexpected dim char");
5965 F->addFnAttr(Attr, NewAttr);
5969 return S ==
"x" || S ==
"y" || S ==
"z";
5974 if (K ==
"kernel") {
5986 const unsigned Idx = (AlignIdxValuePair >> 16);
5987 const Align StackAlign =
Align(AlignIdxValuePair & 0xFFFF);
5992 if (K ==
"maxclusterrank" || K ==
"cluster_max_blocks") {
5997 if (K ==
"minctasm") {
6002 if (K ==
"maxnreg") {
6007 if (K.consume_front(
"maxntid") &&
isXYZ(K)) {
6011 if (K.consume_front(
"reqntid") &&
isXYZ(K)) {
6015 if (K.consume_front(
"cluster_dim_") &&
isXYZ(K)) {
6019 if (K ==
"grid_constant") {
6034 NamedMDNode *NamedMD = M.getNamedMetadata(
"nvvm.annotations");
6041 if (!SeenNodes.
insert(MD).second)
6048 assert((MD->getNumOperands() % 2) == 1 &&
"Invalid number of operands");
6055 for (
unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6057 const MDOperand &V = MD->getOperand(j + 1);
6060 NewOperands.
append({K, V});
6063 if (NewOperands.
size() > 1)
6076 const char *MarkerKey =
"clang.arc.retainAutoreleasedReturnValueMarker";
6077 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6078 if (ModRetainReleaseMarker) {
6084 ID->getString().split(ValueComp,
"#");
6085 if (ValueComp.
size() == 2) {
6086 std::string NewValue = ValueComp[0].str() +
";" + ValueComp[1].str();
6090 M.eraseNamedMetadata(ModRetainReleaseMarker);
6101 auto UpgradeToIntrinsic = [&](
const char *OldFunc,
6127 bool InvalidCast =
false;
6129 for (
unsigned I = 0, E = CI->
arg_size();
I != E; ++
I) {
6142 Arg = Builder.CreateBitCast(Arg, NewFuncTy->
getParamType(
I));
6144 Args.push_back(Arg);
6151 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6156 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->
getType());
6169 UpgradeToIntrinsic(
"clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6177 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6178 {
"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6179 {
"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6180 {
"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6181 {
"objc_autoreleaseReturnValue",
6182 llvm::Intrinsic::objc_autoreleaseReturnValue},
6183 {
"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6184 {
"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6185 {
"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6186 {
"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6187 {
"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6188 {
"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6189 {
"objc_release", llvm::Intrinsic::objc_release},
6190 {
"objc_retain", llvm::Intrinsic::objc_retain},
6191 {
"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6192 {
"objc_retainAutoreleaseReturnValue",
6193 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6194 {
"objc_retainAutoreleasedReturnValue",
6195 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6196 {
"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6197 {
"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6198 {
"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6199 {
"objc_unsafeClaimAutoreleasedReturnValue",
6200 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6201 {
"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6202 {
"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6203 {
"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6204 {
"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6205 {
"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6206 {
"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6207 {
"objc_arc_annotation_topdown_bbstart",
6208 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6209 {
"objc_arc_annotation_topdown_bbend",
6210 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6211 {
"objc_arc_annotation_bottomup_bbstart",
6212 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6213 {
"objc_arc_annotation_bottomup_bbend",
6214 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6216 for (
auto &
I : RuntimeFuncs)
6217 UpgradeToIntrinsic(
I.first,
I.second);
6221 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6225 bool HasObjCFlag =
false, HasClassProperties =
false,
Changed =
false;
6226 bool HasSwiftVersionFlag =
false;
6227 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6234 if (
Op->getNumOperands() != 3)
6248 if (
ID->getString() ==
"Objective-C Image Info Version")
6250 if (
ID->getString() ==
"Objective-C Class Properties")
6251 HasClassProperties =
true;
6253 if (
ID->getString() ==
"PIC Level") {
6254 if (
auto *Behavior =
6256 uint64_t V = Behavior->getLimitedValue();
6262 if (
ID->getString() ==
"PIE Level")
6263 if (
auto *Behavior =
6270 if (
ID->getString() ==
"branch-target-enforcement" ||
6271 ID->getString().starts_with(
"sign-return-address")) {
6272 if (
auto *Behavior =
6278 Op->getOperand(1),
Op->getOperand(2)};
6288 if (
ID->getString() ==
"Objective-C Image Info Section") {
6291 Value->getString().split(ValueComp,
" ");
6292 if (ValueComp.
size() != 1) {
6293 std::string NewValue;
6294 for (
auto &S : ValueComp)
6295 NewValue += S.str();
6306 if (
ID->getString() ==
"Objective-C Garbage Collection") {
6309 assert(Md->getValue() &&
"Expected non-empty metadata");
6310 auto Type = Md->getValue()->getType();
6313 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6314 if ((Val & 0xff) != Val) {
6315 HasSwiftVersionFlag =
true;
6316 SwiftABIVersion = (Val & 0xff00) >> 8;
6317 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6318 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6329 if (
ID->getString() ==
"amdgpu_code_object_version") {
6332 MDString::get(M.getContext(),
"amdhsa_code_object_version"),
6344 if (HasObjCFlag && !HasClassProperties) {
6350 if (HasSwiftVersionFlag) {
6354 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6356 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6364 NamedMDNode *CFIConsts = M.getNamedMetadata(
"cfi.functions");
6368 auto MatchesVersion = [](
const MDNode *
Op) {
6369 return Op->getNumOperands() >= 3 &&
6383 assert(!MatchesVersion(
Op) &&
"Unexpected mix of CFIConstant formats");
6384 assert(
Op->getNumOperands() >= 2 &&
6385 "Expected at least 2 operands - name and linkage type");
6397 for (
unsigned J = 2, EJ =
Op->getNumOperands(); J != EJ; ++J)
6408 auto TrimSpaces = [](
StringRef Section) -> std::string {
6410 Section.split(Components,
',');
6415 for (
auto Component : Components)
6416 OS <<
',' << Component.trim();
6421 for (
auto &GV : M.globals()) {
6422 if (!GV.hasSection())
6427 if (!Section.starts_with(
"__DATA, __objc_catlist"))
6432 GV.setSection(TrimSpaces(Section));
6448struct StrictFPUpgradeVisitor :
public InstVisitor<StrictFPUpgradeVisitor> {
6449 StrictFPUpgradeVisitor() =
default;
6452 if (!
Call.isStrictFP())
6458 Call.removeFnAttr(Attribute::StrictFP);
6459 Call.addFnAttr(Attribute::NoBuiltin);
6464struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6465 :
public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6466 AMDGPUUnsafeFPAtomicsUpgradeVisitor() =
default;
6468 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6483 if (!
F.isDeclaration() && !
F.hasFnAttribute(Attribute::StrictFP)) {
6484 StrictFPUpgradeVisitor SFPV;
6489 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6490 F.getReturnType(),
F.getAttributes().getRetAttrs()));
6491 for (
auto &Arg :
F.args())
6493 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6495 bool AddingAttrs =
false, RemovingAttrs =
false;
6496 AttrBuilder AttrsToAdd(
F.getContext());
6501 if (
Attribute A =
F.getFnAttribute(
"implicit-section-name");
6502 A.isValid() &&
A.isStringAttribute()) {
6503 F.setSection(
A.getValueAsString());
6505 RemovingAttrs =
true;
6509 A.isValid() &&
A.isStringAttribute()) {
6512 AddingAttrs = RemovingAttrs =
true;
6515 if (
Attribute A =
F.getFnAttribute(
"uniform-work-group-size");
6516 A.isValid() &&
A.isStringAttribute() && !
A.getValueAsString().empty()) {
6518 RemovingAttrs =
true;
6519 if (
A.getValueAsString() ==
"true") {
6520 AttrsToAdd.addAttribute(
"uniform-work-group-size");
6529 if (
Attribute A =
F.getFnAttribute(
"amdgpu-unsafe-fp-atomics");
6532 if (
A.getValueAsBool()) {
6533 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6539 AttrsToRemove.
addAttribute(
"amdgpu-unsafe-fp-atomics");
6540 RemovingAttrs =
true;
6547 bool HandleDenormalMode =
false;
6549 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math"); Attr.isValid()) {
6552 DenormalFPMath = ParsedMode;
6554 AddingAttrs = RemovingAttrs =
true;
6555 HandleDenormalMode =
true;
6559 if (
Attribute Attr =
F.getFnAttribute(
"denormal-fp-math-f32");
6563 DenormalFPMathF32 = ParsedMode;
6565 AddingAttrs = RemovingAttrs =
true;
6566 HandleDenormalMode =
true;
6570 if (HandleDenormalMode)
6571 AttrsToAdd.addDenormalFPEnvAttr(
6575 F.removeFnAttrs(AttrsToRemove);
6578 F.addFnAttrs(AttrsToAdd);
6584 if (!
F.hasFnAttribute(FnAttrName))
6585 F.addFnAttr(FnAttrName,
Value);
6592 if (!
F.hasFnAttribute(FnAttrName)) {
6594 F.addFnAttr(FnAttrName);
6596 auto A =
F.getFnAttribute(FnAttrName);
6597 if (
"false" ==
A.getValueAsString())
6598 F.removeFnAttr(FnAttrName);
6599 else if (
"true" ==
A.getValueAsString()) {
6600 F.removeFnAttr(FnAttrName);
6601 F.addFnAttr(FnAttrName);
6607 Triple T(M.getTargetTriple());
6608 if (!
T.isThumb() && !
T.isARM() && !
T.isAArch64())
6618 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6622 if (
Op->getNumOperands() != 3)
6631 uint64_t *ValPtr = IDStr ==
"branch-target-enforcement" ? &BTEValue
6632 : IDStr ==
"branch-protection-pauth-lr" ? &BPPLRValue
6633 : IDStr ==
"guarded-control-stack" ? &GCSValue
6634 : IDStr ==
"sign-return-address" ? &SRAValue
6635 : IDStr ==
"sign-return-address-all" ? &SRAALLValue
6636 : IDStr ==
"sign-return-address-with-bkey"
6642 *ValPtr = CI->getZExtValue();
6648 bool BTE = BTEValue == 1;
6649 bool BPPLR = BPPLRValue == 1;
6650 bool GCS = GCSValue == 1;
6651 bool SRA = SRAValue == 1;
6654 if (SRA && SRAALLValue == 1)
6655 SignTypeValue =
"all";
6658 if (SRA && SRABKeyValue == 1)
6659 SignKeyValue =
"b_key";
6661 for (
Function &
F : M.getFunctionList()) {
6662 if (
F.isDeclaration())
6669 if (
auto A =
F.getFnAttribute(
"sign-return-address");
6670 A.isValid() &&
"none" ==
A.getValueAsString()) {
6671 F.removeFnAttr(
"sign-return-address");
6672 F.removeFnAttr(
"sign-return-address-key");
6688 if (SRAALLValue == 1)
6690 if (SRABKeyValue == 1)
6699 if (
T->getNumOperands() < 1)
6704 return S->getString().starts_with(
"llvm.vectorizer.");
6708 StringRef OldPrefix =
"llvm.vectorizer.";
6711 if (OldTag ==
"llvm.vectorizer.unroll")
6723 if (
T->getNumOperands() < 1)
6728 if (!OldTag->getString().starts_with(
"llvm.vectorizer."))
6733 Ops.reserve(
T->getNumOperands());
6735 for (
unsigned I = 1,
E =
T->getNumOperands();
I !=
E; ++
I)
6736 Ops.push_back(
T->getOperand(
I));
6750 Ops.reserve(
T->getNumOperands());
6761 if ((
T.isSPIR() || (
T.isSPIRV() && !
T.isSPIRVLogical())) &&
6762 !
DL.contains(
"-G") && !
DL.starts_with(
"G")) {
6763 return DL.empty() ? std::string(
"G1") : (
DL +
"-G1").str();
6766 if (
T.isLoongArch64() ||
T.isRISCV64()) {
6768 auto I =
DL.find(
"-n64-");
6770 return (
DL.take_front(
I) +
"-n32:64-" +
DL.drop_front(
I + 5)).str();
6775 std::string Res =
DL.str();
6778 if (!
DL.contains(
"-G") && !
DL.starts_with(
"G"))
6779 Res.append(Res.empty() ?
"G1" :
"-G1");
6787 if (!
DL.contains(
"-ni") && !
DL.starts_with(
"ni"))
6788 Res.append(
"-ni:7:8:9");
6790 if (
DL.ends_with(
"ni:7"))
6792 if (
DL.ends_with(
"ni:7:8"))
6797 if (!
DL.contains(
"-p7") && !
DL.starts_with(
"p7"))
6798 Res.append(
"-p7:160:256:256:32");
6799 if (!
DL.contains(
"-p8") && !
DL.starts_with(
"p8"))
6800 Res.append(
"-p8:128:128:128:48");
6801 constexpr StringRef OldP8(
"-p8:128:128-");
6802 if (
DL.contains(OldP8))
6803 Res.replace(Res.find(OldP8), OldP8.
size(),
"-p8:128:128:128:48-");
6804 if (!
DL.contains(
"-p9") && !
DL.starts_with(
"p9"))
6805 Res.append(
"-p9:192:256:256:32");
6809 if (!
DL.contains(
"m:e"))
6810 Res = Res.empty() ?
"m:e" :
"m:e-" + Res;
6815 if (
T.isSystemZ() && !
DL.empty()) {
6817 if (!
DL.contains(
"-S64"))
6818 return "E-S64" +
DL.drop_front(1).str();
6822 auto AddPtr32Ptr64AddrSpaces = [&
DL, &Res]() {
6825 StringRef AddrSpaces{
"-p270:32:32-p271:32:32-p272:64:64"};
6826 if (!
DL.contains(AddrSpaces)) {
6828 Regex R(
"^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6829 if (R.match(Res, &
Groups))
6835 if (
T.isAArch64()) {
6837 if (!
DL.empty() && !
DL.contains(
"-Fn32"))
6838 Res.append(
"-Fn32");
6839 AddPtr32Ptr64AddrSpaces();
6843 if (
T.isSPARC() || (
T.isMIPS64() && !
DL.contains(
"m:m")) ||
T.isPPC64() ||
6847 std::string I64 =
"-i64:64";
6848 std::string I128 =
"-i128:128";
6850 size_t Pos = Res.find(I64);
6851 if (Pos !=
size_t(-1))
6852 Res.insert(Pos + I64.size(), I128);
6856 if (
T.isPPC() &&
T.isOSAIX() && !
DL.contains(
"f64:32:64") && !
DL.empty()) {
6857 size_t Pos = Res.find(
"-S128");
6860 Res.insert(Pos,
"-f64:32:64");
6866 AddPtr32Ptr64AddrSpaces();
6874 if (!
T.isOSIAMCU()) {
6875 std::string I128 =
"-i128:128";
6878 Regex R(
"^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6879 if (R.match(Res, &
Groups))
6887 if (
T.isWindowsMSVCEnvironment() && !
T.isArch64Bit()) {
6889 auto I =
Ref.find(
"-f80:32-");
6891 Res = (
Ref.take_front(
I) +
"-f80:128-" +
Ref.drop_front(
I + 8)).str();
6899 Attribute A =
B.getAttribute(
"no-frame-pointer-elim");
6902 FramePointer =
A.getValueAsString() ==
"true" ?
"all" :
"none";
6903 B.removeAttribute(
"no-frame-pointer-elim");
6905 if (
B.contains(
"no-frame-pointer-elim-non-leaf")) {
6907 if (FramePointer !=
"all")
6908 FramePointer =
"non-leaf";
6909 B.removeAttribute(
"no-frame-pointer-elim-non-leaf");
6911 if (!FramePointer.
empty())
6912 B.addAttribute(
"frame-pointer", FramePointer);
6914 A =
B.getAttribute(
"null-pointer-is-valid");
6917 bool NullPointerIsValid =
A.getValueAsString() ==
"true";
6918 B.removeAttribute(
"null-pointer-is-valid");
6919 if (NullPointerIsValid)
6920 B.addAttribute(Attribute::NullPointerIsValid);
6923 A =
B.getAttribute(
"uniform-work-group-size");
6927 bool IsTrue = Val ==
"true";
6928 B.removeAttribute(
"uniform-work-group-size");
6930 B.addAttribute(
"uniform-work-group-size");
6941 return OBD.
getTag() ==
"clang.arc.attachedcall" &&
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
void setDebugLoc(DebugLoc Loc)
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
LinkageTypes getLinkage() const
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
static StringRef dropLLVMManglingEscape(StringRef Name)
If the given string begins with the GlobalValue name mangling escape character '\1',...
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
LLVM_ABI SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
LLVM_ABI StringRef getString() const
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
LLVM_ABI bool hasStructReturnType(ID id)
Returns true if id has a struct return type.
@ ADDRESS_SPACE_SHARED_CLUSTER
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI bool UpgradeCFIFunctionsMetadata(Module &M)
Upgrade the cfi.functions metadata node by calculating and inserting the GUID for each function entry...
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ Default
The result value is uniform if and only if all operands are uniform.
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.