43 cl::desc(
"Force a specific generic_v<N> flag to be "
44 "added. For testing purposes only."),
49 if (!HSAMetadataDoc.
fromYAML(HSAMetadataString))
250 OS <<
"\t.amdgcn_target \"" << *
getTargetID() <<
"\"\n";
256 OS <<
"\t.amdhsa_code_object_version " << COV <<
'\n';
265 OS <<
"\t.amd_kernel_code_t\n";
266 Header.EmitKernelCodeT(OS,
getContext(), FoldAndPrint);
267 OS <<
"\t.end_amd_kernel_code_t\n";
275 OS <<
"\t.amdgpu_hsa_kernel " << SymbolName <<
'\n' ;
282 OS <<
"\t.amdgpu_lds " << Symbol->getName() <<
", " <<
Size <<
", "
283 << Alignment.
value() <<
'\n';
292#define PRINT_RES_INFO(ARG) \
294 ARG->print(OS, getContext().getAsmInfo()); \
296 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \
297 Streamer.addBlankLine();
315#define PRINT_RES_INFO(ARG) \
317 ARG->print(OS, getContext().getAsmInfo()); \
319 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \
320 Streamer.addBlankLine();
330 OS <<
"\t.amd_amdgpu_isa \"" <<
getTargetID() <<
"\"\n";
337 if (!Verifier.verify(HSAMetadataDoc.
getRoot()))
340 std::string HSAMetadataString;
342 HSAMetadataDoc.
toYAML(StrOS);
345 OS << StrOS.
str() <<
'\n';
351 const uint32_t Encoded_s_code_end = 0xbf9f0000;
352 const uint32_t Encoded_s_nop = 0xbf800000;
353 uint32_t Encoded_pad = Encoded_s_code_end;
363 Encoded_pad = Encoded_s_nop;
367 OS <<
"\t.p2alignl " << Log2CacheLineSize <<
", " << Encoded_pad <<
'\n';
368 OS <<
"\t.fill " << (FillSize / 4) <<
", 4, " << Encoded_pad <<
'\n';
376 const MCExpr *ReserveFlatScr) {
380 OS <<
"\t.amdhsa_kernel " << KernelName <<
'\n';
385 const MCExpr *ShiftedAndMaskedExpr =
397 OS <<
"\t\t.amdhsa_group_segment_fixed_size ";
401 OS <<
"\t\t.amdhsa_private_segment_fixed_size ";
405 OS <<
"\t\t.amdhsa_kernarg_size ";
411 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
412 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
413 ".amdhsa_user_sgpr_count");
416 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
417 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
418 ".amdhsa_user_sgpr_count");
424 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
425 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
426 ".amdhsa_user_sgpr_private_segment_buffer");
428 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
429 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
430 ".amdhsa_user_sgpr_dispatch_ptr");
432 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
433 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
434 ".amdhsa_user_sgpr_queue_ptr");
436 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
437 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
438 ".amdhsa_user_sgpr_kernarg_segment_ptr");
440 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
441 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
442 ".amdhsa_user_sgpr_dispatch_id");
445 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
446 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
447 ".amdhsa_user_sgpr_flat_scratch_init");
450 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
451 ".amdhsa_user_sgpr_kernarg_preload_length");
453 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
454 ".amdhsa_user_sgpr_kernarg_preload_offset");
458 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
459 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
460 ".amdhsa_user_sgpr_private_segment_size");
461 if (IVersion.
Major >= 10)
463 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
464 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
465 ".amdhsa_wavefront_size32");
468 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
469 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
470 ".amdhsa_uses_dynamic_stack");
472 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
473 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
475 ?
".amdhsa_enable_private_segment"
476 :
".amdhsa_system_sgpr_private_segment_wavefront_offset"));
478 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
479 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
480 ".amdhsa_system_sgpr_workgroup_id_x");
482 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
483 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
484 ".amdhsa_system_sgpr_workgroup_id_y");
486 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
487 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
488 ".amdhsa_system_sgpr_workgroup_id_z");
490 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
491 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
492 ".amdhsa_system_sgpr_workgroup_info");
494 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
495 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
496 ".amdhsa_system_vgpr_workitem_id");
499 OS <<
"\t\t.amdhsa_next_free_vgpr ";
500 EmitMCExpr(NextVGPR);
503 OS <<
"\t\t.amdhsa_next_free_sgpr ";
504 EmitMCExpr(NextSGPR);
511 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
512 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
getContext());
517 OS <<
"\t\t.amdhsa_accum_offset ";
525 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
526 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
527 ".amdhsa_named_barrier_count");
529 OS <<
"\t\t.amdhsa_reserve_vcc ";
530 EmitMCExpr(ReserveVCC);
534 OS <<
"\t\t.amdhsa_reserve_flat_scratch ";
535 EmitMCExpr(ReserveFlatScr);
545 OS <<
"\t\t.amdhsa_reserve_xnack_mask " <<
getTargetID()->isXnackOnOrAny() <<
'\n';
550 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
551 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
552 ".amdhsa_float_round_mode_32");
554 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
555 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
556 ".amdhsa_float_round_mode_16_64");
558 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
559 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
560 ".amdhsa_float_denorm_mode_32");
562 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
563 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
564 ".amdhsa_float_denorm_mode_16_64");
565 if (STI.
hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) {
567 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
568 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
569 ".amdhsa_dx10_clamp");
571 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
572 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
573 ".amdhsa_ieee_mode");
575 if (IVersion.
Major >= 9) {
577 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
578 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
579 ".amdhsa_fp16_overflow");
583 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
584 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
".amdhsa_tg_split");
587 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
588 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
589 ".amdhsa_workgroup_processor_mode");
590 if (IVersion.
Major >= 10) {
592 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
593 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
594 ".amdhsa_memory_ordered");
596 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
597 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
598 ".amdhsa_forward_progress");
600 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
602 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
603 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
604 ".amdhsa_shared_vgpr_count");
606 if (IVersion.
Major == 11) {
608 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT,
609 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE,
610 ".amdhsa_inst_pref_size");
612 if (IVersion.
Major >= 12) {
614 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT,
615 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE,
616 ".amdhsa_inst_pref_size");
618 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
619 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
620 ".amdhsa_round_robin_scheduling");
625 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
626 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
627 ".amdhsa_exception_fp_ieee_invalid_op");
630 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
631 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
632 ".amdhsa_exception_fp_denorm_src");
636 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
637 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
638 ".amdhsa_exception_fp_ieee_div_zero");
641 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
642 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
643 ".amdhsa_exception_fp_ieee_overflow");
646 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
647 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
648 ".amdhsa_exception_fp_ieee_underflow");
651 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
652 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
653 ".amdhsa_exception_fp_ieee_inexact");
656 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
657 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
658 ".amdhsa_exception_int_div_zero");
660 OS <<
"\t.end_amdhsa_kernel\n";
680 W.setELFHeaderEFlags(getEFlags());
681 W.setOverrideABIVersion(
698void AMDGPUTargetELFStreamer::EmitNote(
702 auto &Context = S.getContext();
704 auto NameSZ = Name.size() + 1;
706 unsigned NoteFlags = 0;
716 S.emitValue(DescSZ, 4);
717 S.emitInt32(NoteType);
719 S.emitValueToAlignment(
Align(4), 0, 1, 0);
721 S.emitValueToAlignment(
Align(4), 0, 1, 0);
725unsigned AMDGPUTargetELFStreamer::getEFlags() {
730 return getEFlagsR600();
732 return getEFlagsAMDGCN();
736unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
742unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
743 assert(STI.getTargetTriple().isAMDGCN());
745 switch (STI.getTargetTriple().getOS()) {
750 return getEFlagsUnknownOS();
752 return getEFlagsAMDHSA();
754 return getEFlagsAMDPAL();
756 return getEFlagsMesa3D();
760unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
764 return getEFlagsV3();
767unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
771 return getEFlagsV6();
772 return getEFlagsV4();
775unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
778 return getEFlagsV3();
781unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
784 return getEFlagsV3();
787unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
788 unsigned EFlagsV3 = 0;
803unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
804 unsigned EFlagsV4 = 0;
843unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
844 unsigned Flags = getEFlagsV4();
880 " - no ELF flag can represent this version!");
905 auto *SymbolELF =
static_cast<MCSymbolELF *
>(Symbol);
908 if (!SymbolELF->isBindingSet())
911 if (SymbolELF->declareCommon(
Size, Alignment)) {
913 " redeclared as different type");
924 auto *DescBegin = Context.createTempSymbol();
925 auto *DescEnd = Context.createTempSymbol();
947 if (!Verifier.verify(HSAMetadataDoc.
getRoot()))
950 std::string HSAMetadataString;
956 auto *DescBegin = Context.createTempSymbol();
957 auto *DescEnd = Context.createTempSymbol();
972 const uint32_t Encoded_s_code_end = 0xbf9f0000;
973 const uint32_t Encoded_s_nop = 0xbf800000;
974 uint32_t Encoded_pad = Encoded_s_code_end;
984 Encoded_pad = Encoded_s_nop;
991 for (
unsigned I = 0;
I < FillSize;
I += 4)
1001 const MCExpr *ReserveFlatScr) {
1003 auto &Context = Streamer.getContext();
1005 auto *KernelCodeSymbol =
1007 auto *KernelDescriptorSymbol =
static_cast<MCSymbolELF *
>(
1008 Context.getOrCreateSymbol(
Twine(KernelName) +
Twine(
".kd")));
1012 KernelDescriptorSymbol->
setBinding(KernelCodeSymbol->getBinding());
1013 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
1014 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
1017 KernelDescriptorSymbol->setSize(
1025 Streamer.emitLabel(KernelDescriptorSymbol);
1036 Streamer.emitInt8(0u);
1049 Streamer.emitInt8(0u);
1062 Streamer.emitInt8(0u);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
Enums and constants for AMDGPU PT_NOTE sections.
static cl::opt< unsigned > ForceGenericVersion("amdgpu-force-generic-version", cl::desc("Force a specific generic_v<N> flag to be " "added. For testing purposes only."), cl::ReallyHidden, cl::init(0))
#define PRINT_RES_INFO(ARG)
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
verify safepoint Safepoint IR Verifier
AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
bool EmitISAVersion() override
void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override
void EmitDirectiveAMDGCNTarget() override
void EmitMCResourceMaximums(const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR, const MCSymbol *MaxNamedBarrier) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
void EmitMCResourceInfo(const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier, const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall) override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
void EmitDirectiveAMDGCNTarget() override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI)
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
MCELFStreamer & getStreamer()
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
bool EmitISAVersion() override
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)
Emit HSA Metadata.
AMDGPUPALMetadata * getPALMetadata()
AMDGPUTargetStreamer(MCStreamer &S)
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString)
static unsigned getElfMach(StringRef GPU)
MCContext & getContext() const
static StringRef getArchNameFromElfMach(unsigned ElfMach)
const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
unsigned CodeObjectVersion
This class is intended to be used as a base class for asm properties and features specific to the tar...
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
const MCAsmInfo * getAsmInfo() const
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
ELFObjectWriter & getWriter()
void emitLabel(MCSymbol *Symbol, SMLoc Loc=SMLoc()) override
Emit a label for Symbol into the current section.
Base class for the full range of assembler expressions which are needed for parsing.
void emitBytes(StringRef Data) override
Emit the bytes in Data into the output.
Streaming machine code generation interface.
virtual bool popSection()
Restore the current and previous section from the section stack.
MCContext & getContext() const
virtual void emitValueToAlignment(Align Alignment, int64_t Fill=0, uint8_t FillLen=1, unsigned MaxBytesToEmit=0)
Emit some number of copies of Value until the byte alignment ByteAlignment is reached.
void pushSection()
Save the current and previous section on the section stack.
void emitInt32(uint64_t Value)
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
void setBinding(unsigned Binding) const
void setType(unsigned Type) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
StringRef - Represent a constant reference to a string, i.e.
ArchType getArch() const
Get the parsed architecture type of this triple.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
An efficient, type-erasing, non-owning reference to a callable.
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
DocNode & getRoot()
Get ref to the document's root element.
LLVM_ABI void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
LLVM_ABI void writeToBlob(std::string &Blob)
Write a MsgPack document to a binary MsgPack blob.
LLVM_ABI bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static constexpr unsigned GFX12_5
static constexpr unsigned GFX9_4
static constexpr unsigned GFX10_1
static constexpr unsigned GFX10_3
static constexpr unsigned GFX11
static constexpr unsigned GFX9
static constexpr unsigned GFX12
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
LLVM_ABI StringRef getArchNameR600(GPUKind AK)
GPUKind
GPU kinds supported by the AMDGPU target.
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
bool isHsaAbi(const MCSubtargetInfo &STI)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_ABI GPUKind parseArchAMDGCN(StringRef CPU)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
LLVM_ABI StringRef getArchNameAMDGCN(GPUKind AK)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
LLVM_ABI GPUKind parseArchR600(StringRef CPU)
@ EF_AMDGPU_GENERIC_VERSION_MAX
@ EF_AMDGPU_FEATURE_XNACK_ANY_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX703
@ EF_AMDGPU_MACH_AMDGCN_GFX1035
@ EF_AMDGPU_FEATURE_SRAMECC_V3
@ EF_AMDGPU_MACH_AMDGCN_GFX1031
@ EF_AMDGPU_GENERIC_VERSION_OFFSET
@ EF_AMDGPU_MACH_R600_CAYMAN
@ EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX704
@ EF_AMDGPU_MACH_AMDGCN_GFX902
@ EF_AMDGPU_MACH_AMDGCN_GFX810
@ EF_AMDGPU_MACH_AMDGCN_GFX950
@ EF_AMDGPU_MACH_AMDGCN_GFX1036
@ EF_AMDGPU_MACH_AMDGCN_GFX1102
@ EF_AMDGPU_MACH_R600_RV730
@ EF_AMDGPU_MACH_R600_RV710
@ EF_AMDGPU_MACH_AMDGCN_GFX908
@ EF_AMDGPU_MACH_AMDGCN_GFX1011
@ EF_AMDGPU_MACH_R600_CYPRESS
@ EF_AMDGPU_MACH_AMDGCN_GFX1032
@ EF_AMDGPU_MACH_R600_R600
@ EF_AMDGPU_MACH_AMDGCN_GFX1250
@ EF_AMDGPU_MACH_R600_TURKS
@ EF_AMDGPU_MACH_R600_JUNIPER
@ EF_AMDGPU_FEATURE_SRAMECC_OFF_V4
@ EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX12_5_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX601
@ EF_AMDGPU_MACH_AMDGCN_GFX942
@ EF_AMDGPU_MACH_AMDGCN_GFX1152
@ EF_AMDGPU_MACH_R600_R630
@ EF_AMDGPU_MACH_R600_REDWOOD
@ EF_AMDGPU_MACH_R600_RV770
@ EF_AMDGPU_FEATURE_XNACK_OFF_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX600
@ EF_AMDGPU_FEATURE_XNACK_V3
@ EF_AMDGPU_MACH_AMDGCN_GFX602
@ EF_AMDGPU_MACH_AMDGCN_GFX1101
@ EF_AMDGPU_MACH_AMDGCN_GFX1100
@ EF_AMDGPU_MACH_AMDGCN_GFX1310
@ EF_AMDGPU_MACH_AMDGCN_GFX1033
@ EF_AMDGPU_MACH_AMDGCN_GFX801
@ EF_AMDGPU_MACH_AMDGCN_GFX705
@ EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX1153
@ EF_AMDGPU_MACH_AMDGCN_GFX1170
@ EF_AMDGPU_MACH_AMDGCN_GFX1010
@ EF_AMDGPU_MACH_R600_RV670
@ EF_AMDGPU_MACH_AMDGCN_GFX701
@ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX1012
@ EF_AMDGPU_MACH_AMDGCN_GFX1151
@ EF_AMDGPU_MACH_AMDGCN_GFX1030
@ EF_AMDGPU_MACH_R600_CEDAR
@ EF_AMDGPU_MACH_AMDGCN_GFX1200
@ EF_AMDGPU_MACH_AMDGCN_GFX700
@ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX803
@ EF_AMDGPU_MACH_AMDGCN_GFX802
@ EF_AMDGPU_MACH_AMDGCN_GFX90C
@ EF_AMDGPU_FEATURE_XNACK_ON_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX900
@ EF_AMDGPU_MACH_AMDGCN_GFX909
@ EF_AMDGPU_MACH_AMDGCN_GFX906
@ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX1103
@ EF_AMDGPU_MACH_R600_CAICOS
@ EF_AMDGPU_MACH_AMDGCN_GFX90A
@ EF_AMDGPU_MACH_AMDGCN_GFX1034
@ EF_AMDGPU_MACH_AMDGCN_GFX1013
@ EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC
@ EF_AMDGPU_MACH_AMDGCN_GFX904
@ EF_AMDGPU_MACH_AMDGCN_GFX1251
@ EF_AMDGPU_MACH_R600_RS880
@ EF_AMDGPU_MACH_AMDGCN_GFX805
@ EF_AMDGPU_MACH_AMDGCN_GFX1201
@ EF_AMDGPU_MACH_AMDGCN_GFX1150
@ EF_AMDGPU_MACH_R600_SUMO
@ EF_AMDGPU_MACH_R600_BARTS
@ EF_AMDGPU_FEATURE_SRAMECC_ANY_V4
@ EF_AMDGPU_FEATURE_SRAMECC_ON_V4
@ EF_AMDGPU_MACH_AMDGCN_GFX702
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * compute_pgm_rsrc1
const MCExpr * group_segment_fixed_size
const MCExpr * kernel_code_properties
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
uint32_t group_segment_fixed_size
uint32_t compute_pgm_rsrc1
uint32_t private_segment_fixed_size
uint32_t compute_pgm_rsrc2
uint16_t kernel_code_properties
uint32_t compute_pgm_rsrc3
int64_t kernel_code_entry_byte_offset