doxygen/AMDGPUSubtarget_8h_source.html

//=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//==-----------------------------------------------------------------------===//

//

/// \file

/// Base class for AMDGPU specific classes of TargetSubtarget.

//

//===----------------------------------------------------------------------===//


#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H

#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H


#include "llvm/ADT/SmallVector.h"

#include "llvm/IR/CallingConv.h"

#include "llvm/Support/Alignment.h"

#include "llvm/TargetParser/Triple.h"


namespace llvm {


enum AMDGPUDwarfFlavour : unsigned;

class Function;

class Instruction;

class MachineFunction;

class TargetMachine;


class AMDGPUSubtarget {

public:


  enum Generation {

    INVALID = 0,

    R600 = 1,

    R700 = 2,

    EVERGREEN = 3,

    NORTHERN_ISLANDS = 4,

    SOUTHERN_ISLANDS = 5,

    SEA_ISLANDS = 6,

    VOLCANIC_ISLANDS = 7,

    GFX9 = 8,

    GFX10 = 9,

    GFX11 = 10,

    GFX12 = 11,

    GFX13 = 12,

  };


private:

  const Triple &TargetTriple;


protected:

  bool HasMulI24 = true;

  bool HasMulU24 = true;

  bool HasSMulHi = false;

  bool HasFminFmaxLegacy = true;


  unsigned EUsPerCU = 4;

  unsigned MaxWavesPerEU = 10;

  unsigned LocalMemorySize = 0;

  unsigned AddressableLocalMemorySize = 0;

  char WavefrontSizeLog2 = 0;

  unsigned FlatOffsetBitWidth = 0;


public:

  AMDGPUSubtarget(const Triple &TT) : TargetTriple(TT) {}


  static const AMDGPUSubtarget &get(const MachineFunction &MF);

  static const AMDGPUSubtarget &get(const TargetMachine &TM,

                                    const Function &F);


  /// \returns Default range flat work group size for a calling convention.

  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;


  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes

  /// for function \p F, or minimum/maximum flat work group sizes explicitly

  /// requested using "amdgpu-flat-work-group-size" attribute attached to

  /// function \p F.

  ///

  /// \returns Subtarget's default values if explicitly requested values cannot

  /// be converted to integer, or violate subtarget's specifications.

  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;


  /// \returns true if the maximum flat work-group size for \p F is at most the

  /// wavefront size, so a work-group may fit in a single wavefront.

  bool isSingleWavefrontWorkgroup(const Function &F) const;


  /// \returns The required size of workgroups that will be used to execute \p F

  /// in the \p Dim dimension, if it is known (from `!reqd_work_group_size`

  /// metadata. Otherwise, returns std::nullopt.

  std::optional<unsigned> getReqdWorkGroupSize(const Function &F,

                                               unsigned Dim) const;


  /// \returns true if \p F will execute in a manner that leaves the X

  /// dimensions of the workitem ID evenly tiling wavefronts - that is, if X /

  /// wavefrontsize is uniform. This is true if either the Y and Z block

  /// dimensions are known to always be 1 or if the X dimension will always be a

  /// power of 2. If \p RequireUniformYZ is true, it also ensures that the Y and

  /// Z workitem IDs will be uniform (so, while a (32, 2, 1) launch with

  /// wavesize64 would ordinarily pass this test, it won't with

  /// \pRequiresUniformYZ).

  ///

  /// This information is currently only gathered from the !reqd_work_group_size

  /// metadata on \p F, but this may be improved in the future.

  bool hasWavefrontsEvenlySplittingXDim(const Function &F,

                                        bool REquiresUniformYZ = false) const;


  /// \returns Subtarget's default pair of minimum/maximum number of waves per

  /// execution unit for function \p F, or minimum/maximum number of waves per

  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute

  /// attached to function \p F.

  ///

  /// \returns Subtarget's default values if explicitly requested values cannot

  /// be converted to integer, violate subtarget's specifications, or are not

  /// compatible with minimum/maximum number of waves limited by flat work group

  /// size, register usage, and/or lds usage.

  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;


  /// Overload which uses the specified values for the flat workgroup sizes and

  /// LDS space rather than querying the function itself. \p FlatWorkGroupSizes

  /// should correspond to the function's value for getFlatWorkGroupSizes and \p

  /// LDSBytes to the per-workgroup LDS allocation.

  std::pair<unsigned, unsigned>

  getWavesPerEU(std::pair<unsigned, unsigned> FlatWorkGroupSizes,

                unsigned LDSBytes, const Function &F) const;


  /// Returns the target minimum/maximum number of waves per EU. This is based

  /// on the minimum/maximum number of \p RequestedWavesPerEU and further

  /// limited by the maximum achievable occupancy derived from the range of \p

  /// FlatWorkGroupSizes and number of \p LDSBytes per workgroup.

  std::pair<unsigned, unsigned>

  getEffectiveWavesPerEU(std::pair<unsigned, unsigned> RequestedWavesPerEU,

                         std::pair<unsigned, unsigned> FlatWorkGroupSizes,

                         unsigned LDSBytes) const;


  /// Return the amount of LDS that can be used that will not restrict the

  /// occupancy lower than WaveCount.

  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,

                                           const Function &) const;


  /// Subtarget's minimum/maximum occupancy, in number of waves per EU, that can

  /// be achieved when the only function running on a CU is \p F and each

  /// workgroup running the function requires \p LDSBytes bytes of LDS space.

  /// This notably depends on the range of allowed flat group sizes for the

  /// function and hardware characteristics.

  std::pair<unsigned, unsigned>


  getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const {

    return getOccupancyWithWorkGroupSizes(LDSBytes, getFlatWorkGroupSizes(F));

  }


  /// Overload which uses the specified values for the flat work group sizes,

  /// rather than querying the function itself. \p FlatWorkGroupSizes should

  /// correspond to the function's value for getFlatWorkGroupSizes.

  std::pair<unsigned, unsigned> getOccupancyWithWorkGroupSizes(

      uint32_t LDSBytes,

      std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;


  /// Subtarget's minimum/maximum occupancy, in number of waves per EU, that can

  /// be achieved when the only function running on a CU is \p MF. This notably

  /// depends on the range of allowed flat group sizes for the function, the

  /// amount of per-workgroup LDS space required by the function, and hardware

  /// characteristics.

  std::pair<unsigned, unsigned>

  getOccupancyWithWorkGroupSizes(const MachineFunction &MF) const;


  bool isAmdHsaOS() const {

    return TargetTriple.getOS() == Triple::AMDHSA;

  }


  bool isAmdPalOS() const {

    return TargetTriple.getOS() == Triple::AMDPAL;

  }


  bool isMesa3DOS() const {

    return TargetTriple.getOS() == Triple::Mesa3D;

  }


  bool isMesaKernel(const Function &F) const;


  bool isAmdHsaOrMesa(const Function &F) const {

    return isAmdHsaOS() || isMesaKernel(F);

  }


  bool isGCN() const { return TargetTriple.isAMDGCN(); }


  //==---------------------------------------------------------------------===//

  // TableGen-generated feature getters.

  //==---------------------------------------------------------------------===//


#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \

  virtual bool GETTER() const { return false; }


#include "AMDGPUGenSubtargetInfo.inc"

  //==---------------------------------------------------------------------===//


  /// Return true if real (non-fake) variants of True16 instructions using

  /// 16-bit registers should be code-generated. Fake True16 instructions are

  /// identical to non-fake ones except that they take 32-bit registers as

  /// operands and always use their low halves.

  // TODO: Remove and use hasTrue16BitInsts() instead once True16 is fully

  // supported and the support for fake True16 instructions is removed.


  bool useRealTrue16Insts() const {

    return hasTrue16BitInsts() && enableRealTrue16Insts();

  }


  bool hasMulI24() const {

    return HasMulI24;

  }


  bool hasMulU24() const {

    return HasMulU24;

  }


  bool hasSMulHi() const {

    return HasSMulHi;

  }


  bool hasFminFmaxLegacy() const {

    return HasFminFmaxLegacy;

  }


  unsigned getWavefrontSize() const {

    return 1 << WavefrontSizeLog2;

  }


  unsigned getWavefrontSizeLog2() const {

    return WavefrontSizeLog2;

  }


  /// Return the maximum number of bytes of LDS available for all workgroups

  /// running on the same WGP or CU.

  /// For GFX10-GFX12 in WGP mode this is 128k even though each workgroup is

  /// limited to 64k.


  unsigned getLocalMemorySize() const {

    return LocalMemorySize;

  }


  /// Return the maximum number of bytes of LDS that can be allocated to a

  /// single workgroup.

  /// For GFX10-GFX12 in WGP mode this is limited to 64k even though the WGP has

  /// 128k in total.


  unsigned getAddressableLocalMemorySize() const {

    return AddressableLocalMemorySize;

  }


  /// Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the

  /// "CU" is the unit onto which workgroups are mapped. This takes WGP mode vs.

  /// CU mode into account.

  unsigned getEUsPerCU() const { return EUsPerCU; }


  Align getAlignmentForImplicitArgPtr() const {

    return isAmdHsaOS() ? Align(8) : Align(4);

  }


  /// Returns the offset in bytes from the start of the input buffer

  ///        of the first explicit kernel argument.


  unsigned getExplicitKernelArgOffset() const {

    switch (TargetTriple.getOS()) {

    case Triple::AMDHSA:

    case Triple::AMDPAL:

    case Triple::Mesa3D:

      return 0;

    case Triple::UnknownOS:

    default:

      // For legacy reasons unknown/other is treated as a different version of

      // mesa.

      return 36;

    }


    llvm_unreachable("invalid triple OS");

  }


  /// \returns Maximum number of work groups per compute unit supported by the

  /// subtarget and limited by given \p FlatWorkGroupSize.

  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;


  /// \returns Minimum flat work group size supported by the subtarget.

  virtual unsigned getMinFlatWorkGroupSize() const = 0;


  /// \returns Maximum flat work group size supported by the subtarget.

  virtual unsigned getMaxFlatWorkGroupSize() const = 0;


  /// \returns Number of waves per execution unit required to support the given

  /// \p FlatWorkGroupSize.

  virtual unsigned

  getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0;


  /// \returns Minimum number of waves per execution unit supported by the

  /// subtarget.

  virtual unsigned getMinWavesPerEU() const = 0;


  /// \returns Maximum number of waves per execution unit supported by the

  /// subtarget without any kind of limitation.

  unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }


  /// Return the maximum workitem ID value in the function, for the given (0, 1,

  /// 2) dimension.

  unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;


  /// Return the number of work groups for the function.

  SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) const;


  /// Return true if only a single workitem can be active in a wave.

  bool isSingleLaneExecution(const Function &Kernel) const;


  /// Creates value range metadata on an workitemid.* intrinsic call or load.

  bool makeLIDRangeMetadata(Instruction *I) const;


  /// \returns Number of bytes of arguments that are passed to a shader or

  /// kernel in addition to the explicit ones declared for the function.

  unsigned getImplicitArgNumBytes(const Function &F) const;

  uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;

  unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;


  /// \returns Corresponding DWARF register number mapping flavour for the

  /// \p WavefrontSize.

  AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;


  virtual ~AMDGPUSubtarget() = default;

};


} // end namespace llvm


#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H

Alignment.h

CallingConv.h

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

SmallVector.h
This file defines the SmallVector class.

Triple.h

llvm::AMDGPUSubtarget
Definition AMDGPUSubtarget.h:30

llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition AMDGPUSubtarget.h:173

llvm::AMDGPUSubtarget::hasFminFmaxLegacy
bool hasFminFmaxLegacy() const
Definition AMDGPUSubtarget.h:215

llvm::AMDGPUSubtarget::HasFminFmaxLegacy
bool HasFminFmaxLegacy
Definition AMDGPUSubtarget.h:55

llvm::AMDGPUSubtarget::getDefaultFlatWorkGroupSize
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
Definition AMDGPUSubtarget.cpp:139

llvm::AMDGPUSubtarget::FlatOffsetBitWidth
unsigned FlatOffsetBitWidth
Definition AMDGPUSubtarget.h:62

llvm::AMDGPUSubtarget::isAmdPalOS
bool isAmdPalOS() const
Definition AMDGPUSubtarget.h:169

llvm::AMDGPUSubtarget::WavefrontSizeLog2
char WavefrontSizeLog2
Definition AMDGPUSubtarget.h:61

llvm::AMDGPUSubtarget::getReqdWorkGroupSize
std::optional< unsigned > getReqdWorkGroupSize(const Function &F, unsigned Dim) const
Definition AMDGPUSubtarget.cpp:231

llvm::AMDGPUSubtarget::getAlignmentForImplicitArgPtr
Align getAlignmentForImplicitArgPtr() const
Definition AMDGPUSubtarget.h:248

llvm::AMDGPUSubtarget::getEUsPerCU
unsigned getEUsPerCU() const
Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto whic...
Definition AMDGPUSubtarget.h:246

llvm::AMDGPUSubtarget::hasSMulHi
bool hasSMulHi() const
Definition AMDGPUSubtarget.h:211

llvm::AMDGPUSubtarget::isMesaKernel
bool isMesaKernel(const Function &F) const
Definition AMDGPUSubtarget.cpp:257

llvm::AMDGPUSubtarget::getWavesPerEU
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
Definition AMDGPUSubtarget.cpp:207

llvm::AMDGPUSubtarget::getOccupancyWithWorkGroupSizes
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
Definition AMDGPUSubtarget.h:146

llvm::AMDGPUSubtarget::getMinWavesPerEU
virtual unsigned getMinWavesPerEU() const =0

llvm::AMDGPUSubtarget::getFlatWorkGroupSizes
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
Definition AMDGPUSubtarget.cpp:153

llvm::AMDGPUSubtarget::Generation
Generation
Definition AMDGPUSubtarget.h:32

llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition AMDGPUSubtarget.h:42

llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition AMDGPUSubtarget.h:41

llvm::AMDGPUSubtarget::EVERGREEN
@ EVERGREEN
Definition AMDGPUSubtarget.h:36

llvm::AMDGPUSubtarget::GFX12
@ GFX12
Definition AMDGPUSubtarget.h:44

llvm::AMDGPUSubtarget::INVALID
@ INVALID
Definition AMDGPUSubtarget.h:33

llvm::AMDGPUSubtarget::R700
@ R700
Definition AMDGPUSubtarget.h:35

llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition AMDGPUSubtarget.h:39

llvm::AMDGPUSubtarget::NORTHERN_ISLANDS
@ NORTHERN_ISLANDS
Definition AMDGPUSubtarget.h:37

llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
@ SOUTHERN_ISLANDS
Definition AMDGPUSubtarget.h:38

llvm::AMDGPUSubtarget::R600
@ R600
Definition AMDGPUSubtarget.h:34

llvm::AMDGPUSubtarget::GFX13
@ GFX13
Definition AMDGPUSubtarget.h:45

llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition AMDGPUSubtarget.h:40

llvm::AMDGPUSubtarget::GFX11
@ GFX11
Definition AMDGPUSubtarget.h:43

llvm::AMDGPUSubtarget::EUsPerCU
unsigned EUsPerCU
Definition AMDGPUSubtarget.h:57

llvm::AMDGPUSubtarget::makeLIDRangeMetadata
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* intrinsic call or load.
Definition AMDGPUSubtarget.cpp:283

llvm::AMDGPUSubtarget::getMaxWorkitemID
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
Definition AMDGPUSubtarget.cpp:261

llvm::AMDGPUSubtarget::getImplicitArgNumBytes
unsigned getImplicitArgNumBytes(const Function &F) const
Definition AMDGPUSubtarget.cpp:351

llvm::AMDGPUSubtarget::getLocalMemorySize
unsigned getLocalMemorySize() const
Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.
Definition AMDGPUSubtarget.h:231

llvm::AMDGPUSubtarget::getAddressableLocalMemorySize
unsigned getAddressableLocalMemorySize() const
Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
Definition AMDGPUSubtarget.h:239

llvm::AMDGPUSubtarget::getMaxNumWorkGroups
SmallVector< unsigned > getMaxNumWorkGroups(const Function &F) const
Return the number of work groups for the function.
Definition AMDGPUSubtarget.cpp:436

llvm::AMDGPUSubtarget::getWavesPerEUForWorkGroup
virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0

llvm::AMDGPUSubtarget::getMaxWorkGroupsPerCU
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0

llvm::AMDGPUSubtarget::getWavefrontSizeLog2
unsigned getWavefrontSizeLog2() const
Definition AMDGPUSubtarget.h:223

llvm::AMDGPUSubtarget::HasSMulHi
bool HasSMulHi
Definition AMDGPUSubtarget.h:54

llvm::AMDGPUSubtarget::getKernArgSegmentSize
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
Definition AMDGPUSubtarget.cpp:394

llvm::AMDGPUSubtarget::~AMDGPUSubtarget
virtual ~AMDGPUSubtarget()=default

llvm::AMDGPUSubtarget::isAmdHsaOrMesa
bool isAmdHsaOrMesa(const Function &F) const
Definition AMDGPUSubtarget.h:179

llvm::AMDGPUSubtarget::LocalMemorySize
unsigned LocalMemorySize
Definition AMDGPUSubtarget.h:59

llvm::AMDGPUSubtarget::MaxWavesPerEU
unsigned MaxWavesPerEU
Definition AMDGPUSubtarget.h:58

llvm::AMDGPUSubtarget::HasMulU24
bool HasMulU24
Definition AMDGPUSubtarget.h:53

llvm::AMDGPUSubtarget::HasMulI24
bool HasMulI24
Definition AMDGPUSubtarget.h:52

llvm::AMDGPUSubtarget::AMDGPUSubtarget
AMDGPUSubtarget(const Triple &TT)
Definition AMDGPUSubtarget.h:65

llvm::AMDGPUSubtarget::getAMDGPUDwarfFlavour
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const
Definition AMDGPUSubtarget.cpp:416

llvm::AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
Definition AMDGPUSubtarget.cpp:39

llvm::AMDGPUSubtarget::getMaxFlatWorkGroupSize
virtual unsigned getMaxFlatWorkGroupSize() const =0

llvm::AMDGPUSubtarget::getExplicitKernelArgOffset
unsigned getExplicitKernelArgOffset() const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
Definition AMDGPUSubtarget.h:254

llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition AMDGPUSubtarget.h:291

llvm::AMDGPUSubtarget::hasWavefrontsEvenlySplittingXDim
bool hasWavefrontsEvenlySplittingXDim(const Function &F, bool REquiresUniformYZ=false) const
Definition AMDGPUSubtarget.cpp:239

llvm::AMDGPUSubtarget::hasMulU24
bool hasMulU24() const
Definition AMDGPUSubtarget.h:207

llvm::AMDGPUSubtarget::getExplicitKernArgSize
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const
Definition AMDGPUSubtarget.cpp:369

llvm::AMDGPUSubtarget::AddressableLocalMemorySize
unsigned AddressableLocalMemorySize
Definition AMDGPUSubtarget.h:60

llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition AMDGPUSubtarget.h:165

llvm::AMDGPUSubtarget::isSingleLaneExecution
bool isSingleLaneExecution(const Function &Kernel) const
Return true if only a single workitem can be active in a wave.
Definition AMDGPUSubtarget.cpp:269

llvm::AMDGPUSubtarget::isGCN
bool isGCN() const
Definition AMDGPUSubtarget.h:183

llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition AMDGPUSubtarget.cpp:421

llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition AMDGPUSubtarget.h:219

llvm::AMDGPUSubtarget::getMinFlatWorkGroupSize
virtual unsigned getMinFlatWorkGroupSize() const =0

llvm::AMDGPUSubtarget::getEffectiveWavesPerEU
std::pair< unsigned, unsigned > getEffectiveWavesPerEU(std::pair< unsigned, unsigned > RequestedWavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes) const
Returns the target minimum/maximum number of waves per EU.
Definition AMDGPUSubtarget.cpp:180

llvm::AMDGPUSubtarget::isSingleWavefrontWorkgroup
bool isSingleWavefrontWorkgroup(const Function &F) const
Definition AMDGPUSubtarget.cpp:176

llvm::AMDGPUSubtarget::hasMulI24
bool hasMulI24() const
Definition AMDGPUSubtarget.h:203

llvm::AMDGPUSubtarget::useRealTrue16Insts
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
Definition AMDGPUSubtarget.h:199

llvm::Function
Definition Function.h:65

llvm::Instruction
Definition Instruction.h:70

llvm::MachineFunction
Definition MachineFunction.h:294

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1225

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition TargetMachine.h:83

llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47

llvm::Triple::AMDHSA
@ AMDHSA
Definition Triple.h:236

llvm::Triple::UnknownOS
@ UnknownOS
Definition Triple.h:213

llvm::Triple::AMDPAL
@ AMDPAL
Definition Triple.h:246

llvm::Triple::Mesa3D
@ Mesa3D
Definition Triple.h:245

uint32_t

uint64_t

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::AMDGPUDwarfFlavour
AMDGPUDwarfFlavour
Definition AMDGPUMCTargetDesc.h:33

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39