tilelang.jit.adapter.wrapperΒΆ

AttributesΒΆ

ClassesΒΆ

BaseWrapper

Helper class that provides a standard way to create an ABC using

TLCUDASourceWrapper

TLNVRTCSourceWrapper

A wrapper class for the TileLang NVRTC backend.

TLHIPSourceWrapper

A wrapper class for the TileLang HIP backend.

TLCPUSourceWrapper

TLWrapper

A wrapper class for the TileLang backend.

TLPyWrapper

A wrapper class for the TileLang backend.

Module ContentsΒΆ

tilelang.jit.adapter.wrapper.PREDEF_ATTRIBUTE_SET_DYNAMIC_MEMORY = Multiline-StringΒΆ
Show Value
"""
    cudaError_t result_{0} = cudaFuncSetAttribute({0}, cudaFuncAttributeMaxDynamicSharedMemorySize, {1});
    if (result_{0} != CUDA_SUCCESS) {{
        snprintf(error_buf, ERROR_BUF_SIZE, "Failed to set the allowed dynamic shared memory size to %d with error: %s", {1}, cudaGetErrorString(result_{0}));
        return -1;
    }}
"""
tilelang.jit.adapter.wrapper.PREDEF_ATTRIBUTE_SET_DYNAMIC_MEMORY_HIP = Multiline-StringΒΆ
Show Value
"""
    if ({1} > 65536) {{
        snprintf(error_buf, ERROR_BUF_SIZE, "Failed to set the allowed dynamic shared memory size for {0} to %d", {1});
        return -1;
    }}
    return 0;
"""
tilelang.jit.adapter.wrapper.PREDEF_INIT_FUNC = Multiline-StringΒΆ
Show Value
"""
#define ERROR_BUF_SIZE 1024
static char error_buf[ERROR_BUF_SIZE];

extern "C" const char* get_last_error() {{
    return error_buf;
}}

extern "C" int init() {{
    error_buf[0] = '\0';
    {0}
    return 0;
}}
"""
tilelang.jit.adapter.wrapper.PREDEF_HOST_FUNC = Multiline-StringΒΆ
Show Value
"""
extern "C" int call({}) {{
{}
  return 0;
}}
"""
tilelang.jit.adapter.wrapper.PREDEF_HOST_FUNC_PY = Multiline-StringΒΆ
Show Value
"""
import cuda.bindings.driver
import ctypes

_function_names = {}

def call({}):
    {}
"""
tilelang.jit.adapter.wrapper.L2_PERSISTENT_MAP_CREATE_HANDLE = Multiline-StringΒΆ
Show Value
"""
  cudaStreamAttrValue stream_attribute;
  size_t init_persisting_l2_cache_size;
  cudaDeviceGetLimit(&init_persisting_l2_cache_size, cudaLimitPersistingL2CacheSize);
"""
tilelang.jit.adapter.wrapper.L2_PERSISTENT_MAP_INIT_FUNC = Multiline-StringΒΆ
Show Value
"""
  stream_attribute.accessPolicyWindow.hitRatio = {1};
  stream_attribute.accessPolicyWindow.hitProp = cudaAccessPropertyPersisting;
  stream_attribute.accessPolicyWindow.missProp = cudaAccessPropertyStreaming;
  cudaDeviceSetLimit(cudaLimitPersistingL2CacheSize, {2});
  stream_attribute.accessPolicyWindow.base_ptr = (void*)({0});
  stream_attribute.accessPolicyWindow.num_bytes = {2};
  cudaStreamSetAttribute(stream, cudaStreamAttributeAccessPolicyWindow, &stream_attribute);
"""
tilelang.jit.adapter.wrapper.L2_PERSISTENT_MAP_RESET_HANDLE = Multiline-StringΒΆ
Show Value
"""
  stream_attribute.accessPolicyWindow.num_bytes = 0;
  cudaStreamSetAttribute(stream, cudaStreamAttributeAccessPolicyWindow, &stream_attribute);
  cudaCtxResetPersistingL2Cache();
  cudaDeviceSetLimit(cudaLimitPersistingL2CacheSize, init_persisting_l2_cache_size);
"""
tilelang.jit.adapter.wrapper.TMA_DESC_INIT_FUNC = Multiline-StringΒΆ
Show Value
"""
  CUtensorMap {0};
  CUtensorMapDataType {0}_type= (CUtensorMapDataType){1};
  cuuint32_t {0}_tensorRank= {2};
  void *{0}_globalAddress= {3};
  cuuint64_t {0}_globalDim[{2}]= {{{4}}};
  cuuint64_t {0}_globalStride[{2}]= {{{5}}};
  cuuint32_t {0}_boxDim[{2}]= {{{6}}};
  cuuint32_t {0}_elementStrides[{2}]= {{{7}}};
  CUtensorMapInterleave {0}_interleave= (CUtensorMapInterleave){8};
  CUtensorMapSwizzle {0}_swizzle= (CUtensorMapSwizzle){9};
  CUtensorMapL2promotion {0}_l2Promotion= (CUtensorMapL2promotion){10};
  CUtensorMapFloatOOBfill {0}_oobFill= (CUtensorMapFloatOOBfill){11};

  CUresult {0}_result = CUTLASS_CUDA_DRIVER_WRAPPER_CALL(cuTensorMapEncodeTiled)(
    &{0}, {0}_type, {0}_tensorRank, {0}_globalAddress, {0}_globalDim, {0}_globalStride + 1, {0}_boxDim, {0}_elementStrides, {0}_interleave, {0}_swizzle, {0}_l2Promotion, {0}_oobFill);

  if ({0}_result != CUDA_SUCCESS) {{
          std::stringstream ss;
          ss << "Error: Failed to initialize the TMA descriptor {0}";
          snprintf(error_buf, ERROR_BUF_SIZE, "%s", ss.str().c_str());
          return -1;
  }}
"""
tilelang.jit.adapter.wrapper.TMA_DESC_INIT_FUNC_PY = Multiline-StringΒΆ
Show Value
"""
  {0}_type = cuda.bindings.driver.CUtensorMapDataType({1})
  {0}_tensorRank = {2}
  {0}_globalAddress = {3}.data_ptr()
  {0}_globalDim = [{4}]
  {0}_globalStride = [{5}][1:]
  {0}_boxDim = [{6}]
  {0}_elementStrides = [{7}]
  {0}_interleave = cuda.bindings.driver.CUtensorMapInterleave({8})
  {0}_swizzle = cuda.bindings.driver.CUtensorMapSwizzle({9})
  {0}_l2Promotion = cuda.bindings.driver.CUtensorMapL2promotion({10})
  {0}_oobFill = cuda.bindings.driver.CUtensorMapFloatOOBfill({11})

  res, {0} = cuda.bindings.driver.cuTensorMapEncodeTiled(
          {0}_type,
          {0}_tensorRank,
          {0}_globalAddress,
          {0}_globalDim,
          {0}_globalStride,
          {0}_boxDim,
          {0}_elementStrides,
          {0}_interleave,
          {0}_swizzle,
          {0}_l2Promotion,
          {0}_oobFill,
  )

  if res != cuda.bindings.driver.CUresult.CUDA_SUCCESS:
          raise RuntimeError(f"Failed to initialize the TMA descriptor {0}: {{res}}")
"""
tilelang.jit.adapter.wrapper.KERNEL_LAUNCH_FUNC_PY = Multiline-StringΒΆ
Show Value
"""
  res = cuda.bindings.driver.cuKernelSetAttribute(
          cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
          {7},
          kernels["{0}"],
          cuda.bindings.driver.CUdevice({10})
  )[0]
  if res != cuda.bindings.driver.CUresult.CUDA_SUCCESS:
          raise RuntimeError(f"Failed to set max dynamic shared memory size to {7} for kernel {0}: {{res}}")

  config = cuda.bindings.driver.CUlaunchConfig()
  config.gridDimX = {1}
  config.gridDimY = {2}
  config.gridDimZ = {3}
  config.blockDimX = {4}
  config.blockDimY = {5}
  config.blockDimZ = {6}
  config.sharedMemBytes = {7}
  config.hStream = stream

  arg_values = {8}
  arg_types = {9}

  res = cuda.bindings.driver.cuLaunchKernelEx(config, kernels["{0}"], (arg_values, arg_types), 0)[0]
  if res != cuda.bindings.driver.CUresult.CUDA_SUCCESS:
          raise RuntimeError(f"Failed to launch kernel {0}: {{res}}")
"""
class tilelang.jit.adapter.wrapper.BaseWrapperΒΆ

Bases: abc.ABC

Helper class that provides a standard way to create an ABC using inheritance.

abstract wrap(*args, **kwargs)ΒΆ
tilelang.jit.adapter.wrapper.loggerΒΆ
class tilelang.jit.adapter.wrapper.TLCUDASourceWrapper(scheduled_ir_module, source, target, device_mod=None, host_mod=None, pass_configs=None)ΒΆ

Bases: object

Parameters:
  • scheduled_ir_module (tvm.IRModule)

  • source (str)

  • target (tvm.target.Target)

  • device_mod (Optional[tvm.IRModule])

  • host_mod (Optional[tvm.IRModule])

  • pass_configs (Optional[Dict[str, Any]])

backend = 'tl'ΒΆ
device_mod: tvm.IRModule | None = NoneΒΆ
host_mod: tvm.IRModule | None = NoneΒΆ
pass_configs: Dict[str, Any] | None = NoneΒΆ
modΒΆ
targetΒΆ
sourceΒΆ
function_names: str | None = NoneΒΆ
dynamic_smem_buf: int | None = NoneΒΆ
block_info: List[int] | Dict = [1, 1, 1]ΒΆ
grid_info: List[int] | Dict = [1, 1, 1]ΒΆ
tma_descriptor_args: Dict | None = NoneΒΆ
l2_persistent_map: Dict[str, Dict] | NoneΒΆ
srcpath: str | None = NoneΒΆ
libpath: str | None = NoneΒΆ
lib_code: str | NoneΒΆ
is_tma_descriptor_arg(arg_name)ΒΆ
Parameters:

arg_name (str)

Return type:

bool

create_dispatch_func(code, function_informations)ΒΆ
generate_l2_persistent_map(function_name)ΒΆ
Parameters:

function_name (str)

Return type:

str

generate_tma_descriptor_args(desc_name_map)ΒΆ
Parameters:

desc_name_map (Dict[str, str])

Return type:

str

parse_source_information()ΒΆ
get_dynamic_symbolic_set(prim_func)ΒΆ
get_init_func()ΒΆ
update_lib_code(code)ΒΆ
Parameters:

code (str)

get_stream_type()ΒΆ
Return type:

Dict[str, str]

property prim_funcΒΆ
class tilelang.jit.adapter.wrapper.TLNVRTCSourceWrapper(scheduled_ir_module, source, target, device_mod=None, host_mod=None, pass_configs=None)ΒΆ

Bases: TLCUDASourceWrapper

A wrapper class for the TileLang NVRTC backend.

Parameters:
  • scheduled_ir_module (tvm.IRModule)

  • source (str)

  • target (tvm.target.Target)

  • device_mod (Optional[tvm.IRModule])

  • host_mod (Optional[tvm.IRModule])

  • pass_configs (Optional[Dict[str, Any]])

create_dispatch_func(code, function_informations)ΒΆ
generate_tma_descriptor_args(desc_name_map)ΒΆ
Parameters:

desc_name_map (Dict[str, str])

Return type:

str

update_lib_code(code)ΒΆ
Parameters:

code (str)

get_stream_type()ΒΆ
Return type:

Dict[str, str]

class tilelang.jit.adapter.wrapper.TLHIPSourceWrapper(scheduled_ir_module, source, target, device_mod=None, host_mod=None, pass_configs=None)ΒΆ

Bases: TLCUDASourceWrapper

A wrapper class for the TileLang HIP backend.

Parameters:
  • scheduled_ir_module (tvm.IRModule)

  • source (str)

  • target (tvm.target.Target)

  • device_mod (Optional[tvm.IRModule])

  • host_mod (Optional[tvm.IRModule])

  • pass_configs (Optional[Dict[str, Any]])

get_init_func()ΒΆ
get_stream_type()ΒΆ
Return type:

Dict[str, str]

class tilelang.jit.adapter.wrapper.TLCPUSourceWrapper(scheduled_ir_module, source, target, device_mod=None, host_mod=None, pass_configs=None)ΒΆ

Bases: object

Parameters:
  • scheduled_ir_module (tvm.IRModule)

  • source (str)

  • target (tvm.target.Target)

  • device_mod (Optional[tvm.IRModule])

  • host_mod (Optional[tvm.IRModule])

  • pass_configs (Optional[Dict[str, Any]])

INIT_FUNCΒΆ
CALL_PREFIXΒΆ
backend = 'tl'ΒΆ
device_mod: tvm.IRModule | None = NoneΒΆ
host_mod: tvm.IRModule | None = NoneΒΆ
pass_configs: Dict[str, Any] | None = NoneΒΆ
modΒΆ
targetΒΆ
sourceΒΆ
function_names: str | None = NoneΒΆ
dynamic_smem_buf: int | None = NoneΒΆ
srcpath: str | None = NoneΒΆ
libpath: str | None = NoneΒΆ
lib_code: str | NoneΒΆ
create_call_func(code, function_informations)ΒΆ
parse_source_information()ΒΆ
get_dynamic_symbolic_set(prim_func)ΒΆ
get_cpu_init_func()ΒΆ
update_lib_code(code)ΒΆ
Parameters:

code (str)

property prim_funcΒΆ
class tilelang.jit.adapter.wrapper.TLWrapper(target)ΒΆ

Bases: BaseWrapper

A wrapper class for the TileLang backend.

Parameters:

target (tvm.target.Target)

device_mod: tvm.IRModule | None = NoneΒΆ
host_mod: tvm.IRModule | None = NoneΒΆ
pass_configs: Dict[str, Any] | None = NoneΒΆ
target: tvm.target.Target | None = NoneΒΆ
lib: object | None = NoneΒΆ
scheduled_ir_module = NoneΒΆ
assign_optimized_module(scheduled_ir_module)ΒΆ
Parameters:

scheduled_ir_module (tvm.IRModule)

assign_pass_configs(pass_configs)ΒΆ
Parameters:

pass_configs (Dict[str, Any])

assign_host_module(host_mod)ΒΆ
Parameters:

host_mod (tvm.IRModule)

assign_device_module(device_mod)ΒΆ
Parameters:

device_mod (tvm.IRModule)

wrap(c_source)ΒΆ
Parameters:

c_source (str)

class tilelang.jit.adapter.wrapper.TLPyWrapper(target)ΒΆ

Bases: TLWrapper

A wrapper class for the TileLang backend.

Parameters:

target (tvm.target.Target)

wrap(c_source)ΒΆ
Parameters:

c_source (str)