tilelang.intrinsics.metal_macro_generator ========================================= .. py:module:: tilelang.intrinsics.metal_macro_generator Classes ------- .. autoapisummary:: tilelang.intrinsics.metal_macro_generator.MPSIntrinEmitter Module Contents --------------- .. py:class:: MPSIntrinEmitter(a_dtype = 'float16', b_dtype = 'float16', accum_dtype = 'float32', a_transposed = False, b_transposed = False, block_row_warps = 1, block_col_warps = 1, warp_row_tiles = 8, warp_col_tiles = 8, chunk = 32, thread_var = None) .. py:attribute:: WARP_SIZE :value: 32 .. py:attribute:: a_dtype :value: 'float16' .. py:attribute:: b_dtype :value: 'float16' .. py:attribute:: accum_dtype :value: 'float32' .. py:attribute:: a_transposed :value: False .. py:attribute:: b_transposed :value: False .. py:attribute:: block_row_warps :value: 1 .. py:attribute:: block_col_warps :value: 1 .. py:attribute:: warp_row_tiles :value: 8 .. py:attribute:: warp_col_tiles :value: 8 .. py:attribute:: chunk :value: 32 .. py:attribute:: thread_var :value: None .. py:attribute:: micro_size_x :value: 8 .. py:attribute:: micro_size_y :value: 8 .. py:attribute:: micro_size_k :value: 8 .. py:attribute:: warp_rows :value: 1 .. py:attribute:: warp_cols :value: 1 .. py:method:: get_thread_binding() .. py:method:: ldmatrix_a(A_local_buf, A_shared_buf, ki) .. py:method:: ldmatrix_b(B_local_buf, B_shared_buf, ki) .. py:method:: mma(A_local_buf, B_local_buf, C_local_buf) .. py:method:: simdgroup_copy(C_simd_buf, C_dst, is_store=True) .. py:method:: simd_store(C_simd_buf, C_dst) .. py:method:: simd_load(C_simd_buf, C_src)