Index _ | A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | Z _ __bool__() (tilelang.autotuner.capture.CaptureStack method) (tilelang.language.frame.FrameStack method) (tilelang.language.kernel.FrameStack method) __call__() (tilelang.autotuner.tuner.AutoTuner method) (tilelang.jit.adapter.base.BaseKernelAdapter method) (tilelang.jit.kernel.JITKernel method) (tilelang.language.proxy.BaseTensorProxy method) (tilelang.language.proxy.BufferProxy method) (tilelang.profiler.Profiler method) __class_getitem__() (tilelang.language.proxy.BaseTensor class method) __del__() (tilelang.jit.adapter.libgen.PyLibraryGenerator method) __enter__() (tilelang.autotuner.capture.AutotuneInputsCapture method) (tilelang.language.frame.LetFrame method) (tilelang.language.kernel.KernelLaunchFrame method) __exit__() (tilelang.autotuner.capture.AutotuneInputsCapture method) (tilelang.language.frame.LetFrame method) (tilelang.language.kernel.KernelLaunchFrame method) __getitem__() (tilelang.language.proxy.BaseTensor method) (tilelang.language.proxy.BaseTensorProxy method) (tilelang.language.proxy.BufferProxy method) __hash__() (tilelang.autotuner.param.CompileArgs method) (tilelang.autotuner.param.ProfileArgs method) (tilelang.carver.roller.hint.TileDict method) __len__() (tilelang.autotuner.capture.CaptureStack method) (tilelang.language.frame.FrameStack method) (tilelang.language.kernel.FrameStack method) __post_init__() (tilelang.carver.template.base.BaseTemplate method) (tilelang.profiler.Profiler method) __repr__() (tilelang.carver.analysis.BlockInfo method) (tilelang.carver.analysis.IterInfo method) (tilelang.carver.arch.cuda.CUDA method) (tilelang.carver.roller.bestfit.Block method) (tilelang.carver.roller.hint.Hint method) (tilelang.carver.roller.hint.IntrinInfo method) (tilelang.carver.roller.hint.Stride method) (tilelang.carver.roller.node.Node method) (tilelang.carver.roller.rasterization.NoRasterization method) (tilelang.carver.roller.rasterization.Rasterization2DColumn method) (tilelang.carver.roller.rasterization.Rasterization2DRow method) (tilelang.carver.roller.shape_inference.tir.TensorDepNode method) (tilelang.carver.template.conv.ConvTemplate method) (tilelang.carver.template.elementwise.ElementwiseTemplate method) (tilelang.carver.template.flashattention.FlashAttentionTemplate method) (tilelang.carver.template.gemv.GEMVTemplate method) (tilelang.carver.template.general_reduce.GeneralReductionTemplate method) (tilelang.carver.template.matmul.MatmulTemplate method) (tilelang.jit.param.Kernel method) (tilelang.jit.param.Program method) (tilelang.layout.fragment.Fragment method) (tilelang.primitives.gemm.base.GemmBaseParams method) __setitem__() (tilelang.language.proxy.BaseTensor method) __slots__ (tilelang.autotuner.capture.AutotuneInputsCapture attribute) __str__() (tilelang.carver.analysis.BlockInfo method) (tilelang.carver.analysis.IterInfo method) (tilelang.carver.roller.shape_inference.tir.TensorDepNode method) A A (tilelang.primitives.gemm.base.GemmBaseParams attribute) a_dtype (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) a_transposed (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) abs (in module tilelang.language.tir.ir) abs() (in module tilelang.language.tir.op) accum_dtype (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.primitives.gemm.gemm_mma.GemmPrimitiveMMA property) acos (in module tilelang.language.tir.ir) acos() (in module tilelang.language.tir.op) acosh (in module tilelang.language.tir.ir) acosh() (in module tilelang.language.tir.op) ada_tensorcore_supported (in module tilelang.carver.arch.cuda) adapt_torch2tvm() (in module tilelang.utils.tensor) adapter (tilelang.jit.kernel.JITKernel attribute), [1] (tilelang.profiler.Profiler attribute), [1] add_next() (tilelang.carver.roller.shape_inference.tir.TensorDepNode method) add_prev() (tilelang.carver.roller.shape_inference.tir.TensorDepNode method) add_tag() (tilelang.carver.roller.node.Node method) address_of (in module tilelang.language.tir.ir) address_of() (in module tilelang.language.tir.op) AF_shape (tilelang.carver.roller.hint.TensorCoreExtraConfig attribute) align (tilelang.carver.roller.bestfit.BestFit attribute) AlignDynamicSharedMemoryAllocations() (in module tilelang.transform) all() (in module tilelang.language.tir.op) all_of() (in module tilelang.language.logical) alloc_barrier() (in module tilelang.language.allocate) alloc_fragment() (in module tilelang.language.allocate) alloc_local() (in module tilelang.language.allocate) alloc_shared() (in module tilelang.language.allocate) alloc_var() (in module tilelang.language.allocate) allow_fence_proxy() (in module tilelang.engine.phase) allow_global_thread_synchronization() (in module tilelang.engine.phase) allow_tma_and_warp_specialized() (in module tilelang.engine.phase) allow_vectorize() (in module tilelang.engine.phase) allow_warp_specialized() (in module tilelang.engine.phase) ampere_tensorcore_supported (in module tilelang.carver.arch.cuda) ana (tilelang.carver.roller.node.PrimFuncNode attribute) analysis() (tilelang.tools.Analyzer.Analyzer class method) AnalysisResult (class in tilelang.tools.Analyzer) analyze() (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis method) Analyzer (class in tilelang.tools.Analyzer) annotate_l2_hit_ratio() (in module tilelang.language) annotate_layout() (in module tilelang.language) annotate_padding() (in module tilelang.language) AnnotateDeviceRegions() (in module tilelang.transform) any() (in module tilelang.language.tir.op) any_of() (in module tilelang.language.logical) anylist_getitem (in module tilelang.language.tir.ir) anylist_getitem() (in module tilelang.language.tir.op) anylist_resetitem (in module tilelang.language.tir.ir) anylist_resetitem() (in module tilelang.language.tir.op) anylist_setitem_call_cpacked (in module tilelang.language.tir.ir) anylist_setitem_call_cpacked() (in module tilelang.language.tir.op) anylist_setitem_call_packed (in module tilelang.language.tir.ir) anylist_setitem_call_packed() (in module tilelang.language.tir.op) apply_simplify() (in module tilelang.transform.simplify) arch (tilelang.carver.roller.hint.Hint attribute) (tilelang.carver.roller.policy.default.DefaultPolicy attribute) (tilelang.carver.template.base.BaseTemplate property) ARCH_CONFIGS (in module tilelang.tools.Analyzer) args (tilelang.carver.roller.node.PrimFuncNode attribute) array_reduce() (in module tilelang.utils.language) artifact (tilelang.jit.kernel.JITKernel attribute), [1] AS_shape (tilelang.carver.roller.hint.TensorCoreExtraConfig attribute) asin (in module tilelang.language.tir.ir) asin() (in module tilelang.language.tir.op) asinh (in module tilelang.language.tir.ir) asinh() (in module tilelang.language.tir.op) assert_allclose() (tilelang.profiler.Profiler method) assert_consistent() (tilelang.profiler.Profiler method) assign_block_size() (tilelang.carver.roller.policy.default.DefaultPolicy method) assign_compile_flags() (tilelang.jit.adapter.libgen.LibraryGenerator method) assign_device_module() (tilelang.jit.adapter.wrapper.TLWrapper method) assign_host_module() (tilelang.jit.adapter.wrapper.TLWrapper method) assign_optimized_module() (tilelang.jit.adapter.wrapper.TLWrapper method) assign_pass_configs() (tilelang.jit.adapter.libgen.LibraryGenerator method) (tilelang.jit.adapter.wrapper.TLWrapper method) assume (in module tilelang.language.tir.ir) assume() (in module tilelang.language.tir.op) atan (in module tilelang.language.tir.ir) atan() (in module tilelang.language.tir.op) atan2 (in module tilelang.language.tir.ir) atan2() (in module tilelang.language.tir.op) atanh (in module tilelang.language.tir.ir) atanh() (in module tilelang.language.tir.op) atol (tilelang.autotuner.param.ProfileArgs attribute), [1] atomic_add() (in module tilelang.language.customize) atomic_addx2() (in module tilelang.language.customize) atomic_addx4() (in module tilelang.language.customize) Auto (tilelang.utils.tensor.TensorSupplyType attribute) auto_infer_current_arch() (in module tilelang.carver.arch) auto_inline_consumer_chain() (in module tilelang.carver.matmul_analysis) auto_inline_consumers() (in module tilelang.carver.matmul_analysis) auto_inline_producers() (in module tilelang.carver.matmul_analysis) autotune() (in module tilelang.autotuner.tuner) AutotuneInputsCapture (class in tilelang.autotuner.capture) AutoTuner (class in tilelang.autotuner.tuner) AutotuneResult (class in tilelang.autotuner.param) available_tensor_instructions (tilelang.carver.arch.cuda.CUDA attribute) AVALIABLE_TARGETS (in module tilelang.utils.target) ax (tilelang.carver.roller.hint.Stride property) B B (tilelang.primitives.gemm.base.GemmBaseParams attribute) b_dtype (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) b_transposed (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) backend (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) bandwidth (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) bandwidth_GBps (tilelang.tools.Analyzer.AnalysisResult attribute) barrier_arrive() (in module tilelang.language.builtin) barrier_wait() (in module tilelang.language.builtin) BaseKernelAdapter (class in tilelang.jit.adapter.base) BaseTemplate (class in tilelang.carver.template.base) BaseTensor (class in tilelang.language.proxy) BaseTensorProxy (class in tilelang.language.proxy) BaseWrapper (class in tilelang.jit.adapter.wrapper) batch_size (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) BEST_CONFIG_PATH (in module tilelang.autotuner.param) BestFit (class in tilelang.carver.roller.bestfit) BF_shape (tilelang.carver.roller.hint.TensorCoreExtraConfig attribute) bitwise_and (in module tilelang.language.tir.ir) bitwise_and() (in module tilelang.language.tir.op) bitwise_not (in module tilelang.language.tir.ir) bitwise_not() (in module tilelang.language.tir.op) bitwise_or (in module tilelang.language.tir.ir) bitwise_or() (in module tilelang.language.tir.op) bitwise_xor (in module tilelang.language.tir.ir) bitwise_xor() (in module tilelang.language.tir.op) Block (class in tilelang.carver.roller.bestfit) block (tilelang.carver.roller.hint.Hint attribute) (tilelang.carver.roller.shape_inference.tir.Statement attribute) block_analyzer (tilelang.carver.roller.node.PrimFuncNode attribute) (tilelang.carver.roller.shape_inference.tir.Statement attribute) block_col_warps (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.primitives.gemm.base.GemmBaseParams attribute) block_counts (tilelang.tools.Analyzer.Analyzer attribute) block_info (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) block_infos (tilelang.carver.roller.node.BlockAnalyzer attribute) block_per_SM (tilelang.carver.roller.hint.TileDict attribute) block_reduction_depth (tilelang.carver.roller.hint.Hint attribute) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy attribute) block_row_warps (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.primitives.gemm.base.GemmBaseParams attribute) block_rv (tilelang.carver.analysis.BlockInfo attribute) BlockAnalyzer (class in tilelang.carver.roller.node) BlockInfo (class in tilelang.carver.analysis) blocks (tilelang.carver.roller.node.PrimFuncNode attribute) (tilelang.language.kernel.KernelLaunchFrame property) BS_shape (tilelang.carver.roller.hint.TensorCoreExtraConfig attribute) Buffer (in module tilelang.language.proxy) buffer_device_map (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) buffer_dtype_map (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) buffer_load_to_tile_region() (in module tilelang.language.copy) buffer_mapping (tilelang.carver.roller.shape_inference.tir.InputShapeInference attribute) buffer_region_to_tile_region() (in module tilelang.language.copy) buffer_to_tile_region() (in module tilelang.language.copy) BufferProxy (class in tilelang.language.proxy) buffers (tilelang.carver.roller.node.PrimFuncNode attribute) C C (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.primitives.gemm.base.GemmBaseParams attribute) c2d_im2col() (in module tilelang.language.copy) cache_dir (tilelang.autotuner.tuner.AutoTuner attribute) cache_input_tensors (tilelang.autotuner.param.ProfileArgs attribute), [1] cached() (in module tilelang.cache) (tilelang.cache.kernel_cache.KernelCache method) cached_tensors (tilelang.carver.roller.hint.Hint attribute) cached_tensors_map (tilelang.carver.roller.hint.TileDict attribute) calculate() (tilelang.tools.Analyzer.Analyzer method) call_cpacked (in module tilelang.language.tir.ir) call_cpacked() (in module tilelang.language.tir.op) call_cpacked_lowered (in module tilelang.language.tir.ir) call_cpacked_lowered() (in module tilelang.language.tir.op) call_extern (in module tilelang.language.tir.ir) call_extern() (in module tilelang.language.tir.op) call_intrin (in module tilelang.language.tir.ir) call_intrin() (in module tilelang.language.tir.op) call_llvm_intrin (in module tilelang.language.tir.ir) call_llvm_intrin() (in module tilelang.language.tir.op) call_llvm_pure_intrin (in module tilelang.language.tir.ir) call_llvm_pure_intrin() (in module tilelang.language.tir.op) call_packed (in module tilelang.language.tir.ir) call_packed() (in module tilelang.language.tir.op) call_packed_lowered (in module tilelang.language.tir.ir) call_packed_lowered() (in module tilelang.language.tir.op) CALL_PREFIX (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) call_pure_extern (in module tilelang.language.tir.ir) call_pure_extern() (in module tilelang.language.tir.op) call_tir() (in module tilelang.language.tir.op) callback_libdevice_path() (in module tilelang.contrib.nvcc) callback_rocm_bitcode_path() (in module tilelang.contrib.rocm) callback_rocm_link() (in module tilelang.contrib.rocm) canon_target_host() (in module tilelang.engine.lower) CaptureStack (class in tilelang.autotuner.capture) cdiv() (in module tilelang.math) CDNA (class in tilelang.carver.arch.cdna) ceil (in module tilelang.language.tir.ir) ceil() (in module tilelang.language.tir.op) ceildiv (in module tilelang.language.tir.ir) ceildiv() (in module tilelang.language.tir.op) check_cuda_availability() (in module tilelang.utils.target) check_func_with_dynamic() (in module tilelang.carver.analysis) check_hip_availability() (in module tilelang.utils.target) check_sm_version() (in module tilelang.carver.arch.cuda) check_tile_shape_isvalid() (tilelang.carver.roller.policy.default.DefaultPolicy method) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy method) chunk (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.primitives.gemm.base.GemmBaseParams attribute) clamp() (in module tilelang.language.customize) class_attributes (tilelang.carver.template.conv.ConvTemplate property) (tilelang.carver.template.elementwise.ElementwiseTemplate property) (tilelang.carver.template.flashattention.FlashAttentionTemplate property) (tilelang.carver.template.gemv.GEMVTemplate property) (tilelang.carver.template.general_reduce.GeneralReductionTemplate property) (tilelang.carver.template.matmul.MatmulTemplate property) (tilelang.primitives.gemm.base.GemmBaseParams property) clear() (in module tilelang.language.fill) clear_cache() (in module tilelang.cache) (tilelang.cache.kernel_cache.KernelCache method) ClusterPlanning() (in module tilelang.transform) clz (in module tilelang.language.tir.ir) clz() (in module tilelang.language.tir.op) coalesced_factor() (in module tilelang.carver.roller.policy.common) coalesced_tensor_shape() (in module tilelang.carver.roller.policy.common) collect_block_iter_vars_used_in_access_region() (in module tilelang.carver.analysis) collect_vars_from_expr() (in module tilelang.carver.matmul_analysis) collect_vars_used_in_prim_expr() (in module tilelang.carver.analysis) comm_reducer() (in module tilelang.language.tir.op) compile() (in module tilelang.jit) compile_args (tilelang.autotuner.tuner.AutoTuner attribute) compile_cuda() (in module tilelang.contrib.nvcc) (in module tilelang.contrib.nvrtc) compile_flags (tilelang.jit.adapter.libgen.LibraryGenerator attribute) (tilelang.jit.kernel.JITKernel attribute) compile_hip() (in module tilelang.contrib.hipcc) compile_lib() (tilelang.jit.adapter.libgen.LibraryGenerator method) (tilelang.jit.adapter.libgen.PyLibraryGenerator method) compile_program() (tilelang.autotuner.param.CompileArgs method) CompileArgs (class in tilelang.autotuner.param) CompiledArtifact (class in tilelang.engine.param) complete_config() (tilelang.carver.roller.hint.Hint method) COMPOSABLE_KERNEL_INCLUDE_DIR (in module tilelang.env) compress_sm90() (in module tilelang.utils.sparse) compress_util (in module tilelang.utils.sparse) compute_capability (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) compute_elements_from_shape() (tilelang.carver.roller.hint.Stride method) compute_max_core (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) compute_node_stride_map() (tilelang.carver.roller.policy.default.DefaultPolicy method) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy method) compute_strides_from_shape() (tilelang.carver.roller.hint.Stride method) compute_tile_dict() (tilelang.carver.roller.policy.default.DefaultPolicy method) compute_warp_partition() (tilelang.primitives.gemm.base.GemmWarpPolicy method) compute_workload_per_item() (tilelang.carver.roller.policy.default.DefaultPolicy method) condense_rep_var() (tilelang.layout.fragment.Fragment method) config (tilelang.autotuner.param.AutotuneResult attribute), [1] (tilelang.jit.kernel.JITKernel attribute) ConfigIndexBitwidth() (in module tilelang.transform) configs (tilelang.autotuner.tuner.AutoTuner attribute) construct_dependency_target() (tilelang.carver.roller.shape_inference.tir.InputShapeInference method) convert_func() (in module tilelang.contrib.dlpack) ConvTemplate (class in tilelang.carver.template.conv) copy() (in module tilelang.language.copy) copysign (in module tilelang.language.tir.ir) copysign() (in module tilelang.language.tir.op) cos (in module tilelang.language.tir.ir) cos() (in module tilelang.language.tir.op) cosh (in module tilelang.language.tir.ir) cosh() (in module tilelang.language.tir.op) CPU (class in tilelang.carver.arch.cpu) create_barriers (in module tilelang.language.tir.ir) create_barriers() (in module tilelang.language.tir.op) create_call_func() (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper method) create_dispatch_func() (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) (tilelang.jit.adapter.wrapper.TLNVRTCSourceWrapper method) create_executable() (in module tilelang.contrib.cc) create_list_of_mbarrier() (in module tilelang.language.builtin) create_shared() (in module tilelang.contrib.cc) create_staticlib() (in module tilelang.contrib.cc) create_tma_descriptor() (in module tilelang.language.builtin) cross_compiler() (in module tilelang.contrib.cc) CtypesKernelAdapter (class in tilelang.jit.adapter.ctypes.adapter) CUDA (class in tilelang.carver.arch.cuda) CUDA_HOME (in module tilelang.env) CUDA_KERNELS_OUTPUT_DIR (tilelang.transform.pass_config.PassConfigKey attribute) cudaDeviceProp (class in tilelang.carver.arch.driver.cuda_driver) culib (tilelang.jit.adapter.libgen.PyLibraryGenerator attribute) cumsum() (in module tilelang.language.reduce) cumsum_fragment() (in module tilelang.language.reduce) Current() (tilelang.language.frame.LetFrame class method) (tilelang.language.kernel.KernelLaunchFrame class method) current_dir (in module tilelang.jit.adapter.cython.adapter) CUTLASS_INCLUDE_DIR (in module tilelang.env) cython_wrapper (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) cython_wrapper_code (in module tilelang.jit.adapter.cython.adapter) cython_wrapper_path (in module tilelang.jit.adapter.cython.adapter) CythonKernelAdapter (class in tilelang.jit.adapter.cython.adapter) D D (tilelang.carver.template.conv.ConvTemplate attribute), [1] data (tilelang.jit.param.Program attribute) dec_max_nreg() (in module tilelang.language.builtin) decode_i1_to_f16 (in module tilelang.quantize.lop3) decode_i1_to_f16_scale (in module tilelang.quantize.lop3) decode_i1_to_f16_scale_zeros_original (in module tilelang.quantize.lop3) decode_i1_to_f16_scale_zeros_rescale (in module tilelang.quantize.lop3) decode_i1s_to_i8s (in module tilelang.quantize.lop3) decode_i2_to_f16 (in module tilelang.quantize.lop3) decode_i2_to_f16_scale (in module tilelang.quantize.lop3) decode_i2_to_f16_scale_zeros_original (in module tilelang.quantize.lop3) decode_i2_to_f16_scale_zeros_original_offset (in module tilelang.quantize.lop3) decode_i2_to_f16_scale_zeros_quantized (in module tilelang.quantize.lop3) decode_i2_to_f16_scale_zeros_rescale (in module tilelang.quantize.lop3) decode_i2s_to_i4s (in module tilelang.quantize.lop3) decode_i2s_to_i8s (in module tilelang.quantize.lop3) decode_i4_to_f16 (in module tilelang.quantize.lop3) decode_i4_to_f16_scale (in module tilelang.quantize.lop3) decode_i4_to_f16_scale_offset (in module tilelang.quantize.lop3) decode_i4_to_f16_scale_zeros_original (in module tilelang.quantize.lop3) decode_i4_to_f16_scale_zeros_original_offset (in module tilelang.quantize.lop3) decode_i4_to_f16_scale_zeros_quantized (in module tilelang.quantize.lop3) decode_i4_to_f16_scale_zeros_quantized_offset (in module tilelang.quantize.lop3) decode_i4_to_f16_scale_zeros_rescale (in module tilelang.quantize.lop3) decode_i4_to_f16_scale_zeros_rescale_offset (in module tilelang.quantize.lop3) decode_i4s_to_i8s (in module tilelang.quantize.lop3) decompose_col_major() (in module tilelang.layout.gemm_sp) deduplicate() (tilelang.carver.roller.shape_inference.tir.TensorDepNode method) default_align (tilelang.language.proxy.BaseTensorProxy attribute) default_offset_factor (tilelang.language.proxy.BaseTensorProxy attribute) default_scope (tilelang.language.proxy.BaseTensorProxy attribute) (tilelang.language.proxy.FragmentBufferProxy attribute) (tilelang.language.proxy.LocalBufferProxy attribute) (tilelang.language.proxy.SharedBufferProxy attribute) DefaultPolicy (class in tilelang.carver.roller.policy.default) dep_analysis (tilelang.carver.roller.shape_inference.tir.InputShapeInference attribute) dep_name (tilelang.carver.roller.shape_inference.tir.Statement attribute) DependencyAnalysis (class in tilelang.carver.roller.shape_inference.tir) dependent_region (tilelang.carver.roller.shape_inference.common.Statement attribute) (tilelang.carver.roller.shape_inference.tir.Statement attribute) deprecated() (in module tilelang.utils.deprecated) deps (tilelang.carver.roller.shape_inference.common.InputShapeInference attribute) (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis attribute) (tilelang.carver.roller.shape_inference.tir.InputShapeInference attribute) detect_dominant_read() (in module tilelang.carver.analysis) detect_iter_traits() (in module tilelang.carver.matmul_analysis) determine_profiler() (tilelang.profiler.Profiler method) determine_target() (in module tilelang.utils.target) device (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cpu.CPU attribute) (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.tools.Analyzer.Analyzer attribute) device_codegen() (in module tilelang.engine.lower) device_codegen_without_compile() (in module tilelang.engine.lower) device_mod (tilelang.engine.param.CompiledArtifact attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLWrapper attribute) dfs_smem_tile() (tilelang.carver.roller.policy.default.DefaultPolicy method) disable_cache (in module tilelang.env) div() (in module tilelang.language.tir.op) do_bench() (in module tilelang.profiler.bench) (tilelang.profiler.Profiler method) dom (tilelang.carver.analysis.IterInfo property) dom() (tilelang.carver.analysis.BlockInfo method) dom_kind() (tilelang.carver.analysis.BlockInfo method) dp4a() (in module tilelang.language.customize) dst_id (tilelang.carver.roller.node.Edge attribute) dst_node (tilelang.carver.roller.node.Edge attribute) dtype (tilelang.carver.template.elementwise.ElementwiseTemplate attribute), [1] (tilelang.carver.template.general_reduce.GeneralReductionTemplate attribute) (tilelang.engine.param.KernelParam attribute) dtype_abbrv (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) dynamic_smem_buf (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) dynamic_symbolic_map (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) E Edge (class in tilelang.carver.roller.node) ElementwiseTemplate (class in tilelang.carver.template.elementwise) EliminateStorageSyncForMBarrier() (in module tilelang.transform) emit() (tilelang.TqdmLoggingHandler method) emit_config() (tilelang.carver.roller.policy.default.DefaultPolicy method) enable_cache (in module tilelang.env) end (tilelang.carver.roller.bestfit.Block attribute) end_profile_intrinsic (in module tilelang.language.tir.ir) end_profile_intrinsic() (in module tilelang.language.tir.op) equivalent_function() (tilelang.carver.template.base.BaseTemplate method) erf (in module tilelang.language.tir.ir) erf() (in module tilelang.language.tir.op) estimated_time (tilelang.tools.Analyzer.AnalysisResult attribute), [1] execution_backend (tilelang.autotuner.param.CompileArgs attribute), [1] (tilelang.cache.kernel_cache.KernelCache attribute) (tilelang.jit.kernel.JITKernel attribute) exp (in module tilelang.language.tir.ir) exp() (in module tilelang.language.tir.op) exp10 (in module tilelang.language.tir.ir) exp10() (in module tilelang.language.tir.op) exp2 (in module tilelang.language.tir.ir) exp2() (in module tilelang.language.tir.op) expected_bandwidth_GBps (tilelang.tools.Analyzer.AnalysisResult attribute) expected_tflops (tilelang.tools.Analyzer.AnalysisResult attribute) export_library() (tilelang.jit.kernel.JITKernel method) extent (tilelang.carver.matmul_analysis.IterTrait attribute) extent_wrapper() (tilelang.carver.roller.node.PrimFuncNode method) extrac_params() (in module tilelang.engine.lower) extract_thread_binding() (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter method) F F (tilelang.carver.template.conv.ConvTemplate attribute), [1] factorize() (in module tilelang.carver.roller.policy.common) fence_proxy_async() (in module tilelang.language.builtin) fill() (in module tilelang.language.fill) find_arg_idx_from_buffer_chain() (in module tilelang.carver.matmul_analysis) find_cuda_path() (in module tilelang.contrib.nvcc) find_first_similar_buffer() (in module tilelang.carver.matmul_analysis) find_first_similar_region() (in module tilelang.carver.matmul_analysis) find_last_producer_from_buffer() (in module tilelang.carver.matmul_analysis) find_libdevice_path() (in module tilelang.contrib.nvcc) find_lld() (in module tilelang.contrib.rocm) find_path_from_source() (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis method) find_rocm_path() (in module tilelang.contrib.rocm) find_topo_sort() (in module tilelang.carver.roller.node) find_topo_sort_priority() (in module tilelang.carver.roller.node) find_var_from_func() (in module tilelang.carver.analysis) FlashAttentionTemplate (class in tilelang.carver.template.flashattention) FlattenBuffer() (in module tilelang.transform) floor (in module tilelang.language.tir.ir) floor() (in module tilelang.language.tir.op) floordiv (in module tilelang.language.tir.ir) floordiv() (in module tilelang.language.tir.op) floormod (in module tilelang.language.tir.ir) floormod() (in module tilelang.language.tir.op) fmod (in module tilelang.language.tir.ir) fmod() (in module tilelang.language.tir.op) fn (tilelang.autotuner.tuner.AutoTuner attribute) footprint() (tilelang.carver.roller.node.PrimFuncNode method) Fragment (class in tilelang.layout.fragment) FragmentBufferProxy (class in tilelang.language.proxy) FrameStack (class in tilelang.language.frame) (class in tilelang.language.kernel) free() (tilelang.carver.roller.bestfit.BestFit method) from_buffer() (tilelang.engine.param.KernelParam class method) from_database() (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter class method) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter class method) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter class method) (tilelang.jit.kernel.JITKernel class method) from_dict() (tilelang.carver.roller.hint.Hint class method) from_kernel() (tilelang.autotuner.tuner.AutoTuner class method) from_output_nodes() (tilelang.carver.roller.policy.default.DefaultPolicy class method) from_prim_func() (tilelang.carver.roller.policy.default.DefaultPolicy class method) from_ptr() (tilelang.language.proxy.BaseTensor class method) (tilelang.language.proxy.BaseTensorProxy method) (tilelang.language.proxy.BufferProxy method) from_tilelang_function() (tilelang.jit.kernel.JITKernel class method) from_var() (tilelang.engine.param.KernelParam class method) from_warp_partition() (tilelang.primitives.gemm.base.GemmWarpPolicy class method) FrontendLegalize() (in module tilelang.transform) FullCol (tilelang.primitives.gemm.base.GemmWarpPolicy attribute) FullRow (tilelang.primitives.gemm.base.GemmWarpPolicy attribute) func (tilelang.autotuner.param.AutotuneResult attribute), [1] (tilelang.carver.roller.policy.default.DefaultPolicy attribute) (tilelang.jit.adapter.base.BaseKernelAdapter attribute) (tilelang.profiler.Profiler property) function_names (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) FUNCTION_PATH (in module tilelang.autotuner.param) G gemm() (in module tilelang.language.gemm) (in module tilelang.primitives.gemm) gemm_rrr() (tilelang.primitives.gemm.gemm_mma.GemmPrimitiveMMA method) gemm_rsr() (tilelang.primitives.gemm.gemm_mma.GemmPrimitiveMMA method) gemm_sp() (in module tilelang.language.experimental.gemm_sp) gemm_srr() (tilelang.primitives.gemm.gemm_mma.GemmPrimitiveMMA method) gemm_ssr() (tilelang.primitives.gemm.gemm_mma.GemmPrimitiveMMA method) GemmBaseParams (class in tilelang.primitives.gemm.base) GemmPrimitiveMMA (class in tilelang.primitives.gemm.gemm_mma) GemmWarpPolicy (class in tilelang.primitives.gemm.base) GEMVTemplate (class in tilelang.carver.template.gemv) gen_quant4() (in module tilelang.quantize.utils) general_compress() (in module tilelang.quantize.utils) GeneralReductionTemplate (class in tilelang.carver.template.general_reduce) generate_cache_key() (tilelang.autotuner.tuner.AutoTuner method) generate_l2_persistent_map() (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) generate_tma_descriptor_args() (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) (tilelang.jit.adapter.wrapper.TLNVRTCSourceWrapper method) get_all_factors() (in module tilelang.carver.roller.policy.common) get_analyzer_by_tir() (in module tilelang.carver.roller.shape_inference.tir) get_annotated_mod() (in module tilelang.jit.adapter.utils) get_arch() (in module tilelang.carver.arch) get_autotune_inputs() (in module tilelang.autotuner.capture) get_available_cpu_count() (in module tilelang.autotuner.tuner) get_avaliable_tensorintrin_shapes() (tilelang.carver.arch.arch_base.TileDevice method) (tilelang.carver.arch.cuda.CUDA method) get_base_tile() (tilelang.carver.roller.policy.default.DefaultPolicy method) get_block() (in module tilelang.carver.common_schedules) get_block_binding() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_block_bindings() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_block_extent() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_block_extents() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_block_info() (tilelang.carver.roller.node.BlockAnalyzer method) get_block_name() (tilelang.carver.roller.node.BlockAnalyzer method) get_block_size() (tilelang.carver.roller.policy.default.DefaultPolicy method) get_buffer_dtype() (tilelang.carver.roller.node.PrimFuncNode method) get_buffer_elems() (in module tilelang.utils.language) get_buffer_region_from_load() (in module tilelang.utils.language) get_buffers() (tilelang.carver.roller.node.BlockAnalyzer method) get_cache_dir() (in module tilelang.jit.adapter.cython.adapter) get_cached_lib() (in module tilelang.jit.adapter.cython.adapter) get_cc() (in module tilelang.contrib.cc) get_coalesced_veclen() (in module tilelang.carver.analysis) get_code() (tilelang.carver.roller.rasterization.NoRasterization method) (tilelang.carver.roller.rasterization.Rasterization method) (tilelang.carver.roller.rasterization.Rasterization2DColumn method) (tilelang.carver.roller.rasterization.Rasterization2DRow method) get_consumer_blocks() (tilelang.carver.roller.node.BlockAnalyzer method) get_cplus_compiler() (in module tilelang.contrib.cc) get_cpu_init_func() (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper method) get_cuda_device_properties() (in module tilelang.carver.arch.driver.cuda_driver) get_cuda_version() (in module tilelang.contrib.nvcc) get_cython_compiler() (in module tilelang.jit.adapter.cython.adapter) get_dequantize_block() (in module tilelang.carver.matmul_analysis) get_device_attribute() (in module tilelang.carver.arch.driver.cuda_driver) get_device_call() (in module tilelang.engine.lower) get_device_function() (tilelang.carver.roller.rasterization.Rasterization2DColumn method) get_device_name() (in module tilelang.carver.arch.driver.cuda_driver) get_dtype() (tilelang.carver.roller.node.Node method) get_dynamic_symbolic_set() (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper method) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) get_forward_vars() (tilelang.layout.layout.Layout method) get_global_symbol_section_map() (in module tilelang.contrib.cc) get_hardware_aware_configs() (tilelang.carver.template.base.BaseTemplate method) (tilelang.carver.template.conv.ConvTemplate method) (tilelang.carver.template.elementwise.ElementwiseTemplate method) (tilelang.carver.template.flashattention.FlashAttentionTemplate method) (tilelang.carver.template.gemv.GEMVTemplate method) (tilelang.carver.template.general_reduce.GeneralReductionTemplate method) (tilelang.carver.template.matmul.MatmulTemplate method) get_host_call() (in module tilelang.engine.lower) get_host_source() (tilelang.jit.kernel.JITKernel method) get_in_out_dtypes() (in module tilelang.carver.matmul_analysis) get_index_map() (in module tilelang.carver.matmul_analysis) get_init_func() (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) (tilelang.jit.adapter.wrapper.TLHIPSourceWrapper method) get_input_buffers() (tilelang.carver.roller.node.BlockAnalyzer method) (tilelang.carver.roller.node.PrimFuncNode method) get_input_exprs() (tilelang.carver.roller.shape_inference.common.InputShapeInference method) (tilelang.carver.roller.shape_inference.tir.InputShapeInference method) get_input_shape() (tilelang.layout.layout.Layout method) get_ir() (tilelang.carver.roller.node.Node method) (tilelang.carver.roller.node.OutputNode method) (tilelang.carver.roller.node.PlaceHolderNode method) get_kernel_source() (tilelang.jit.adapter.base.BaseKernelAdapter method) (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter method) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter method) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter method) (tilelang.jit.kernel.JITKernel method) (tilelang.jit.param.Kernel method) get_ladder_stage3_map() (in module tilelang.carver.matmul_analysis) get_ldmatrix_index_map() (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) get_ldmatrix_offset() (in module tilelang.intrinsics.utils) get_let_value() (in module tilelang.language.frame) get_lib_path() (tilelang.jit.adapter.libgen.LibraryGenerator method) get_lop3_intrin_group() (in module tilelang.quantize.lop3) get_max_dynamic_shared_size_bytes() (in module tilelang.carver.arch.driver.cuda_driver) get_max_shared_memory_per_block() (in module tilelang.carver.analysis) get_max_threads_per_block() (in module tilelang.carver.analysis) get_mbarrier() (in module tilelang.language.builtin) get_mma_micro_size() (in module tilelang.intrinsics.utils) get_node_reduce_step_candidates() (tilelang.carver.roller.policy.default.DefaultPolicy method) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy method) get_num_sms() (in module tilelang.carver.arch.driver.cuda_driver) get_num_threads() (tilelang.language.kernel.KernelLaunchFrame method) get_nvcc_compiler() (in module tilelang.contrib.nvcc) get_nvrtc_version() (in module tilelang.contrib.nvrtc) get_opt_shape() (tilelang.carver.roller.node.PrimFuncNode method) get_or_create_node() (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis method) get_output_blocks() (in module tilelang.carver.common_schedules) get_output_buffers() (tilelang.carver.roller.node.BlockAnalyzer method) get_output_shape() (tilelang.layout.layout.Layout method) get_pass_context() (in module tilelang.transform) get_persisting_l2_cache_max_size() (in module tilelang.carver.arch.driver.cuda_driver) get_producer_blocks() (tilelang.carver.roller.node.BlockAnalyzer method) get_profiler() (tilelang.jit.kernel.JITKernel method) get_propagate_map() (in module tilelang.carver.matmul_analysis) get_rasterization_code() (in module tilelang.carver.utils) get_reduce_axis() (tilelang.carver.roller.node.BlockAnalyzer method) get_reduce_inputs_dtype() (tilelang.carver.roller.node.PrimFuncNode method) get_reduction_blocks() (in module tilelang.carver.analysis) get_registers_per_block() (in module tilelang.carver.arch.driver.cuda_driver) get_rocm_arch() (in module tilelang.contrib.rocm) get_roller_hints_from_func() (in module tilelang.carver.utils) get_roller_hints_from_output_nodes() (in module tilelang.carver.utils) get_root_block() (in module tilelang.carver.analysis) get_rstep() (tilelang.carver.roller.hint.TileDict method) get_shape() (tilelang.carver.roller.node.Node method) get_shared_memory_per_block() (in module tilelang.carver.arch.driver.cuda_driver) get_source_path() (tilelang.jit.adapter.libgen.LibraryGenerator method) get_space_dim() (tilelang.carver.roller.node.PrimFuncNode method) get_spatial_axis() (tilelang.carver.roller.node.BlockAnalyzer method) get_store_index_map() (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter method) get_stream_type() (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) (tilelang.jit.adapter.wrapper.TLHIPSourceWrapper method) (tilelang.jit.adapter.wrapper.TLNVRTCSourceWrapper method) get_swizzle_layout() (in module tilelang.intrinsics.mma_layout) get_tag() (tilelang.carver.roller.node.Node method) get_target_by_dump_machine() (in module tilelang.contrib.cc) get_target_compute_version() (in module tilelang.contrib.nvcc) get_tensor_supply() (in module tilelang.utils.tensor) get_tensorized_func_and_tags() (in module tilelang.carver.matmul_analysis) get_thread_binding() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_thread_bindings() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_thread_extent() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_thread_extents() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_thread_size() (tilelang.layout.fragment.Fragment method) get_tile() (tilelang.carver.roller.hint.TileDict method) get_tuner_result() (tilelang.jit.kernel.JITKernel method) get_value() (tilelang.language.frame.FrameStack method) (tilelang.language.frame.LetFrame static method) get_warp_size() (tilelang.primitives.gemm.base.GemmBaseParams method) global_buffers (tilelang.tools.Analyzer.Analyzer attribute) grid() (in module tilelang.language.tir.ir) grid_info (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) grid_size (tilelang.carver.roller.hint.TileDict attribute) H H (tilelang.carver.template.conv.ConvTemplate attribute), [1] has_arch() (tilelang.carver.template.base.BaseTemplate method) has_device_kernel_launch() (in module tilelang.engine.lower) has_let_value() (in module tilelang.language.frame) has_mma_support() (in module tilelang.carver.arch.cuda) has_value() (tilelang.language.frame.FrameStack method) (tilelang.language.frame.LetFrame static method) have_bf16() (in module tilelang.contrib.nvcc) have_cudagraph() (in module tilelang.contrib.nvcc) have_fp16() (in module tilelang.contrib.nvcc) have_fp8() (in module tilelang.contrib.nvcc) have_int8() (in module tilelang.contrib.nvcc) have_matrixcore() (in module tilelang.contrib.rocm) have_tensorcore() (in module tilelang.contrib.nvcc) have_tma() (in module tilelang.contrib.nvcc) head_dim (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) Hint (class in tilelang.carver.roller.hint) hopper_tensorcore_supported (in module tilelang.carver.arch.cuda) host_codegen() (in module tilelang.engine.lower) host_func (tilelang.jit.adapter.libgen.PyLibraryGenerator attribute) host_mod (tilelang.engine.param.CompiledArtifact attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLWrapper attribute) host_source (tilelang.jit.kernel.JITKernel property) hypot (in module tilelang.language.tir.ir) hypot() (in module tilelang.language.tir.op) I if_then_else (in module tilelang.language.tir.ir) if_then_else() (in module tilelang.language.tir.op) IfStmtBinding() (in module tilelang.transform) import_from_file() (tilelang.jit.adapter.libgen.PyLibraryGenerator static method) import_source() (in module tilelang.language) in_dtype (tilelang.carver.roller.hint.IntrinInfo attribute) (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] (tilelang.primitives.gemm.gemm_mma.GemmPrimitiveMMA property) inc_max_nreg() (in module tilelang.language.builtin) index (tilelang.layout.layout.Layout property) index_to_coordinates() (in module tilelang.intrinsics.utils) indexdiv() (in module tilelang.language.tir.op) indexmod() (in module tilelang.language.tir.op) infer() (tilelang.carver.roller.shape_inference.common.InputShapeInference method) (tilelang.carver.roller.shape_inference.tir.InputShapeInference method) infer_block_partition() (tilelang.primitives.gemm.base.GemmBaseParams method) infer_node_smem_usage() (tilelang.carver.roller.policy.default.DefaultPolicy method) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy method) infer_tensorcore_axis() (tilelang.carver.roller.node.PrimFuncNode method) infinity (in module tilelang.language.tir.ir) infinity() (in module tilelang.language.tir.op) INIT_FUNC (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) initialize_function() (tilelang.carver.template.base.BaseTemplate method) (tilelang.carver.template.conv.ConvTemplate method) (tilelang.carver.template.elementwise.ElementwiseTemplate method) (tilelang.carver.template.flashattention.FlashAttentionTemplate method) (tilelang.carver.template.gemv.GEMVTemplate method) (tilelang.carver.template.general_reduce.GeneralReductionTemplate method) (tilelang.carver.template.matmul.MatmulTemplate method) InjectFenceProxy() (in module tilelang.transform) InjectPTXAsyncCopy() (in module tilelang.transform) InjectSoftwarePipeline() (in module tilelang.transform) InjectTmaBarrier() (in module tilelang.transform) inline_transpose_block() (in module tilelang.carver.matmul_analysis) input_buffers (tilelang.carver.roller.node.PrimFuncNode attribute) input_transform_kind (tilelang.carver.roller.hint.IntrinInfo attribute) inputs (tilelang.carver.roller.node.Node property) InputShapeInference (class in tilelang.carver.roller.shape_inference.common) (class in tilelang.carver.roller.shape_inference.tir) INT4TensorCoreIntrinEmitter (class in tilelang.intrinsics.mma_macro_generator) INT4TensorCoreIntrinEmitterWithLadderTransform (class in tilelang.intrinsics.mma_macro_generator) Integer (tilelang.utils.tensor.TensorSupplyType attribute) inter_transform_a (tilelang.carver.roller.hint.IntrinInfo property) inter_transform_b (tilelang.carver.roller.hint.IntrinInfo property) interleave_weight() (in module tilelang.quantize.utils) InterWarpTransform (tilelang.common.transform_kind.TransformKind attribute) IntraWarpTransform (tilelang.common.transform_kind.TransformKind attribute) intrin_info (tilelang.carver.roller.hint.Hint attribute) IntrinInfo (class in tilelang.carver.roller.hint) inverse() (tilelang.layout.layout.Layout method) invoke() (tilelang.primitives.gemm.gemm_mma.GemmPrimitiveMMA method) ir_module (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) ir_pass() (tilelang.tools.Analyzer.Analyzer method) is_ada_arch() (in module tilelang.carver.arch.cuda) is_ampere_arch() (in module tilelang.carver.arch.cuda) (tilelang.carver.template.base.BaseTemplate method) is_boolean() (tilelang.engine.param.KernelParam method) is_broadcast_epilogue() (in module tilelang.carver.analysis) is_cache_enabled (in module tilelang.env) is_causal (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) is_cdna_arch() (in module tilelang.carver.arch.cdna) (tilelang.carver.template.base.BaseTemplate method) is_cpu_arch() (in module tilelang.carver.arch.cpu) is_cpu_device_backend() (in module tilelang.engine.lower) is_cpu_target() (in module tilelang.jit.adapter.utils) is_cuda_arch() (in module tilelang.carver.arch.cuda) is_cuda_target() (in module tilelang.jit.adapter.utils) is_device_call() (in module tilelang.engine.lower) is_device_call_c_device() (in module tilelang.engine.lower) is_dynamic (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter property) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter property) is_elementwise() (tilelang.carver.analysis.BlockInfo method) is_float8() (tilelang.engine.param.KernelParam method) is_fragment() (in module tilelang.utils.language) is_free (tilelang.carver.roller.bestfit.Block attribute) is_full_col() (tilelang.primitives.gemm.base.GemmWarpPolicy method) is_full_row() (tilelang.primitives.gemm.base.GemmWarpPolicy method) is_gemm() (tilelang.carver.analysis.BlockInfo method) is_gemv() (tilelang.carver.analysis.BlockInfo method) is_global() (in module tilelang.utils.language) is_hip_target() (in module tilelang.jit.adapter.utils) is_hopper_arch() (in module tilelang.carver.arch.cuda) is_identity_block() (in module tilelang.carver.matmul_analysis) is_identity_or_transpose_block() (in module tilelang.carver.matmul_analysis) is_injective() (tilelang.carver.analysis.BlockInfo method) is_input_8bit() (tilelang.carver.roller.hint.IntrinInfo method) is_inter_warp_transform() (tilelang.common.transform_kind.TransformKind method) is_intra_warp_transform() (tilelang.common.transform_kind.TransformKind method) is_ld_matrix_transform() (tilelang.common.transform_kind.TransformKind method) is_local() (in module tilelang.utils.language) is_m_first (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) is_non_transform() (tilelang.common.transform_kind.TransformKind method) is_nvrtc_available (in module tilelang.jit.adapter.libgen), [1] (in module tilelang.jit.adapter.nvrtc.adapter), [1] is_output() (tilelang.carver.roller.node.Node method) (tilelang.carver.roller.node.OutputNode method) is_placeholder() (tilelang.carver.roller.node.Node method) (tilelang.carver.roller.node.PlaceHolderNode method) is_reduction() (tilelang.carver.analysis.BlockInfo method) is_scalar() (tilelang.engine.param.KernelParam method) is_shared() (in module tilelang.utils.language) is_shared_dynamic() (in module tilelang.utils.language) is_square() (tilelang.primitives.gemm.base.GemmWarpPolicy method) is_tensorcore_supported_precision() (in module tilelang.carver.arch.cuda) is_tma_descriptor_arg() (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) is_transpose_block() (in module tilelang.carver.matmul_analysis) is_unsigned() (tilelang.engine.param.KernelParam method) is_valid() (tilelang.carver.roller.hint.Stride method) is_volta_arch() (in module tilelang.carver.arch.cuda) (tilelang.carver.template.base.BaseTemplate method) isfinite (in module tilelang.language.tir.ir) isfinite() (in module tilelang.language.tir.op) isinf (in module tilelang.language.tir.ir) isinf() (in module tilelang.language.tir.op) isnan (in module tilelang.language.tir.ir) isnan() (in module tilelang.language.tir.op) isnullptr (in module tilelang.language.tir.ir) isnullptr() (in module tilelang.language.tir.op) IterInfo (class in tilelang.carver.analysis) IterKind (class in tilelang.carver.matmul_analysis) iters (tilelang.carver.analysis.BlockInfo attribute) IterTrait (class in tilelang.carver.matmul_analysis) J jit() (in module tilelang.jit) jit_compile (tilelang.autotuner.tuner.AutoTuner attribute) jit_input_tensors (tilelang.autotuner.tuner.AutoTuner attribute) JITKernel (class in tilelang.jit.kernel) K K (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] k_pack (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.primitives.gemm.base.GemmBaseParams attribute) Kernel (class in tilelang.jit.param) kernel (tilelang.autotuner.param.AutotuneResult attribute), [1] Kernel() (in module tilelang.language.kernel) KERNEL_CUBIN_PATH (in module tilelang.cache.kernel_cache) kernel_global_source (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) KERNEL_LAUNCH_FUNC_PY (in module tilelang.jit.adapter.wrapper) KERNEL_LIB_PATH (in module tilelang.autotuner.param) (in module tilelang.cache.kernel_cache) KERNEL_PATH (in module tilelang.autotuner.param) (in module tilelang.cache.kernel_cache) KERNEL_PY_PATH (in module tilelang.cache.kernel_cache) kernel_source (tilelang.engine.param.CompiledArtifact attribute) (tilelang.jit.kernel.JITKernel property) KernelCache (class in tilelang.cache.kernel_cache) KernelLaunchFrame (class in tilelang.language.kernel) KernelParam (class in tilelang.engine.param) kernels (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) kind (tilelang.carver.analysis.IterInfo attribute) (tilelang.carver.matmul_analysis.IterTrait attribute) kIter_I (tilelang.carver.matmul_analysis.IterKind attribute) kIter_J (tilelang.carver.matmul_analysis.IterKind attribute) kIter_K (tilelang.carver.matmul_analysis.IterKind attribute) kIter_S (tilelang.carver.matmul_analysis.IterKind attribute) kIter_T (tilelang.carver.matmul_analysis.IterKind attribute) L l2_cache_size_bytes (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) l2_persistent_map (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) L2_PERSISTENT_MAP_CREATE_HANDLE (in module tilelang.jit.adapter.wrapper) L2_PERSISTENT_MAP_INIT_FUNC (in module tilelang.jit.adapter.wrapper) L2_PERSISTENT_MAP_RESET_HANDLE (in module tilelang.jit.adapter.wrapper) latency (tilelang.autotuner.param.AutotuneResult attribute), [1] (tilelang.jit.kernel.JITKernel attribute) LATENCY_PATH (in module tilelang.autotuner.param) Layout (class in tilelang.layout.layout) layout_propagate_chain() (in module tilelang.carver.matmul_analysis) LayoutInference() (in module tilelang.transform) ldexp (in module tilelang.language.tir.ir) ldexp() (in module tilelang.language.tir.op) ldmatrix_16x32_to_shared_16x32_layout_a() (in module tilelang.intrinsics.mma_layout) ldmatrix_16x32_to_shared_16x32_layout_b() (in module tilelang.intrinsics.mma_layout) ldmatrix_32x16_to_shared_16x32_layout_a() (in module tilelang.intrinsics.mma_layout) ldmatrix_32x16_to_shared_16x32_layout_b() (in module tilelang.intrinsics.mma_layout) ldmatrix_32x8_to_shared_16x16_layout() (in module tilelang.intrinsics.mma_layout) ldmatrix_a() (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitterWithLadderTransform method) ldmatrix_b() (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitterWithLadderTransform method) ldmatrix_trans_32x8_to_shared_16x16_layout() (in module tilelang.intrinsics.mma_layout) LDMatrixTransform (tilelang.common.transform_kind.TransformKind attribute) LegalizeSafeMemoryAccess() (in module tilelang.transform) LegalizeVectorizedLoop() (in module tilelang.transform) LetFrame (class in tilelang.language.frame) lib (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.wrapper.TLWrapper attribute) lib_code (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter property) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter property) (tilelang.jit.adapter.libgen.LibraryGenerator attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) lib_generator (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) libcode (tilelang.autotuner.param.AutotuneResult attribute), [1] libpath (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter property) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter property) (tilelang.jit.adapter.libgen.LibraryGenerator attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) LibraryGenerator (class in tilelang.jit.adapter.libgen) lift (in module tilelang.intrinsics.mfma_macro_generator) (in module tilelang.intrinsics.mma_macro_generator) likely (in module tilelang.language.tir.ir) likely() (in module tilelang.language.tir.op) limit (tilelang.carver.roller.bestfit.BestFit attribute) list (tilelang.carver.roller.bestfit.BestFit attribute) load_from_disk() (tilelang.autotuner.param.AutotuneResult class method) load_lib() (tilelang.jit.adapter.libgen.LibraryGenerator method) (tilelang.jit.adapter.libgen.PyLibraryGenerator method) LocalBufferProxy (class in tilelang.language.proxy) log (in module tilelang.language.tir.ir) log() (in module tilelang.language.tir.op) log10 (in module tilelang.language.tir.ir) log10() (in module tilelang.language.tir.op) log1p (in module tilelang.language.tir.ir) log1p() (in module tilelang.language.tir.op) log2 (in module tilelang.language.tir.ir) log2() (in module tilelang.language.tir.op) logger (in module tilelang) (in module tilelang.autotuner.tuner) (in module tilelang.carver.matmul_analysis) (in module tilelang.carver.roller.policy.tensorcore) (in module tilelang.carver.utils) (in module tilelang.jit) (in module tilelang.jit.adapter.cython.adapter) (in module tilelang.jit.adapter.libgen) (in module tilelang.jit.adapter.nvrtc.adapter) (in module tilelang.jit.adapter.wrapper) (in module tilelang.tools.Analyzer) lookup_param (in module tilelang.language.tir.ir) lookup_param() (in module tilelang.language.tir.op) loop_rv (tilelang.carver.analysis.IterInfo attribute) loop_stack (tilelang.tools.Analyzer.Analyzer attribute) LoopVectorizeDynamic() (in module tilelang.transform) lower() (in module tilelang.engine.lower) LowerAndLegalize() (in module tilelang.engine.phase) LowerDeviceKernelLaunch() (in module tilelang.transform) LowerDeviceStorageAccessInfo() (in module tilelang.transform) LowerHopperIntrin() (in module tilelang.transform) LowerL2Persistent() (in module tilelang.transform) LowerOpaqueBlock() (in module tilelang.transform) LowerSharedBarrier() (in module tilelang.transform) LowerThreadAllreduce() (in module tilelang.transform) LowerTileOp() (in module tilelang.transform) M M (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] M_DIM (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) macro() (in module tilelang.language.tir.entry) main() (in module tilelang.testing) make_iter_fusion_index_map() (in module tilelang.carver.matmul_analysis) make_metadata_layout() (in module tilelang.layout.gemm_sp) make_mfma_swizzle_layout() (in module tilelang.intrinsics.mfma_layout) make_mma_load_layout() (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter method) make_mma_store_layout() (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter method) make_mma_swizzle_layout() (in module tilelang.intrinsics.mma_layout) make_reverse() (tilelang.carver.roller.shape_inference.tir.Statement method) make_swizzled_layout() (in module tilelang.layout.fragment) (in module tilelang.layout.swizzle) make_tensor() (in module tilelang.language.proxy) MakePackedAPI() (in module tilelang.transform) malloc() (tilelang.carver.roller.bestfit.BestFit method) manual_assert_close() (tilelang.profiler.Profiler method) manual_check_prog (tilelang.autotuner.param.ProfileArgs attribute), [1] map_forward_index() (tilelang.layout.layout.Layout method) map_forward_thread() (tilelang.layout.fragment.Fragment method) map_torch_type() (in module tilelang.utils.tensor) mapping (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis attribute) match_declare_kernel() (in module tilelang.jit.adapter.utils) match_declare_kernel_cpu() (in module tilelang.jit.adapter.utils) match_global_kernel() (in module tilelang.jit.adapter.utils) MatmulTemplate (class in tilelang.carver.template.matmul) MatrixCoreIntrinEmitter (class in tilelang.intrinsics.mfma_macro_generator) max (in module tilelang.language.tir.op) max_mismatched_ratio (tilelang.autotuner.param.ProfileArgs attribute), [1] max_smem_usage (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) max_value (in module tilelang.language.tir.ir) max_value() (in module tilelang.language.tir.op) mbarrier_arrive() (in module tilelang.language.builtin) mbarrier_expect_tx() (in module tilelang.language.builtin) mbarrier_wait_parity() (in module tilelang.language.builtin) mem_info_local_var() (in module tilelang.language.memscope) merge() (tilelang.carver.roller.bestfit.Block method) MergeIfStmt() (in module tilelang.transform) MergeSharedMemoryAllocations() (in module tilelang.transform) mfma() (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) mfma_store_index_map() (in module tilelang.intrinsics.utils) min (in module tilelang.language.tir.op) min_value (in module tilelang.language.tir.ir) min_value() (in module tilelang.language.tir.op) mma() (tilelang.intrinsics.mma_macro_generator.INT4TensorCoreIntrinEmitter method) (tilelang.intrinsics.mma_macro_generator.INT4TensorCoreIntrinEmitterWithLadderTransform method) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitterWithLadderTransform method) mma_32x8_to_shared_16x16_layout() (in module tilelang.intrinsics.mma_layout) mma_fill (in module tilelang.language.tir.ir) mma_fill() (in module tilelang.language.tir.op) mma_store (in module tilelang.language.tir.ir) mma_store() (in module tilelang.language.tir.op) mma_store_32x8_to_shared_16x16_layout() (in module tilelang.intrinsics.mma_layout) mma_store_index_map() (in module tilelang.intrinsics.utils) mod (tilelang.jit.adapter.base.BaseKernelAdapter attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) module tilelang tilelang.autotuner tilelang.autotuner.capture tilelang.autotuner.param tilelang.autotuner.tuner tilelang.cache tilelang.cache.kernel_cache tilelang.carver tilelang.carver.analysis tilelang.carver.arch tilelang.carver.arch.arch_base tilelang.carver.arch.cdna tilelang.carver.arch.cpu tilelang.carver.arch.cuda tilelang.carver.arch.driver tilelang.carver.arch.driver.cuda_driver tilelang.carver.common_schedules tilelang.carver.matmul_analysis tilelang.carver.roller tilelang.carver.roller.bestfit tilelang.carver.roller.hint tilelang.carver.roller.node tilelang.carver.roller.policy tilelang.carver.roller.policy.common tilelang.carver.roller.policy.default tilelang.carver.roller.policy.tensorcore tilelang.carver.roller.rasterization tilelang.carver.roller.shape_inference tilelang.carver.roller.shape_inference.common tilelang.carver.roller.shape_inference.tir tilelang.carver.template tilelang.carver.template.base tilelang.carver.template.conv tilelang.carver.template.elementwise tilelang.carver.template.flashattention tilelang.carver.template.gemv tilelang.carver.template.general_reduce tilelang.carver.template.matmul tilelang.carver.utils tilelang.common tilelang.common.transform_kind tilelang.contrib tilelang.contrib.cc tilelang.contrib.dlpack tilelang.contrib.hipcc tilelang.contrib.nvcc tilelang.contrib.nvrtc tilelang.contrib.rocm tilelang.engine tilelang.engine.callback tilelang.engine.lower tilelang.engine.param tilelang.engine.phase tilelang.env tilelang.intrinsics tilelang.intrinsics.mfma_layout tilelang.intrinsics.mfma_macro_generator tilelang.intrinsics.mma_layout tilelang.intrinsics.mma_macro_generator tilelang.intrinsics.utils tilelang.jit tilelang.jit.adapter tilelang.jit.adapter.base tilelang.jit.adapter.ctypes tilelang.jit.adapter.ctypes.adapter tilelang.jit.adapter.cython tilelang.jit.adapter.cython.adapter tilelang.jit.adapter.dlpack tilelang.jit.adapter.libgen tilelang.jit.adapter.nvrtc tilelang.jit.adapter.nvrtc.adapter tilelang.jit.adapter.utils tilelang.jit.adapter.wrapper tilelang.jit.env tilelang.jit.kernel tilelang.jit.param tilelang.language tilelang.language.allocate tilelang.language.builtin tilelang.language.copy tilelang.language.customize tilelang.language.experimental tilelang.language.experimental.gemm_sp tilelang.language.fill tilelang.language.frame tilelang.language.gemm tilelang.language.kernel tilelang.language.logical tilelang.language.memscope tilelang.language.parallel tilelang.language.persistent tilelang.language.pipeline tilelang.language.print tilelang.language.proxy tilelang.language.reduce tilelang.language.tir tilelang.language.tir.entry tilelang.language.tir.ir tilelang.language.tir.op tilelang.language.warpgroup tilelang.layout tilelang.layout.fragment tilelang.layout.gemm_sp tilelang.layout.layout tilelang.layout.swizzle tilelang.math tilelang.primitives tilelang.primitives.gemm tilelang.primitives.gemm.base tilelang.primitives.gemm.gemm_mma tilelang.profiler tilelang.profiler.bench tilelang.quantize tilelang.quantize.lop3 tilelang.quantize.quantization tilelang.quantize.utils tilelang.testing tilelang.tools tilelang.tools.Analyzer tilelang.tools.plot_layout tilelang.transform tilelang.transform.pass_config tilelang.transform.simplify tilelang.utils tilelang.utils.deprecated tilelang.utils.language tilelang.utils.sparse tilelang.utils.target tilelang.utils.tensor MultiVersionBuffer() (in module tilelang.transform) N N (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] N_DIM (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) name (tilelang.carver.analysis.BlockInfo attribute) (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.carver.arch.cuda.TensorInstruction attribute) (tilelang.carver.roller.node.Node attribute) (tilelang.carver.roller.shape_inference.tir.TensorDepNode attribute) name2dep (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis attribute) nearbyint (in module tilelang.language.tir.ir) nearbyint() (in module tilelang.language.tir.op) next_power_of_2() (in module tilelang.math) nextafter (in module tilelang.language.tir.ir) nextafter() (in module tilelang.language.tir.op) no_set_max_nreg() (in module tilelang.language.builtin) Node (class in tilelang.carver.roller.node) nodes (tilelang.carver.roller.policy.default.DefaultPolicy attribute) NonTransform (tilelang.common.transform_kind.TransformKind attribute) NoRasterization (class in tilelang.carver.roller.rasterization) Normal (tilelang.utils.tensor.TensorSupplyType attribute) normalize_prim_func() (in module tilelang.carver.analysis) normalize_to_matmul() (in module tilelang.carver.matmul_analysis) num_elems_per_byte (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) num_heads (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) num_outputs() (tilelang.carver.roller.node.Node method) num_threads (tilelang.language.kernel.KernelLaunchFrame property) num_wave (tilelang.carver.roller.hint.TileDict attribute) NVRTC_UNAVAILABLE_WARNING (in module tilelang.jit.adapter.libgen) (in module tilelang.jit.adapter.nvrtc.adapter) NVRTCKernelAdapter (class in tilelang.jit.adapter.nvrtc.adapter) O One (tilelang.utils.tensor.TensorSupplyType attribute) opt_shapes (tilelang.carver.roller.hint.Hint attribute) OptimizeForTarget() (in module tilelang.engine.phase) out_dtype (tilelang.carver.roller.hint.IntrinInfo attribute) (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] out_idx (tilelang.autotuner.param.CompileArgs attribute) (tilelang.autotuner.param.ProfileArgs attribute) (tilelang.jit.kernel.JITKernel property) (tilelang.jit.param.Kernel attribute) output (tilelang.carver.roller.shape_inference.common.Statement attribute) output_blocks (tilelang.carver.roller.node.PrimFuncNode attribute) output_buffers (tilelang.carver.roller.node.PrimFuncNode attribute) output_nodes (tilelang.carver.template.base.BaseTemplate property) output_strides (tilelang.carver.roller.hint.Hint attribute) output_strides_map (tilelang.carver.roller.hint.TileDict attribute) output_tile (tilelang.carver.roller.hint.TileDict attribute) OutputNode (class in tilelang.carver.roller.node) outputs (tilelang.carver.roller.node.Node property) P P (tilelang.carver.template.conv.ConvTemplate attribute), [1] panel_width (tilelang.carver.roller.rasterization.Rasterization property) panel_width_ (tilelang.carver.roller.rasterization.Rasterization attribute) (tilelang.carver.roller.rasterization.Rasterization2DColumn attribute) (tilelang.carver.roller.rasterization.Rasterization2DRow attribute) Parallel() (in module tilelang.language.parallel) parallel() (in module tilelang.language.tir.ir) param_dtypes (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) param_shapes (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) params (tilelang.engine.param.CompiledArtifact attribute) (tilelang.jit.adapter.base.BaseKernelAdapter attribute) (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) (tilelang.jit.kernel.JITKernel property) (tilelang.profiler.Profiler attribute), [1] params_as_dict() (tilelang.carver.template.conv.ConvTemplate method) (tilelang.carver.template.elementwise.ElementwiseTemplate method) (tilelang.carver.template.flashattention.FlashAttentionTemplate method) (tilelang.carver.template.gemv.GEMVTemplate method) (tilelang.carver.template.general_reduce.GeneralReductionTemplate method) (tilelang.carver.template.matmul.MatmulTemplate method) (tilelang.primitives.gemm.base.GemmBaseParams method) PARAMS_PATH (in module tilelang.autotuner.param) (in module tilelang.cache.kernel_cache) parse_compute_version() (in module tilelang.contrib.nvcc) (in module tilelang.contrib.rocm) parse_source_information() (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper method) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) pass_configs (tilelang.autotuner.param.CompileArgs attribute), [1] (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.libgen.LibraryGenerator attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLWrapper attribute) (tilelang.jit.kernel.JITKernel attribute) pass_context (tilelang.carver.roller.hint.Hint attribute) PassConfigKey (class in tilelang.transform.pass_config) Persistent() (in module tilelang.language.persistent) PersistThreadblock() (in module tilelang.transform) pipeline_stage (tilelang.carver.roller.hint.Hint attribute) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy attribute) Pipelined() (in module tilelang.language.pipeline) PipelinePlanning() (in module tilelang.transform) PlaceHolderNode (class in tilelang.carver.roller.node) plan_rasterization() (tilelang.carver.roller.policy.default.DefaultPolicy method) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy method) platform (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cpu.CPU attribute) (tilelang.carver.arch.cuda.CUDA attribute) plot_layout() (in module tilelang.tools.plot_layout) policy (tilelang.primitives.gemm.base.GemmBaseParams attribute) pop() (tilelang.autotuner.capture.CaptureStack method) (tilelang.language.frame.FrameStack method) (tilelang.language.kernel.FrameStack method) popcount (in module tilelang.language.tir.ir) popcount() (in module tilelang.language.tir.op) pow (in module tilelang.language.tir.ir) pow() (in module tilelang.language.tir.op) pow_of_int() (in module tilelang.language.tir.op) power() (in module tilelang.language.tir.op) pre_order_traverse() (in module tilelang.carver.roller.node) PREDEF_ATTRIBUTE_SET_DYNAMIC_MEMORY (in module tilelang.jit.adapter.wrapper) PREDEF_ATTRIBUTE_SET_DYNAMIC_MEMORY_HIP (in module tilelang.jit.adapter.wrapper) PREDEF_HOST_FUNC (in module tilelang.jit.adapter.wrapper) PREDEF_HOST_FUNC_PY (in module tilelang.jit.adapter.wrapper) PREDEF_INIT_FUNC (in module tilelang.jit.adapter.wrapper) prim_func (tilelang.carver.roller.node.PrimFuncNode attribute) (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter property) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter property) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter property) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper property) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper property) (tilelang.jit.kernel.JITKernel attribute) prim_func() (in module tilelang.language.tir.entry) PrimFuncNode (class in tilelang.carver.roller.node) print() (in module tilelang.language.print) print_dependencies() (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis method) print_fragment_buffer_with_condition() (in module tilelang.language.print) print_global_buffer_with_condition() (in module tilelang.language.print) print_local_buffer_with_condition() (in module tilelang.language.print) print_shared_buffer_with_condition() (in module tilelang.language.print) print_var() (in module tilelang.language.print) print_var_with_condition() (in module tilelang.language.print) profile_args (tilelang.autotuner.tuner.AutoTuner attribute) ProfileArgs (class in tilelang.autotuner.param) Profiler (class in tilelang.profiler) Program (class in tilelang.jit.param) propagate() (tilelang.carver.roller.node.PrimFuncNode method) propagate_inputs() (tilelang.carver.roller.node.PrimFuncNode method) propagate_inputs_on_reduction() (tilelang.carver.roller.node.PrimFuncNode method) propagate_outputs() (tilelang.carver.roller.node.PrimFuncNode method) propagate_reduction_inputs() (tilelang.carver.roller.node.PrimFuncNode method) ptr() (in module tilelang.language.proxy) ptr_map (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) ptx_arrive_barrier (in module tilelang.language.tir.ir) ptx_arrive_barrier() (in module tilelang.language.tir.op) ptx_arrive_barrier_expect_tx (in module tilelang.language.tir.ir) ptx_arrive_barrier_expect_tx() (in module tilelang.language.tir.op) ptx_commit_group (in module tilelang.language.tir.ir) ptx_commit_group() (in module tilelang.language.tir.op) ptx_cp_async (in module tilelang.language.tir.ir) ptx_cp_async() (in module tilelang.language.tir.op) ptx_cp_async_barrier (in module tilelang.language.tir.ir) ptx_cp_async_barrier() (in module tilelang.language.tir.op) ptx_cp_async_bulk (in module tilelang.language.tir.ir) ptx_cp_async_bulk() (in module tilelang.language.tir.op) ptx_init_barrier_thread_count (in module tilelang.language.tir.ir) ptx_init_barrier_thread_count() (in module tilelang.language.tir.op) ptx_ldmatrix (in module tilelang.language.tir.ir) ptx_ldmatrix() (in module tilelang.language.tir.op) ptx_mma (in module tilelang.language.tir.ir) ptx_mma() (in module tilelang.language.tir.op) ptx_mma_sp (in module tilelang.language.tir.ir) ptx_mma_sp() (in module tilelang.language.tir.op) ptx_wait_barrier (in module tilelang.language.tir.ir) ptx_wait_barrier() (in module tilelang.language.tir.op) ptx_wait_group (in module tilelang.language.tir.ir) ptx_wait_group() (in module tilelang.language.tir.op) push() (tilelang.autotuner.capture.CaptureStack method) (tilelang.language.frame.FrameStack method) (tilelang.language.kernel.FrameStack method) PyLibraryGenerator (class in tilelang.jit.adapter.libgen) pymodule (tilelang.jit.adapter.libgen.PyLibraryGenerator attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) pythonic_expr() (in module tilelang.jit.adapter.utils) Q q_multiply_shift (in module tilelang.language.tir.ir) q_multiply_shift() (in module tilelang.language.tir.op) q_multiply_shift_per_axis (in module tilelang.language.tir.ir) q_multiply_shift_per_axis() (in module tilelang.language.tir.op) R Randn (tilelang.utils.tensor.TensorSupplyType attribute) range_map (tilelang.carver.roller.shape_inference.common.Statement attribute) Rasterization (class in tilelang.carver.roller.rasterization) rasterization (tilelang.carver.roller.policy.default.DefaultPolicy attribute) Rasterization2DColumn (class in tilelang.carver.roller.rasterization) Rasterization2DRow (class in tilelang.carver.roller.rasterization) rasterization_plan (tilelang.carver.roller.hint.Hint attribute) raxis (tilelang.carver.roller.node.PrimFuncNode attribute) raxis_order (tilelang.carver.roller.hint.Hint property) recommend_block_size() (tilelang.carver.roller.policy.default.DefaultPolicy method) recommend_hints() (tilelang.carver.template.base.BaseTemplate method) reduce() (in module tilelang.language.reduce) reduce_absmax() (in module tilelang.language.reduce) reduce_abssum() (in module tilelang.language.reduce) reduce_axes (tilelang.carver.roller.shape_inference.tir.InputShapeInference attribute) reduce_k (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) reduce_max() (in module tilelang.language.reduce) reduce_min() (in module tilelang.language.reduce) reduce_sum() (in module tilelang.language.reduce) reduce_thread (tilelang.carver.roller.hint.Hint attribute) reduction_block (tilelang.carver.roller.node.PrimFuncNode attribute) ref_input_tensors (tilelang.autotuner.tuner.AutoTuner attribute) ref_latency (tilelang.autotuner.param.AutotuneResult attribute), [1] (tilelang.jit.kernel.JITKernel attribute) ref_latency_cache (tilelang.autotuner.tuner.AutoTuner attribute) ref_prog (tilelang.autotuner.param.ProfileArgs attribute), [1], [2] reg_cap (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) region() (in module tilelang.language.copy) region_exist_in_list() (in module tilelang.carver.roller.shape_inference.tir) register_cuda_postproc() (in module tilelang.engine.callback) register_cuda_postproc_callback() (in module tilelang.engine.callback) register_hip_postproc() (in module tilelang.engine.callback) register_hip_postproc_callback() (in module tilelang.engine.callback) reinterpret (in module tilelang.language.tir.ir) reinterpret() (in module tilelang.language.tir.op) remove_lib() (tilelang.jit.adapter.libgen.LibraryGenerator method) rep (tilelang.autotuner.param.ProfileArgs attribute), [1] repeat() (tilelang.layout.fragment.Fragment method) replicate() (tilelang.layout.fragment.Fragment method) requires_cuda_compute_version() (in module tilelang.testing) requires_cuda_compute_version_eq() (in module tilelang.testing) requires_cuda_compute_version_ge() (in module tilelang.testing) requires_cuda_compute_version_gt() (in module tilelang.testing) requires_cuda_compute_version_le() (in module tilelang.testing) requires_cuda_compute_version_lt() (in module tilelang.testing) reshape() (in module tilelang.language.customize) result_idx (tilelang.jit.adapter.base.BaseKernelAdapter attribute) (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) (tilelang.profiler.Profiler attribute), [1] ret (in module tilelang.language.tir.ir) ret() (in module tilelang.language.tir.op) retrieve_func_from_module() (in module tilelang.carver.utils) (in module tilelang.utils.language) reverse_bound_inference (tilelang.carver.roller.shape_inference.tir.Statement attribute) RewriteWgmmaSync() (in module tilelang.transform) ROCM_HOME (in module tilelang.env) rocm_link() (in module tilelang.contrib.rocm) round (in module tilelang.language.tir.ir) round() (in module tilelang.language.tir.op) rsqrt (in module tilelang.language.tir.ir) rsqrt() (in module tilelang.language.tir.op) rstep (tilelang.carver.roller.hint.Hint attribute) rstep_map (tilelang.carver.roller.hint.TileDict attribute) rt_mod (tilelang.engine.param.CompiledArtifact attribute) rtol (tilelang.autotuner.param.ProfileArgs attribute), [1] run() (tilelang.autotuner.tuner.AutoTuner method) run_once() (tilelang.jit.kernel.JITKernel method) (tilelang.profiler.Profiler method) run_with_timeout() (in module tilelang.autotuner.tuner) S S (tilelang.carver.template.conv.ConvTemplate attribute), [1] save_to_disk() (tilelang.autotuner.param.AutotuneResult method) sch (tilelang.carver.roller.node.BlockAnalyzer attribute) (tilelang.carver.roller.node.PrimFuncNode attribute) schedule_stages (tilelang.carver.roller.hint.Hint attribute) (tilelang.carver.roller.node.PrimFuncNode attribute) scheduled_ir_module (tilelang.jit.adapter.wrapper.TLWrapper attribute) score_block_size() (tilelang.carver.roller.policy.default.DefaultPolicy method) seq_kv_length (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) seq_length (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) serial() (in module tilelang.language.tir.ir) set_autotune_inputs() (in module tilelang.autotuner.capture) set_compile_args() (tilelang.autotuner.tuner.AutoTuner method) set_dtype() (tilelang.carver.roller.node.Node method) (tilelang.carver.roller.node.PrimFuncNode method) set_function() (tilelang.carver.template.base.BaseTemplate method) set_inputs() (tilelang.carver.roller.node.Node method) set_kernel_parameters() (tilelang.autotuner.tuner.AutoTuner method) set_lib_path() (tilelang.jit.adapter.libgen.LibraryGenerator method) set_log_level() (in module tilelang) set_max_nreg() (in module tilelang.language.builtin) set_output_nodes() (tilelang.carver.template.base.BaseTemplate method) set_outputs() (tilelang.carver.roller.node.Node method) set_profile_args() (tilelang.autotuner.tuner.AutoTuner method) set_random_seed() (in module tilelang.testing) set_shape() (tilelang.carver.roller.node.Node method) set_src_path() (tilelang.jit.adapter.libgen.LibraryGenerator method) set_tag() (tilelang.carver.roller.node.Node method) shape (tilelang.carver.arch.cuda.TensorInstruction attribute) (tilelang.carver.template.elementwise.ElementwiseTemplate attribute), [1] (tilelang.carver.template.general_reduce.GeneralReductionTemplate attribute) (tilelang.engine.param.KernelParam attribute) shared_16x16_to_ldmatrix_64x4_layout() (in module tilelang.intrinsics.mfma_layout) shared_16x16_to_local_64x4_layout_A() (in module tilelang.intrinsics.mfma_layout) shared_16x16_to_local_64x4_layout_B() (in module tilelang.intrinsics.mfma_layout) shared_16x16_to_local_64x4_layout_C() (in module tilelang.intrinsics.mfma_layout) shared_16x16_to_local_64x4_layout_k_n (in module tilelang.intrinsics.mfma_layout) shared_16x16_to_local_64x4_layout_m_n (in module tilelang.intrinsics.mfma_layout) shared_16x16_to_local_64x4_layout_n_k (in module tilelang.intrinsics.mfma_layout) shared_16x16_to_local_64x4_layout_n_m (in module tilelang.intrinsics.mfma_layout) shared_16x16_to_mma_32x8_layout (in module tilelang.intrinsics.mma_layout) shared_16x16_to_mma_32x8_layout() (in module tilelang.intrinsics.utils) shared_16x16_to_mma_32x8_layout_rs() (in module tilelang.intrinsics.mma_layout) shared_16x16_to_mma_32x8_layout_sr() (in module tilelang.intrinsics.mma_layout) shared_16x16_to_mma_32x8_layout_trans (in module tilelang.intrinsics.mma_layout) shared_16x16_to_mma_32x8_smoothlayout() (in module tilelang.intrinsics.mma_layout) shared_16x32_to_local_64x8_layout_A() (in module tilelang.intrinsics.mfma_layout) shared_16x32_to_local_64x8_layout_B() (in module tilelang.intrinsics.mfma_layout) shared_16x32_to_mma_32x16_layout() (in module tilelang.intrinsics.mma_layout) (in module tilelang.intrinsics.utils) shared_16x32_to_mma_32x16_smoothlayout() (in module tilelang.intrinsics.mma_layout) shared_16x4_to_local_64x1_layout_A() (in module tilelang.intrinsics.mfma_layout) shared_16x64_to_local_64x16_layout_A() (in module tilelang.intrinsics.mfma_layout) shared_16x64_to_local_64x16_layout_B() (in module tilelang.intrinsics.mfma_layout) shared_32x16_to_mma_32x16_layout() (in module tilelang.intrinsics.mma_layout) (in module tilelang.intrinsics.utils) shared_32x16_to_mma_32x16_smoothlayout() (in module tilelang.intrinsics.mma_layout) shared_4x16_to_local_64x1_layout_B() (in module tilelang.intrinsics.mfma_layout) shared_scope (tilelang.carver.roller.hint.Hint attribute) SharedBufferProxy (class in tilelang.language.proxy) shfl_down() (in module tilelang.language.builtin) shfl_up() (in module tilelang.language.builtin) shfl_xor() (in module tilelang.language.builtin) shift_left (in module tilelang.language.tir.ir) shift_left() (in module tilelang.language.tir.op) shift_right (in module tilelang.language.tir.ir) shift_right() (in module tilelang.language.tir.op) should_enable_aggressive_merge() (in module tilelang.engine.phase) sigmoid (in module tilelang.language.tir.ir) sigmoid() (in module tilelang.language.tir.op) Simplify() (in module tilelang.transform.simplify) simplify_prim_func() (in module tilelang.transform.simplify) sin (in module tilelang.language.tir.ir) sin() (in module tilelang.language.tir.op) sinh (in module tilelang.language.tir.ir) sinh() (in module tilelang.language.tir.op) size() (tilelang.autotuner.capture.CaptureStack method) (tilelang.carver.roller.bestfit.Block method) (tilelang.language.kernel.FrameStack method) skip_check (tilelang.autotuner.param.ProfileArgs attribute), [1] sm_partition (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) sm_version (tilelang.carver.arch.cuda.CUDA attribute) smem_cap (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) smem_cost (tilelang.carver.roller.hint.TileDict attribute) smooth_a (tilelang.carver.roller.hint.IntrinInfo property) smooth_b (tilelang.carver.roller.hint.IntrinInfo property) source (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) source_code (tilelang.jit.param.Kernel attribute) split_k_factor (tilelang.carver.roller.hint.Hint attribute) sqrt (in module tilelang.language.tir.ir) sqrt() (in module tilelang.language.tir.op) Square (tilelang.primitives.gemm.base.GemmWarpPolicy attribute) src_id (tilelang.carver.roller.node.Edge attribute) src_node (tilelang.carver.roller.node.Edge attribute) srcpath (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter property) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter property) (tilelang.jit.adapter.libgen.LibraryGenerator attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) stack (tilelang.autotuner.capture.CaptureStack attribute) start (tilelang.carver.roller.bestfit.Block attribute) start_profile_intrinsic (in module tilelang.language.tir.ir) start_profile_intrinsic() (in module tilelang.language.tir.op) Statement (class in tilelang.carver.roller.shape_inference.common) (class in tilelang.carver.roller.shape_inference.tir) static_shape_map (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) step (tilelang.carver.roller.hint.Hint property) stmatrix() (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter method) StorageRewrite() (in module tilelang.transform) Stride (class in tilelang.carver.roller.hint) stride (tilelang.carver.roller.hint.Stride property) structure (tilelang.carver.template.general_reduce.GeneralReductionTemplate attribute) sum (in module tilelang.language.tir.op) supply_prog (tilelang.autotuner.param.ProfileArgs attribute), [1], [2] supply_type (tilelang.autotuner.param.ProfileArgs attribute), [1], [2] (tilelang.profiler.Profiler attribute), [1] symbolic() (in module tilelang.language) sync_global() (in module tilelang.language.builtin) sync_grid() (in module tilelang.language.builtin) sync_thread_partial() (in module tilelang.language.builtin) sync_threads() (in module tilelang.language.builtin) T tags (tilelang.carver.roller.policy.default.DefaultPolicy attribute) tan (in module tilelang.language.tir.ir) tan() (in module tilelang.language.tir.op) tanh (in module tilelang.language.tir.ir) tanh() (in module tilelang.language.tir.op) target (tilelang.autotuner.param.CompileArgs attribute), [1] (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cpu.CPU attribute) (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.libgen.LibraryGenerator attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLWrapper attribute) (tilelang.jit.kernel.JITKernel attribute) target_host (tilelang.autotuner.param.CompileArgs attribute), [1] (tilelang.jit.kernel.JITKernel attribute) target_mapping (tilelang.carver.roller.shape_inference.tir.InputShapeInference attribute) tc_axis (tilelang.carver.roller.hint.TensorCoreExtraConfig attribute) tensor_strides_map (tilelang.carver.roller.hint.TileDict attribute) tensorcore_legalization() (tilelang.carver.roller.hint.Hint method) TensorCoreExtraConfig (class in tilelang.carver.roller.hint) TensorCoreIntrinEmitter (class in tilelang.intrinsics.mma_macro_generator) TensorCoreIntrinEmitterWithLadderTransform (class in tilelang.intrinsics.mma_macro_generator) TensorCorePolicy (class in tilelang.carver.roller.policy.tensorcore) TensorDepNode (class in tilelang.carver.roller.shape_inference.tir) TensorInstruction (class in tilelang.carver.arch.cuda) TensorProxy (class in tilelang.language.proxy) tensors (tilelang.autotuner.capture.AutotuneInputsCapture attribute) TensorSupplyType (class in tilelang.utils.tensor) tflops (tilelang.tools.Analyzer.AnalysisResult attribute) thread (tilelang.carver.roller.hint.Hint attribute) (tilelang.layout.fragment.Fragment property) thread_binding() (in module tilelang.language.tir.ir) thread_id_shared_access_64x16_to_16x64_layout_A() (in module tilelang.intrinsics.mfma_layout) thread_id_shared_access_64x16_to_16x64_layout_B() (in module tilelang.intrinsics.mfma_layout) thread_id_shared_access_64x1_to_16x4_layout_A() (in module tilelang.intrinsics.mfma_layout) thread_id_shared_access_64x1_to_4x16_layout_B() (in module tilelang.intrinsics.mfma_layout) thread_id_shared_access_64x4_to_16x16_layout_A() (in module tilelang.intrinsics.mfma_layout) thread_id_shared_access_64x4_to_16x16_layout_B() (in module tilelang.intrinsics.mfma_layout) thread_id_shared_access_64x4_to_16x16_layout_C_m_n() (in module tilelang.intrinsics.mfma_layout) thread_id_shared_access_64x4_to_16x16_layout_C_n_m() (in module tilelang.intrinsics.mfma_layout) thread_id_shared_access_64x8_to_16x32_layout_A() (in module tilelang.intrinsics.mfma_layout) thread_id_shared_access_64x8_to_16x32_layout_B() (in module tilelang.intrinsics.mfma_layout) ThreadPartialSync() (in module tilelang.transform) threads (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.language.kernel.KernelLaunchFrame property) ThreadSync() (in module tilelang.transform) tile_map (tilelang.carver.roller.hint.TileDict attribute) TileDevice (class in tilelang.carver.arch.arch_base) TileDict (class in tilelang.carver.roller.hint) tilelang module tilelang.autotuner module tilelang.autotuner.capture module tilelang.autotuner.param module tilelang.autotuner.tuner module tilelang.cache module tilelang.cache.kernel_cache module tilelang.carver module tilelang.carver.analysis module tilelang.carver.arch module tilelang.carver.arch.arch_base module tilelang.carver.arch.cdna module tilelang.carver.arch.cpu module tilelang.carver.arch.cuda module tilelang.carver.arch.driver module tilelang.carver.arch.driver.cuda_driver module tilelang.carver.common_schedules module tilelang.carver.matmul_analysis module tilelang.carver.roller module tilelang.carver.roller.bestfit module tilelang.carver.roller.hint module tilelang.carver.roller.node module tilelang.carver.roller.policy module tilelang.carver.roller.policy.common module tilelang.carver.roller.policy.default module tilelang.carver.roller.policy.tensorcore module tilelang.carver.roller.rasterization module tilelang.carver.roller.shape_inference module tilelang.carver.roller.shape_inference.common module tilelang.carver.roller.shape_inference.tir module tilelang.carver.template module tilelang.carver.template.base module tilelang.carver.template.conv module tilelang.carver.template.elementwise module tilelang.carver.template.flashattention module tilelang.carver.template.gemv module tilelang.carver.template.general_reduce module tilelang.carver.template.matmul module tilelang.carver.utils module tilelang.common module tilelang.common.transform_kind module tilelang.contrib module tilelang.contrib.cc module tilelang.contrib.dlpack module tilelang.contrib.hipcc module tilelang.contrib.nvcc module tilelang.contrib.nvrtc module tilelang.contrib.rocm module tilelang.engine module tilelang.engine.callback module tilelang.engine.lower module tilelang.engine.param module tilelang.engine.phase module tilelang.env module tilelang.intrinsics module tilelang.intrinsics.mfma_layout module tilelang.intrinsics.mfma_macro_generator module tilelang.intrinsics.mma_layout module tilelang.intrinsics.mma_macro_generator module tilelang.intrinsics.utils module tilelang.jit module tilelang.jit.adapter module tilelang.jit.adapter.base module tilelang.jit.adapter.ctypes module tilelang.jit.adapter.ctypes.adapter module tilelang.jit.adapter.cython module tilelang.jit.adapter.cython.adapter module tilelang.jit.adapter.dlpack module tilelang.jit.adapter.libgen module tilelang.jit.adapter.nvrtc module tilelang.jit.adapter.nvrtc.adapter module tilelang.jit.adapter.utils module tilelang.jit.adapter.wrapper module tilelang.jit.env module tilelang.jit.kernel module tilelang.jit.param module tilelang.language module tilelang.language.allocate module tilelang.language.builtin module tilelang.language.copy module tilelang.language.customize module tilelang.language.experimental module tilelang.language.experimental.gemm_sp module tilelang.language.fill module tilelang.language.frame module tilelang.language.gemm module tilelang.language.kernel module tilelang.language.logical module tilelang.language.memscope module tilelang.language.parallel module tilelang.language.persistent module tilelang.language.pipeline module tilelang.language.print module tilelang.language.proxy module tilelang.language.reduce module tilelang.language.tir module tilelang.language.tir.entry module tilelang.language.tir.ir module tilelang.language.tir.op module tilelang.language.warpgroup module tilelang.layout module tilelang.layout.fragment module tilelang.layout.gemm_sp module tilelang.layout.layout module tilelang.layout.swizzle module tilelang.math module tilelang.primitives module tilelang.primitives.gemm module tilelang.primitives.gemm.base module tilelang.primitives.gemm.gemm_mma module tilelang.profiler module tilelang.profiler.bench module tilelang.quantize module tilelang.quantize.lop3 module tilelang.quantize.quantization module tilelang.quantize.utils module tilelang.testing module tilelang.tools module tilelang.tools.Analyzer module tilelang.tools.plot_layout module tilelang.transform module tilelang.transform.pass_config module tilelang.transform.simplify module tilelang.utils module tilelang.utils.deprecated module tilelang.utils.language module tilelang.utils.sparse module tilelang.utils.target module tilelang.utils.tensor module TILELANG_CACHE_DIR (in module tilelang.env) tilelang_callback_cuda_compile() (in module tilelang.contrib.nvcc) (in module tilelang.engine.lower) tilelang_callback_hip_compile() (in module tilelang.contrib.hipcc) (in module tilelang.engine.lower) TILELANG_GEN_SRC_DIR (in module tilelang.jit.env) TILELANG_JIT_DIR (in module tilelang.jit.env) TILELANG_JIT_WORKSPACE_DIR (in module tilelang.jit.env) TILELANG_TEMPLATE_PATH (in module tilelang.env) timeout (tilelang.autotuner.param.ProfileArgs attribute), [1] timeout_handler() (in module tilelang.autotuner.tuner) TimeoutException TIR_ADD_LOWER_PASS (tilelang.transform.pass_config.PassConfigKey attribute) TIR_DISABLE_CSE (tilelang.transform.pass_config.PassConfigKey attribute) TIR_DISABLE_STORAGE_REWRITE (tilelang.transform.pass_config.PassConfigKey attribute) TIR_DISABLE_VECTORIZE (tilelang.transform.pass_config.PassConfigKey attribute) TIR_ENABLE_DEBUG (tilelang.transform.pass_config.PassConfigKey attribute) TIR_ENABLE_EQUIV_TERMS_IN_CSE (tilelang.transform.pass_config.PassConfigKey attribute) TIR_MERGE_STATIC_SMEM (tilelang.transform.pass_config.PassConfigKey attribute) TIR_NOALIAS (tilelang.transform.pass_config.PassConfigKey attribute) TIR_SIMPLIFY (tilelang.transform.pass_config.PassConfigKey attribute) TIR_USE_ASYNC_COPY (tilelang.transform.pass_config.PassConfigKey attribute) TL_CONFIG_INDEX_BITWIDTH (tilelang.transform.pass_config.PassConfigKey attribute) TL_DEBUG_MERGE_SHARED_MEMORY_ALLOCATIONS (tilelang.transform.pass_config.PassConfigKey attribute) TL_DISABLE_DYNAMIC_TAIL_SPLIT (tilelang.transform.pass_config.PassConfigKey attribute) TL_DISABLE_FAST_MATH (tilelang.transform.pass_config.PassConfigKey attribute) TL_DISABLE_SAFE_MEMORY_ACCESS (tilelang.transform.pass_config.PassConfigKey attribute) TL_DISABLE_TMA_LOWER (tilelang.transform.pass_config.PassConfigKey attribute) TL_DISABLE_WARP_SPECIALIZED (tilelang.transform.pass_config.PassConfigKey attribute) TL_DYNAMIC_ALIGNMENT (tilelang.transform.pass_config.PassConfigKey attribute) TL_ENABLE_AGGRESSIVE_SHARED_MEMORY_MERGE (tilelang.transform.pass_config.PassConfigKey attribute) TL_ENABLE_PTXAS_VERBOSE_OUTPUT (tilelang.transform.pass_config.PassConfigKey attribute) TL_SIMPLIFY (tilelang.transform.pass_config.PassConfigKey attribute) TLCPUSourceWrapper (class in tilelang.jit.adapter.wrapper) TLCUDASourceWrapper (class in tilelang.jit.adapter.wrapper) TLHIPSourceWrapper (class in tilelang.jit.adapter.wrapper) TLNVRTCSourceWrapper (class in tilelang.jit.adapter.wrapper) TLPyWrapper (class in tilelang.jit.adapter.wrapper) TLWrapper (class in tilelang.jit.adapter.wrapper) TMA_DESC_INIT_FUNC (in module tilelang.jit.adapter.wrapper) TMA_DESC_INIT_FUNC_PY (in module tilelang.jit.adapter.wrapper) tma_descriptor_args (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) tma_load() (in module tilelang.language.builtin) tma_store_arrive() (in module tilelang.language.builtin) tma_store_wait() (in module tilelang.language.builtin) to_dict() (tilelang.carver.roller.hint.Hint method) to_prime_factors() (tilelang.primitives.gemm.base.GemmWarpPolicy static method) to_pytorch_func() (in module tilelang.contrib.dlpack) top() (tilelang.autotuner.capture.CaptureStack method) (tilelang.language.frame.FrameStack method) (tilelang.language.kernel.FrameStack method) topo_order() (in module tilelang.carver.roller.node) torch_assert_close() (in module tilelang.utils.tensor) torch_function (tilelang.jit.kernel.JITKernel attribute), [1] TorchDLPackKernelAdapter (class in tilelang.jit.adapter.dlpack) total_flops (tilelang.tools.Analyzer.AnalysisResult attribute) (tilelang.tools.Analyzer.Analyzer attribute) total_global_bytes (tilelang.tools.Analyzer.AnalysisResult attribute), [1] (tilelang.tools.Analyzer.Analyzer attribute) TqdmLoggingHandler (class in tilelang) trace() (in module tilelang.language.tir.op) traffic (tilelang.carver.roller.hint.TileDict attribute) trans_a (tilelang.carver.roller.hint.IntrinInfo attribute) trans_A (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] trans_b (tilelang.carver.roller.hint.IntrinInfo attribute) trans_B (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] transaction_size (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) TransformKind (class in tilelang.common.transform_kind) transpose_A (tilelang.primitives.gemm.base.GemmBaseParams attribute) transpose_B (tilelang.primitives.gemm.base.GemmBaseParams attribute) traverse_dependencies() (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis method) trunc (in module tilelang.language.tir.ir) trunc() (in module tilelang.language.tir.op) truncdiv (in module tilelang.language.tir.ir) truncdiv() (in module tilelang.language.tir.op) truncmod (in module tilelang.language.tir.ir) truncmod() (in module tilelang.language.tir.op) try_inline() (in module tilelang.carver.common_schedules) try_inline_contiguous_spatial() (in module tilelang.carver.common_schedules) tvm_access_ptr (in module tilelang.language.tir.ir) tvm_access_ptr() (in module tilelang.language.tir.op) tvm_bmma_sync (in module tilelang.language.tir.ir) tvm_bmma_sync() (in module tilelang.language.tir.op) tvm_check_return (in module tilelang.language.tir.ir) tvm_check_return() (in module tilelang.language.tir.op) tvm_fill_fragment (in module tilelang.language.tir.ir) tvm_fill_fragment() (in module tilelang.language.tir.op) TVM_LIBRARY_PATH (in module tilelang.env) tvm_load_matrix_sync (in module tilelang.language.tir.ir) tvm_load_matrix_sync() (in module tilelang.language.tir.op) tvm_mfma (in module tilelang.language.tir.ir) tvm_mfma() (in module tilelang.language.tir.op) tvm_mfma_store (in module tilelang.language.tir.ir) tvm_mfma_store() (in module tilelang.language.tir.op) tvm_mma_sync (in module tilelang.language.tir.ir) tvm_mma_sync() (in module tilelang.language.tir.op) TVM_PYTHON_PATH (in module tilelang.env) tvm_rdna_wmma (in module tilelang.language.tir.ir) tvm_rdna_wmma() (in module tilelang.language.tir.op) tvm_rdna_wmma_store (in module tilelang.language.tir.ir) tvm_rdna_wmma_store() (in module tilelang.language.tir.op) tvm_stack_alloca (in module tilelang.language.tir.ir) tvm_stack_alloca() (in module tilelang.language.tir.op) tvm_stack_make_array (in module tilelang.language.tir.ir) tvm_stack_make_array() (in module tilelang.language.tir.op) tvm_stack_make_shape (in module tilelang.language.tir.ir) tvm_stack_make_shape() (in module tilelang.language.tir.op) tvm_storage_sync (in module tilelang.language.tir.ir) tvm_storage_sync() (in module tilelang.language.tir.op) tvm_store_matrix_sync (in module tilelang.language.tir.ir) tvm_store_matrix_sync() (in module tilelang.language.tir.op) tvm_struct_get (in module tilelang.language.tir.ir) tvm_struct_get() (in module tilelang.language.tir.op) tvm_struct_set (in module tilelang.language.tir.ir) tvm_struct_set() (in module tilelang.language.tir.op) tvm_thread_allreduce (in module tilelang.language.tir.ir) tvm_thread_allreduce() (in module tilelang.language.tir.op) tvm_thread_invariant (in module tilelang.language.tir.ir) tvm_thread_invariant() (in module tilelang.language.tir.op) tvm_throw_last_error (in module tilelang.language.tir.ir) tvm_throw_last_error() (in module tilelang.language.tir.op) tvm_tuple (in module tilelang.language.tir.ir) tvm_tuple() (in module tilelang.language.tir.op) tvm_warp_activemask (in module tilelang.language.tir.ir) tvm_warp_activemask() (in module tilelang.language.tir.op) tvm_warp_shuffle (in module tilelang.language.tir.ir) tvm_warp_shuffle() (in module tilelang.language.tir.op) tvm_warp_shuffle_down (in module tilelang.language.tir.ir) tvm_warp_shuffle_down() (in module tilelang.language.tir.op) tvm_warp_shuffle_up (in module tilelang.language.tir.ir) tvm_warp_shuffle_up() (in module tilelang.language.tir.op) TVMBackendAllocWorkspace (in module tilelang.language.tir.ir) TVMBackendAllocWorkspace() (in module tilelang.language.tir.op) TVMBackendFreeWorkspace (in module tilelang.language.tir.ir) TVMBackendFreeWorkspace() (in module tilelang.language.tir.op) type_annotation() (in module tilelang.language.tir.op) U undef (in module tilelang.language.tir.ir) undef() (in module tilelang.language.tir.op) Uniform (tilelang.utils.tensor.TensorSupplyType attribute) unroll() (in module tilelang.language.tir.ir) update_host_func() (tilelang.jit.adapter.libgen.PyLibraryGenerator method) update_lib_code() (tilelang.jit.adapter.libgen.LibraryGenerator method) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper method) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) (tilelang.jit.adapter.wrapper.TLNVRTCSourceWrapper method) update_tags() (tilelang.carver.roller.node.Node method) update_tuner_result() (tilelang.jit.kernel.JITKernel method) use_async (tilelang.carver.roller.hint.Hint attribute) use_async_copy (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy attribute) use_swizzle() (in module tilelang.language) use_tc (tilelang.carver.roller.hint.Hint attribute) V valid (tilelang.carver.roller.hint.TileDict attribute) var (tilelang.carver.analysis.IterInfo attribute) var_map (tilelang.carver.roller.shape_inference.common.Statement attribute) vectorcombine (in module tilelang.language.tir.ir) vectorcombine() (in module tilelang.language.tir.op) vectorhigh (in module tilelang.language.tir.ir) vectorhigh() (in module tilelang.language.tir.op) vectorize (tilelang.carver.roller.hint.Hint attribute) vectorized() (in module tilelang.language.tir.ir) VectorizeLoop() (in module tilelang.transform) vectorlow (in module tilelang.language.tir.ir) vectorlow() (in module tilelang.language.tir.op) verbose (tilelang.autotuner.param.CompileArgs attribute), [1] (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.libgen.LibraryGenerator attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) (tilelang.jit.kernel.JITKernel attribute) view() (in module tilelang.language.customize) volta_tensorcore_supported (in module tilelang.carver.arch.cuda) vscale (in module tilelang.language.tir.ir) vscale() (in module tilelang.language.tir.op) W W (tilelang.carver.template.conv.ConvTemplate attribute), [1] wait_wgmma() (in module tilelang.language.builtin) walk_indice() (in module tilelang.carver.roller.shape_inference.tir) warmup (tilelang.autotuner.param.ProfileArgs attribute), [1] warp (tilelang.carver.roller.hint.Hint attribute) warp_col_tiles (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.primitives.gemm.base.GemmBaseParams attribute) warp_cols (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) warp_row_tiles (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.primitives.gemm.base.GemmBaseParams attribute) warp_rows (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) warp_size (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) WARP_SIZE (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) WarpSpecialize() (in module tilelang.language.warpgroup) WarpSpecialized() (in module tilelang.transform) WarpSpecializedPipeline() (in module tilelang.transform) WarpSpecializeFrame (class in tilelang.language.warpgroup) weight_transform_kind (tilelang.carver.roller.hint.IntrinInfo attribute) with_arch() (tilelang.carver.template.base.BaseTemplate method) with_bias (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] with_default_adapter() (tilelang.profiler.Profiler method) wmma_k (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy attribute) wrap() (tilelang.jit.adapter.wrapper.BaseWrapper method) (tilelang.jit.adapter.wrapper.TLPyWrapper method) (tilelang.jit.adapter.wrapper.TLWrapper method) WRAPPED_KERNEL_PATH (in module tilelang.autotuner.param) (in module tilelang.cache.kernel_cache) wrapped_source (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) wrapper (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) ws (in module tilelang.language.warpgroup) Z Zero (tilelang.utils.tensor.TensorSupplyType attribute)