Index _ | A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z _ __add__() (tilelang.contrib.cutedsl.gemm_tcgen05.Tcgen05SmemDescriptor method) (tilelang.contrib.cutedsl.gemm_v2.GmmaDescriptor method) (tilelang.layout.cute.IntTuple method) __all__ (in module tilelang.language.dtypes) (in module tilelang.testing) __bool__() (tilelang.autotuner.capture.CaptureStack method) (tilelang.language.frame.FrameStack method) (tilelang.language.kernel.FrameStack method) __call__() (tilelang.autotuner.tuner.AutoTuneImpl method) (tilelang.autotuner.tuner.AutoTuner method) (tilelang.contrib.cutedsl.gemm_v1.Gemm_SM80 method) (tilelang.contrib.cutedsl.gemm_v1.Gemm_SM90 method) (tilelang.contrib.cutedsl.reduce.BitAndOp static method) (tilelang.contrib.cutedsl.reduce.BitOrOp static method) (tilelang.contrib.cutedsl.reduce.BitXorOp static method) (tilelang.contrib.cutedsl.reduce.MaxOp static method) (tilelang.contrib.cutedsl.reduce.MinOp static method) (tilelang.contrib.cutedsl.reduce.SumOp static method) (tilelang.jit.adapter.base.BaseKernelAdapter method) (tilelang.jit.JITImpl method) (tilelang.jit.kernel.JITKernel method) (tilelang.language.eager.builder.JITFunc method) (tilelang.language.eager.builder.Macro method) (tilelang.language.proxy.BaseTensorProxy method) (tilelang.language.proxy.BufferProxy method) (tilelang.language.proxy.StridedTensorProxy method) (tilelang.language.proxy.TensorProxy method) (tilelang.layout.cute.Layout method) (tilelang.layout.layout.Layout method) (tilelang.profiler.Profiler method) __class_getitem__() (tilelang.language.proxy.BaseTensor class method) __del__() (tilelang.jit.adapter.nvrtc.libgen.NVRTCLibraryGenerator method) __dtype_as_torch__() (in module tilelang.language.dtypes) __dtype_bytes__() (in module tilelang.language.dtypes) __dtype_call__() (in module tilelang.language.dtypes) __dtype_is_float4__() (in module tilelang.language.dtypes) __dtype_is_float4_e2m1_unpacked__() (in module tilelang.language.dtypes) __dtype_is_float4_e2m1fn__() (in module tilelang.language.dtypes) __dtype_new__() (in module tilelang.language.dtypes) __enter__() (tilelang.autodd.AsyncPythonRunner method) (tilelang.autodd.SubProcRunner method) (tilelang.autotuner.capture.AutotuneInputsCapture method) (tilelang.language.eager.builder.Frame method) (tilelang.language.frame.LetFrame method) (tilelang.language.kernel.KernelLaunchFrame method) (tilelang.profiler.bench.suppress_stdout_stderr method) __eq__() (tilelang.language.eager.builder.Macro method) __exit__() (tilelang.autodd.AsyncPythonRunner method) (tilelang.autodd.SubProcRunner method) (tilelang.autotuner.capture.AutotuneInputsCapture method) (tilelang.language.eager.builder.Frame method) (tilelang.language.frame.LetFrame method) (tilelang.language.kernel.KernelLaunchFrame method) (tilelang.profiler.bench.suppress_stdout_stderr method) __freeze__ (in module tilelang.autodd) __get__() (tilelang.env.EnvVar method) __getattr__() (tilelang.language.eager.builder.JITFunc method) __getitem__() (tilelang.language.proxy.BaseTensor method) (tilelang.language.proxy.BaseTensorProxy method) (tilelang.language.proxy.BufferProxy method) (tilelang.layout.cute.Layout method) __hash__() (tilelang.autotuner.param.CompileArgs method) (tilelang.autotuner.param.ProfileArgs method) (tilelang.carver.roller.hint.TileDict method) (tilelang.language.eager.builder.Macro method) __len__() (tilelang.autotuner.capture.CaptureStack method) (tilelang.language.frame.FrameStack method) (tilelang.language.kernel.FrameStack method) __mul__() (tilelang.layout.cute.IntTuple method) __post_init__() (tilelang.autodd.ParTaskManager method) (tilelang.autotuner.tuner.AutoTuneImpl method) (tilelang.carver.template.base.BaseTemplate method) (tilelang.jit.JITImpl method) (tilelang.language.eager.builder.JITFunc method) (tilelang.profiler.Profiler method) (tilelang.tileop.gemm.gemm_base.GemmBase method) __radd__() (tilelang.layout.cute.IntTuple method) __repr__() (tilelang.carver.analysis.BlockInfo method) (tilelang.carver.analysis.IterInfo method) (tilelang.carver.arch.cuda.CUDA method) (tilelang.carver.arch.rdna.RDNA method) (tilelang.carver.roller.bestfit.Block method) (tilelang.carver.roller.hint.Hint method) (tilelang.carver.roller.hint.IntrinInfo method) (tilelang.carver.roller.hint.Stride method) (tilelang.carver.roller.node.Node method) (tilelang.carver.roller.rasterization.NoRasterization method) (tilelang.carver.roller.rasterization.Rasterization2DColumn method) (tilelang.carver.roller.rasterization.Rasterization2DRow method) (tilelang.carver.roller.shape_inference.tir.TensorDepNode method) (tilelang.carver.template.conv.ConvTemplate method) (tilelang.carver.template.elementwise.ElementwiseTemplate method) (tilelang.carver.template.flashattention.FlashAttentionTemplate method) (tilelang.carver.template.gemv.GEMVTemplate method) (tilelang.carver.template.general_reduce.GeneralReductionTemplate method) (tilelang.carver.template.matmul.MatmulTemplate method) (tilelang.jit.param.Kernel method) (tilelang.jit.param.Program method) (tilelang.layout.cute.ScaledBasis method) (tilelang.layout.fragment.Fragment method) (tilelang.layout.layout.Layout method) __rmul__() (tilelang.layout.cute.IntTuple method) __set__() (tilelang.env.EnvVar method) __setitem__() (tilelang.language.proxy.BaseTensor method) __slots__ (tilelang.autotuner.capture.AutotuneInputsCapture attribute) __str__() (tilelang.carver.analysis.BlockInfo method) (tilelang.carver.analysis.IterInfo method) (tilelang.carver.roller.shape_inference.tir.TensorDepNode method) __version__ (in module tilelang) A A (tilelang.tileop.gemm.Gemm property) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP attribute) A_base_offsets (tilelang.tileop.gemm.gemm_base.GemmBase property) a_dtype (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) a_fragment_forward_fn (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) a_shared_layout (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.wgmma_sp_macro_generator.WGSparseTensorCoreIntrinEmitter attribute) a_transposed (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) abs (in module tilelang.language.tir.ir) abs() (in module tilelang.language.tir.op) abs2() (in module tilelang.contrib.cutedsl.math) (in module tilelang.language.math_intrinsics) access_ptr() (in module tilelang.language.builtin) AccessReplacer (class in tilelang.transform.decouple_type_cast) accum_dtype (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) acos (in module tilelang.language.tir.ir) acos() (in module tilelang.language.tir.op) acosh (in module tilelang.language.tir.ir) acosh() (in module tilelang.language.tir.op) activemask() (in module tilelang.language.builtin) adapter (tilelang.jit.kernel.JITKernel attribute), [1] (tilelang.profiler.Profiler attribute), [1] add2() (in module tilelang.language.math_intrinsics) add_next() (tilelang.carver.roller.shape_inference.tir.TensorDepNode method) add_prev() (tilelang.carver.roller.shape_inference.tir.TensorDepNode method) add_tag() (tilelang.carver.roller.node.Node method) address_of (in module tilelang.language.tir.ir) address_of() (in module tilelang.language.tir.op) AddWrapperForSingleBufStore() (in module tilelang.transform.add_bufstore_wrapper) AF_shape (tilelang.carver.roller.hint.TensorCoreExtraConfig attribute) align (tilelang.carver.roller.bestfit.BestFit attribute) all() (in module tilelang.language.tir.op) all_labels (tilelang.autodd.PDD attribute) all_of() (in module tilelang.language.logical) all_sync() (in module tilelang.language.builtin) all_threads (tilelang.contrib.cutedsl.reduce.NamedBarrier attribute) alloc_barrier() (in module tilelang.language.allocate) alloc_cluster_barrier() (in module tilelang.language.allocate) alloc_descriptor() (in module tilelang.language.allocate) alloc_fragment() (in module tilelang.language.allocate) alloc_global() (in module tilelang.language.allocate) alloc_local() (in module tilelang.language.allocate) alloc_reducer() (in module tilelang.language.allocate) alloc_shared() (in module tilelang.language.allocate) alloc_tcgen05_instr_desc() (in module tilelang.language.allocate) alloc_tcgen05_instruction_desc() (in module tilelang.language.allocate) alloc_tcgen05_smem_desc() (in module tilelang.language.allocate) alloc_tmem() (in module tilelang.language.allocate) alloc_var() (in module tilelang.language.allocate) alloc_wgmma_desc() (in module tilelang.language.allocate) allow_f8f6f4_mixed_dtypes (tilelang.cuda.op.gemm.gemm_tcgen05.GemmTCGEN5 property) (tilelang.tileop.gemm.gemm_base.GemmBase property) allow_global_thread_synchronization() (in module tilelang.backend.pass_pipeline.pipeline_utils) allow_larger (tilelang.autodd.ParTaskManager attribute) allow_vectorize() (in module tilelang.backend.pass_pipeline.pipeline_utils) allow_warp_specialized() (in module tilelang.cuda.pipeline) allowed_backends_for_target() (in module tilelang.backend.execution_backend) AllReduce() (in module tilelang.contrib.cutedsl.reduce) ana (tilelang.carver.roller.node.PrimFuncNode attribute) analysis() (tilelang.tools.Analyzer.Analyzer class method) AnalysisResult (class in tilelang.tools.Analyzer) analyze() (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis method) Analyzer (class in tilelang.tools.Analyzer) annotate_compile_flags() (in module tilelang.language.eager.builder) annotate_consumer_reg_alloc() (in module tilelang.language.builtin) annotate_l2_hit_ratio() (in module tilelang.language.annotations) annotate_layout() (in module tilelang.language.annotations) annotate_min_blocks_per_sm() (in module tilelang.language.annotations) annotate_pass_configs() (in module tilelang.language.eager.builder) annotate_producer_reg_dealloc() (in module tilelang.language.builtin) annotate_restrict_buffers() (in module tilelang.language.annotations) annotate_safe_value() (in module tilelang.language.annotations) AnnotateDeviceRegions() (in module tilelang.transform) AnnotateReadOnlyParams() (in module tilelang.transform) AnnotateWarpGroupRegAlloc() (in module tilelang.cuda.transform) annotations (tilelang.language.eager.builder.Macro attribute) (tilelang.language.eager.builder.SerialForWithStep attribute) any() (in module tilelang.language.tir.op) any_of() (in module tilelang.language.logical) any_sync() (in module tilelang.language.builtin) AnyDType (in module tilelang.language.dtypes) AnyFrame (in module tilelang.language.eager.builder) anylist_getitem (in module tilelang.language.tir.ir) anylist_getitem() (in module tilelang.language.tir.op) anylist_resetitem (in module tilelang.language.tir.ir) anylist_resetitem() (in module tilelang.language.tir.op) anylist_setitem_call_cpacked (in module tilelang.language.tir.ir) anylist_setitem_call_cpacked() (in module tilelang.language.tir.op) anylist_setitem_call_packed (in module tilelang.language.tir.ir) anylist_setitem_call_packed() (in module tilelang.language.tir.op) applied (tilelang.autodd.Task attribute) applied_rewrites (tilelang.autodd.RewriteApplier attribute) apply (tilelang.autodd.IntConstApply attribute) apply() (tilelang.autodd.ASTPDD method) (tilelang.autodd.PDD method) apply_rewrites() (in module tilelang.autodd) apply_simplify() (in module tilelang.transform.simplify) APtr (tilelang.tileop.gemm.Gemm property) arange_semi_sparse() (in module tilelang.utils.sparse) arch (tilelang.carver.roller.hint.Hint attribute) (tilelang.carver.roller.policy.default.DefaultPolicy attribute) (tilelang.carver.template.base.BaseTemplate property) ARCH_CONFIGS (in module tilelang.tools.Analyzer) ARegion (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) aRegion (tilelang.tileop.gemm_sp.GemmSP attribute) arg() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) arg_names (tilelang.language.eager.builder.JITFunc attribute) Args (class in tilelang.autodd) args (tilelang.carver.roller.node.PrimFuncNode attribute) array_reduce() (in module tilelang.utils.language) artifact (tilelang.jit.kernel.JITKernel attribute), [1] as_rmem_tensor() (in module tilelang.contrib.cutedsl.utils) AS_shape (tilelang.carver.roller.hint.TensorCoreExtraConfig attribute) as_tensor_ssa() (in module tilelang.contrib.cutedsl.utils) as_torch() (tilelang.language.dtypes.dtype method) asin (in module tilelang.language.tir.ir) asin() (in module tilelang.language.tir.op) asinh (in module tilelang.language.tir.ir) asinh() (in module tilelang.language.tir.op) assert_allclose() (tilelang.profiler.Profiler method) assert_consistent() (tilelang.profiler.Profiler method) assert_expr() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) assign_block_size() (tilelang.carver.roller.policy.default.DefaultPolicy method) assign_compile_flags() (tilelang.jit.adapter.libgen.LibraryGenerator method) assign_device_module() (tilelang.jit.adapter.wrapper.TLWrapper method) assign_host_module() (tilelang.jit.adapter.wrapper.TLWrapper method) assign_optimized_module() (tilelang.jit.adapter.wrapper.TLWrapper method) assign_pass_configs() (tilelang.jit.adapter.libgen.LibraryGenerator method) (tilelang.jit.adapter.wrapper.TLWrapper method) assign_slice() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) assume (in module tilelang.language.tir.ir) assume() (in module tilelang.language.tir.op) ast_get_span() (in module tilelang.language.eager.ast) ast_has_span() (in module tilelang.language.eager.ast) ast_replace() (in module tilelang.autodd) ast_set_span() (in module tilelang.language.eager.ast) ASTMutator (class in tilelang.autodd) ASTPat (class in tilelang.autodd) ASTPatKind (in module tilelang.autodd) ASTPatRewrite (class in tilelang.autodd) ASTPDD (class in tilelang.autodd) ASTPrinter() (in module tilelang.analysis.ast_printer) ASTRewrite (class in tilelang.autodd) async_copy() (in module tilelang.language.copy_op) AsyncPythonRunner (class in tilelang.autodd) atan (in module tilelang.language.tir.ir) atan() (in module tilelang.language.tir.op) atan2 (in module tilelang.language.tir.ir) atan2() (in module tilelang.language.tir.op) atanh (in module tilelang.language.tir.ir) atanh() (in module tilelang.language.tir.op) atol (tilelang.autotuner.param.ProfileArgs attribute), [1] (tilelang.autotuner.tuner.AutoTuneImpl attribute) atomic_add() (in module tilelang.language.atomic) atomic_addx2() (in module tilelang.language.atomic) atomic_addx4() (in module tilelang.language.atomic) atomic_load() (in module tilelang.language.atomic) atomic_max() (in module tilelang.language.atomic) atomic_min() (in module tilelang.language.atomic) atomic_store() (in module tilelang.language.atomic) AtomicAdd (class in tilelang.ir) AtomicAdd() (in module tilelang.contrib.cutedsl.atomic) AtomicAddRet() (in module tilelang.contrib.cutedsl.atomic) AtomicAddx2() (in module tilelang.contrib.cutedsl.atomic) AtomicAddx4() (in module tilelang.contrib.cutedsl.atomic) AtomicLoad() (in module tilelang.contrib.cutedsl.atomic) AtomicMax() (in module tilelang.contrib.cutedsl.atomic) AtomicMaxRet() (in module tilelang.contrib.cutedsl.atomic) AtomicMin() (in module tilelang.contrib.cutedsl.atomic) AtomicMinRet() (in module tilelang.contrib.cutedsl.atomic) AtomicStore() (in module tilelang.contrib.cutedsl.atomic) attach_rewrites() (in module tilelang.autodd) AttachFullFuncArgs (class in tilelang.autodd) attrs (tilelang.language.eager.builder.PrimFunc attribute) aug_assign() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) aug_assign_slice() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) Auto (tilelang.utils.tensor.TensorSupplyType attribute) auto_detect_target() (in module tilelang.backend.target) auto_inline_consumer_chain() (in module tilelang.carver.matmul_analysis) auto_inline_consumers() (in module tilelang.carver.matmul_analysis) auto_inline_producers() (in module tilelang.carver.matmul_analysis) autotune() (in module tilelang.autotuner.tuner) AutoTuneImpl (class in tilelang.autotuner.tuner) AutotuneInputsCapture (class in tilelang.autotuner.capture) AutoTuner (class in tilelang.autotuner.tuner) AutotuneResult (class in tilelang.autotuner.param) AvailabilityCheck (in module tilelang.backend.execution_backend) available_tensor_instructions (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.carver.arch.rdna.RDNA attribute) ax (tilelang.carver.roller.hint.Stride property) B B (tilelang.tileop.gemm.Gemm property) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP attribute) B_base_offsets (tilelang.tileop.gemm.gemm_base.GemmBase property) b_bits (tilelang.layout.cute.Swizzle attribute) b_dtype (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) b_fragment_forward_fn (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) b_shared_layout (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.wgmma_sp_macro_generator.WGSparseTensorCoreIntrinEmitter attribute) b_transposed (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) backend (tilelang.autodd.Args attribute) (tilelang.autodd.ParTaskManager attribute) (tilelang.autotuner.param.ProfileArgs attribute), [1] (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) ballot() (in module tilelang.language.builtin) ballot_sync() (in module tilelang.language.builtin) bandwidth (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.carver.arch.rdna.RDNA attribute) bandwidth_GBps (tilelang.tools.Analyzer.AnalysisResult attribute) bar_sync() (in module tilelang.contrib.cutedsl.reduce) bar_sync_ptx() (in module tilelang.contrib.cutedsl.reduce) barrier_arrive() (in module tilelang.language.builtin) barrier_wait() (in module tilelang.language.builtin) BaseBuilder (class in tilelang.language.eager.ast) BaseKernelAdapter (class in tilelang.jit.adapter.base) BaseTemplate (class in tilelang.carver.template.base) BaseTensor (class in tilelang.language.proxy) BaseTensorProxy (class in tilelang.language.proxy) BaseWrapper (class in tilelang.jit.adapter.wrapper) basis (tilelang.layout.cute.IntTupleScaledBasis attribute) batch_size (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) BEST_CONFIG_PATH (in module tilelang.autotuner.param) BestFit (class in tilelang.carver.roller.bestfit) BF_shape (tilelang.carver.roller.hint.TensorCoreExtraConfig attribute) bind() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) bind_immutable() (tilelang.language.eager.builder.Builder method) BindEnv (in module tilelang.transform.decouple_type_cast) BinOpFwdArg (class in tilelang.autodd) BitAndOp (class in tilelang.contrib.cutedsl.reduce) bitcast() (in module tilelang.contrib.cutedsl.utils) BitOrOp (class in tilelang.contrib.cutedsl.reduce) bits (tilelang.language.dtypes.dtype property) bits_product() (in module tilelang.utils.language) bitwise_and (in module tilelang.language.tir.ir) bitwise_and() (in module tilelang.language.tir.op) bitwise_not (in module tilelang.language.tir.ir) bitwise_not() (in module tilelang.language.tir.op) bitwise_or (in module tilelang.language.tir.ir) bitwise_or() (in module tilelang.language.tir.op) bitwise_xor (in module tilelang.language.tir.ir) bitwise_xor() (in module tilelang.language.tir.op) BitXorOp (class in tilelang.contrib.cutedsl.reduce) Block (class in tilelang.carver.roller.bestfit) block (tilelang.carver.roller.hint.Hint attribute) (tilelang.carver.roller.shape_inference.tir.Statement attribute) block_analyzer (tilelang.carver.roller.node.PrimFuncNode attribute) (tilelang.carver.roller.shape_inference.tir.Statement attribute) block_col_warps (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) block_counts (tilelang.tools.Analyzer.Analyzer attribute) block_info (tilelang.jit.adapter.torch.metal.MetalKernelAdapter attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) block_infos (tilelang.carver.roller.node.BlockAnalyzer attribute) block_per_SM (tilelang.carver.roller.hint.TileDict attribute) block_rank_in_cluster() (in module tilelang.language.cluster) block_reduction_depth (tilelang.carver.roller.hint.Hint attribute) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy attribute) block_row_warps (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) block_rv (tilelang.carver.analysis.BlockInfo attribute) BlockAnalyzer (class in tilelang.carver.roller.node) BlockIdx() (in module tilelang.contrib.cutedsl.threadblock_swizzle) BlockInfo (class in tilelang.carver.analysis) blocks (tilelang.carver.roller.node.PrimFuncNode attribute) (tilelang.language.kernel.KernelLaunchFrame property) body (tilelang.language.eager.builder.PrimFunc attribute) body_rs() (tilelang.contrib.cutedsl.gemm_v1.Gemm_SM80 method) (tilelang.contrib.cutedsl.gemm_v1.Gemm_SM90 method) body_sr() (tilelang.contrib.cutedsl.gemm_v1.Gemm_SM80 method) bool (class in tilelang.language.dtypes) BoolOp (in module tilelang.language.eager.ast) boolop() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) BoolOpFrame (class in tilelang.language.eager.builder) BOTTOM_MASK (in module tilelang.contrib.cutedsl.quantize) box_dim (tilelang.jit.adapter.utils.TMADescriptorParams attribute) BPtr (tilelang.tileop.gemm.Gemm property) BreakFrame (class in tilelang.language.eager.builder) BRegion (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) bRegion (tilelang.tileop.gemm_sp.GemmSP attribute) BS_shape (tilelang.carver.roller.hint.TensorCoreExtraConfig attribute) Buffer (in module tilelang.language.proxy) buffer (tilelang.language.eager.builder.Ref property) buffer_device_map (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) buffer_dtype_map (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) buffer_map (tilelang.language.eager.builder.PrimFunc attribute) buffer_mapping (tilelang.carver.roller.shape_inference.tir.InputShapeInference attribute) buffer_region_to_tile_region() (in module tilelang.language.utils) BufferProxy (class in tilelang.language.proxy) buffers (tilelang.carver.roller.node.PrimFuncNode attribute) bufload (tilelang.language.eager.builder.Ref attribute) Builder (class in tilelang.language.eager.builder) bytes (tilelang.language.dtypes.dtype property) BYTES_PER_POINTER (in module tilelang.contrib.cutedsl.utils) BYTES_PER_TENSORMAP (in module tilelang.contrib.cutedsl.utils) C C (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.tileop.gemm.Gemm property) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP attribute) c2d_im2col() (in module tilelang.language.copy_op) C_base_offsets (tilelang.tileop.gemm.gemm_base.GemmBase property) C_coords (tilelang.tileop.gemm.gemm_base.GemmBase property) cache_dir (tilelang.autotuner.tuner.AutoTuner property) cache_input_tensors (tilelang.autotuner.param.ProfileArgs attribute), [1] (tilelang.autotuner.tuner.AutoTuneImpl attribute) cache_root_dir (tilelang.cache.kernel_cache.KernelCache attribute) cached() (in module tilelang.cache) (tilelang.cache.kernel_cache.KernelCache method) cached_tensors (tilelang.carver.roller.hint.Hint attribute) cached_tensors_map (tilelang.carver.roller.hint.TileDict attribute) CachedTextSource (class in tilelang.jit.adapter.base) CachedTextSourceLike (in module tilelang.jit.adapter.base) CacheState (class in tilelang.env) calculate() (tilelang.tools.Analyzer.Analyzer method) call_cpacked (in module tilelang.language.tir.ir) call_cpacked() (in module tilelang.language.tir.op) call_cpacked_lowered (in module tilelang.language.tir.ir) call_cpacked_lowered() (in module tilelang.language.tir.op) call_extern (in module tilelang.language.tir.ir) call_extern() (in module tilelang.language.tir.op) call_intrin (in module tilelang.language.tir.ir) call_intrin() (in module tilelang.language.tir.op) call_llvm_intrin (in module tilelang.language.tir.ir) call_llvm_intrin() (in module tilelang.language.tir.op) call_llvm_pure_intrin (in module tilelang.language.tir.ir) call_llvm_pure_intrin() (in module tilelang.language.tir.op) call_packed (in module tilelang.language.tir.ir) call_packed() (in module tilelang.language.tir.op) call_packed_lowered (in module tilelang.language.tir.ir) call_packed_lowered() (in module tilelang.language.tir.op) CALL_PREFIX (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) call_pure_extern (in module tilelang.language.tir.ir) call_pure_extern() (in module tilelang.language.tir.op) call_tir() (in module tilelang.language.tir.op) callback_rocm_bitcode_path() (in module tilelang.contrib.rocm) callback_rocm_link() (in module tilelang.contrib.rocm) CallFwdArg1 (class in tilelang.autodd) canon_target_host() (in module tilelang.engine.lower) canonicalize_execution_backend() (in module tilelang.backend.execution_backend) CaptureStack (class in tilelang.autotuner.capture) cast() (in module tilelang.language.tir.ir) cast_tensor() (in module tilelang.contrib.cutedsl.utils) CastEntry (in module tilelang.transform.decouple_type_cast) cdiv (in module tilelang.language.tir.ir) cdiv() (in module tilelang.math) CDNA (class in tilelang.carver.arch.cdna) ceil (in module tilelang.language.tir.ir) ceil() (in module tilelang.language.tir.op) ceildiv (in module tilelang.language.tir.ir) ceildiv() (in module tilelang.language.tir.op) check_continue_break() (tilelang.language.eager.builder.Builder method) check_cuda_availability() (in module tilelang.cuda.target) check_cutedsl_available() (in module tilelang.jit.adapter.cutedsl.checks) check_func_with_dynamic() (in module tilelang.carver.analysis) check_hip_availability() (in module tilelang.rocm.target) check_metal_availability() (in module tilelang.metal.target) check_nvrtc_available() (in module tilelang.jit.adapter.nvrtc) check_tile_shape_isvalid() (tilelang.carver.roller.policy.default.DefaultPolicy method) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy method) checker (tilelang.autodd.ASTPatRewrite attribute) chunk (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) (tilelang.tileop.gemm.gemm_base.GemmBase property) ck_inc_path (in module tilelang.env) clamp() (in module tilelang.language.customize) class_attributes (tilelang.carver.template.conv.ConvTemplate property) (tilelang.carver.template.elementwise.ElementwiseTemplate property) (tilelang.carver.template.flashattention.FlashAttentionTemplate property) (tilelang.carver.template.gemv.GEMVTemplate property) (tilelang.carver.template.general_reduce.GeneralReductionTemplate property) (tilelang.carver.template.matmul.MatmulTemplate property) clc_get_first_ctaid_x() (in module tilelang.language.cluster) clc_get_first_ctaid_y() (in module tilelang.language.cluster) clc_get_first_ctaid_z() (in module tilelang.language.cluster) clc_is_canceled() (in module tilelang.language.cluster) clc_try_cancel() (in module tilelang.language.cluster) clc_try_cancel_multicast() (in module tilelang.language.cluster) clean_empty_pass() (in module tilelang.autodd) cleanup() (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter method) clear() (in module tilelang.language.fill_op) clear_accum (tilelang.tileop.gemm.Gemm property) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP attribute) clear_cache() (tilelang.cache.kernel_cache.KernelCache method) clear_let_values() (in module tilelang.language.frame) clear_values() (tilelang.language.frame.FrameStack method) cli_main() (in module tilelang.autodd) cluster_arrive() (in module tilelang.language.cluster) cluster_arrive_relaxed() (in module tilelang.language.cluster) cluster_sync() (in module tilelang.language.cluster) cluster_wait() (in module tilelang.language.cluster) ClusterKernel() (in module tilelang.language.kernel) ClusterPlanning() (in module tilelang.transform) clz (in module tilelang.language.tir.ir) clz() (in module tilelang.language.tir.op) coalesce() (in module tilelang.layout.cute) coalesced_factor() (in module tilelang.carver.roller.policy.common) coalesced_tensor_shape() (in module tilelang.carver.roller.policy.common) collect_block_iter_vars_used_in_access_region() (in module tilelang.carver.analysis) collect_fragment_accesses() (in module tilelang.analysis.fragment_loop_checker) collect_vars_from_expr() (in module tilelang.carver.matmul_analysis) collect_vars_used_in_prim_expr() (in module tilelang.carver.analysis) comm_reducer() (in module tilelang.language.tir.op) compile() (in module tilelang.jit) (tilelang.autotuner.tuner.AutoTuneImpl method) (tilelang.jit.JITImpl method) COMPILE_ARGS (in module tilelang.jit.adapter.tvm_ffi) compile_args (tilelang.autotuner.tuner.AutoTuner attribute) compile_cuda() (in module tilelang.contrib.nvcc) (in module tilelang.contrib.nvrtc) compile_flags (tilelang.jit.adapter.libgen.LibraryGenerator attribute) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter attribute) (tilelang.jit.JITImpl attribute), [1] (tilelang.jit.kernel.JITKernel attribute) compile_grouped_unit_tvm_ffi() (in module tilelang.autotuner.grouped_compile) compile_hip() (in module tilelang.contrib.hipcc) compile_lib() (tilelang.jit.adapter.cutedsl.libgen.CuTeDSLLibraryGenerator method) (tilelang.jit.adapter.libgen.LibraryGenerator method) (tilelang.jit.adapter.nvrtc.libgen.NVRTCLibraryGenerator method) compile_program() (tilelang.autotuner.param.CompileArgs method) CompileArgs (class in tilelang.autotuner.param) CompiledArtifact (class in tilelang.engine.param) CompileMethod (in module tilelang.language.eager.utils) CompileUnitResult (in module tilelang.autotuner.grouped_compile) complete_config() (tilelang.carver.roller.hint.Hint method) COMPOSABLE_KERNEL_INCLUDE_DIR (in module tilelang.env) (tilelang.env.Environment attribute) COMPOSABLE_KERNEL_NOT_FOUND_MESSAGE (in module tilelang.env) ComposedLayout (class in tilelang.layout.cute) composition() (in module tilelang.layout.cute) compress() (in module tilelang.utils.sparse) compute_capability (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.carver.arch.rdna.RDNA attribute) compute_elements_from_shape() (tilelang.carver.roller.hint.Stride method) compute_max_core (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.carver.arch.rdna.RDNA attribute) compute_node_stride_map() (tilelang.carver.roller.policy.default.DefaultPolicy method) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy method) compute_strides_from_shape() (tilelang.carver.roller.hint.Stride method) compute_tcgen05_a_desc_params() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) compute_tcgen05_b_desc_params() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) compute_tcgen05_instr_desc() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) compute_tile_dict() (tilelang.carver.roller.policy.default.DefaultPolicy method) compute_warp_partition() (tilelang.ir.GemmSPWarpPolicy method) (tilelang.ir.GemmWarpPolicy method) (tilelang.tileop.base.GemmWarpPolicy method) compute_wgmma_a_desc_params() (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter method) compute_wgmma_b_desc_params() (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter method) compute_workload_per_item() (tilelang.carver.roller.policy.default.DefaultPolicy method) condense_rep_var() (tilelang.layout.fragment.Fragment method) config (tilelang.autotuner.param.AutotuneResult attribute), [1] (tilelang.jit.kernel.JITKernel attribute) ConfigIndexBitwidth() (in module tilelang.transform) configs (tilelang.autotuner.tuner.AutoTuneImpl attribute) (tilelang.autotuner.tuner.AutoTuner attribute) const() (in module tilelang.language.eager.builder) constexpr() (tilelang.language.eager.builder.Builder method) constexpr_var (tilelang.language.eager.builder.Builder attribute) constexprs (tilelang.language.eager.builder.TirTemplate attribute) construct_dependency_target() (tilelang.carver.roller.shape_inference.tir.InputShapeInference method) construct_strides() (in module tilelang.language.eager.utils) ContinueFrame (class in tilelang.language.eager.builder) ContinueOrBreak (in module tilelang.language.eager.builder) convert_func() (in module tilelang.contrib.dlpack) ConvTemplate (class in tilelang.carver.template.conv) Copy (class in tilelang.ir) copy() (in module tilelang.language.copy_op) copy_cluster() (in module tilelang.language.copy_op) copysign (in module tilelang.language.tir.ir) copysign() (in module tilelang.language.tir.op) copysignf() (in module tilelang.contrib.cutedsl.math) cos (in module tilelang.language.tir.ir) cos() (in module tilelang.contrib.cutedsl.math) (in module tilelang.language.tir.op) cosh (in module tilelang.language.tir.ir) cosh() (in module tilelang.language.tir.op) cp_async_barrier_noinc() (in module tilelang.language.builtin) cp_async_gs() (in module tilelang.contrib.cutedsl.cpasync) cp_async_gs_conditional() (in module tilelang.contrib.cutedsl.cpasync) cp_async_shared_global() (in module tilelang.contrib.cutedsl.cpasync) CPP_COOPERATIVE_KERNEL_LAUNCH_TEMPLATE (in module tilelang.jit.adapter.cutedsl.wrapper) CPP_KERNEL_INIT_TEMPLATE (in module tilelang.jit.adapter.cutedsl.wrapper) CPP_KERNEL_LAUNCH_TEMPLATE (in module tilelang.jit.adapter.cutedsl.wrapper) CPP_LAUNCHER_TEMPLATE (in module tilelang.jit.adapter.cutedsl.wrapper) CPP_PDL_KERNEL_LAUNCH_TEMPLATE (in module tilelang.jit.adapter.cutedsl.wrapper) CPP_TMA_DESC_INIT_TEMPLATE (in module tilelang.jit.adapter.cutedsl.wrapper) CPP_TMA_IM2COL_DESC_INIT_TEMPLATE (in module tilelang.jit.adapter.cutedsl.wrapper) CPP_TMA_INIT_FUNC_TEMPLATE (in module tilelang.jit.adapter.cutedsl.wrapper) CPP_TMA_LAUNCH_INIT_TEMPLATE (in module tilelang.jit.adapter.cutedsl.wrapper) CPtr (tilelang.tileop.gemm.Gemm property) CPU (class in tilelang.carver.arch.cpu) CPUPassPipelineBody() (in module tilelang.cpu.pipeline) create() (tilelang.language.eager.builder.TirTemplate class method) create_barriers (in module tilelang.language.tir.ir) create_barriers() (in module tilelang.language.tir.op) create_call_func() (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper method) create_dispatch_func() (tilelang.jit.adapter.cutedsl.wrapper.TLCuTeDSLSourceWrapper method) (tilelang.jit.adapter.nvrtc.wrapper.TLNVRTCSourceWrapper method) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) create_dispatch_func_cpp_launcher() (tilelang.jit.adapter.cutedsl.wrapper.TLCuTeDSLSourceWrapper method) create_executable() (in module tilelang.contrib.cc) create_shared() (in module tilelang.contrib.cc) (in module tilelang.contrib.msvc) create_staticlib() (in module tilelang.contrib.cc) create_tma_descriptor() (in module tilelang.language.builtin) CRegion (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) cRegion (tilelang.tileop.gemm_sp.GemmSP attribute) cross_compiler() (in module tilelang.contrib.cc) ctx_break() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) ctx_continue() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) ctx_else() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) ctx_for() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) ctx_if() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) ctx_then() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) ctx_while() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) ctx_with() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) CUBIN_FAKE_TENSOR_TEMPLATE (in module tilelang.jit.adapter.cutedsl.wrapper) CUBIN_GEN_CODE_TEMPLATE (in module tilelang.jit.adapter.cutedsl.wrapper) CUBIN_KERNEL_LAUNCH_TEMPLATE (in module tilelang.jit.adapter.cutedsl.wrapper) CUBIN_TMA_ATOM_INIT_TEMPLATE (in module tilelang.jit.adapter.cutedsl.wrapper) CUDA (class in tilelang.carver.arch.cuda) CUDA_HOME (in module tilelang.env) (tilelang.env.Environment attribute) CUDA_KERNELS_OUTPUT_DIR (tilelang.transform.pass_config.PassConfigKey attribute) cuda_pipeline (in module tilelang.cuda.pipeline) cudaDevAttrMaxPersistingL2CacheSize (tilelang.carver.arch.driver.cuda_driver.cudaDeviceAttrNames attribute) cudaDevAttrMaxRegistersPerBlock (tilelang.carver.arch.driver.cuda_driver.cudaDeviceAttrNames attribute) cudaDevAttrMaxSharedMemoryPerMultiprocessor (tilelang.carver.arch.driver.cuda_driver.cudaDeviceAttrNames attribute) cudaDevAttrMaxThreadsPerBlock (tilelang.carver.arch.driver.cuda_driver.cudaDeviceAttrNames attribute) cudaDeviceAttrNames (class in tilelang.carver.arch.driver.cuda_driver) CUDAPassPipelineBody() (in module tilelang.cuda.pipeline) CUDAPassPipelineBodyPrologue() (in module tilelang.cuda.pipeline) CUDASourceCodeKernel() (in module tilelang.language.kernel) culib (tilelang.jit.adapter.nvrtc.libgen.NVRTCLibraryGenerator attribute), [1] cummax() (in module tilelang.language.scan_op) CumMax1D (class in tilelang.contrib.cutedsl.reduce) CumMax2D (class in tilelang.contrib.cutedsl.reduce) cummax_fragment() (in module tilelang.language.scan_op) CumMaxOp (class in tilelang.ir) cumsum() (in module tilelang.language.scan_op) CumSum1D (class in tilelang.contrib.cutedsl.reduce) CumSum2D (class in tilelang.contrib.cutedsl.reduce) cumsum_fragment() (in module tilelang.language.scan_op) CumSumOp (class in tilelang.ir) current() (tilelang.language.eager.builder.Builder class method) Current() (tilelang.language.frame.LetFrame class method) (tilelang.language.kernel.KernelLaunchFrame class method) current_file (tilelang.language.eager.builder.Builder attribute) current_line (tilelang.language.eager.builder.Builder attribute) current_macro_name (tilelang.language.eager.builder.Builder attribute) CuTeDSLKernelAdapter (class in tilelang.jit.adapter.cutedsl.adapter) CuTeDSLKernelCache (class in tilelang.jit.adapter.cutedsl.kernel_cache) CuTeDSLLibraryGenerator (class in tilelang.jit.adapter.cutedsl.libgen) cutlass_inc_path (in module tilelang.env) CUTLASS_INCLUDE_DIR (in module tilelang.env) (tilelang.env.Environment attribute) CUTLASS_NOT_FOUND_MESSAGE (in module tilelang.env) cython_wrapper (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) CythonKernelAdapter (class in tilelang.jit.adapter.cython.adapter) CythonKernelCache (class in tilelang.jit.adapter.cython.kernel_cache) D D (tilelang.carver.template.conv.ConvTemplate attribute), [1] data (tilelang.jit.param.Program attribute) deallocate_tmem() (in module tilelang.language.builtin) debug_root_path (tilelang.jit.JITImpl attribute), [1] dec_max_nreg() (in module tilelang.language.builtin) decode_f4_to_bf16_simple_hip (in module tilelang.quantize.mxfp) decode_f4_to_bf16_twiddling (in module tilelang.quantize.mxfp) decode_f4_to_bf16_twiddling_hip (in module tilelang.quantize.mxfp) decode_fp4_to_bf16_twiddling() (in module tilelang.contrib.cutedsl.quantize) decode_i1_to_f16 (in module tilelang.quantize.lop3) decode_i1_to_f16_scale (in module tilelang.quantize.lop3) decode_i1_to_f16_scale_zeros_original (in module tilelang.quantize.lop3) decode_i1_to_f16_scale_zeros_rescale (in module tilelang.quantize.lop3) decode_i1s_to_i8s (in module tilelang.quantize.lop3) decode_i2_to_f16 (in module tilelang.quantize.lop3) decode_i2_to_f16_scale (in module tilelang.quantize.lop3) decode_i2_to_f16_scale_zeros_original (in module tilelang.quantize.lop3) decode_i2_to_f16_scale_zeros_original_offset (in module tilelang.quantize.lop3) decode_i2_to_f16_scale_zeros_quantized (in module tilelang.quantize.lop3) decode_i2_to_f16_scale_zeros_rescale (in module tilelang.quantize.lop3) decode_i2s_to_i4s (in module tilelang.quantize.lop3) decode_i2s_to_i8s (in module tilelang.quantize.lop3) decode_i4_to_f16 (in module tilelang.quantize.lop3) decode_i4_to_f16_scale (in module tilelang.quantize.lop3) decode_i4_to_f16_scale_offset (in module tilelang.quantize.lop3) decode_i4_to_f16_scale_zeros_original (in module tilelang.quantize.lop3) decode_i4_to_f16_scale_zeros_original_offset (in module tilelang.quantize.lop3) decode_i4_to_f16_scale_zeros_quantized (in module tilelang.quantize.lop3) decode_i4_to_f16_scale_zeros_quantized_offset (in module tilelang.quantize.lop3) decode_i4_to_f16_scale_zeros_rescale (in module tilelang.quantize.lop3) decode_i4_to_f16_scale_zeros_rescale_offset (in module tilelang.quantize.lop3) decode_i4s_to_f16() (in module tilelang.contrib.cutedsl.quantize) decode_i4s_to_i8s (in module tilelang.quantize.lop3) decode_i4u_to_f16() (in module tilelang.contrib.cutedsl.quantize) decompose_col_major() (in module tilelang.layout.gemm_sp) DecoupleTypeCast() (in module tilelang.transform.decouple_type_cast) DecoupleTypeCastMutator (class in tilelang.transform.decouple_type_cast) deduplicate() (tilelang.carver.roller.shape_inference.tir.TensorDepNode method) default (tilelang.env.EnvVar attribute) default_align (tilelang.language.proxy.BaseTensorProxy attribute) default_compile_options() (in module tilelang.contrib.nvcc) default_offset_factor (tilelang.language.proxy.BaseTensorProxy attribute) default_scope (tilelang.language.proxy.BaseTensorProxy attribute) (tilelang.language.proxy.FragmentBufferProxy attribute) (tilelang.language.proxy.LocalBufferProxy attribute) (tilelang.language.proxy.SharedBufferProxy attribute) DefaultPolicy (class in tilelang.carver.roller.policy.default) dep_analysis (tilelang.carver.roller.shape_inference.tir.InputShapeInference attribute) dep_name (tilelang.carver.roller.shape_inference.tir.Statement attribute) DependencyAnalysis (class in tilelang.carver.roller.shape_inference.tir) dependent_region (tilelang.carver.roller.shape_inference.common.Statement attribute) (tilelang.carver.roller.shape_inference.tir.Statement attribute) deprecated() (in module tilelang.utils.deprecated) deprecated_warning() (in module tilelang.utils.deprecated) deps (tilelang.carver.roller.shape_inference.common.InputShapeInference attribute) (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis attribute) (tilelang.carver.roller.shape_inference.tir.InputShapeInference attribute) derived (tilelang.autodd.ASTPatRewrite attribute) desc (tilelang.contrib.cutedsl.gemm_tcgen05.Tcgen05SmemDescriptor attribute) (tilelang.contrib.cutedsl.gemm_v2.GmmaDescriptor attribute) desc_i64 (tilelang.contrib.cutedsl.gemm_tcgen05.Tcgen05SmemDescriptor attribute) (tilelang.contrib.cutedsl.gemm_v2.GmmaDescriptor attribute) DescKind (in module tilelang.language.allocate) detect (tilelang.backend.target.TargetDetectorSpec attribute) detect_dominant_read() (in module tilelang.carver.analysis) detect_iter_traits() (in module tilelang.carver.matmul_analysis) determine_fp8_type() (in module tilelang.language.fp8) determine_target() (in module tilelang.backend.target) determine_torch_fp8_type() (in module tilelang.language.fp8) DEV (in module tilelang.env), [1] device (in module tilelang.profiler.bench) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cpu.CPU attribute) (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.carver.arch.rdna.RDNA attribute) (tilelang.tools.Analyzer.Analyzer attribute) device_assert() (in module tilelang.cuda.debug) (in module tilelang.language.print_op) device_codegen() (in module tilelang.engine.lower) device_codegen_without_compile() (in module tilelang.engine.lower) device_func (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper property) DEVICE_KERNEL_PATH (in module tilelang.autotuner.param) device_kernel_path (tilelang.cache.kernel_cache.KernelCache attribute) (tilelang.jit.adapter.cutedsl.kernel_cache.CuTeDSLKernelCache attribute) device_kernel_source (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter attribute) device_mod (tilelang.engine.param.CompiledArtifact attribute) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLMetalSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLWrapper attribute) dfs_smem_tile() (tilelang.carver.roller.policy.default.DefaultPolicy method) diagnostics_enabled() (in module tilelang.jit.diagnostics) dim (tilelang.contrib.cutedsl.reduce.CumMax2D attribute) (tilelang.contrib.cutedsl.reduce.CumSum2D attribute) dim3 (class in tilelang.contrib.cutedsl.threadblock_swizzle) disable() (tilelang.env.CacheState class method) disable_cache (in module tilelang.env) disable_cache() (tilelang.env.Environment method) disable_warp_group_reg_alloc() (in module tilelang.language.builtin) disk_compile() (in module tilelang.language.eager.utils) div() (in module tilelang.language.tir.op) divf() (in module tilelang.contrib.cutedsl.math) do_bench() (in module tilelang.profiler.bench) (tilelang.profiler.Profiler method) do_not_specialize (tilelang.autotuner.tuner.AutoTuneImpl attribute) dom (tilelang.carver.analysis.IterInfo property) dom() (tilelang.carver.analysis.BlockInfo method) dom_kind() (tilelang.carver.analysis.BlockInfo method) dp4a() (in module tilelang.language.customize) ds_read_tr16_b64() (in module tilelang.language.builtin) ds_read_tr8_b64() (in module tilelang.language.builtin) DSLMutator (class in tilelang.language.eager.ast) dst_id (tilelang.carver.roller.node.Edge attribute) dst_node (tilelang.carver.roller.node.Edge attribute) dtype (class in tilelang.language.dtypes) (tilelang.carver.template.elementwise.ElementwiseTemplate attribute), [1] (tilelang.carver.template.general_reduce.GeneralReductionTemplate attribute) (tilelang.engine.param.KernelParam attribute) (tilelang.jit.adapter.utils.TMADescriptorParams attribute) (tilelang.language.eager.builder.OutTensor attribute) dtype_abbrv (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) dtype_name (in module tilelang.language.dtypes) dump_to_file() (in module tilelang.contrib.hip_resource_info) dynamic() (in module tilelang.language.symbolics) dynamic_smem_buf (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) dynamic_symbolic_map (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter attribute) E E (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP attribute) E() (in module tilelang.layout.cute) e_dtype (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) e_factor (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) e_transposed (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) eager_jit (tilelang.language.eager.builder.Builder attribute) eager_jit_subs (tilelang.language.eager.builder.Builder attribute) EagerJITBuildError EagerJITStage (in module tilelang.language.eager.builder) Edge (class in tilelang.carver.roller.node) elem_bits (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TCGEN05DescriptorParams attribute) element_strides (tilelang.jit.adapter.utils.TMADescriptorParams attribute) ElementwiseTemplate (class in tilelang.carver.template.elementwise) elems_in_bytes (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.WGMMADescriptorParams attribute) emit_config() (tilelang.carver.roller.policy.default.DefaultPolicy method) empty (tilelang.language.eager.ast.BaseBuilder attribute) empty() (in module tilelang.language.allocate) enable() (tilelang.env.CacheState class method) enable_cache (in module tilelang.env) enable_cache() (tilelang.env.Environment method) enable_device_compile (tilelang.backend.execution_backend.ExecutionBackendSpec attribute) enable_host_codegen (tilelang.backend.execution_backend.ExecutionBackendSpec attribute) end (tilelang.carver.roller.bestfit.Block attribute) end_profile_intrinsic (in module tilelang.language.tir.ir) end_profile_intrinsic() (in module tilelang.language.tir.op) enter_frame() (tilelang.language.eager.builder.Builder method) env (in module tilelang.env) Environment (class in tilelang.env) EnvVar (class in tilelang.env) EnvVarDefault (in module tilelang.env) equivalent_function() (tilelang.carver.template.base.BaseTemplate method) ERegion (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) eRegion (tilelang.tileop.gemm_sp.GemmSP attribute) erf (in module tilelang.language.tir.ir) erf() (in module tilelang.language.tir.op) err_msg (tilelang.autodd.Args attribute) (tilelang.autodd.ParTaskManager attribute) estimated_time (tilelang.tools.Analyzer.AnalysisResult attribute), [1] eval() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) eval_aug_assign() (in module tilelang.language.eager.ast) eval_op() (in module tilelang.language.eager.ast) executable (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter attribute) EXECUTABLE_PATH (in module tilelang.autotuner.param) execution_backend (tilelang.autotuner.param.CompileArgs attribute), [1] (tilelang.cache.kernel_cache.KernelCache attribute) (tilelang.jit.JITImpl attribute), [1] (tilelang.jit.kernel.JITKernel attribute) execution_backend_spec (tilelang.jit.kernel.JITKernel attribute) ExecutionBackend (in module tilelang.jit) ExecutionBackendSpec (class in tilelang.backend.execution_backend) ExitedMacroFrame (class in tilelang.language.eager.builder) exp (in module tilelang.language.tir.ir) exp() (in module tilelang.contrib.cutedsl.math) (in module tilelang.language.tir.op) exp10 (in module tilelang.language.tir.ir) exp10() (in module tilelang.contrib.cutedsl.math) (in module tilelang.language.tir.op) exp2 (in module tilelang.language.tir.ir) exp2() (in module tilelang.contrib.cutedsl.math) (in module tilelang.language.tir.op) expand() (tilelang.layout.layout.Layout method) expected_bandwidth_GBps (tilelang.tools.Analyzer.AnalysisResult attribute) expected_tflops (tilelang.tools.Analyzer.AnalysisResult attribute) export_library() (tilelang.jit.kernel.JITKernel method) export_ptx() (tilelang.jit.kernel.JITKernel method) export_sass() (tilelang.jit.kernel.JITKernel method) export_sources() (tilelang.jit.kernel.JITKernel method) expr_to_zeros() (in module tilelang.autodd) extent (tilelang.carver.matmul_analysis.IterTrait attribute) extent_wrapper() (tilelang.carver.roller.node.PrimFuncNode method) extra (tilelang.contrib.hip_resource_info.KernelResourceUsage attribute) extra_type_hints (tilelang.language.eager.ast.DSLMutator attribute) (tilelang.language.eager.ast.IRGenerator attribute) extrac_params() (in module tilelang.engine.lower) extract_if_condition() (in module tilelang.transform.decouple_type_cast) extract_python_func_declaration() (in module tilelang.jit.adapter.utils) extract_tensormap_ptr() (in module tilelang.contrib.cutedsl.cpasync) extract_thread_binding() (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter method) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter method) F F (tilelang.carver.template.conv.ConvTemplate attribute), [1] fabsf() (in module tilelang.contrib.cutedsl.math) factorize() (in module tilelang.carver.roller.policy.common) fence_barrier_init() (in module tilelang.contrib.cutedsl.cpasync) fence_proxy_async() (in module tilelang.contrib.cutedsl.cpasync) (in module tilelang.language.builtin) fields (tilelang.layout.cute.IntTupleTuple attribute) filename (tilelang.language.eager.ast.DSLMutator attribute) filename_var (tilelang.language.eager.ast.SpanAttacher attribute) Fill (class in tilelang.ir) fill() (in module tilelang.language.fill_op) filter_and_record() (in module tilelang.contrib.hip_resource_info) finalize_reducer() (in module tilelang.language.reduce_op) FinalizeReducerOp (class in tilelang.ir) find_arg_idx_from_buffer_chain() (in module tilelang.carver.matmul_analysis) find_cuda_path() (in module tilelang.contrib.nvcc) find_first_similar_buffer() (in module tilelang.carver.matmul_analysis) find_first_similar_region() (in module tilelang.carver.matmul_analysis) find_frame_idx() (tilelang.language.eager.builder.Builder method) find_last_producer_from_buffer() (in module tilelang.carver.matmul_analysis) find_lld() (in module tilelang.contrib.rocm) find_path_from_source() (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis method) find_rocm_path() (in module tilelang.contrib.rocm) find_topo_sort() (in module tilelang.carver.roller.node) find_topo_sort_priority() (in module tilelang.carver.roller.node) find_var_from_func() (in module tilelang.carver.analysis) finished (tilelang.autodd.Ruff attribute) fix_lint (tilelang.autodd.Ruff attribute) FlashAttentionTemplate (class in tilelang.carver.template.flashattention) flatten() (in module tilelang.layout.cute) flatten_to_tuple() (in module tilelang.layout.cute) FlattenBuffer() (in module tilelang.transform) floor (in module tilelang.language.tir.ir) floor() (in module tilelang.language.tir.op) floordiv (in module tilelang.language.tir.ir) floordiv() (in module tilelang.language.tir.op) floormod (in module tilelang.language.tir.ir) floormod() (in module tilelang.language.tir.op) fma2() (in module tilelang.language.math_intrinsics) fmod (in module tilelang.language.tir.ir) fmod() (in module tilelang.language.tir.op) fn (tilelang.autotuner.tuner.AutoTuner attribute) footprint() (tilelang.carver.roller.node.PrimFuncNode method) format_code (tilelang.autodd.Ruff attribute) forward (tilelang.autodd.BinOpFwdArg attribute) FP16_TOP_MAGIC_NUM (in module tilelang.contrib.cutedsl.quantize) fp8_remove_negative_zeros_() (in module tilelang.utils.tensor) Fragment (class in tilelang.layout.fragment) fragment_forward_A_colmajor_gfx11() (in module tilelang.rocm.intrinsics.wmma_layout) fragment_forward_A_gfx11() (in module tilelang.rocm.intrinsics.wmma_layout) fragment_forward_B_colmajor_gfx11() (in module tilelang.rocm.intrinsics.wmma_layout) fragment_forward_B_gfx11() (in module tilelang.rocm.intrinsics.wmma_layout) fragment_replicate (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) FragmentBufferProxy (class in tilelang.language.proxy) FragmentLoopChecker() (in module tilelang.analysis.fragment_loop_checker) Frame (class in tilelang.language.eager.builder) frames (tilelang.language.eager.builder.Builder attribute) FrameStack (class in tilelang.language.frame) (class in tilelang.language.kernel) free() (tilelang.carver.roller.bestfit.BestFit method) from_buffer() (tilelang.engine.param.KernelParam class method) from_code() (tilelang.autodd.ASTPat class method) (tilelang.autodd.ASTPatRewrite class method) from_database() (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter class method) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter class method) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter class method) (tilelang.jit.adapter.nvrtc.NVRTCKernelAdapter class method) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter class method) (tilelang.jit.kernel.JITKernel class method) from_dict() (tilelang.carver.roller.hint.Hint class method) from_kernel() (tilelang.autotuner.tuner.AutoTuner class method) from_lazy_style() (tilelang.language.eager.builder.TirTemplate class method) from_output_nodes() (tilelang.carver.roller.policy.default.DefaultPolicy class method) from_prim_func() (tilelang.carver.roller.policy.default.DefaultPolicy class method) from_ptr() (tilelang.language.proxy.BaseTensor class method) (tilelang.language.proxy.BaseTensorProxy method) (tilelang.language.proxy.BufferProxy method) from_python() (in module tilelang.layout.cute) from_source() (tilelang.autodd.ASTPDD class method) (tilelang.autodd.LinePDD class method) (tilelang.autodd.Ruff class method) (tilelang.autodd.TaskManager class method) from_tilelang() (tilelang.layout.cute.ComposedLayout static method) (tilelang.layout.cute.Layout static method) from_tilelang_function() (tilelang.jit.kernel.JITKernel class method) from_var() (tilelang.engine.param.KernelParam class method) from_warp_partition() (tilelang.tileop.base.GemmWarpPolicy class method) FullCol (tilelang.tileop.base.GemmWarpPolicy attribute) FullRow (tilelang.tileop.base.GemmWarpPolicy attribute) func (tilelang.autotuner.param.AutotuneResult attribute), [1] (tilelang.carver.roller.policy.default.DefaultPolicy attribute) (tilelang.jit.adapter.base.BaseKernelAdapter attribute) (tilelang.jit.JITImpl attribute), [1] (tilelang.profiler.Profiler property) func_compile_flags (tilelang.language.eager.builder.Builder attribute) func_name_var (tilelang.language.eager.ast.SpanAttacher attribute) func_pass_configs (tilelang.language.eager.builder.Builder attribute) func_source (tilelang.jit.JITImpl attribute), [1] function_names (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) FUNCTION_PATH (in module tilelang.autotuner.param) FuseMBarrierArriveExpectTx() (in module tilelang.cuda.transform) G Gemm (class in tilelang.tileop.gemm) gemm() (in module tilelang.language.gemm_op) gemm_infer_layout() (in module tilelang.tileop.gemm) GEMM_INST_METAL (in module tilelang.metal.op.gemm.gemm_metal) GEMM_INST_MFMA (in module tilelang.rocm.op.gemm.gemm_mfma) GEMM_INST_MMA (in module tilelang.cuda.op.gemm.gemm_mma) (in module tilelang.cuda.op.gemm.gemm_mma_sm70) GEMM_INST_SCALAR (in module tilelang.cpu.op.gemm.gemm_scalar) GEMM_INST_TCGEN05 (in module tilelang.cuda.op.gemm.gemm_tcgen05) GEMM_INST_WGMMA (in module tilelang.cuda.op.gemm.gemm_wgmma) GEMM_INST_WMMA (in module tilelang.rocm.op.gemm.gemm_wmma) gemm_lower() (in module tilelang.tileop.gemm) gemm_node (tilelang.tileop.gemm.gemm_base.GemmBase attribute) gemm_rr() (in module tilelang.contrib.cutedsl.gemm_v1) gemm_rs() (in module tilelang.contrib.cutedsl.gemm_v1) Gemm_SM80 (class in tilelang.contrib.cutedsl.gemm_v1) Gemm_SM90 (class in tilelang.contrib.cutedsl.gemm_v1) gemm_sp() (in module tilelang.language.experimental.gemm_sp_op) gemm_sp_infer_layout() (tilelang.tileop.gemm_sp.GemmSP method) GEMM_SP_INST_MMA_SP (in module tilelang.cuda.op.gemm_sp.gemm_sp_mma) GEMM_SP_INST_WGMMA_SP (in module tilelang.cuda.op.gemm_sp.gemm_sp_wgmma) (in module tilelang.tileop.gemm_sp.gemm_sp_wgmma) gemm_sp_lower() (tilelang.tileop.gemm_sp.GemmSP method) gemm_sp_node (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase attribute) gemm_sr() (in module tilelang.contrib.cutedsl.gemm_v1) gemm_ss() (in module tilelang.contrib.cutedsl.gemm_v1) GemmBase (class in tilelang.tileop.gemm.gemm_base) GemmImplEntry (class in tilelang.tileop.gemm.registry) GemmMetal (class in tilelang.metal.op.gemm.gemm_metal) GemmMFMA (class in tilelang.rocm.op.gemm.gemm_mfma) GemmMMA (class in tilelang.cuda.op.gemm.gemm_mma) GemmMMASm70 (class in tilelang.cuda.op.gemm.gemm_mma_sm70) GemmMMASm75 (class in tilelang.cuda.op.gemm.gemm_mma_sm75) GemmScalar (class in tilelang.cpu.op.gemm.gemm_scalar) GemmSP (class in tilelang.tileop.gemm_sp) GemmSPBase (class in tilelang.tileop.gemm_sp.gemm_sp_base) GemmSPImplEntry (class in tilelang.tileop.gemm_sp.registry) GemmSPMMA (class in tilelang.cuda.op.gemm_sp.gemm_sp_mma) GemmSPTargetPredicate (in module tilelang.tileop.gemm_sp.registry) GemmSPWarpPolicy (class in tilelang.ir) GemmSPWGMMA (class in tilelang.cuda.op.gemm_sp.gemm_sp_wgmma) (class in tilelang.tileop.gemm_sp.gemm_sp_wgmma) GemmTargetPredicate (in module tilelang.tileop.gemm.registry) GemmTCGEN5 (class in tilelang.cuda.op.gemm.gemm_tcgen05) GemmWarpPolicy (class in tilelang.ir) (class in tilelang.tileop.base) GemmWGMMA (class in tilelang.cuda.op.gemm.gemm_wgmma) GemmWMMA (class in tilelang.rocm.op.gemm.gemm_wmma) GEMVTemplate (class in tilelang.carver.template.gemv) gen (tilelang.language.eager.ast.IRGenerator attribute) gen_quant4() (in module tilelang.quantize.utils) general_compress() (in module tilelang.quantize.utils) GeneralReductionTemplate (class in tilelang.carver.template.general_reduce) GeneralRemove (class in tilelang.autodd) generate_cache_key() (tilelang.autotuner.tuner.AutoTuner method) generate_l2_persistent_map() (tilelang.jit.adapter.nvrtc.wrapper.TLNVRTCSourceWrapper method) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) generate_pdl_sync_code() (tilelang.jit.adapter.nvrtc.wrapper.TLNVRTCSourceWrapper method) generate_tma_descriptor_args() (tilelang.jit.adapter.cutedsl.wrapper.TLCuTeDSLSourceWrapper method) (tilelang.jit.adapter.nvrtc.wrapper.TLNVRTCSourceWrapper method) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) generated_module_source (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) generator() (tilelang.autodd.PDD method) generic_visit() (tilelang.autodd.ASTMutator method) (tilelang.language.eager.ast.QuoteVisitor method) get() (tilelang.env.EnvVar method) (tilelang.language.eager.builder.Builder method) get_all_factors() (in module tilelang.carver.roller.policy.common) get_analyzer_by_tir() (in module tilelang.carver.roller.shape_inference.tir) get_annotated_mod() (in module tilelang.jit.adapter.utils) get_ast() (in module tilelang.language.eager.utils) get_autotune_inputs() (in module tilelang.autotuner.capture) get_available_cpu_count() (in module tilelang.autotuner.tuner) get_avaliable_tensorintrin_shapes() (tilelang.carver.arch.arch_base.TileDevice method) (tilelang.carver.arch.cuda.CUDA method) (tilelang.carver.arch.rdna.RDNA method) get_base_tile() (tilelang.carver.roller.policy.default.DefaultPolicy method) get_block() (in module tilelang.carver.common_schedules) get_block_binding() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_block_bindings() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_block_extent() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_block_extents() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_block_info() (tilelang.carver.roller.node.BlockAnalyzer method) get_block_name() (tilelang.carver.roller.node.BlockAnalyzer method) get_block_size() (tilelang.carver.roller.policy.default.DefaultPolicy method) get_boolop_name() (in module tilelang.language.eager.ast) get_buffer_dtype() (tilelang.carver.roller.node.PrimFuncNode method) get_buffer_elems() (in module tilelang.utils.language) get_buffer_region_from_load() (in module tilelang.language.utils) get_buffers() (tilelang.carver.roller.node.BlockAnalyzer method) get_cc() (in module tilelang.contrib.cc) get_coalesced_veclen() (in module tilelang.carver.analysis) get_code() (tilelang.carver.roller.rasterization.NoRasterization method) (tilelang.carver.roller.rasterization.Rasterization method) (tilelang.carver.roller.rasterization.Rasterization2DColumn method) (tilelang.carver.roller.rasterization.Rasterization2DRow method) get_compile_timeout_seconds() (tilelang.env.Environment method) get_compiled_object() (in module tilelang.language.eager.utils) get_consumer_blocks() (tilelang.carver.roller.node.BlockAnalyzer method) get_cplus_compiler() (in module tilelang.contrib.cc) get_cpu_init_func() (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper method) get_cuda_device_properties() (in module tilelang.carver.arch.driver.cuda_driver) get_cuda_dll_search_dirs() (in module tilelang.env) get_cuda_library_dirs() (in module tilelang.contrib.nvcc) get_cuda_version() (in module tilelang.contrib.nvcc) get_current_device() (in module tilelang.utils.device) get_current_device_functor() (tilelang.jit.adapter.base.BaseKernelAdapter static method) get_current_stream_functor() (tilelang.jit.adapter.base.BaseKernelAdapter static method) get_declaration() (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) (tilelang.jit.adapter.wrapper.TLHIPSourceWrapper method) get_default_execution_backend() (tilelang.env.Environment method) get_default_target() (tilelang.env.Environment method) get_default_verbose() (tilelang.env.Environment method) get_dequantize_block() (in module tilelang.carver.matmul_analysis) get_device_attribute() (in module tilelang.carver.arch.driver.cuda_driver) get_device_call() (in module tilelang.engine.lower) get_device_function() (tilelang.carver.roller.rasterization.Rasterization2DColumn method) get_device_name() (in module tilelang.carver.arch.driver.cuda_driver) get_device_source() (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter method) get_dtype() (tilelang.carver.roller.node.Node method) get_dynamic_symbolic_set() (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper method) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) get_e_factor() (in module tilelang.utils.sparse) get_e_replicate_factor() (in module tilelang.utils.sparse) get_env_path() (in module tilelang.contrib.msvc) get_exportable_executable() (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter method) get_extent() (in module tilelang.language.utils) get_fileline_stack() (tilelang.language.eager.builder.Builder method) get_forward_index() (tilelang.layout.layout.Layout method) get_forward_vars() (tilelang.layout.layout.Layout method) get_func_nonlocals() (in module tilelang.language.eager.utils) get_generated_module_source() (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter method) get_global_symbol_section_map() (in module tilelang.contrib.cc) get_hardware_aware_configs() (tilelang.carver.template.base.BaseTemplate method) (tilelang.carver.template.conv.ConvTemplate method) (tilelang.carver.template.elementwise.ElementwiseTemplate method) (tilelang.carver.template.flashattention.FlashAttentionTemplate method) (tilelang.carver.template.gemv.GEMVTemplate method) (tilelang.carver.template.general_reduce.GeneralReductionTemplate method) (tilelang.carver.template.matmul.MatmulTemplate method) get_host_call() (in module tilelang.engine.lower) get_host_source() (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter method) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter method) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter method) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter method) (tilelang.jit.kernel.JITKernel method) get_in_out_dtypes() (in module tilelang.carver.matmul_analysis) get_index_map() (in module tilelang.carver.matmul_analysis) get_init_func() (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) (tilelang.jit.adapter.wrapper.TLHIPSourceWrapper method) get_input_buffers() (tilelang.carver.roller.node.BlockAnalyzer method) (tilelang.carver.roller.node.PrimFuncNode method) get_input_exprs() (tilelang.carver.roller.shape_inference.common.InputShapeInference method) (tilelang.carver.roller.shape_inference.tir.InputShapeInference method) get_input_shape() (tilelang.layout.layout.Layout method) get_ir() (tilelang.carver.roller.node.Node method) (tilelang.carver.roller.node.OutputNode method) (tilelang.carver.roller.node.PlaceHolderNode method) get_kernel_launch_code() (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) (tilelang.jit.adapter.wrapper.TLHIPSourceWrapper method) get_kernel_source() (tilelang.jit.adapter.base.BaseKernelAdapter method) (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter method) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter method) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter method) (tilelang.jit.adapter.torch.metal.MetalKernelAdapter method) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter method) (tilelang.jit.JITImpl method) (tilelang.jit.kernel.JITKernel method) (tilelang.jit.param.Kernel method) get_ladder_stage3_map() (in module tilelang.carver.matmul_analysis) get_lane_idx() (in module tilelang.language.builtin) get_launcher_cpp_code() (tilelang.jit.adapter.cutedsl.wrapper.TLCuTeDSLSourceWrapper method) get_layout_visual_formats() (in module tilelang.backend.pass_pipeline.pipeline_utils) get_ldmatrix_index_map() (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter method) get_ldmatrix_offset() (in module tilelang.cuda.intrinsics.layout.utils) get_ldmatrix_offset_b() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) get_let_value() (in module tilelang.language.frame) get_lib_path() (tilelang.jit.adapter.libgen.LibraryGenerator method) get_logical_id_32bit() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) get_logical_id_8bit() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) get_lop3_intrin_group() (in module tilelang.quantize.lop3) get_max_dynamic_shared_size_bytes() (in module tilelang.carver.arch.driver.cuda_driver) get_max_shared_memory_per_block() (in module tilelang.carver.analysis) get_max_threads_per_block() (in module tilelang.carver.analysis) get_mma_micro_size() (in module tilelang.cuda.intrinsics.layout.utils) (in module tilelang.rocm.intrinsics.utils) get_msvc_environment_error() (in module tilelang.contrib.msvc) get_msvc_subprocess_env() (in module tilelang.contrib.msvc) get_mxfp_intrin_group() (in module tilelang.quantize.mxfp) get_name() (tilelang.autodd.ASTPatRewrite method) (tilelang.autodd.ASTRewrite method) (tilelang.autodd.AttachFullFuncArgs method) (tilelang.autodd.BinOpFwdArg method) (tilelang.autodd.CallFwdArg1 method) (tilelang.autodd.GeneralRemove method) (tilelang.autodd.IntConstApply method) get_next_task() (tilelang.autodd.ParTaskManager method) get_node_reduce_step_candidates() (tilelang.carver.roller.policy.default.DefaultPolicy method) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy method) get_num_sms() (in module tilelang.carver.arch.driver.cuda_driver) get_num_threads() (tilelang.language.kernel.KernelLaunchFrame method) get_nvcc_compiler() (in module tilelang.contrib.nvcc) get_nvcc_subprocess_env() (in module tilelang.contrib.nvcc) get_nvrtc_version() (in module tilelang.contrib.nvrtc) get_operator_name() (in module tilelang.language.eager.ast) get_opt_shape() (tilelang.carver.roller.node.PrimFuncNode method) get_or_create_node() (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis method) get_output_blocks() (in module tilelang.carver.common_schedules) get_output_buffers() (tilelang.carver.roller.node.BlockAnalyzer method) get_output_shape() (tilelang.layout.layout.Layout method) get_parent_locals() (tilelang.language.eager.ast.BaseBuilder method) get_pass_context() (in module tilelang.transform) get_pass_diff_mode() (tilelang.env.Environment method) get_persisting_l2_cache_max_size() (in module tilelang.carver.arch.driver.cuda_driver) get_pipeline() (in module tilelang.backend.pass_pipeline.pipeline) get_prim_func_name() (in module tilelang.utils.language) get_producer_blocks() (tilelang.carver.roller.node.BlockAnalyzer method) get_profiler() (tilelang.jit.kernel.JITKernel method) get_propagate_map() (in module tilelang.carver.matmul_analysis) get_ptx_from_source() (in module tilelang.contrib.nvcc) get_rasterization_code() (in module tilelang.carver.utils) get_reduce_axis() (tilelang.carver.roller.node.BlockAnalyzer method) get_reduce_inputs_dtype() (tilelang.carver.roller.node.PrimFuncNode method) get_reduction_blocks() (in module tilelang.carver.analysis) get_region_base_offsets() (tilelang.tileop.gemm.gemm_base.GemmBase method) get_registers_per_block() (in module tilelang.carver.arch.driver.cuda_driver) get_rocm_arch() (in module tilelang.contrib.rocm) get_roller_hints_from_func() (in module tilelang.carver.utils) get_roller_hints_from_output_nodes() (in module tilelang.carver.utils) get_root_block() (in module tilelang.carver.analysis) get_rstep() (tilelang.carver.roller.hint.TileDict method) get_sass_from_source() (in module tilelang.contrib.nvcc) get_shape() (tilelang.carver.roller.node.Node method) get_shared_memory_per_block() (in module tilelang.carver.arch.driver.cuda_driver) get_source_path() (tilelang.jit.adapter.libgen.LibraryGenerator method) get_space_dim() (tilelang.carver.roller.node.PrimFuncNode method) get_spatial_axis() (tilelang.carver.roller.node.BlockAnalyzer method) get_stack_str() (in module tilelang.cuda.debug) get_store_index_map() (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter method) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter method) get_stream_type() (tilelang.jit.adapter.nvrtc.wrapper.TLNVRTCSourceWrapper method) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) (tilelang.jit.adapter.wrapper.TLHIPSourceWrapper method) get_swizzle_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) get_tag() (tilelang.carver.roller.node.Node method) get_target_arch() (in module tilelang.contrib.nvcc) get_target_by_dump_machine() (in module tilelang.contrib.cc) get_target_compute_version() (in module tilelang.contrib.nvcc) get_tcgen5_blockscaled_instr_desc() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) get_tcgen5_instr_desc() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) get_tcgen5_mma_meta() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) get_tensor_supply() (in module tilelang.utils.tensor) get_tensorized_func_and_tags() (in module tilelang.carver.matmul_analysis) get_thread_binding() (in module tilelang.language.kernel) (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter method) (tilelang.language.kernel.KernelLaunchFrame method) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter method) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter method) get_thread_bindings() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_thread_extent() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_thread_extents() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_thread_size() (tilelang.layout.fragment.Fragment method) get_tile() (tilelang.carver.roller.hint.TileDict method) get_tir() (tilelang.jit.JITImpl method) (tilelang.language.eager.builder.JITFunc method) (tilelang.language.eager.builder.TirTemplate method) get_tma_atom() (tilelang.contrib.cutedsl.gemm_v1.Gemm_SM90 static method) get_tmp() (tilelang.language.eager.ast.DSLMutator method) get_tuner_result() (tilelang.jit.kernel.JITKernel method) get_tunner() (tilelang.autotuner.tuner.AutoTuneImpl method) get_tvm_dtype() (in module tilelang.language.dtypes) get_type_hints() (in module tilelang.language.eager.builder) get_value() (tilelang.language.frame.FrameStack method) (tilelang.language.frame.LetFrame static method) get_warp_group_idx() (in module tilelang.language.builtin) get_warp_idx() (in module tilelang.language.builtin) get_warp_idx_sync() (in module tilelang.language.builtin) get_windows_compiler() (in module tilelang.contrib.msvc) get_windows_runtime_dll_dirs() (in module tilelang.env) get_wmma_a_fragment_forward_func() (in module tilelang.rocm.intrinsics.wmma_layout) get_wmma_a_layout_funcs() (in module tilelang.rocm.intrinsics.wmma_layout) get_wmma_b_fragment_forward_func() (in module tilelang.rocm.intrinsics.wmma_layout) get_wmma_b_layout_funcs() (in module tilelang.rocm.intrinsics.wmma_layout) get_wmma_c_layout_funcs() (in module tilelang.rocm.intrinsics.wmma_layout) get_wmma_fragment_replicate_count() (in module tilelang.rocm.intrinsics.wmma_layout) get_wmma_store_index_map_func() (in module tilelang.rocm.intrinsics.wmma_layout) global_address (tilelang.jit.adapter.utils.TMADescriptorParams attribute) global_buffers (tilelang.tools.Analyzer.Analyzer attribute) global_dim (tilelang.jit.adapter.utils.TMADescriptorParams attribute) global_stride (tilelang.jit.adapter.utils.TMADescriptorParams attribute) globals (tilelang.language.eager.ast.DSLMutator attribute) GmmaDescriptor (class in tilelang.contrib.cutedsl.gemm_v2) grid() (in module tilelang.language.tir.ir) grid_info (tilelang.jit.adapter.torch.metal.MetalKernelAdapter attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) grid_size (tilelang.carver.roller.hint.TileDict attribute) GridDim() (in module tilelang.contrib.cutedsl.threadblock_swizzle) GROUP_CONFIG (in module tilelang.utils.sparse) H H (tilelang.carver.template.conv.ConvTemplate attribute), [1] HALF_WARP_SIZE (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) handle_add_byte_offset() (in module tilelang.contrib.cutedsl.utils) handle_name (tilelang.jit.adapter.utils.TMADescriptorParams attribute) has_arch() (tilelang.carver.template.base.BaseTemplate method) has_device_kernel_launch() (in module tilelang.engine.lower) has_internal_prim_func() (in module tilelang.language.eager.ast) has_let_value() (in module tilelang.language.frame) has_mma_support() (in module tilelang.carver.arch.cuda) has_value() (tilelang.language.frame.FrameStack method) (tilelang.language.frame.LetFrame static method) have_bf16() (in module tilelang.contrib.nvcc) have_cudagraph() (in module tilelang.contrib.nvcc) have_fp16() (in module tilelang.contrib.nvcc) have_fp8() (in module tilelang.contrib.nvcc) have_int8() (in module tilelang.contrib.nvcc) have_matrixcore() (in module tilelang.contrib.rocm) have_mbarrier() (in module tilelang.contrib.nvcc) have_pdl() (in module tilelang.contrib.nvcc) have_tensorcore() (in module tilelang.contrib.nvcc) have_tma() (in module tilelang.contrib.nvcc) head_dim (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) Hint (class in tilelang.carver.roller.hint) hipcc_remark_flag() (in module tilelang.contrib.hip_resource_info) hoist_enabled (tilelang.transform.hoist_broadcast_values.HoistBroadcastValuesMutator attribute) HoistBroadcastValues() (in module tilelang.transform.hoist_broadcast_values) HoistBroadcastValuesMutator (class in tilelang.transform.hoist_broadcast_values) HoistGlobalBufferAllocations() (in module tilelang.transform) HoistNonRestrictParams() (in module tilelang.transform) host_codegen() (in module tilelang.engine.lower) host_func (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) (tilelang.jit.adapter.cutedsl.libgen.CuTeDSLLibraryGenerator attribute) (tilelang.jit.adapter.cutedsl.wrapper.TLCuTeDSLSourceWrapper property) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.libgen.NVRTCLibraryGenerator attribute), [1] (tilelang.jit.adapter.nvrtc.wrapper.TLNVRTCSourceWrapper property) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper property) HOST_KERNEL_PATH (in module tilelang.autotuner.param) host_kernel_path (tilelang.cache.kernel_cache.KernelCache attribute) (tilelang.jit.adapter.cutedsl.kernel_cache.CuTeDSLKernelCache attribute) host_kernel_source (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter attribute) host_mod (tilelang.engine.param.CompiledArtifact attribute) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLMetalSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLWrapper attribute) host_source (tilelang.jit.kernel.JITKernel property) hypot (in module tilelang.language.tir.ir) hypot() (in module tilelang.language.tir.op) I ieee_add() (in module tilelang.language.math_intrinsics) ieee_fadd() (in module tilelang.contrib.cutedsl.ieee_math) ieee_fdiv() (in module tilelang.contrib.cutedsl.ieee_math) (in module tilelang.language.math_intrinsics) ieee_fmaf() (in module tilelang.contrib.cutedsl.ieee_math) (in module tilelang.language.math_intrinsics) ieee_fmul() (in module tilelang.contrib.cutedsl.ieee_math) ieee_frcp() (in module tilelang.contrib.cutedsl.ieee_math) (in module tilelang.language.math_intrinsics) ieee_frsqrt() (in module tilelang.language.math_intrinsics) ieee_fsqrt() (in module tilelang.contrib.cutedsl.ieee_math) (in module tilelang.language.math_intrinsics) ieee_fsub() (in module tilelang.contrib.cutedsl.ieee_math) ieee_mul() (in module tilelang.language.math_intrinsics) ieee_sub() (in module tilelang.language.math_intrinsics) if_then_else (in module tilelang.language.tir.ir) if_then_else() (in module tilelang.language.tir.op) ifexp() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) IfStmtBinding() (in module tilelang.transform) im2col() (in module tilelang.language.copy_op) Im2ColOp (class in tilelang.ir) IMMLUT (in module tilelang.contrib.cutedsl.quantize) impl_class (tilelang.tileop.gemm.registry.GemmImplEntry attribute) (tilelang.tileop.gemm_sp.registry.GemmSPImplEntry attribute) import_from_file() (tilelang.jit.adapter.cutedsl.libgen.CuTeDSLLibraryGenerator static method) (tilelang.jit.adapter.nvrtc.libgen.NVRTCLibraryGenerator static method) import_source() (in module tilelang.language) in_dtype (tilelang.carver.roller.hint.IntrinInfo attribute) (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] inc_max_nreg() (in module tilelang.language.builtin) increase_descriptor_offset() (in module tilelang.contrib.cutedsl.gemm_v2) (in module tilelang.language.builtin) index (tilelang.layout.layout.Layout property) index_to_coordinates() (in module tilelang.language.utils) indexdiv() (in module tilelang.language.tir.op) indexmod() (in module tilelang.language.tir.op) infer() (tilelang.carver.roller.shape_inference.common.InputShapeInference method) (tilelang.carver.roller.shape_inference.tir.InputShapeInference method) infer_layout() (tilelang.cpu.op.gemm.gemm_scalar.GemmScalar method) (tilelang.cuda.op.gemm.gemm_mma.GemmMMA method) (tilelang.cuda.op.gemm.gemm_mma_sm70.GemmMMASm70 method) (tilelang.cuda.op.gemm.gemm_tcgen05.GemmTCGEN5 method) (tilelang.cuda.op.gemm.gemm_wgmma.GemmWGMMA method) (tilelang.cuda.op.gemm_sp.gemm_sp_mma.GemmSPMMA method) (tilelang.cuda.op.gemm_sp.gemm_sp_wgmma.GemmSPWGMMA method) (tilelang.metal.op.gemm.gemm_metal.GemmMetal method) (tilelang.rocm.op.gemm.gemm_mfma.GemmMFMA method) (tilelang.rocm.op.gemm.gemm_wmma.GemmWMMA method) (tilelang.tileop.gemm.Gemm method) (tilelang.tileop.gemm.gemm_base.GemmBase method) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase method) (tilelang.tileop.gemm_sp.gemm_sp_wgmma.GemmSPWGMMA method) (tilelang.tileop.gemm_sp.GemmSP method) infer_node_smem_usage() (tilelang.carver.roller.policy.default.DefaultPolicy method) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy method) infer_shared_layout() (tilelang.cuda.op.gemm.gemm_tcgen05.GemmTCGEN5 method) (tilelang.cuda.op.gemm.gemm_wgmma.GemmWGMMA method) (tilelang.cuda.op.gemm_sp.gemm_sp_wgmma.GemmSPWGMMA method) (tilelang.tileop.gemm_sp.gemm_sp_wgmma.GemmSPWGMMA method) infer_tensorcore_axis() (tilelang.carver.roller.node.PrimFuncNode method) infinity (in module tilelang.language.tir.ir) infinity() (in module tilelang.language.tir.op) INIT_FUNC (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) init_tcgen05_a_desc() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) init_tcgen05_b_desc() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) init_wgmma_a_desc() (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter method) init_wgmma_b_desc() (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter method) initialize_function() (tilelang.carver.template.base.BaseTemplate method) (tilelang.carver.template.conv.ConvTemplate method) (tilelang.carver.template.elementwise.ElementwiseTemplate method) (tilelang.carver.template.flashattention.FlashAttentionTemplate method) (tilelang.carver.template.gemv.GEMVTemplate method) (tilelang.carver.template.general_reduce.GeneralReductionTemplate method) (tilelang.carver.template.matmul.MatmulTemplate method) initialize_jit_mode() (tilelang.jit.JITImpl method) initialize_tcgen05_descriptor() (in module tilelang.contrib.cutedsl.gemm_tcgen05) (in module tilelang.language.builtin) initialize_wgmma_descriptor() (in module tilelang.contrib.cutedsl.gemm_v2) (in module tilelang.language.builtin) InjectAssumes() (in module tilelang.transform) InjectFenceProxy() (in module tilelang.cuda.transform) InjectSoftwarePipeline() (in module tilelang.transform) InjectTcgen05Fence() (in module tilelang.cuda.transform) inline_transpose_block() (in module tilelang.carver.matmul_analysis) input_buffers (tilelang.carver.roller.node.PrimFuncNode attribute) input_queue (tilelang.autodd.AsyncPythonRunner attribute) input_transform_kind (tilelang.carver.roller.hint.IntrinInfo attribute) inputs (tilelang.carver.roller.node.Node property) InputShapeInference (class in tilelang.carver.roller.shape_inference.common) (class in tilelang.carver.roller.shape_inference.tir) inside_list (tilelang.autodd.GeneralRemove attribute) inst_name (tilelang.tileop.gemm.registry.GemmImplEntry attribute) (tilelang.tileop.gemm_sp.registry.GemmSPImplEntry attribute) install_pass_diff_hook() (in module tilelang.utils.pass_diff_hook) int_ (in module tilelang.language.dtypes) IntConstApply (class in tilelang.autodd) Integer (tilelang.utils.tensor.TensorSupplyType attribute) inter_transform_a (tilelang.carver.roller.hint.IntrinInfo property) inter_transform_b (tilelang.carver.roller.hint.IntrinInfo property) interleave (tilelang.jit.adapter.utils.TMADescriptorParams attribute) interleave_weight() (in module tilelang.quantize.utils) InterWarpTransform (tilelang.common.transform_kind.TransformKind attribute) IntraWarpTransform (tilelang.common.transform_kind.TransformKind attribute) intrin_emitter_cls (tilelang.cuda.op.gemm.gemm_mma.GemmMMA attribute) (tilelang.cuda.op.gemm.gemm_mma_sm75.GemmMMASm75 attribute) intrin_info (tilelang.carver.roller.hint.Hint attribute) IntrinInfo (class in tilelang.carver.roller.hint) IntTuple (class in tilelang.layout.cute) IntTupleConst (class in tilelang.layout.cute) IntTupleLike (in module tilelang.layout.cute) IntTuplePrimExpr (class in tilelang.layout.cute) IntTupleScaledBasis (class in tilelang.layout.cute) IntTupleTuple (class in tilelang.layout.cute) inverse() (tilelang.layout.layout.Layout method) ir_builder (tilelang.language.eager.builder.Builder attribute) ir_gen (tilelang.language.eager.builder.JITFunc attribute) (tilelang.language.eager.builder.Macro attribute) (tilelang.language.eager.builder.PrimFunc attribute) (tilelang.language.eager.builder.TirTemplate attribute) ir_module (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter attribute) ir_pass() (tilelang.tools.Analyzer.Analyzer method) IRGenerator (class in tilelang.language.eager.ast) is_ada_arch() (in module tilelang.carver.arch.cuda) is_ampere_arch() (in module tilelang.carver.arch.cuda) (tilelang.carver.template.base.BaseTemplate method) is_autotune_cache_disabled() (tilelang.env.Environment method) is_available (tilelang.backend.execution_backend.ExecutionBackendSpec attribute) is_blockscaled (tilelang.tileop.gemm.gemm_base.GemmBase property) is_boolean() (tilelang.engine.param.KernelParam method) is_broadcast_epilogue() (in module tilelang.carver.analysis) is_cache_enabled (in module tilelang.env) is_cache_enabled() (tilelang.env.Environment method) is_cache_globally_disabled() (tilelang.env.Environment method) is_causal (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) is_cdna_arch() (in module tilelang.carver.arch.cdna) (tilelang.carver.template.base.BaseTemplate method) is_cpu_arch() (in module tilelang.carver.arch.cpu) is_cpu_device_backend() (in module tilelang.engine.lower) is_cpu_target() (in module tilelang.jit.adapter.utils) IS_CUDA (in module tilelang.profiler.bench) (in module tilelang.utils.device) is_cuda_arch() (in module tilelang.carver.arch.cuda) is_cuda_target() (in module tilelang.jit.adapter.utils) is_cutedsl_target() (in module tilelang.jit.adapter.utils) is_darwin() (in module tilelang.contrib.cc) is_device_call() (in module tilelang.engine.lower) is_device_call_c_device() (in module tilelang.engine.lower) is_dynamic (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter property) is_elementwise() (tilelang.carver.analysis.BlockInfo method) is_enabled() (tilelang.env.CacheState class method) is_equal() (tilelang.layout.fragment.Fragment method) (tilelang.layout.layout.Layout method) is_f8f6f4_family() (in module tilelang.language.dtypes) is_float4() (in module tilelang.language.dtypes) (tilelang.engine.param.KernelParam method) (tilelang.language.dtypes.dtype method) is_float4_e2m1_unpacked() (in module tilelang.language.dtypes) (tilelang.language.dtypes.dtype method) is_float4_e2m1fn() (in module tilelang.language.dtypes) (tilelang.language.dtypes.dtype method) is_float8() (tilelang.engine.param.KernelParam method) is_float8_dtype() (in module tilelang.utils.tensor) is_fragment() (in module tilelang.utils.language) is_free (tilelang.carver.roller.bestfit.Block attribute) is_full_col() (tilelang.tileop.base.GemmWarpPolicy method) is_full_region() (in module tilelang.utils.language) is_full_row() (tilelang.tileop.base.GemmWarpPolicy method) is_gemm() (tilelang.carver.analysis.BlockInfo method) is_gemm_rr() (tilelang.cuda.op.gemm.gemm_mma.GemmMMA method) (tilelang.cuda.op.gemm.gemm_wgmma.GemmWGMMA method) (tilelang.cuda.op.gemm_sp.gemm_sp_mma.GemmSPMMA method) (tilelang.cuda.op.gemm_sp.gemm_sp_wgmma.GemmSPWGMMA method) (tilelang.rocm.op.gemm.gemm_mfma.GemmMFMA method) (tilelang.rocm.op.gemm.gemm_wmma.GemmWMMA method) (tilelang.tileop.gemm.gemm_base.GemmBase method) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase method) (tilelang.tileop.gemm_sp.gemm_sp_wgmma.GemmSPWGMMA method) is_gemm_rs() (tilelang.cuda.op.gemm.gemm_mma.GemmMMA method) (tilelang.cuda.op.gemm.gemm_wgmma.GemmWGMMA method) (tilelang.cuda.op.gemm_sp.gemm_sp_mma.GemmSPMMA method) (tilelang.cuda.op.gemm_sp.gemm_sp_wgmma.GemmSPWGMMA method) (tilelang.rocm.op.gemm.gemm_mfma.GemmMFMA method) (tilelang.rocm.op.gemm.gemm_wmma.GemmWMMA method) (tilelang.tileop.gemm.gemm_base.GemmBase method) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase method) (tilelang.tileop.gemm_sp.gemm_sp_wgmma.GemmSPWGMMA method) is_gemm_sr() (tilelang.cuda.op.gemm.gemm_mma.GemmMMA method) (tilelang.cuda.op.gemm.gemm_wgmma.GemmWGMMA method) (tilelang.cuda.op.gemm_sp.gemm_sp_mma.GemmSPMMA method) (tilelang.cuda.op.gemm_sp.gemm_sp_wgmma.GemmSPWGMMA method) (tilelang.rocm.op.gemm.gemm_mfma.GemmMFMA method) (tilelang.rocm.op.gemm.gemm_wmma.GemmWMMA method) (tilelang.tileop.gemm.gemm_base.GemmBase method) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase method) (tilelang.tileop.gemm_sp.gemm_sp_wgmma.GemmSPWGMMA method) is_gemm_ss() (tilelang.cuda.op.gemm.gemm_mma.GemmMMA method) (tilelang.cuda.op.gemm.gemm_wgmma.GemmWGMMA method) (tilelang.cuda.op.gemm_sp.gemm_sp_mma.GemmSPMMA method) (tilelang.cuda.op.gemm_sp.gemm_sp_wgmma.GemmSPWGMMA method) (tilelang.metal.op.gemm.gemm_metal.GemmMetal method) (tilelang.rocm.op.gemm.gemm_mfma.GemmMFMA method) (tilelang.rocm.op.gemm.gemm_wmma.GemmWMMA method) (tilelang.tileop.gemm.gemm_base.GemmBase method) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase method) (tilelang.tileop.gemm_sp.gemm_sp_wgmma.GemmSPWGMMA method) is_gemm_ts() (tilelang.tileop.gemm.gemm_base.GemmBase method) is_gemv() (tilelang.carver.analysis.BlockInfo method) is_global() (in module tilelang.utils.language) is_global_or_shared_buffer() (in module tilelang.transform.decouple_type_cast) is_hip_target() (in module tilelang.jit.adapter.utils) is_hopper() (in module tilelang.contrib.nvcc) is_hopper_arch() (in module tilelang.carver.arch.cuda) is_identity_block() (in module tilelang.carver.matmul_analysis) is_identity_or_transpose_block() (in module tilelang.carver.matmul_analysis) is_img2col (tilelang.jit.adapter.utils.TMADescriptorParams attribute) is_injective() (tilelang.carver.analysis.BlockInfo method) is_input_8bit() (tilelang.carver.roller.hint.IntrinInfo method) is_inter_warp_transform() (tilelang.common.transform_kind.TransformKind method) is_intra_warp_transform() (tilelang.common.transform_kind.TransformKind method) is_jit_diagnostics_enabled() (tilelang.env.Environment method) is_k_major (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TCGEN05DescriptorParams attribute) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.WGMMADescriptorParams attribute) is_lazy_mode() (tilelang.jit.JITImpl method) is_lazy_style (tilelang.language.eager.builder.TirTemplate attribute) is_ld_matrix_transform() (tilelang.common.transform_kind.TransformKind method) is_light_import() (tilelang.env.Environment method) is_local() (in module tilelang.utils.language) is_local_buffer() (in module tilelang.transform.decouple_type_cast) is_local_var() (in module tilelang.utils.language) is_m_first (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) is_metal_arch() (in module tilelang.carver.arch.metal) is_metal_simdgroup() (in module tilelang.utils.language) is_metal_target() (in module tilelang.jit.adapter.utils) IS_MPS (in module tilelang.utils.device), [1] is_non_transform() (tilelang.common.transform_kind.TransformKind method) is_none() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.SwizzleMode method) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.SwizzleMode method) is_nvrtc_available (in module tilelang.jit.adapter.nvrtc), [1] is_output() (tilelang.carver.roller.node.Node method) (tilelang.carver.roller.node.OutputNode method) is_pipelined_for() (in module tilelang.analysis.nested_loop_checker) is_placeholder() (tilelang.carver.roller.node.Node method) (tilelang.carver.roller.node.PlaceHolderNode method) is_print_on_compilation_enabled() (tilelang.env.Environment method) is_rdna_arch() (in module tilelang.carver.arch.rdna) (tilelang.carver.template.base.BaseTemplate method) is_reduction() (tilelang.carver.analysis.BlockInfo method) is_running_autodd() (tilelang.env.Environment method) is_scalar() (tilelang.engine.param.KernelParam method) is_shared() (in module tilelang.utils.language) is_shared_dynamic() (in module tilelang.utils.language) is_square() (tilelang.tileop.base.GemmWarpPolicy method) is_swizzle_128b() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.SwizzleMode method) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.SwizzleMode method) is_swizzle_32b() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.SwizzleMode method) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.SwizzleMode method) is_swizzle_64b() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.SwizzleMode method) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.SwizzleMode method) is_swizzled (tilelang.layout.cute.Swizzle property) is_symbolic_expr() (in module tilelang.jit.adapter.cython.adapter) is_tcgen05 (tilelang.tileop.gemm.Gemm property) (tilelang.tileop.gemm.gemm_base.GemmBase property) is_tensor_memory() (in module tilelang.utils.language) is_tensorcore_supported_precision() (in module tilelang.carver.arch.cuda) is_tile_op() (in module tilelang.analysis.nested_loop_checker) is_tma_descriptor_arg() (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) is_transpose_block() (in module tilelang.carver.matmul_analysis) is_unsigned() (tilelang.engine.param.KernelParam method) is_valid() (tilelang.carver.roller.hint.Stride method) is_var() (in module tilelang.language.eager.builder) is_volta_arch() (in module tilelang.carver.arch.cuda) (tilelang.carver.template.base.BaseTemplate method) isfinite (in module tilelang.language.tir.ir) isfinite() (in module tilelang.contrib.cutedsl.math) (in module tilelang.language.tir.op) isinf (in module tilelang.language.tir.ir) isinf() (in module tilelang.language.tir.op) isnan (in module tilelang.language.tir.ir) isnan() (in module tilelang.language.tir.op) isnullptr (in module tilelang.language.tir.ir) isnullptr() (in module tilelang.language.tir.op) IterInfo (class in tilelang.carver.analysis) IterKind (class in tilelang.carver.matmul_analysis) iters (tilelang.carver.analysis.BlockInfo attribute) IterTrait (class in tilelang.carver.matmul_analysis) J jit() (in module tilelang.jit) jit_compile (tilelang.autotuner.tuner.AutoTuner attribute) jit_elaborate (tilelang.autotuner.tuner.AutoTuner attribute) jit_impl (tilelang.autotuner.tuner.AutoTuneImpl attribute) jit_input_tensors (tilelang.autotuner.tuner.AutoTuner attribute) jit_phase() (in module tilelang.jit.diagnostics) JITFunc (class in tilelang.language.eager.builder) JITImpl (class in tilelang.jit) JITKernel (class in tilelang.jit.kernel) JITNoBuilderError JobBackend (in module tilelang.autodd) jobs (tilelang.autodd.Args attribute) K K (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] (tilelang.tileop.gemm.Gemm property) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP attribute) k_atom_size (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TCGEN05DescriptorParams attribute) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.WGMMADescriptorParams attribute) K_DIM (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) k_pack (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) (tilelang.tileop.gemm.Gemm property) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP property) Kernel (class in tilelang.jit.param) kernel (tilelang.autotuner.param.AutotuneResult attribute), [1] Kernel() (in module tilelang.language.kernel) KERNEL_CLUSTER_LAUNCH_FUNC_CODE (in module tilelang.jit.adapter.wrapper) KERNEL_CUBIN_PATH (in module tilelang.autotuner.param) kernel_global_source (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.torch.metal.MetalKernelAdapter attribute) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter attribute) KERNEL_LAUNCH_FUNC_CODE (in module tilelang.jit.adapter.wrapper) KERNEL_LAUNCH_FUNC_PY (in module tilelang.jit.adapter.nvrtc.wrapper) KERNEL_LIB_PATH (in module tilelang.autotuner.param) kernel_lib_path (tilelang.cache.kernel_cache.KernelCache attribute) (tilelang.jit.adapter.cutedsl.kernel_cache.CuTeDSLKernelCache attribute) (tilelang.jit.adapter.kernel_cache.TVMFFIKernelCache attribute) (tilelang.jit.adapter.nvrtc.kernel_cache.NVRTCKernelCache attribute) kernel_name (tilelang.jit.adapter.torch.metal.MetalKernelAdapter attribute) KERNEL_PY_PATH (in module tilelang.autotuner.param) kernel_py_path (tilelang.jit.adapter.nvrtc.kernel_cache.NVRTCKernelCache attribute) kernel_source (tilelang.engine.param.CompiledArtifact attribute) (tilelang.jit.kernel.JITKernel property) KernelCache (class in tilelang.cache.kernel_cache) KernelLaunchFrame (class in tilelang.language.kernel) KernelParam (class in tilelang.engine.param) KernelResourceUsage (class in tilelang.contrib.hip_resource_info) kernels (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) key (tilelang.env.EnvVar attribute) kind (tilelang.carver.analysis.IterInfo attribute) (tilelang.carver.matmul_analysis.IterTrait attribute) kIter_I (tilelang.carver.matmul_analysis.IterKind attribute) kIter_J (tilelang.carver.matmul_analysis.IterKind attribute) kIter_K (tilelang.carver.matmul_analysis.IterKind attribute) kIter_S (tilelang.carver.matmul_analysis.IterKind attribute) kIter_T (tilelang.carver.matmul_analysis.IterKind attribute) kPack (tilelang.tileop.gemm_sp.GemmSP attribute) L l2_cache_size_bytes (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.carver.arch.rdna.RDNA attribute) l2_persistent_map (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) L2_PERSISTENT_MAP_CREATE_HANDLE (in module tilelang.jit.adapter.wrapper) L2_PERSISTENT_MAP_CREATE_HANDLE_PY (in module tilelang.jit.adapter.nvrtc.wrapper) L2_PERSISTENT_MAP_INIT_FUNC (in module tilelang.jit.adapter.wrapper) L2_PERSISTENT_MAP_INIT_FUNC_PY (in module tilelang.jit.adapter.nvrtc.wrapper) L2_PERSISTENT_MAP_RESET_HANDLE (in module tilelang.jit.adapter.wrapper) L2_PERSISTENT_MAP_RESET_HANDLE_PY (in module tilelang.jit.adapter.nvrtc.wrapper) l2_promotion (tilelang.jit.adapter.utils.TMADescriptorParams attribute) label (tilelang.autodd.LabeledRewrite attribute) LabeledRewrite (class in tilelang.autodd) latency (tilelang.autotuner.param.AutotuneResult attribute), [1] (tilelang.jit.kernel.JITKernel attribute) (tilelang.testing.perf_regression.PerfResult attribute) LATENCY_PATH (in module tilelang.autotuner.param) launcher_cpp_code (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) (tilelang.jit.adapter.cutedsl.libgen.CuTeDSLLibraryGenerator attribute) launcher_cpp_path (tilelang.jit.adapter.cutedsl.kernel_cache.CuTeDSLKernelCache attribute) launcher_lib_name (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) (tilelang.jit.adapter.cutedsl.libgen.CuTeDSLLibraryGenerator attribute) launcher_lib_path (tilelang.jit.adapter.cutedsl.kernel_cache.CuTeDSLKernelCache attribute) Layout (class in tilelang.layout.cute) (class in tilelang.layout.layout) layout (tilelang.layout.cute.ComposedLayout attribute) layout_propagate_chain() (in module tilelang.carver.matmul_analysis) LayoutInference() (in module tilelang.transform) LayoutReducer() (in module tilelang.transform) LayoutVisual() (in module tilelang.analysis.layout_visual) (in module tilelang.backend.pass_pipeline.pipeline_utils) ldexp (in module tilelang.language.tir.ir) ldexp() (in module tilelang.language.tir.op) ldg128() (in module tilelang.language.builtin) ldg256() (in module tilelang.language.builtin) ldg32() (in module tilelang.language.builtin) ldg64() (in module tilelang.language.builtin) ldmatrix_32x16_to_shared_16x32_layout_a() (in module tilelang.cuda.intrinsics.layout.mma_layout) ldmatrix_32x16_to_shared_16x32_layout_b() (in module tilelang.cuda.intrinsics.layout.mma_layout) ldmatrix_32x16_to_shared_32x16_layout() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) ldmatrix_32x4_to_shared_16x8_layout_a() (in module tilelang.cuda.intrinsics.layout.mma_layout) ldmatrix_32x4_to_shared_16x8_layout_b() (in module tilelang.cuda.intrinsics.layout.mma_layout) ldmatrix_32x8_to_shared_16x16_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) ldmatrix_a() (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitterWithLadderTransform method) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter method) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter method) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCorePreshuffleIntrinEmitter method) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter method) ldmatrix_b() (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitterWithLadderTransform method) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter method) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter method) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCorePreshuffleIntrinEmitter method) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter method) ldmatrix_e() (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.wgmma_sp_macro_generator.WGSparseTensorCoreIntrinEmitter method) ldmatrix_trans_32x16_to_shared_16x32_layout() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) ldmatrix_trans_32x32_to_shared_shared_16x64_layout() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) ldmatrix_trans_32x8_to_shared_16x16_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) LDMatrixTransform (tilelang.common.transform_kind.TransformKind attribute) leading_byte_offset (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TCGEN05DescriptorParams attribute) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.WGMMADescriptorParams attribute) legalize_pairwise_extents() (in module tilelang.utils.language) LegalizeNegativeIndex() (in module tilelang.transform) LegalizeSafeMemoryAccess() (in module tilelang.transform) LegalizeVectorizedLoop() (in module tilelang.transform) LetFrame (class in tilelang.language.frame) LetInline() (in module tilelang.transform.simplify) lib (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.wrapper.TLWrapper attribute) lib_code (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter property) (tilelang.jit.adapter.libgen.LibraryGenerator attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLMetalSourceWrapper attribute) lib_generator (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) libcode (tilelang.autotuner.param.AutotuneResult attribute), [1] libpath (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter property) (tilelang.jit.adapter.libgen.LibraryGenerator attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) LibraryGenerator (class in tilelang.jit.adapter.libgen) lift (in module tilelang.cuda.intrinsics.macro.mma_macro_generator) (in module tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator) (in module tilelang.cuda.intrinsics.macro.mma_sp_macro_generator) (in module tilelang.cuda.intrinsics.macro.tcgen05_macro_generator) (in module tilelang.cuda.intrinsics.macro.wgmma_macro_generator) (in module tilelang.rocm.intrinsics.mfma_macro_generator) (in module tilelang.rocm.intrinsics.wmma_macro_generator) likely (in module tilelang.language.tir.ir) likely() (in module tilelang.language.tir.op) limit (tilelang.carver.roller.bestfit.BestFit attribute) linear_index() (in module tilelang.language.utils) LinePDD (class in tilelang.autodd) lines (tilelang.autodd.LinePDD attribute) list (tilelang.carver.roller.bestfit.BestFit attribute) list_target_detectors() (in module tilelang.backend.target) load() (tilelang.language.eager.builder.Ref method) load_entries (tilelang.transform.decouple_type_cast.AccessReplacer attribute) load_from_disk() (tilelang.autotuner.param.AutotuneResult class method) load_from_file() (in module tilelang.contrib.hip_resource_info) load_lib() (tilelang.jit.adapter.cutedsl.libgen.CuTeDSLLibraryGenerator method) (tilelang.jit.adapter.libgen.LibraryGenerator method) (tilelang.jit.adapter.nvrtc.libgen.NVRTCLibraryGenerator method) loads (tilelang.transform.decouple_type_cast.MemoryAccessCollector attribute) local_size_a (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) local_size_b (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) local_size_out (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) LocalBufferProxy (class in tilelang.language.proxy) lock (tilelang.autodd.AsyncPythonRunner attribute) log (in module tilelang.language.tir.ir) log() (in module tilelang.contrib.cutedsl.math) (in module tilelang.language.tir.op) log10 (in module tilelang.language.tir.ir) log10() (in module tilelang.contrib.cutedsl.math) (in module tilelang.language.tir.op) log1p (in module tilelang.language.tir.ir) log1p() (in module tilelang.contrib.cutedsl.math) (in module tilelang.language.tir.op) log2 (in module tilelang.language.tir.ir) log2() (in module tilelang.contrib.cutedsl.math) (in module tilelang.language.tir.op) logger (in module tilelang) (in module tilelang.autotuner.tuner) (in module tilelang.carver.matmul_analysis) (in module tilelang.carver.roller.policy.tensorcore) (in module tilelang.carver.utils) (in module tilelang.env) (in module tilelang.jit) (in module tilelang.jit.adapter.cutedsl.adapter) (in module tilelang.jit.adapter.cython.adapter) (in module tilelang.jit.adapter.libgen) (in module tilelang.jit.adapter.nvrtc.adapter) (in module tilelang.jit.adapter.nvrtc.libgen) (in module tilelang.jit.adapter.wrapper) (in module tilelang.jit.diagnostics) (in module tilelang.jit.kernel) (in module tilelang.language.eager.builder) (in module tilelang.tools.Analyzer) lookup_param (in module tilelang.language.tir.ir) lookup_param() (in module tilelang.language.tir.op) loop_break() (in module tilelang.language.builtin) (in module tilelang.language.customize) loop_rv (tilelang.carver.analysis.IterInfo attribute) loop_stack (tilelang.tools.Analyzer.Analyzer attribute) loop_var (tilelang.transform.decouple_type_cast.AccessReplacer attribute) (tilelang.transform.decouple_type_cast.MemoryAccessCollector attribute) LoopUnswitching() (in module tilelang.transform) lower() (in module tilelang.engine.lower) (tilelang.backend.pass_pipeline.pipeline.PassPipeline method) (tilelang.cpu.op.gemm.gemm_scalar.GemmScalar method) (tilelang.cuda.op.gemm.gemm_mma.GemmMMA method) (tilelang.cuda.op.gemm.gemm_mma_sm70.GemmMMASm70 method) (tilelang.cuda.op.gemm.gemm_tcgen05.GemmTCGEN5 method) (tilelang.cuda.op.gemm.gemm_wgmma.GemmWGMMA method) (tilelang.cuda.op.gemm_sp.gemm_sp_mma.GemmSPMMA method) (tilelang.cuda.op.gemm_sp.gemm_sp_wgmma.GemmSPWGMMA method) (tilelang.metal.op.gemm.gemm_metal.GemmMetal method) (tilelang.rocm.op.gemm.gemm_mfma.GemmMFMA method) (tilelang.rocm.op.gemm.gemm_wmma.GemmWMMA method) (tilelang.tileop.gemm.Gemm method) (tilelang.tileop.gemm.gemm_base.GemmBase method) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase method) (tilelang.tileop.gemm_sp.gemm_sp_wgmma.GemmSPWGMMA method) (tilelang.tileop.gemm_sp.GemmSP method) lower_corner (tilelang.jit.adapter.utils.TMADescriptorParams attribute) lower_to_host_device_ir() (in module tilelang.engine.lower) LowerAccessPtr() (in module tilelang.transform) LowerBlackwell2SM() (in module tilelang.cuda.transform) LowerDeviceKernelLaunch() (in module tilelang.transform) LowerFunc (in module tilelang.backend.pass_pipeline.pipeline) LowerHopperIntrin() (in module tilelang.cuda.transform) LowerIntrin() (in module tilelang.transform) LowerL2Persistent() (in module tilelang.cuda.transform) LowerLDGSTG() (in module tilelang.cuda.transform) LowerOpaqueBlock() (in module tilelang.transform) LowerPTXAsyncCopy() (in module tilelang.cuda.transform) LowerSharedBarrier() (in module tilelang.cuda.transform) LowerSharedTmem() (in module tilelang.cuda.transform) LowerThreadAllreduce() (in module tilelang.transform) LowerTileOp() (in module tilelang.transform) M M (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] (tilelang.tileop.gemm.Gemm property) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP attribute) m_base (tilelang.layout.cute.Swizzle attribute) M_DIM (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) m_warp (tilelang.ir.GemmSPWarpPolicy attribute) (tilelang.ir.GemmWarpPolicy attribute) Macro (class in tilelang.language.eager.builder) macro() (in module tilelang.language.eager.builder) (in module tilelang.language.tir.entry) (tilelang.language.eager.builder.Builder method) macro_arg() (tilelang.language.eager.builder.Builder method) macro_arg_annot (tilelang.language.eager.builder.Builder attribute) macro_fileline_stack (tilelang.language.eager.builder.Builder attribute) MacroFrame (class in tilelang.language.eager.builder) main() (in module tilelang.autodd) (in module tilelang.testing) make_aligned_tensor() (in module tilelang.contrib.cutedsl.gemm_v1) make_blockscaled_gemm_layout() (in module tilelang.language.gemm_op) make_column_major_layout() (in module tilelang.layout.cute) make_cutlass_metadata_layout() (in module tilelang.layout.gemm_sp) make_cutlass_metadata_layout_sm8x() (in module tilelang.layout.gemm_sp) make_cutlass_metadata_layout_sm90() (in module tilelang.layout.gemm_sp) make_filled_tensor() (in module tilelang.contrib.cutedsl.utils) make_full_bank_swizzled_layout() (in module tilelang.layout.swizzle) make_fully_replicated_layout_fragment() (in module tilelang.layout.swizzle) make_gemm_fragment_8x8() (in module tilelang.layout.swizzle) make_gemm_fragment_8x8_transposed() (in module tilelang.layout.swizzle) make_half_bank_swizzled_layout() (in module tilelang.layout.swizzle) make_identity_layout() (in module tilelang.layout.cute) make_iter_fusion_index_map() (in module tilelang.carver.matmul_analysis) make_layout() (in module tilelang.layout.cute) make_linear_layout() (in module tilelang.layout.swizzle) make_mfma_load_layout() (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) make_mfma_store_layout() (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) make_mfma_swizzle_layout() (in module tilelang.rocm.intrinsics.mfma_layout) make_mma_load_layout() (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.wgmma_sp_macro_generator.WGSparseTensorCoreIntrinEmitter method) make_mma_store_layout() (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.wgmma_sp_macro_generator.WGSparseTensorCoreIntrinEmitter method) make_mma_swizzle_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) make_quarter_bank_swizzled_layout() (in module tilelang.layout.swizzle) make_reverse() (tilelang.carver.roller.shape_inference.tir.Statement method) make_row_major_layout() (in module tilelang.layout.cute) make_smem_layout_AB() (tilelang.contrib.cutedsl.gemm_v1.Gemm_SM90 static method) make_swizzled_layout() (in module tilelang.layout.swizzle) make_tcgen05mma_swizzled_layout() (in module tilelang.layout.swizzle) make_tensor() (in module tilelang.language.proxy) make_tensor_at_offset() (in module tilelang.contrib.cutedsl.utils) make_tensor_from_addr() (in module tilelang.language.proxy) make_tma_atom() (tilelang.contrib.cutedsl.gemm_v1.Gemm_SM90 static method) make_volta_swizzled_layout() (in module tilelang.layout.swizzle) make_wgmma_swizzled_layout() (in module tilelang.layout.swizzle) make_wmma_load_layout() (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter method) make_wmma_store_layout() (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter method) MakePackedAPI() (in module tilelang.transform) malloc() (tilelang.carver.roller.bestfit.BestFit method) manual_assert_close() (tilelang.profiler.Profiler method) manual_check_prog (tilelang.autotuner.param.ProfileArgs attribute), [1] (tilelang.autotuner.tuner.AutoTuneImpl attribute) map_forward_index() (tilelang.layout.layout.Layout method) map_forward_thread() (tilelang.layout.fragment.Fragment method) mapping (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis attribute) MarkCudaSyncCalls() (in module tilelang.cuda.transform) MarkHostMetalContext() (in module tilelang.metal.transform.mark_host_metal_context) masked (tilelang.autodd.Task attribute) match() (tilelang.autodd.ASTPat method) (tilelang.autodd.ASTPatRewrite method) (tilelang.autodd.ASTRewrite method) (tilelang.autodd.AttachFullFuncArgs method) (tilelang.autodd.BinOpFwdArg method) (tilelang.autodd.CallFwdArg1 method) (tilelang.autodd.GeneralRemove method) (tilelang.autodd.IntConstApply method) match_all_sync() (in module tilelang.language.builtin) match_any_sync() (in module tilelang.language.builtin) match_declare_kernel() (in module tilelang.jit.adapter.utils) match_declare_kernel_cpu() (in module tilelang.jit.adapter.utils) match_declare_kernel_cutedsl() (in module tilelang.jit.adapter.utils) match_global_kernel() (in module tilelang.jit.adapter.utils) match_pat (tilelang.autodd.ASTPatRewrite attribute) match_placeholders() (tilelang.autodd.ASTPat method) (tilelang.autodd.ASTPatRewrite method) matcher (tilelang.autodd.IntConstApply attribute) (tilelang.language.eager.builder.TirTemplate attribute) matches() (tilelang.backend.execution_backend.ExecutionBackendSpec method) MaterializeKernelLaunch() (in module tilelang.transform) MatmulTemplate (class in tilelang.carver.template.matmul) MatrixCoreIntrinEmitter (class in tilelang.rocm.intrinsics.mfma_macro_generator) MatrixCorePreshuffleIntrinEmitter (class in tilelang.rocm.intrinsics.mfma_macro_generator) max (in module tilelang.language.tir.op) max() (in module tilelang.contrib.cutedsl.reduce) max2() (in module tilelang.contrib.cutedsl.math) (in module tilelang.language.math_intrinsics) max_mismatched_ratio (tilelang.autotuner.param.ProfileArgs attribute), [1] (tilelang.autotuner.tuner.AutoTuneImpl attribute) max_smem_usage (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.carver.arch.rdna.RDNA attribute) max_value (in module tilelang.language.tir.ir) max_value() (in module tilelang.language.tir.op) MaxOp (class in tilelang.contrib.cutedsl.reduce) maybe_desc_name() (in module tilelang.jit.adapter.utils) mbar (tilelang.tileop.gemm.gemm_base.GemmBase property) mbarptr (tilelang.tileop.gemm.gemm_base.GemmBase property) mbarrier_arrive() (in module tilelang.language.builtin) mbarrier_arrive_expect_tx() (in module tilelang.language.builtin) mbarrier_cp_async_arrive() (in module tilelang.contrib.cutedsl.cpasync) mbarrier_expect_tx() (in module tilelang.language.builtin) mbarrier_wait() (in module tilelang.contrib.cutedsl.cpasync) mbarrier_wait_parity() (in module tilelang.language.builtin) mcpu (tilelang.carver.arch.rdna.RDNA attribute) MEDIAN_NUM_SIGNED (in module tilelang.contrib.cutedsl.quantize) MEDIAN_NUM_UNSIGNED (in module tilelang.contrib.cutedsl.quantize) MemoryAccessCollector (class in tilelang.transform.decouple_type_cast) merge() (tilelang.carver.roller.bestfit.Block method) MergeIfStmt() (in module tilelang.transform) MergeSharedMemoryAllocations() (in module tilelang.transform) metadata_16bit_load_32x2_to_shared_16x2_layout_16bit() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) metadata_16bit_load_32x2_to_shared_16x2_layout_32bit() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) metadata_16bit_load_32x2_to_shared_16x4_layout_8bit() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) metadata_32bit_load_32x1_to_shared_16x2_layout_8bit() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) metadata_8bit_load_32x4_to_shared_16x4_layout_16bit() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) metadata_8bit_load_32x4_to_shared_16x4_layout_32bit() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) metadata_8bit_load_32x4_to_shared_16x8_layout_8bit() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) METAL (class in tilelang.carver.arch.metal) metal_pipeline (in module tilelang.metal.pipeline) MetalFragmentToSimdgroup (in module tilelang.metal.transform.metal_fragment_to_simdgroup) MetalKernelAdapter (class in tilelang.jit.adapter.torch.metal) MetalPassPipelineBody() (in module tilelang.metal.pipeline) mfma() (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) mfma_store_index_map() (in module tilelang.rocm.intrinsics.utils) mfma_store_index_map_32x32() (in module tilelang.rocm.intrinsics.utils) micro_size_k (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) micro_size_x (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) micro_size_y (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) min (in module tilelang.language.tir.op) min() (in module tilelang.contrib.cutedsl.reduce) min2() (in module tilelang.contrib.cutedsl.math) (in module tilelang.language.math_intrinsics) min_value (in module tilelang.language.tir.ir) min_value() (in module tilelang.language.tir.op) MinOp (class in tilelang.contrib.cutedsl.reduce) mma() (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitterWithLadderTransform method) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter method) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter method) mma_32x8_to_shared_16x16_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) mma_32x8_to_shared_16x16_layout_fp16() (in module tilelang.cuda.intrinsics.layout.mma_sm70_layout) mma_32x8_to_shared_16x16_layout_fp32() (in module tilelang.cuda.intrinsics.layout.mma_sm70_layout) mma_atom() (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter method) mma_fill (in module tilelang.language.tir.ir) mma_fill() (in module tilelang.language.tir.op) mma_load_a_32x16_to_shared_16x32_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) mma_load_a_32x4_to_shared_16x4_layout() (in module tilelang.cuda.intrinsics.layout.mma_sm70_layout) mma_load_a_32x4_to_shared_16x8_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) mma_load_a_32x8_to_shared_16x16_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) mma_load_b_32x16_to_shared_16x32_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) mma_load_b_32x4_to_shared_16x4_layout_trans() (in module tilelang.cuda.intrinsics.layout.mma_sm70_layout) mma_load_b_32x4_to_shared_16x8_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) mma_load_b_32x4_to_shared_4x16_layout() (in module tilelang.cuda.intrinsics.layout.mma_sm70_layout) mma_load_b_32x8_to_shared_16x16_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) mma_num_inst_m (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter property) mma_num_inst_n (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter property) mma_sp() (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter method) mma_sp_load_a_32x16_to_shared_16x64_layout() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) mma_sp_load_a_32x4_to_shared_16x16_layout() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) mma_sp_load_a_32x8_to_shared_16x32_layout() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) mma_sp_load_b_32x16_to_shared_16x32_layout() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) mma_sp_load_b_32x32_to_shared_16x64_layout() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) mma_sp_load_b_32x8_to_shared_16x16_layout() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) mma_store (in module tilelang.language.tir.ir) mma_store() (in module tilelang.language.tir.op) mma_store_32x2_to_shared_8x8_layout_fp64() (in module tilelang.cuda.intrinsics.layout.mma_layout) mma_store_32x8_to_shared_16x16_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) mma_store_index_map() (in module tilelang.cuda.intrinsics.layout.utils) mma_store_index_map_fp64() (in module tilelang.cuda.intrinsics.layout.utils) mod (tilelang.jit.adapter.base.BaseKernelAdapter attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLMetalSourceWrapper attribute) mode (tilelang.jit.JITImpl attribute), [1] (tilelang.language.eager.builder.JITFunc attribute) (tilelang.layout.cute.ScaledBasis property) ModeLike (in module tilelang.layout.cute) module tilelang tilelang.analysis tilelang.analysis.ast_printer tilelang.analysis.fragment_loop_checker tilelang.analysis.layout_visual tilelang.analysis.nested_loop_checker tilelang.autodd tilelang.autotuner tilelang.autotuner.capture tilelang.autotuner.grouped_compile tilelang.autotuner.param tilelang.autotuner.tuner tilelang.backend tilelang.backend.common tilelang.backend.execution_backend tilelang.backend.pass_pipeline tilelang.backend.pass_pipeline.pipeline tilelang.backend.pass_pipeline.pipeline_utils tilelang.backend.target tilelang.cache tilelang.cache.kernel_cache tilelang.carver tilelang.carver.analysis tilelang.carver.arch tilelang.carver.arch.arch_base tilelang.carver.arch.cdna tilelang.carver.arch.cpu tilelang.carver.arch.cuda tilelang.carver.arch.driver tilelang.carver.arch.driver.cuda_driver tilelang.carver.arch.metal tilelang.carver.arch.rdna tilelang.carver.common_schedules tilelang.carver.matmul_analysis tilelang.carver.roller tilelang.carver.roller.bestfit tilelang.carver.roller.hint tilelang.carver.roller.node tilelang.carver.roller.policy tilelang.carver.roller.policy.common tilelang.carver.roller.policy.default tilelang.carver.roller.policy.tensorcore tilelang.carver.roller.rasterization tilelang.carver.roller.shape_inference tilelang.carver.roller.shape_inference.common tilelang.carver.roller.shape_inference.tir tilelang.carver.template tilelang.carver.template.base tilelang.carver.template.conv tilelang.carver.template.elementwise tilelang.carver.template.flashattention tilelang.carver.template.gemv tilelang.carver.template.general_reduce tilelang.carver.template.matmul tilelang.carver.utils tilelang.common tilelang.common.transform_kind tilelang.contrib tilelang.contrib.cc tilelang.contrib.cutedsl tilelang.contrib.cutedsl.atomic tilelang.contrib.cutedsl.cpasync tilelang.contrib.cutedsl.gemm_tcgen05 tilelang.contrib.cutedsl.gemm_v1 tilelang.contrib.cutedsl.gemm_v2 tilelang.contrib.cutedsl.grid_sync tilelang.contrib.cutedsl.ieee_math tilelang.contrib.cutedsl.ldsm tilelang.contrib.cutedsl.math tilelang.contrib.cutedsl.ptx_mma tilelang.contrib.cutedsl.quantize tilelang.contrib.cutedsl.reduce tilelang.contrib.cutedsl.threadblock_swizzle tilelang.contrib.cutedsl.utils tilelang.contrib.cutedsl.warp tilelang.contrib.dlpack tilelang.contrib.hip_resource_info tilelang.contrib.hipcc tilelang.contrib.msvc tilelang.contrib.nvcc tilelang.contrib.nvrtc tilelang.contrib.rocm tilelang.cpu tilelang.cpu.execution_backend tilelang.cpu.op tilelang.cpu.op.gemm tilelang.cpu.op.gemm.gemm_scalar tilelang.cpu.pipeline tilelang.cuda tilelang.cuda.debug tilelang.cuda.execution_backend tilelang.cuda.intrinsics tilelang.cuda.intrinsics.layout tilelang.cuda.intrinsics.layout.mma_layout tilelang.cuda.intrinsics.layout.mma_sm70_layout tilelang.cuda.intrinsics.layout.mma_sp_layout tilelang.cuda.intrinsics.layout.utils tilelang.cuda.intrinsics.macro tilelang.cuda.intrinsics.macro.mma_macro_generator tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator tilelang.cuda.intrinsics.macro.mma_sm75_macro_generator tilelang.cuda.intrinsics.macro.mma_sp_macro_generator tilelang.cuda.intrinsics.macro.tcgen05_macro_generator tilelang.cuda.intrinsics.macro.wgmma_macro_generator tilelang.cuda.intrinsics.macro.wgmma_sp_macro_generator tilelang.cuda.op tilelang.cuda.op.gemm tilelang.cuda.op.gemm.gemm_mma tilelang.cuda.op.gemm.gemm_mma_sm70 tilelang.cuda.op.gemm.gemm_mma_sm75 tilelang.cuda.op.gemm.gemm_tcgen05 tilelang.cuda.op.gemm.gemm_wgmma tilelang.cuda.op.gemm_sp tilelang.cuda.op.gemm_sp.gemm_sp_mma tilelang.cuda.op.gemm_sp.gemm_sp_wgmma tilelang.cuda.pipeline tilelang.cuda.target tilelang.cuda.transform tilelang.dtypes tilelang.engine tilelang.engine.callback tilelang.engine.lower tilelang.engine.param tilelang.engine.semantic_check tilelang.env tilelang.intrinsics tilelang.ir tilelang.jit tilelang.jit.adapter tilelang.jit.adapter.base tilelang.jit.adapter.cutedsl tilelang.jit.adapter.cutedsl.adapter tilelang.jit.adapter.cutedsl.checks tilelang.jit.adapter.cutedsl.kernel_cache tilelang.jit.adapter.cutedsl.libgen tilelang.jit.adapter.cutedsl.wrapper tilelang.jit.adapter.cython tilelang.jit.adapter.cython.adapter tilelang.jit.adapter.cython.kernel_cache tilelang.jit.adapter.kernel_cache tilelang.jit.adapter.libgen tilelang.jit.adapter.nvrtc tilelang.jit.adapter.nvrtc.adapter tilelang.jit.adapter.nvrtc.kernel_cache tilelang.jit.adapter.nvrtc.libgen tilelang.jit.adapter.nvrtc.wrapper tilelang.jit.adapter.torch tilelang.jit.adapter.torch.kernel_cache tilelang.jit.adapter.torch.metal tilelang.jit.adapter.tvm_ffi tilelang.jit.adapter.utils tilelang.jit.adapter.wrapper tilelang.jit.diagnostics tilelang.jit.env tilelang.jit.exceptions tilelang.jit.kernel tilelang.jit.param tilelang.language tilelang.language.allocate tilelang.language.annotations tilelang.language.atomic tilelang.language.builtin tilelang.language.cluster tilelang.language.copy_op tilelang.language.customize tilelang.language.dtypes tilelang.language.eager tilelang.language.eager.ast tilelang.language.eager.builder tilelang.language.eager.utils tilelang.language.experimental tilelang.language.experimental.gemm_sp_op tilelang.language.fastmath tilelang.language.fill_op tilelang.language.fp8 tilelang.language.frame tilelang.language.gemm_op tilelang.language.kernel tilelang.language.logical tilelang.language.loop tilelang.language.math_intrinsics tilelang.language.overrides tilelang.language.overrides.buffer tilelang.language.pdl tilelang.language.print_op tilelang.language.proxy tilelang.language.random tilelang.language.reduce_op tilelang.language.scan_op tilelang.language.symbolics tilelang.language.tir tilelang.language.tir.entry tilelang.language.tir.ir tilelang.language.tir.op tilelang.language.utils tilelang.language.warpgroup tilelang.layout tilelang.layout.cute tilelang.layout.fragment tilelang.layout.gemm_sp tilelang.layout.layout tilelang.layout.swizzle tilelang.math tilelang.metal tilelang.metal.execution_backend tilelang.metal.intrinsics tilelang.metal.intrinsics.metal_macro_generator tilelang.metal.op tilelang.metal.op.gemm tilelang.metal.op.gemm.gemm_metal tilelang.metal.pipeline tilelang.metal.target tilelang.metal.transform tilelang.metal.transform.mark_host_metal_context tilelang.metal.transform.metal_fragment_to_simdgroup tilelang.profiler tilelang.profiler.bench tilelang.quantize tilelang.quantize.lop3 tilelang.quantize.mxfp tilelang.quantize.quantization tilelang.quantize.utils tilelang.rocm tilelang.rocm.execution_backend tilelang.rocm.intrinsics tilelang.rocm.intrinsics.mfma_layout tilelang.rocm.intrinsics.mfma_macro_generator tilelang.rocm.intrinsics.utils tilelang.rocm.intrinsics.wmma_layout tilelang.rocm.intrinsics.wmma_macro_generator tilelang.rocm.op tilelang.rocm.op.gemm tilelang.rocm.op.gemm.gemm_mfma tilelang.rocm.op.gemm.gemm_wmma tilelang.rocm.pipeline tilelang.rocm.target tilelang.testing tilelang.testing.perf_regression tilelang.tileop tilelang.tileop.base tilelang.tileop.gemm tilelang.tileop.gemm.gemm_base tilelang.tileop.gemm.registry tilelang.tileop.gemm_sp tilelang.tileop.gemm_sp.gemm_sp_base tilelang.tileop.gemm_sp.gemm_sp_wgmma tilelang.tileop.gemm_sp.registry tilelang.tools tilelang.tools.Analyzer tilelang.tools.plot_layout tilelang.transform tilelang.transform.add_bufstore_wrapper tilelang.transform.decouple_type_cast tilelang.transform.hoist_broadcast_values tilelang.transform.pass_config tilelang.transform.simplify tilelang.utils tilelang.utils.deprecated tilelang.utils.device tilelang.utils.language tilelang.utils.pass_diff tilelang.utils.pass_diff_hook tilelang.utils.sparse tilelang.utils.tensor tilelang.webgpu tilelang.webgpu.pipeline module_has_tma() (in module tilelang.cuda.pipeline) MPSIntrinEmitter (class in tilelang.metal.intrinsics.metal_macro_generator) mul2() (in module tilelang.language.math_intrinsics) mutate() (in module tilelang.language.eager.ast) N N (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] (tilelang.tileop.gemm.Gemm property) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP attribute) n_dim (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) N_DIM (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) n_max_threads (tilelang.contrib.hip_resource_info.KernelResourceUsage attribute) (tilelang.jit.kernel.JITKernel property) n_regs (tilelang.contrib.hip_resource_info.KernelResourceUsage attribute) (tilelang.jit.kernel.JITKernel property) n_spills (tilelang.contrib.hip_resource_info.KernelResourceUsage attribute) (tilelang.jit.kernel.JITKernel property) n_warp (tilelang.ir.GemmSPWarpPolicy attribute) (tilelang.ir.GemmWarpPolicy attribute) name (tilelang.autodd.ASTPatRewrite attribute) (tilelang.autodd.GeneralRemove attribute) (tilelang.autodd.IntConstApply attribute) (tilelang.backend.execution_backend.ExecutionBackendSpec attribute) (tilelang.backend.pass_pipeline.pipeline.PassPipeline attribute) (tilelang.backend.target.TargetDetectorSpec attribute) (tilelang.backend.target.TargetNormalizerSpec attribute) (tilelang.carver.analysis.BlockInfo attribute) (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.carver.roller.node.Node attribute) (tilelang.carver.roller.shape_inference.tir.TensorDepNode attribute) (tilelang.language.eager.builder.Macro attribute) (tilelang.language.eager.builder.TirTemplate attribute) (tilelang.testing.perf_regression.PerfResult attribute) (tilelang.tileop.gemm.registry.GemmImplEntry attribute) (tilelang.tileop.gemm_sp.registry.GemmSPImplEntry attribute) name2dep (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis attribute) name_inside_frame (tilelang.language.eager.builder.Builder attribute) named_barrier_arrive() (in module tilelang.language.builtin) NamedBarrier (class in tilelang.contrib.cutedsl.reduce) names (tilelang.language.eager.ast.QuoteVisitor attribute) nearbyint (in module tilelang.language.tir.ir) nearbyint() (in module tilelang.language.tir.op) NestedLoopChecker() (in module tilelang.analysis.nested_loop_checker) next_power_of_2() (in module tilelang.math) nextafter (in module tilelang.language.tir.ir) nextafter() (in module tilelang.language.tir.op) no_set_max_nreg() (in module tilelang.language.builtin) Node (class in tilelang.carver.roller.node) nodes (tilelang.carver.roller.policy.default.DefaultPolicy attribute) NONE (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.SwizzleMode attribute) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.SwizzleMode attribute) nonlocals (tilelang.language.eager.ast.DSLMutator attribute) NonTransform (tilelang.common.transform_kind.TransformKind attribute) NoRasterization (class in tilelang.carver.roller.rasterization) Normal (tilelang.utils.tensor.TensorSupplyType attribute) normalize (tilelang.backend.target.TargetNormalizerSpec attribute) normalize_cutedsl_target() (in module tilelang.cuda.target) normalize_flat_binds() (in module tilelang.transform.decouple_type_cast) normalize_pass_configs() (in module tilelang.transform.pass_config) normalize_prim_func() (in module tilelang.carver.analysis) normalize_rocm_arch() (in module tilelang.rocm.target) normalize_rocm_target() (in module tilelang.rocm.target) normalize_to_matmul() (in module tilelang.carver.matmul_analysis) num_elems_per_byte (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) num_heads (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) num_outputs() (tilelang.carver.roller.node.Node method) num_threads (tilelang.language.kernel.KernelLaunchFrame property) num_wave (tilelang.carver.roller.hint.TileDict attribute) num_workers (tilelang.autodd.ParTaskManager attribute) NVRTCKernelAdapter (class in tilelang.jit.adapter.nvrtc) (class in tilelang.jit.adapter.nvrtc.adapter) NVRTCKernelCache (class in tilelang.jit.adapter.nvrtc.kernel_cache) NVRTCLibraryGenerator (class in tilelang.jit.adapter.nvrtc.libgen) O offset (tilelang.layout.cute.ComposedLayout attribute) offset_A (tilelang.tileop.gemm.Gemm property) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP attribute) offset_B (tilelang.tileop.gemm.Gemm property) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP attribute) One (tilelang.utils.tensor.TensorSupplyType attribute) oob_fill (tilelang.jit.adapter.utils.TMADescriptorParams attribute) Operator (in module tilelang.language.eager.ast) opt_shapes (tilelang.carver.roller.hint.Hint attribute) orig_func (tilelang.language.eager.builder.JITFunc attribute) (tilelang.language.eager.builder.Macro attribute) (tilelang.language.eager.builder.PrimFunc attribute) out_dtype (tilelang.carver.roller.hint.IntrinInfo attribute) (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] out_idx (tilelang.autotuner.param.CompileArgs attribute) (tilelang.autotuner.param.ProfileArgs attribute) (tilelang.jit.JITImpl attribute), [1] (tilelang.jit.kernel.JITKernel property) (tilelang.jit.param.Kernel attribute) (tilelang.language.eager.builder.Builder attribute) OUT_IDX_PATH (in module tilelang.autotuner.param) out_tensor_cnt (tilelang.language.eager.builder.Builder attribute) output (tilelang.autodd.Args attribute) (tilelang.carver.roller.shape_inference.common.Statement attribute) output_blocks (tilelang.carver.roller.node.PrimFuncNode attribute) output_buffers (tilelang.carver.roller.node.PrimFuncNode attribute) output_file (tilelang.autodd.ParTaskManager attribute) output_nodes (tilelang.carver.template.base.BaseTemplate property) output_queue (tilelang.autodd.AsyncPythonRunner attribute) output_strides (tilelang.carver.roller.hint.Hint attribute) output_strides_map (tilelang.carver.roller.hint.TileDict attribute) output_tile (tilelang.carver.roller.hint.TileDict attribute) OutputNode (class in tilelang.carver.roller.node) outputs (tilelang.carver.roller.node.Node property) OutTensor (class in tilelang.language.eager.builder) override() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) P P (tilelang.carver.template.conv.ConvTemplate attribute), [1] pack_half2() (in module tilelang.contrib.cutedsl.utils) panel_width (tilelang.carver.roller.rasterization.Rasterization property) panel_width_ (tilelang.carver.roller.rasterization.Rasterization attribute) (tilelang.carver.roller.rasterization.Rasterization2DColumn attribute) (tilelang.carver.roller.rasterization.Rasterization2DRow attribute) par_compile() (in module tilelang.jit) (tilelang.jit.JITImpl method) Parallel() (in module tilelang.language.loop) parallel() (in module tilelang.language.tir.ir) ParallelOp (class in tilelang.ir) param_dtypes (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) param_shapes (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) params (tilelang.engine.param.CompiledArtifact attribute) (tilelang.jit.adapter.base.BaseKernelAdapter attribute) (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter attribute) (tilelang.jit.kernel.JITKernel property) (tilelang.language.eager.builder.PrimFunc attribute) (tilelang.profiler.Profiler attribute), [1] params_as_dict() (tilelang.carver.template.conv.ConvTemplate method) (tilelang.carver.template.elementwise.ElementwiseTemplate method) (tilelang.carver.template.flashattention.FlashAttentionTemplate method) (tilelang.carver.template.gemv.GEMVTemplate method) (tilelang.carver.template.general_reduce.GeneralReductionTemplate method) (tilelang.carver.template.matmul.MatmulTemplate method) PARAMS_PATH (in module tilelang.autotuner.param) params_path (tilelang.cache.kernel_cache.KernelCache attribute) parse_args() (tilelang.language.eager.builder.JITFunc method) parse_cache_key() (tilelang.jit.JITImpl method) parse_compute_version() (in module tilelang.contrib.nvcc) (in module tilelang.contrib.rocm) parse_expr() (in module tilelang.autodd) parse_function_call_args() (in module tilelang.jit.adapter.utils) parse_source_information() (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper method) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) parse_stmts() (in module tilelang.autodd) parse_tma_descriptor_args() (in module tilelang.jit.adapter.utils) ParTaskManager (class in tilelang.autodd) pass_configs (tilelang.autotuner.param.CompileArgs attribute), [1] (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.libgen.LibraryGenerator attribute) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLMetalSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLWrapper attribute) (tilelang.jit.JITImpl attribute), [1] (tilelang.jit.kernel.JITKernel attribute) pass_context (tilelang.carver.roller.hint.Hint attribute) pass_diff() (in module tilelang.utils.pass_diff) PassConfigKey (class in tilelang.transform.pass_config) passes (tilelang.language.eager.ast.QuoteVisitor attribute) PassPipeline (class in tilelang.backend.pass_pipeline.pipeline) path (tilelang.jit.adapter.base.CachedTextSource attribute) PDD (class in tilelang.autodd) pdl_sync() (in module tilelang.language.pdl) pdl_sync_map (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) PDL_SYNC_PY (in module tilelang.jit.adapter.nvrtc.wrapper) pdl_trigger() (in module tilelang.language.pdl) pending_defs (tilelang.transform.hoist_broadcast_values.HoistBroadcastValuesMutator attribute) PerfResult (class in tilelang.testing.perf_regression) Persistent() (in module tilelang.language.loop) PersistThreadblock() (in module tilelang.cuda.transform) pipeline_stage (tilelang.carver.roller.hint.Hint attribute) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy attribute) Pipelined() (in module tilelang.language.loop) PipelinePlanning() (in module tilelang.transform) PlaceHolderNode (class in tilelang.carver.roller.node) placeholders (tilelang.autodd.ASTPat attribute) plan_rasterization() (tilelang.carver.roller.policy.default.DefaultPolicy method) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy method) PlanAndUpdateBufferAllocationLocation() (in module tilelang.transform) platform (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cpu.CPU attribute) (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.carver.arch.rdna.RDNA attribute) plot_fragment_tv() (in module tilelang.tools.plot_layout) plot_layout() (in module tilelang.tools.plot_layout) policy (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP attribute) policy_type (tilelang.ir.GemmSPWarpPolicy attribute) (tilelang.ir.GemmWarpPolicy attribute) pop() (tilelang.autotuner.capture.CaptureStack method) (tilelang.language.frame.FrameStack method) (tilelang.language.kernel.FrameStack method) pop_recorded() (in module tilelang.contrib.hip_resource_info) popcount (in module tilelang.language.tir.ir) popcount() (in module tilelang.language.tir.op) post_proc() (tilelang.autodd.ParTaskManager method) pow (in module tilelang.language.tir.ir) pow() (in module tilelang.language.tir.op) pow_of_int() (in module tilelang.language.tir.op) power() (in module tilelang.language.tir.op) pre_order_traverse() (in module tilelang.carver.roller.node) PREDEF_ATTRIBUTE_SET_DYNAMIC_MEMORY (in module tilelang.jit.adapter.wrapper) PREDEF_ATTRIBUTE_SET_DYNAMIC_MEMORY_HIP (in module tilelang.jit.adapter.wrapper) PREDEF_HOST_FUNC (in module tilelang.jit.adapter.wrapper) PREDEF_HOST_FUNC_PY (in module tilelang.jit.adapter.nvrtc.wrapper) PREDEF_INIT_FUNC (in module tilelang.jit.adapter.wrapper) predicate (tilelang.tileop.gemm.registry.GemmImplEntry attribute) (tilelang.tileop.gemm_sp.registry.GemmSPImplEntry attribute) prefetch_tma_descriptor() (in module tilelang.contrib.cutedsl.cpasync) PreLowerSemanticCheck() (in module tilelang.engine.semantic_check) prepend_dll_search_path() (in module tilelang.env) prepend_pythonpath() (in module tilelang.env) prim_expr_equal() (in module tilelang.utils.language) prim_func (tilelang.carver.roller.node.PrimFuncNode attribute) (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter property) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter property) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter property) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter property) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper property) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper property) (tilelang.jit.kernel.JITKernel attribute) (tilelang.language.eager.builder.TirTemplate attribute) prim_func() (in module tilelang.language.eager.builder) (in module tilelang.language.tir.entry) (tilelang.language.eager.builder.Builder method) prim_func_arg() (tilelang.language.eager.builder.Builder method) PrimFunc (class in tilelang.language.eager.builder) PrimFuncNode (class in tilelang.carver.roller.node) print() (in module tilelang.language.print_op) print_dependencies() (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis method) print_fragment_buffer_with_condition() (in module tilelang.language.print_op) print_fragment_format() (in module tilelang.analysis.layout_visual) print_global_buffer_with_condition() (in module tilelang.language.print_op) print_local_buffer_with_condition() (in module tilelang.language.print_op) print_msg() (in module tilelang.language.print_op) print_shared_buffer_with_condition() (in module tilelang.language.print_op) print_source_code() (tilelang.jit.kernel.JITKernel method) print_var() (in module tilelang.language.print_op) print_var_with_condition() (in module tilelang.language.print_op) probas (tilelang.autodd.PDD attribute) process (tilelang.autodd.AsyncPythonRunner attribute) process_func() (in module tilelang.testing.perf_regression) ProducerConsumerWarpSpecialized() (in module tilelang.cuda.transform) product() (in module tilelang.layout.cute) profile_args (tilelang.autotuner.tuner.AutoTuner attribute) ProfileArgs (class in tilelang.autotuner.param) Profiler (class in tilelang.profiler) Program (class in tilelang.jit.param) propagate() (tilelang.carver.roller.node.PrimFuncNode method) propagate_inputs() (tilelang.carver.roller.node.PrimFuncNode method) propagate_inputs_on_reduction() (tilelang.carver.roller.node.PrimFuncNode method) propagate_outputs() (tilelang.carver.roller.node.PrimFuncNode method) propagate_reduction_inputs() (tilelang.carver.roller.node.PrimFuncNode method) ptr() (in module tilelang.language.proxy) ptr_map (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) ptx_arrive_barrier (in module tilelang.language.tir.ir) ptx_arrive_barrier() (in module tilelang.language.tir.op) ptx_arrive_barrier_expect_tx (in module tilelang.language.tir.ir) ptx_arrive_barrier_expect_tx() (in module tilelang.language.tir.op) ptx_arrive_cluster_barrier() (in module tilelang.language.builtin) ptx_commit_group (in module tilelang.language.tir.ir) ptx_commit_group() (in module tilelang.language.tir.op) ptx_cp_async (in module tilelang.language.tir.ir) ptx_cp_async() (in module tilelang.language.tir.op) ptx_cp_async_barrier (in module tilelang.language.tir.ir) ptx_cp_async_barrier() (in module tilelang.language.tir.op) ptx_cp_async_bulk (in module tilelang.language.tir.ir) ptx_cp_async_bulk() (in module tilelang.language.tir.op) ptx_fence_barrier_init (in module tilelang.language.tir.ir) ptx_fence_barrier_init() (in module tilelang.language.tir.op) ptx_init_barrier_thread_count (in module tilelang.language.tir.ir) ptx_init_barrier_thread_count() (in module tilelang.language.tir.op) ptx_ldmatrix (in module tilelang.language.tir.ir) ptx_ldmatrix() (in module tilelang.language.tir.op) ptx_ldmatrix_x1() (in module tilelang.contrib.cutedsl.ldsm) ptx_ldmatrix_x1_trans() (in module tilelang.contrib.cutedsl.ldsm) ptx_ldmatrix_x2() (in module tilelang.contrib.cutedsl.ldsm) ptx_ldmatrix_x2_trans() (in module tilelang.contrib.cutedsl.ldsm) ptx_ldmatrix_x4() (in module tilelang.contrib.cutedsl.ldsm) ptx_ldmatrix_x4_trans() (in module tilelang.contrib.cutedsl.ldsm) ptx_mma (in module tilelang.language.tir.ir) ptx_mma() (in module tilelang.contrib.cutedsl.ptx_mma) (in module tilelang.language.tir.op) ptx_mma_m16n8k16_bf16_bf16_f32 (in module tilelang.contrib.cutedsl.ptx_mma) ptx_mma_m16n8k16_f16_f16_f16 (in module tilelang.contrib.cutedsl.ptx_mma) ptx_mma_m16n8k16_f16_f16_f32 (in module tilelang.contrib.cutedsl.ptx_mma) ptx_mma_m16n8k32_e4m3_e4m3_f16 (in module tilelang.contrib.cutedsl.ptx_mma) ptx_mma_m16n8k32_e4m3_e4m3_f32 (in module tilelang.contrib.cutedsl.ptx_mma) ptx_mma_m16n8k32_e5m2_e5m2_f32 (in module tilelang.contrib.cutedsl.ptx_mma) ptx_mma_m16n8k32_s4_s4_s32 (in module tilelang.contrib.cutedsl.ptx_mma) ptx_mma_m16n8k32_s8_s8_s32 (in module tilelang.contrib.cutedsl.ptx_mma) ptx_mma_m16n8k32_u4_u4_s32 (in module tilelang.contrib.cutedsl.ptx_mma) ptx_mma_m16n8k32_u8_u8_s32 (in module tilelang.contrib.cutedsl.ptx_mma) ptx_mma_m16n8k4_tf32_tf32_f32 (in module tilelang.contrib.cutedsl.ptx_mma) ptx_mma_m16n8k8_tf32_tf32_f32 (in module tilelang.contrib.cutedsl.ptx_mma) ptx_mma_m8n8k4_f64_f64_f64 (in module tilelang.contrib.cutedsl.ptx_mma) ptx_mma_sm70() (in module tilelang.language.builtin) ptx_mma_sp (in module tilelang.language.tir.ir) ptx_mma_sp() (in module tilelang.contrib.cutedsl.ptx_mma) (in module tilelang.language.tir.op) ptx_stmatrix_x1() (in module tilelang.contrib.cutedsl.ldsm) ptx_stmatrix_x1_trans() (in module tilelang.contrib.cutedsl.ldsm) ptx_stmatrix_x2() (in module tilelang.contrib.cutedsl.ldsm) ptx_stmatrix_x2_trans() (in module tilelang.contrib.cutedsl.ldsm) ptx_stmatrix_x4() (in module tilelang.contrib.cutedsl.ldsm) ptx_stmatrix_x4_trans() (in module tilelang.contrib.cutedsl.ldsm) ptx_tcgen05_mma_blockscaled_ss (in module tilelang.language.tir.ir) ptx_tcgen05_mma_blockscaled_ss() (in module tilelang.language.tir.op) ptx_tcgen05_mma_ss (in module tilelang.language.tir.ir) ptx_tcgen05_mma_ss() (in module tilelang.language.tir.op) ptx_tcgen05_mma_ts (in module tilelang.language.tir.ir) ptx_tcgen05_mma_ts() (in module tilelang.language.tir.op) ptx_wait_barrier (in module tilelang.language.tir.ir) ptx_wait_barrier() (in module tilelang.language.tir.op) ptx_wait_group (in module tilelang.language.tir.ir) ptx_wait_group() (in module tilelang.language.tir.op) ptx_wgmma_rs (in module tilelang.language.tir.ir) ptx_wgmma_rs() (in module tilelang.language.tir.op) ptx_wgmma_sp_rs (in module tilelang.language.tir.ir) ptx_wgmma_sp_rs() (in module tilelang.language.tir.op) ptx_wgmma_sp_ss (in module tilelang.language.tir.ir) ptx_wgmma_sp_ss() (in module tilelang.language.tir.op) ptx_wgmma_ss (in module tilelang.language.tir.ir) ptx_wgmma_ss() (in module tilelang.language.tir.op) push() (tilelang.autotuner.capture.CaptureStack method) (tilelang.language.frame.FrameStack method) (tilelang.language.kernel.FrameStack method) PyIntTuple (in module tilelang.layout.cute) pymodule (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) (tilelang.jit.adapter.cutedsl.libgen.CuTeDSLLibraryGenerator attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.libgen.NVRTCLibraryGenerator attribute), [1] pypath (tilelang.jit.adapter.nvrtc.libgen.NVRTCLibraryGenerator attribute) PYTHON_HOST_FUNC_TEMPLATE (in module tilelang.jit.adapter.cutedsl.wrapper) pythonic_expr() (in module tilelang.jit.adapter.utils) Q q_multiply_shift (in module tilelang.language.tir.ir) q_multiply_shift() (in module tilelang.language.tir.op) q_multiply_shift_per_axis (in module tilelang.language.tir.ir) q_multiply_shift_per_axis() (in module tilelang.language.tir.op) quote() (in module tilelang.language.eager.ast) quote1() (in module tilelang.language.eager.ast) quote_expr() (in module tilelang.language.eager.ast) QuoteVisitor (class in tilelang.language.eager.ast) R randint_semi_sparse() (in module tilelang.utils.sparse) Randn (tilelang.utils.tensor.TensorSupplyType attribute) randn_semi_sparse() (in module tilelang.utils.sparse) range_map (tilelang.carver.roller.shape_inference.common.Statement attribute) rank() (in module tilelang.layout.cute) Rasterization (class in tilelang.carver.roller.rasterization) rasterization (tilelang.carver.roller.policy.default.DefaultPolicy attribute) Rasterization2DColumn (class in tilelang.carver.roller.rasterization) rasterization2DColumn() (in module tilelang.contrib.cutedsl.threadblock_swizzle) Rasterization2DRow (class in tilelang.carver.roller.rasterization) rasterization2DRow() (in module tilelang.contrib.cutedsl.threadblock_swizzle) rasterization_plan (tilelang.carver.roller.hint.Hint attribute) raxis (tilelang.carver.roller.node.PrimFuncNode attribute) raxis_order (tilelang.carver.roller.hint.Hint property) RDNA (class in tilelang.carver.arch.rdna) rdna_gen (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) rdna_generation (tilelang.carver.arch.rdna.RDNA attribute) recast() (tilelang.layout.cute.ComposedLayout method) (tilelang.layout.cute.Swizzle method) recommend_block_size() (tilelang.carver.roller.policy.default.DefaultPolicy method) recommend_hints() (tilelang.carver.template.base.BaseTemplate method) reduce() (in module tilelang.language.reduce_op) reduce_absmax() (in module tilelang.language.reduce_op) reduce_abssum() (in module tilelang.language.reduce_op) reduce_axes (tilelang.carver.roller.shape_inference.tir.InputShapeInference attribute) reduce_bitand() (in module tilelang.language.reduce_op) reduce_bitor() (in module tilelang.language.reduce_op) reduce_bitxor() (in module tilelang.language.reduce_op) reduce_k (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) reduce_max() (in module tilelang.language.reduce_op) reduce_min() (in module tilelang.language.reduce_op) reduce_sum() (in module tilelang.language.reduce_op) reduce_thread (tilelang.carver.roller.hint.Hint attribute) ReduceKind (in module tilelang.language.reduce_op) ReduceOp (class in tilelang.ir) ReducerOp (in module tilelang.language.allocate) ReduceType (class in tilelang.ir) reduction_block (tilelang.carver.roller.node.PrimFuncNode attribute) Ref (class in tilelang.language.eager.builder) ref_input_tensors (tilelang.autotuner.tuner.AutoTuner attribute) ref_latency (tilelang.autotuner.param.AutotuneResult attribute), [1] (tilelang.jit.kernel.JITKernel attribute) ref_latency_cache (tilelang.autotuner.tuner.AutoTuner attribute) ref_prog (tilelang.autotuner.param.ProfileArgs attribute), [1], [2] (tilelang.autotuner.tuner.AutoTuneImpl attribute) reg_cap (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.carver.arch.rdna.RDNA attribute) region() (in module tilelang.language.utils) region_exist_in_list() (in module tilelang.carver.roller.shape_inference.tir) RegionOp (class in tilelang.ir) register_c_postproc() (in module tilelang.engine.callback) register_c_postproc_callback() (in module tilelang.engine.callback) register_cuda_postproc() (in module tilelang.engine.callback) register_cuda_postproc_callback() (in module tilelang.engine.callback) register_execution_backend() (in module tilelang.backend.execution_backend) register_gemm_impl() (in module tilelang.tileop.gemm.registry) register_gemm_sp_impl() (in module tilelang.tileop.gemm_sp.registry) register_hip_postproc() (in module tilelang.engine.callback) register_hip_postproc_callback() (in module tilelang.engine.callback) register_lazy_execution_backends() (in module tilelang.backend.execution_backend) register_let_value() (in module tilelang.language.frame) register_metal_postproc() (in module tilelang.engine.callback) register_metal_postproc_callback() (in module tilelang.engine.callback) register_pipeline() (in module tilelang.backend.pass_pipeline.pipeline) register_target_detector() (in module tilelang.backend.target) register_target_normalizer() (in module tilelang.backend.target) regression() (in module tilelang.testing.perf_regression) reinterpret (in module tilelang.language.tir.ir) reinterpret() (in module tilelang.language.tir.op) remove_lib() (tilelang.jit.adapter.libgen.LibraryGenerator method) rep (tilelang.autotuner.param.ProfileArgs attribute), [1] (tilelang.autotuner.tuner.AutoTuneImpl attribute) repeat() (tilelang.layout.fragment.Fragment method) (tilelang.layout.layout.Layout method) replace() (tilelang.autodd.ASTPat method) replace_with (tilelang.autodd.GeneralRemove attribute) replicate() (tilelang.layout.fragment.Fragment method) requires_cdna() (in module tilelang.testing) requires_cuda_compute_version() (in module tilelang.testing) requires_cuda_compute_version_eq() (in module tilelang.testing) requires_cuda_compute_version_ge() (in module tilelang.testing) requires_cuda_compute_version_gt() (in module tilelang.testing) requires_cuda_compute_version_le() (in module tilelang.testing) requires_cuda_compute_version_lt() (in module tilelang.testing) requires_cuda_or_cdna() (in module tilelang.testing) requires_gfx950() (in module tilelang.testing) reset() (tilelang.autodd.ParTaskManager method) reset_recorder() (in module tilelang.contrib.hip_resource_info) reshape() (in module tilelang.language.customize) (tilelang.layout.layout.Layout method) resolve_execution_backend() (in module tilelang.backend.execution_backend) resolve_execution_backend_spec() (in module tilelang.backend.execution_backend) resolve_gemm_impl() (in module tilelang.tileop.gemm.registry) resolve_gemm_sp_impl() (in module tilelang.tileop.gemm_sp.registry) resolve_pipeline() (in module tilelang.backend.pass_pipeline.pipeline) resource_usage (tilelang.jit.kernel.JITKernel property) resource_usage_path (tilelang.cache.kernel_cache.KernelCache attribute) result_idx (tilelang.jit.adapter.base.BaseKernelAdapter attribute) (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter attribute) (tilelang.profiler.Profiler attribute), [1] ret (in module tilelang.language.tir.ir) ret() (in module tilelang.language.tir.op) (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) ret_type (tilelang.language.eager.builder.PrimFunc attribute) retrieve_buffer_and_offset() (in module tilelang.utils.language) retrieve_dtype() (in module tilelang.utils.language) retrieve_func_from_module() (in module tilelang.carver.utils) (in module tilelang.utils.language) retrieve_offset() (in module tilelang.utils.language) retrieve_ptr() (in module tilelang.utils.language) retrieve_shape() (in module tilelang.utils.language) retrieve_stride() (in module tilelang.utils.language) retrive_ptr_from_buffer_region() (in module tilelang.utils.language) reverse (tilelang.contrib.cutedsl.reduce.CumMax1D attribute) (tilelang.contrib.cutedsl.reduce.CumMax2D attribute) (tilelang.contrib.cutedsl.reduce.CumSum1D attribute) (tilelang.contrib.cutedsl.reduce.CumSum2D attribute) reverse_bound_inference (tilelang.carver.roller.shape_inference.tir.Statement attribute) rewrite (tilelang.autodd.LabeledRewrite attribute) rewrite() (tilelang.autodd.ASTPatRewrite method) (tilelang.autodd.ASTRewrite method) (tilelang.autodd.AttachFullFuncArgs method) (tilelang.autodd.BinOpFwdArg method) (tilelang.autodd.CallFwdArg1 method) (tilelang.autodd.GeneralRemove method) (tilelang.autodd.IntConstApply method) rewrite_counter (tilelang.autodd.RewriteAttacher attribute) rewrite_names (tilelang.autodd.RewriteAttacher attribute) rewrite_pat (tilelang.autodd.ASTPatRewrite attribute) RewriteApplier (class in tilelang.autodd) RewriteAttacher (class in tilelang.autodd) rewrites (tilelang.autodd.RewriteAttacher attribute) right_inverse() (in module tilelang.layout.cute) rng_init() (in module tilelang.language.random) rng_rand() (in module tilelang.language.random) rng_rand_float() (in module tilelang.language.random) ROCM_HOME (in module tilelang.env) (tilelang.env.Environment attribute) rocm_link() (in module tilelang.contrib.rocm) ROCM_MTRIPLE (in module tilelang.rocm.target) rocm_pipeline (in module tilelang.rocm.pipeline) rocm_warp_size_for_arch() (in module tilelang.rocm.target) ROCMPassPipelineBody() (in module tilelang.rocm.pipeline) round (in module tilelang.language.tir.ir) round() (in module tilelang.language.tir.op) rsqrt (in module tilelang.language.tir.ir) rsqrt() (in module tilelang.contrib.cutedsl.math) (in module tilelang.language.tir.op) rstep (tilelang.carver.roller.hint.Hint attribute) rstep_map (tilelang.carver.roller.hint.TileDict attribute) rt_mod (tilelang.engine.param.CompiledArtifact attribute) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter attribute) rtol (tilelang.autotuner.param.ProfileArgs attribute), [1] (tilelang.autotuner.tuner.AutoTuneImpl attribute) Ruff (class in tilelang.autodd) ruff_fix_code() (in module tilelang.autodd) run() (tilelang.autodd.AsyncPythonRunner method) (tilelang.autodd.SubProcRunner method) (tilelang.autotuner.tuner.AutoTuner method) (tilelang.contrib.cutedsl.reduce.CumMax1D method) (tilelang.contrib.cutedsl.reduce.CumMax2D method) (tilelang.contrib.cutedsl.reduce.CumSum1D method) (tilelang.contrib.cutedsl.reduce.CumSum2D method) run_async() (tilelang.autodd.ParTaskManager method) run_once() (tilelang.jit.kernel.JITKernel method) (tilelang.profiler.Profiler method) run_with() (tilelang.autodd.ParTaskManager method) run_with_timeout() (in module tilelang.autotuner.tuner) runtime_dll_dirs (in module tilelang) rval() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) S S (tilelang.carver.template.conv.ConvTemplate attribute), [1] s_shift (tilelang.layout.cute.Swizzle attribute) save_to_disk() (tilelang.autotuner.param.AutotuneResult method) ScaledBasis (class in tilelang.layout.cute) sch (tilelang.carver.roller.node.BlockAnalyzer attribute) (tilelang.carver.roller.node.PrimFuncNode attribute) schedule_stages (tilelang.carver.roller.hint.Hint attribute) (tilelang.carver.roller.node.PrimFuncNode attribute) scheduled_ir_module (tilelang.jit.adapter.wrapper.TLWrapper attribute) score_block_size() (tilelang.carver.roller.policy.default.DefaultPolicy method) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy method) scratch_bytes (tilelang.contrib.hip_resource_info.KernelResourceUsage attribute) seq_kv_length (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) seq_length (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) Serial() (in module tilelang.language.loop) serial() (in module tilelang.language.loop) (in module tilelang.language.tir.ir) SerialForWithStep (class in tilelang.language.eager.builder) set_autotune_inputs() (in module tilelang.autotuner.capture) set_compile_args() (tilelang.autotuner.tuner.AutoTuner method) set_dtype() (tilelang.carver.roller.node.Node method) (tilelang.carver.roller.node.PrimFuncNode method) set_fileline() (tilelang.language.eager.builder.Builder method) set_function() (tilelang.carver.template.base.BaseTemplate method) set_inputs() (tilelang.carver.roller.node.Node method) set_kernel_parameters() (tilelang.autotuner.tuner.AutoTuner method) set_lib_path() (tilelang.jit.adapter.libgen.LibraryGenerator method) set_log_level() (in module tilelang) set_max_nreg() (in module tilelang.language.builtin) set_mode() (tilelang.language.eager.builder.JITFunc method) set_output_nodes() (tilelang.carver.template.base.BaseTemplate method) set_outputs() (tilelang.carver.roller.node.Node method) set_profile_args() (tilelang.autotuner.tuner.AutoTuner method) set_random_seed() (in module tilelang.testing) set_shape() (tilelang.carver.roller.node.Node method) set_src_path() (tilelang.jit.adapter.libgen.LibraryGenerator method) set_tag() (tilelang.carver.roller.node.Node method) set_value() (tilelang.language.frame.FrameStack method) sf_k_start (tilelang.tileop.gemm.Gemm property) (tilelang.tileop.gemm.gemm_base.GemmBase property) SFARegion (tilelang.tileop.gemm.gemm_base.GemmBase property) SFBRegion (tilelang.tileop.gemm.gemm_base.GemmBase property) shape (tilelang.carver.template.elementwise.ElementwiseTemplate attribute), [1] (tilelang.carver.template.general_reduce.GeneralReductionTemplate attribute) (tilelang.engine.param.KernelParam attribute) (tilelang.language.eager.builder.OutTensor attribute) (tilelang.layout.cute.Layout property) shared_16x16_to_ldmatrix_64x4_layout() (in module tilelang.rocm.intrinsics.mfma_layout) shared_16x16_to_local_32x16_layout_A_colmajor_gfx11() (in module tilelang.rocm.intrinsics.wmma_layout) shared_16x16_to_local_32x16_layout_A_gfx11() (in module tilelang.rocm.intrinsics.wmma_layout) shared_16x16_to_local_32x16_layout_B_colmajor_gfx11() (in module tilelang.rocm.intrinsics.wmma_layout) shared_16x16_to_local_32x16_layout_B_gfx11() (in module tilelang.rocm.intrinsics.wmma_layout) shared_16x16_to_local_32x8_layout_A_colmajor_gfx12() (in module tilelang.rocm.intrinsics.wmma_layout) shared_16x16_to_local_32x8_layout_A_gfx12() (in module tilelang.rocm.intrinsics.wmma_layout) shared_16x16_to_local_32x8_layout_B_colmajor_gfx12() (in module tilelang.rocm.intrinsics.wmma_layout) shared_16x16_to_local_32x8_layout_B_gfx12() (in module tilelang.rocm.intrinsics.wmma_layout) shared_16x16_to_local_32x8_layout_C_gfx11() (in module tilelang.rocm.intrinsics.wmma_layout) shared_16x16_to_local_32x8_layout_C_gfx12() (in module tilelang.rocm.intrinsics.wmma_layout) shared_16x16_to_local_64x4_layout_A() (in module tilelang.rocm.intrinsics.mfma_layout) shared_16x16_to_local_64x4_layout_B() (in module tilelang.rocm.intrinsics.mfma_layout) shared_16x16_to_local_64x4_layout_C() (in module tilelang.rocm.intrinsics.mfma_layout) shared_16x16_to_local_64x4_layout_k_n (in module tilelang.rocm.intrinsics.mfma_layout) shared_16x16_to_local_64x4_layout_m_n (in module tilelang.rocm.intrinsics.mfma_layout) shared_16x16_to_local_64x4_layout_n_k (in module tilelang.rocm.intrinsics.mfma_layout) shared_16x16_to_local_64x4_layout_n_m (in module tilelang.rocm.intrinsics.mfma_layout) shared_16x16_to_mma_32x8_layout() (in module tilelang.cuda.intrinsics.layout.utils) shared_16x16_to_mma_32x8_layout_rs_a (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x16_to_mma_32x8_layout_rs_b (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x16_to_mma_32x8_layout_sr_a (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x16_to_mma_32x8_layout_sr_b (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x16_to_mma_32x8_smoothlayout() (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x16_to_mma_a_32x8_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x16_to_mma_a_32x8_layout_trans() (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x16_to_mma_b_32x8_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x16_to_mma_b_32x8_layout_trans() (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x16_to_mma_sp_layout_sr_a() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) shared_16x16_to_mma_sp_layout_sr_b() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) shared_16x32_to_local_64x8_layout_A() (in module tilelang.rocm.intrinsics.mfma_layout) shared_16x32_to_local_64x8_layout_B() (in module tilelang.rocm.intrinsics.mfma_layout) shared_16x32_to_mma_32x16_layout() (in module tilelang.cuda.intrinsics.layout.utils) shared_16x32_to_mma_32x16_layout_rs_a (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x32_to_mma_32x16_layout_rs_b (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x32_to_mma_32x16_layout_sr_a (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x32_to_mma_32x16_layout_sr_b (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x32_to_mma_32x16_smoothlayout() (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x32_to_mma_a_32x16_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x32_to_mma_b_32x16_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x32_to_mma_sp_layout_sr_a() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) shared_16x32_to_mma_sp_layout_sr_b() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) shared_16x4_to_local_64x1_layout_A() (in module tilelang.rocm.intrinsics.mfma_layout) shared_16x4_to_mma_a_32x4_layout() (in module tilelang.cuda.intrinsics.layout.mma_sm70_layout) shared_16x4_to_mma_b_32x4_layout_trans() (in module tilelang.cuda.intrinsics.layout.mma_sm70_layout) shared_16x64_to_local_64x16_layout_A() (in module tilelang.rocm.intrinsics.mfma_layout) shared_16x64_to_local_64x16_layout_B() (in module tilelang.rocm.intrinsics.mfma_layout) shared_16x64_to_mma_sp_layout_sr_a() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) shared_16x64_to_mma_sp_layout_sr_b() (in module tilelang.cuda.intrinsics.layout.mma_sp_layout) shared_16x8_to_mma_32x4_layout_rs_a (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x8_to_mma_32x4_layout_rs_b (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x8_to_mma_32x4_layout_sr_a (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x8_to_mma_32x4_layout_sr_b (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x8_to_mma_a_32x4_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x8_to_mma_a_32x4_layout_trans() (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x8_to_mma_b_32x4_layout() (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_16x8_to_mma_b_32x4_layout_trans() (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_32x16_to_mma_32x16_layout() (in module tilelang.cuda.intrinsics.layout.utils) shared_32x16_to_mma_32x16_smoothlayout() (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_32x16_to_mma_a_32x16_layout_trans() (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_32x16_to_mma_b_32x16_layout_trans() (in module tilelang.cuda.intrinsics.layout.mma_layout) shared_32x32_to_local_64x16_layout_A() (in module tilelang.rocm.intrinsics.mfma_layout) shared_32x32_to_local_64x16_layout_B() (in module tilelang.rocm.intrinsics.mfma_layout) shared_32x32_to_local_64x16_layout_C() (in module tilelang.rocm.intrinsics.mfma_layout) shared_4x16_to_local_64x1_layout_B() (in module tilelang.rocm.intrinsics.mfma_layout) shared_4x16_to_mma_b_32x4_layout() (in module tilelang.cuda.intrinsics.layout.mma_sm70_layout) shared_scope (tilelang.carver.roller.hint.Hint attribute) SharedBufferProxy (class in tilelang.language.proxy) shfl_down() (in module tilelang.language.builtin) shfl_sync() (in module tilelang.language.builtin) shfl_up() (in module tilelang.language.builtin) shfl_xor() (in module tilelang.language.builtin) shift_left (in module tilelang.language.tir.ir) shift_left() (in module tilelang.language.tir.op) shift_right (in module tilelang.language.tir.ir) shift_right() (in module tilelang.language.tir.op) should_cleanup_temp_files() (tilelang.env.Environment method) should_disable_shared_memory_reuse() (in module tilelang.backend.pass_pipeline.pipeline_utils) should_enable_aggressive_merge() (in module tilelang.backend.pass_pipeline.pipeline_utils) should_enable_ast_print() (in module tilelang.engine.semantic_check) should_enable_layout_visual() (in module tilelang.backend.pass_pipeline.pipeline_utils) should_enable_prelower_semantic_check() (in module tilelang.engine.semantic_check) should_enable_race_check() (in module tilelang.backend.pass_pipeline.pipeline_utils) should_force_let_inline() (in module tilelang.backend.pass_pipeline.pipeline_utils) show_ptx() (tilelang.jit.kernel.JITKernel method) show_sass() (tilelang.jit.kernel.JITKernel method) show_source() (tilelang.jit.kernel.JITKernel method) shuffle_elect() (in module tilelang.contrib.cutedsl.utils) (in module tilelang.language.builtin) side_effect() (in module tilelang.utils.language) sigmoid (in module tilelang.language.tir.ir) sigmoid() (in module tilelang.language.tir.op) signature (tilelang.jit.JITImpl attribute), [1] (tilelang.language.eager.builder.JITFunc attribute) simd_load() (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter method) simd_store() (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter method) simdgroup_copy() (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter method) Simplify() (in module tilelang.transform.simplify) simplify_prim_func() (in module tilelang.transform.simplify) sin (in module tilelang.language.tir.ir) sin() (in module tilelang.contrib.cutedsl.math) (in module tilelang.language.tir.op) sinh (in module tilelang.language.tir.ir) sinh() (in module tilelang.language.tir.op) size() (in module tilelang.layout.cute) (tilelang.autotuner.capture.CaptureStack method) (tilelang.carver.roller.bestfit.Block method) (tilelang.language.kernel.FrameStack method) skip_check (tilelang.autotuner.param.ProfileArgs attribute), [1] (tilelang.autotuner.tuner.AutoTuneImpl attribute) skip_kernel_ctx() (tilelang.language.eager.builder.Builder method) SKIP_LOADING_TILELANG_SO (tilelang.env.Environment attribute) sm_partition (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.carver.arch.rdna.RDNA attribute) sm_version (tilelang.carver.arch.cuda.CUDA attribute) smem_box_channel (tilelang.jit.adapter.utils.TMADescriptorParams attribute) smem_box_pixel (tilelang.jit.adapter.utils.TMADescriptorParams attribute) smem_cap (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.carver.arch.rdna.RDNA attribute) smem_cost (tilelang.carver.roller.hint.TileDict attribute) smooth_a (tilelang.carver.roller.hint.IntrinInfo property) smooth_b (tilelang.carver.roller.hint.IntrinInfo property) source (tilelang.autodd.Args attribute) (tilelang.autodd.Ruff attribute) (tilelang.autodd.Task attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLMetalSourceWrapper attribute) (tilelang.language.eager.ast.IRGenerator attribute) (tilelang.language.eager.builder.Macro property) source_code (tilelang.jit.param.Kernel attribute) span (tilelang.language.eager.ast.QuoteVisitor attribute) (tilelang.language.eager.builder.PrimFunc attribute) SpanAttacher (class in tilelang.language.eager.ast) SPARSE_FACTOR (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) SPARSE_SELECTOR (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) SparseTensorCoreIntrinEmitter (class in tilelang.cuda.intrinsics.macro.mma_sp_macro_generator) split_k_factor (tilelang.carver.roller.hint.Hint attribute) SplitHostDevice() (in module tilelang.transform) sqrt (in module tilelang.language.tir.ir) sqrt() (in module tilelang.contrib.cutedsl.math) (in module tilelang.language.tir.op) Square (tilelang.tileop.base.GemmWarpPolicy attribute) src_id (tilelang.carver.roller.node.Edge attribute) src_node (tilelang.carver.roller.node.Edge attribute) srcpath (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter property) (tilelang.jit.adapter.libgen.LibraryGenerator attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) stack (tilelang.autotuner.capture.CaptureStack attribute) staging_root_dir (tilelang.cache.kernel_cache.KernelCache attribute) start (tilelang.carver.roller.bestfit.Block attribute) (tilelang.language.eager.builder.SerialForWithStep attribute) start_proc() (tilelang.autodd.AsyncPythonRunner method) start_profile_intrinsic (in module tilelang.language.tir.ir) start_profile_intrinsic() (in module tilelang.language.tir.op) start_workers() (tilelang.autodd.ParTaskManager method) Statement (class in tilelang.carver.roller.shape_inference.common) (class in tilelang.carver.roller.shape_inference.tir) static_contiguous_list (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) static_shape_map (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) static_strides_map (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) step (tilelang.carver.roller.hint.Hint property) (tilelang.language.eager.builder.SerialForWithStep attribute) stg128() (in module tilelang.language.builtin) stg256() (in module tilelang.language.builtin) stg32() (in module tilelang.language.builtin) stg64() (in module tilelang.language.builtin) stmatrix() (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter method) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter method) stop (tilelang.language.eager.builder.SerialForWithStep attribute) stop_proc() (tilelang.autodd.AsyncPythonRunner method) stop_workers() (tilelang.autodd.ParTaskManager method) StorageRewrite() (in module tilelang.transform) store() (tilelang.language.eager.builder.Ref method) store_entries (tilelang.transform.decouple_type_cast.AccessReplacer attribute) store_index_map_fn (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) stores (tilelang.transform.decouple_type_cast.MemoryAccessCollector attribute) Stride (class in tilelang.carver.roller.hint) stride (tilelang.carver.roller.hint.Stride property) (tilelang.layout.cute.Layout property) stride_A (tilelang.tileop.gemm.Gemm property) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP attribute) stride_B (tilelang.tileop.gemm.Gemm property) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP attribute) stride_byte_offset (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TCGEN05DescriptorParams attribute) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.WGMMADescriptorParams attribute) StridedTensorProxy (class in tilelang.language.proxy) strides (tilelang.language.eager.builder.OutTensor property) structure (tilelang.carver.template.general_reduce.GeneralReductionTemplate attribute) sub2() (in module tilelang.language.math_intrinsics) submit_result() (tilelang.autodd.ParTaskManager method) SubProcRunner (class in tilelang.autodd) substitute_primfunc() (in module tilelang.language.eager.builder) sum (in module tilelang.language.tir.op) SumOp (class in tilelang.contrib.cutedsl.reduce) supply_prog (tilelang.autotuner.param.ProfileArgs attribute), [1], [2] (tilelang.autotuner.tuner.AutoTuneImpl attribute) supply_type (tilelang.autotuner.param.ProfileArgs attribute), [1], [2] (tilelang.autotuner.tuner.AutoTuneImpl attribute) (tilelang.profiler.Profiler attribute), [1] supports_target (tilelang.backend.execution_backend.ExecutionBackendSpec attribute) suppress_stdout_stderr (class in tilelang.profiler.bench) Swizzle (class in tilelang.layout.cute) swizzle (tilelang.jit.adapter.utils.TMADescriptorParams attribute) (tilelang.layout.cute.ComposedLayout attribute) SWIZZLE_128B (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.SwizzleMode attribute) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.SwizzleMode attribute) SWIZZLE_32B (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.SwizzleMode attribute) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.SwizzleMode attribute) SWIZZLE_64B (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.SwizzleMode attribute) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.SwizzleMode attribute) swizzle_atom_elems (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TCGEN05DescriptorParams attribute) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.WGMMADescriptorParams attribute) swizzle_atom_size() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.SwizzleMode method) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.SwizzleMode method) swizzle_byte_size() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.SwizzleMode method) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.SwizzleMode method) swizzle_mode (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TCGEN05DescriptorParams attribute) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.WGMMADescriptorParams attribute) SwizzleMode (class in tilelang.cuda.intrinsics.macro.tcgen05_macro_generator) (class in tilelang.cuda.intrinsics.macro.wgmma_macro_generator) symbolic() (in module tilelang.language.symbolics) sync_global() (in module tilelang.language.builtin) sync_grid() (in module tilelang.contrib.cutedsl.grid_sync) (in module tilelang.language.builtin) sync_thread_partial() (in module tilelang.contrib.cutedsl.utils) sync_threads() (in module tilelang.language.builtin) sync_warp() (in module tilelang.language.builtin) syncthreads_and() (in module tilelang.language.builtin) syncthreads_count() (in module tilelang.language.builtin) syncthreads_or() (in module tilelang.language.builtin) T tags (tilelang.carver.roller.policy.default.DefaultPolicy attribute) tan (in module tilelang.language.tir.ir) tan() (in module tilelang.contrib.cutedsl.math) (in module tilelang.language.tir.op) tanh (in module tilelang.language.tir.ir) tanh() (in module tilelang.contrib.cutedsl.math) (in module tilelang.language.tir.op) target (tilelang.autotuner.param.CompileArgs attribute), [1] (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cpu.CPU attribute) (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.carver.arch.metal.METAL attribute) (tilelang.carver.arch.rdna.RDNA attribute) (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.libgen.LibraryGenerator attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLMetalSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLWrapper attribute) (tilelang.jit.JITImpl attribute), [1] (tilelang.jit.kernel.JITKernel attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) target_get_mcpu() (in module tilelang.rocm.target) target_get_rdna_generation() (in module tilelang.rocm.target) target_get_warp_size() (in module tilelang.rocm.target) target_has_async_copy() (in module tilelang.cuda.target) target_has_bulk_copy() (in module tilelang.cuda.target) target_has_ldmatrix() (in module tilelang.cuda.target) target_has_stmatrix() (in module tilelang.cuda.target) target_host (tilelang.autotuner.param.CompileArgs attribute), [1] (tilelang.jit.JITImpl attribute), [1] (tilelang.jit.kernel.JITKernel attribute) target_is_ampere() (in module tilelang.cuda.target) target_is_cdna() (in module tilelang.rocm.target) target_is_cuda() (in module tilelang.cuda.target) target_is_gfx950() (in module tilelang.rocm.target) target_is_hip() (in module tilelang.rocm.target) target_is_hopper() (in module tilelang.cuda.target) target_is_metal() (in module tilelang.metal.target) target_is_rdna() (in module tilelang.rocm.target) target_is_sm120() (in module tilelang.cuda.target) target_is_turing() (in module tilelang.cuda.target) target_is_volta() (in module tilelang.cuda.target) target_labels (tilelang.autodd.RewriteApplier attribute) target_mapping (tilelang.carver.roller.shape_inference.tir.InputShapeInference attribute) target_type (tilelang.autodd.GeneralRemove attribute) TargetConfig (in module tilelang.backend.target) TargetDetector (in module tilelang.backend.target) TargetDetectorSpec (class in tilelang.backend.target) TargetInput (in module tilelang.backend.target) TargetLike (in module tilelang.backend.target) (in module tilelang.cache) (in module tilelang.jit) (in module tilelang.jit.kernel) TargetNormalizer (in module tilelang.backend.target) TargetNormalizerSpec (class in tilelang.backend.target) TargetPredicate (in module tilelang.backend.execution_backend) Task (class in tilelang.autodd) task_generator() (tilelang.autodd.ASTPDD method) (tilelang.autodd.LinePDD method) (tilelang.autodd.Ruff method) (tilelang.autodd.TaskManager method) task_update() (tilelang.autodd.ASTPDD method) (tilelang.autodd.LinePDD method) (tilelang.autodd.Ruff method) (tilelang.autodd.TaskManager method) TaskManager (class in tilelang.autodd) tc_axis (tilelang.carver.roller.hint.TensorCoreExtraConfig attribute) tcgen05_after_thread_sync() (in module tilelang.language.builtin) tcgen05_atom_arrive() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) tcgen05_before_thread_sync() (in module tilelang.language.builtin) tcgen05_blockscaled_atom() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) tcgen05_cp_warpx4() (in module tilelang.language.builtin) tcgen05_gemm() (in module tilelang.language.gemm_op) tcgen05_gemm_blockscaled() (in module tilelang.language.gemm_op) tcgen05_gemm_sp() (in module tilelang.language.experimental.gemm_sp_op) tcgen05_ld_32dp128bNx() (in module tilelang.contrib.cutedsl.gemm_tcgen05) tcgen05_ld_32dp256bNx() (in module tilelang.contrib.cutedsl.gemm_tcgen05) tcgen05_ld_32dp32bNx() (in module tilelang.contrib.cutedsl.gemm_tcgen05) tcgen05_ld_32dp64bNx() (in module tilelang.contrib.cutedsl.gemm_tcgen05) tcgen05_meta_unpacked (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter property) tcgen05_mma_arrive() (in module tilelang.contrib.cutedsl.gemm_tcgen05) (in module tilelang.language.builtin) tcgen05_num_inst_m (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter property) tcgen05_num_inst_n (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter property) tcgen05_num_k_atoms (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter property) tcgen05_prefix (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter attribute) tcgen05_sf_warp_transpose() (in module tilelang.language.builtin) tcgen05_ss_atom() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) tcgen05_ts_atom() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) TCGEN05DescriptorParams (class in tilelang.cuda.intrinsics.macro.tcgen05_macro_generator) tcgen05mma() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) tcgen05mma_blockscaled() (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) tcgen05mma_ss() (in module tilelang.contrib.cutedsl.gemm_tcgen05) (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) tcgen05mma_ts() (in module tilelang.contrib.cutedsl.gemm_tcgen05) (tilelang.cuda.intrinsics.macro.tcgen05_macro_generator.TensorCoreIntrinEmitter method) tcgen05mma_ws_ss() (in module tilelang.contrib.cutedsl.gemm_tcgen05) Tcgen05SmemDescriptor (class in tilelang.contrib.cutedsl.gemm_tcgen05) tensor_args (tilelang.language.eager.builder.JITFunc attribute) tensor_args_defaults (tilelang.language.eager.builder.JITFunc attribute) tensor_rank (tilelang.jit.adapter.utils.TMADescriptorParams attribute) tensor_strides_map (tilelang.carver.roller.hint.TileDict attribute) tensorcore_legalization() (tilelang.carver.roller.hint.Hint method) TensorCoreExtraConfig (class in tilelang.carver.roller.hint) TensorCoreIntrinEmitter (class in tilelang.cuda.intrinsics.macro.mma_macro_generator) (class in tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator) (class in tilelang.cuda.intrinsics.macro.tcgen05_macro_generator) (class in tilelang.cuda.intrinsics.macro.wgmma_macro_generator) TensorCoreIntrinEmitterSM75 (class in tilelang.cuda.intrinsics.macro.mma_sm75_macro_generator) TensorCoreIntrinEmitterWithLadderTransform (class in tilelang.cuda.intrinsics.macro.mma_macro_generator) TensorCorePolicy (class in tilelang.carver.roller.policy.tensorcore) TensorDepNode (class in tilelang.carver.roller.shape_inference.tir) TensorProxy (class in tilelang.language.proxy) tensors (tilelang.autotuner.capture.AutotuneInputsCapture attribute) TensorSupplyType (class in tilelang.utils.tensor) test_rewrite() (in module tilelang.autodd) text (tilelang.autodd.ParTaskManager attribute) (tilelang.jit.adapter.base.CachedTextSource attribute) text_len (tilelang.autodd.ParTaskManager property) tflops (tilelang.tools.Analyzer.AnalysisResult attribute) THIRD_PARTY_ROOT (in module tilelang.env) thread (tilelang.carver.roller.hint.Hint attribute) (tilelang.layout.fragment.Fragment property) thread_binding() (in module tilelang.language.tir.ir) thread_id_shared_access_32x16_to_16x16_layout_A_colmajor_gfx11() (in module tilelang.rocm.intrinsics.wmma_layout) thread_id_shared_access_32x16_to_16x16_layout_A_gfx11() (in module tilelang.rocm.intrinsics.wmma_layout) thread_id_shared_access_32x16_to_16x16_layout_B_colmajor_gfx11() (in module tilelang.rocm.intrinsics.wmma_layout) thread_id_shared_access_32x16_to_16x16_layout_B_gfx11() (in module tilelang.rocm.intrinsics.wmma_layout) thread_id_shared_access_32x8_to_16x16_layout_A_colmajor_gfx12() (in module tilelang.rocm.intrinsics.wmma_layout) thread_id_shared_access_32x8_to_16x16_layout_A_gfx12() (in module tilelang.rocm.intrinsics.wmma_layout) thread_id_shared_access_32x8_to_16x16_layout_B_colmajor_gfx12() (in module tilelang.rocm.intrinsics.wmma_layout) thread_id_shared_access_32x8_to_16x16_layout_B_gfx12() (in module tilelang.rocm.intrinsics.wmma_layout) thread_id_shared_access_32x8_to_16x16_layout_C_gfx11() (in module tilelang.rocm.intrinsics.wmma_layout) thread_id_shared_access_32x8_to_16x16_layout_C_gfx12() (in module tilelang.rocm.intrinsics.wmma_layout) thread_id_shared_access_64x16_to_16x64_layout_A() (in module tilelang.rocm.intrinsics.mfma_layout) thread_id_shared_access_64x16_to_16x64_layout_B() (in module tilelang.rocm.intrinsics.mfma_layout) thread_id_shared_access_64x16_to_32x32_layout_A() (in module tilelang.rocm.intrinsics.mfma_layout) thread_id_shared_access_64x16_to_32x32_layout_B() (in module tilelang.rocm.intrinsics.mfma_layout) thread_id_shared_access_64x16_to_32x32_layout_C_m_n() (in module tilelang.rocm.intrinsics.mfma_layout) thread_id_shared_access_64x16_to_32x32_layout_C_n_m() (in module tilelang.rocm.intrinsics.mfma_layout) thread_id_shared_access_64x1_to_16x4_layout_A() (in module tilelang.rocm.intrinsics.mfma_layout) thread_id_shared_access_64x1_to_4x16_layout_B() (in module tilelang.rocm.intrinsics.mfma_layout) thread_id_shared_access_64x4_to_16x16_layout_A() (in module tilelang.rocm.intrinsics.mfma_layout) thread_id_shared_access_64x4_to_16x16_layout_B() (in module tilelang.rocm.intrinsics.mfma_layout) thread_id_shared_access_64x4_to_16x16_layout_C_m_n() (in module tilelang.rocm.intrinsics.mfma_layout) thread_id_shared_access_64x4_to_16x16_layout_C_n_m() (in module tilelang.rocm.intrinsics.mfma_layout) thread_id_shared_access_64x8_to_16x32_layout_A() (in module tilelang.rocm.intrinsics.mfma_layout) thread_id_shared_access_64x8_to_16x32_layout_B() (in module tilelang.rocm.intrinsics.mfma_layout) thread_idx() (in module tilelang.contrib.cutedsl) thread_local_storage (in module tilelang.language.eager.builder) thread_var (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) ThreadIdx() (in module tilelang.contrib.cutedsl.threadblock_swizzle) threads (tilelang.contrib.cutedsl.reduce.CumMax1D attribute) (tilelang.contrib.cutedsl.reduce.CumMax2D attribute) (tilelang.contrib.cutedsl.reduce.CumSum1D attribute) (tilelang.contrib.cutedsl.reduce.CumSum2D attribute) (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.language.kernel.KernelLaunchFrame property) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) ThreadSync() (in module tilelang.transform) TILE_H (tilelang.contrib.cutedsl.reduce.CumSum2D attribute) tile_map (tilelang.carver.roller.hint.TileDict attribute) TileDevice (class in tilelang.carver.arch.arch_base) TileDict (class in tilelang.carver.roller.hint) tilelang module tilelang.analysis module tilelang.analysis.ast_printer module tilelang.analysis.fragment_loop_checker module tilelang.analysis.layout_visual module tilelang.analysis.nested_loop_checker module tilelang.autodd module tilelang.autotuner module tilelang.autotuner.capture module tilelang.autotuner.grouped_compile module tilelang.autotuner.param module tilelang.autotuner.tuner module tilelang.backend module tilelang.backend.common module tilelang.backend.execution_backend module tilelang.backend.pass_pipeline module tilelang.backend.pass_pipeline.pipeline module tilelang.backend.pass_pipeline.pipeline_utils module tilelang.backend.target module tilelang.cache module tilelang.cache.kernel_cache module tilelang.carver module tilelang.carver.analysis module tilelang.carver.arch module tilelang.carver.arch.arch_base module tilelang.carver.arch.cdna module tilelang.carver.arch.cpu module tilelang.carver.arch.cuda module tilelang.carver.arch.driver module tilelang.carver.arch.driver.cuda_driver module tilelang.carver.arch.metal module tilelang.carver.arch.rdna module tilelang.carver.common_schedules module tilelang.carver.matmul_analysis module tilelang.carver.roller module tilelang.carver.roller.bestfit module tilelang.carver.roller.hint module tilelang.carver.roller.node module tilelang.carver.roller.policy module tilelang.carver.roller.policy.common module tilelang.carver.roller.policy.default module tilelang.carver.roller.policy.tensorcore module tilelang.carver.roller.rasterization module tilelang.carver.roller.shape_inference module tilelang.carver.roller.shape_inference.common module tilelang.carver.roller.shape_inference.tir module tilelang.carver.template module tilelang.carver.template.base module tilelang.carver.template.conv module tilelang.carver.template.elementwise module tilelang.carver.template.flashattention module tilelang.carver.template.gemv module tilelang.carver.template.general_reduce module tilelang.carver.template.matmul module tilelang.carver.utils module tilelang.common module tilelang.common.transform_kind module tilelang.contrib module tilelang.contrib.cc module tilelang.contrib.cutedsl module tilelang.contrib.cutedsl.atomic module tilelang.contrib.cutedsl.cpasync module tilelang.contrib.cutedsl.gemm_tcgen05 module tilelang.contrib.cutedsl.gemm_v1 module tilelang.contrib.cutedsl.gemm_v2 module tilelang.contrib.cutedsl.grid_sync module tilelang.contrib.cutedsl.ieee_math module tilelang.contrib.cutedsl.ldsm module tilelang.contrib.cutedsl.math module tilelang.contrib.cutedsl.ptx_mma module tilelang.contrib.cutedsl.quantize module tilelang.contrib.cutedsl.reduce module tilelang.contrib.cutedsl.threadblock_swizzle module tilelang.contrib.cutedsl.utils module tilelang.contrib.cutedsl.warp module tilelang.contrib.dlpack module tilelang.contrib.hip_resource_info module tilelang.contrib.hipcc module tilelang.contrib.msvc module tilelang.contrib.nvcc module tilelang.contrib.nvrtc module tilelang.contrib.rocm module tilelang.cpu module tilelang.cpu.execution_backend module tilelang.cpu.op module tilelang.cpu.op.gemm module tilelang.cpu.op.gemm.gemm_scalar module tilelang.cpu.pipeline module tilelang.cuda module tilelang.cuda.debug module tilelang.cuda.execution_backend module tilelang.cuda.intrinsics module tilelang.cuda.intrinsics.layout module tilelang.cuda.intrinsics.layout.mma_layout module tilelang.cuda.intrinsics.layout.mma_sm70_layout module tilelang.cuda.intrinsics.layout.mma_sp_layout module tilelang.cuda.intrinsics.layout.utils module tilelang.cuda.intrinsics.macro module tilelang.cuda.intrinsics.macro.mma_macro_generator module tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator module tilelang.cuda.intrinsics.macro.mma_sm75_macro_generator module tilelang.cuda.intrinsics.macro.mma_sp_macro_generator module tilelang.cuda.intrinsics.macro.tcgen05_macro_generator module tilelang.cuda.intrinsics.macro.wgmma_macro_generator module tilelang.cuda.intrinsics.macro.wgmma_sp_macro_generator module tilelang.cuda.op module tilelang.cuda.op.gemm module tilelang.cuda.op.gemm.gemm_mma module tilelang.cuda.op.gemm.gemm_mma_sm70 module tilelang.cuda.op.gemm.gemm_mma_sm75 module tilelang.cuda.op.gemm.gemm_tcgen05 module tilelang.cuda.op.gemm.gemm_wgmma module tilelang.cuda.op.gemm_sp module tilelang.cuda.op.gemm_sp.gemm_sp_mma module tilelang.cuda.op.gemm_sp.gemm_sp_wgmma module tilelang.cuda.pipeline module tilelang.cuda.target module tilelang.cuda.transform module tilelang.dtypes module tilelang.engine module tilelang.engine.callback module tilelang.engine.lower module tilelang.engine.param module tilelang.engine.semantic_check module tilelang.env module tilelang.intrinsics module tilelang.ir module tilelang.jit module tilelang.jit.adapter module tilelang.jit.adapter.base module tilelang.jit.adapter.cutedsl module tilelang.jit.adapter.cutedsl.adapter module tilelang.jit.adapter.cutedsl.checks module tilelang.jit.adapter.cutedsl.kernel_cache module tilelang.jit.adapter.cutedsl.libgen module tilelang.jit.adapter.cutedsl.wrapper module tilelang.jit.adapter.cython module tilelang.jit.adapter.cython.adapter module tilelang.jit.adapter.cython.kernel_cache module tilelang.jit.adapter.kernel_cache module tilelang.jit.adapter.libgen module tilelang.jit.adapter.nvrtc module tilelang.jit.adapter.nvrtc.adapter module tilelang.jit.adapter.nvrtc.kernel_cache module tilelang.jit.adapter.nvrtc.libgen module tilelang.jit.adapter.nvrtc.wrapper module tilelang.jit.adapter.torch module tilelang.jit.adapter.torch.kernel_cache module tilelang.jit.adapter.torch.metal module tilelang.jit.adapter.tvm_ffi module tilelang.jit.adapter.utils module tilelang.jit.adapter.wrapper module tilelang.jit.diagnostics module tilelang.jit.env module tilelang.jit.exceptions module tilelang.jit.kernel module tilelang.jit.param module tilelang.language module tilelang.language.allocate module tilelang.language.annotations module tilelang.language.atomic module tilelang.language.builtin module tilelang.language.cluster module tilelang.language.copy_op module tilelang.language.customize module tilelang.language.dtypes module tilelang.language.eager module tilelang.language.eager.ast module tilelang.language.eager.builder module tilelang.language.eager.utils module tilelang.language.experimental module tilelang.language.experimental.gemm_sp_op module tilelang.language.fastmath module tilelang.language.fill_op module tilelang.language.fp8 module tilelang.language.frame module tilelang.language.gemm_op module tilelang.language.kernel module tilelang.language.logical module tilelang.language.loop module tilelang.language.math_intrinsics module tilelang.language.overrides module tilelang.language.overrides.buffer module tilelang.language.pdl module tilelang.language.print_op module tilelang.language.proxy module tilelang.language.random module tilelang.language.reduce_op module tilelang.language.scan_op module tilelang.language.symbolics module tilelang.language.tir module tilelang.language.tir.entry module tilelang.language.tir.ir module tilelang.language.tir.op module tilelang.language.utils module tilelang.language.warpgroup module tilelang.layout module tilelang.layout.cute module tilelang.layout.fragment module tilelang.layout.gemm_sp module tilelang.layout.layout module tilelang.layout.swizzle module tilelang.math module tilelang.metal module tilelang.metal.execution_backend module tilelang.metal.intrinsics module tilelang.metal.intrinsics.metal_macro_generator module tilelang.metal.op module tilelang.metal.op.gemm module tilelang.metal.op.gemm.gemm_metal module tilelang.metal.pipeline module tilelang.metal.target module tilelang.metal.transform module tilelang.metal.transform.mark_host_metal_context module tilelang.metal.transform.metal_fragment_to_simdgroup module tilelang.profiler module tilelang.profiler.bench module tilelang.quantize module tilelang.quantize.lop3 module tilelang.quantize.mxfp module tilelang.quantize.quantization module tilelang.quantize.utils module tilelang.rocm module tilelang.rocm.execution_backend module tilelang.rocm.intrinsics module tilelang.rocm.intrinsics.mfma_layout module tilelang.rocm.intrinsics.mfma_macro_generator module tilelang.rocm.intrinsics.utils module tilelang.rocm.intrinsics.wmma_layout module tilelang.rocm.intrinsics.wmma_macro_generator module tilelang.rocm.op module tilelang.rocm.op.gemm module tilelang.rocm.op.gemm.gemm_mfma module tilelang.rocm.op.gemm.gemm_wmma module tilelang.rocm.pipeline module tilelang.rocm.target module tilelang.testing module tilelang.testing.perf_regression module tilelang.tileop module tilelang.tileop.base module tilelang.tileop.gemm module tilelang.tileop.gemm.gemm_base module tilelang.tileop.gemm.registry module tilelang.tileop.gemm_sp module tilelang.tileop.gemm_sp.gemm_sp_base module tilelang.tileop.gemm_sp.gemm_sp_wgmma module tilelang.tileop.gemm_sp.registry module tilelang.tools module tilelang.tools.Analyzer module tilelang.tools.plot_layout module tilelang.transform module tilelang.transform.add_bufstore_wrapper module tilelang.transform.decouple_type_cast module tilelang.transform.hoist_broadcast_values module tilelang.transform.pass_config module tilelang.transform.simplify module tilelang.utils module tilelang.utils.deprecated module tilelang.utils.device module tilelang.utils.language module tilelang.utils.pass_diff module tilelang.utils.pass_diff_hook module tilelang.utils.sparse module tilelang.utils.tensor module tilelang.webgpu module tilelang.webgpu.pipeline module TILELANG_AUTO_TUNING_CPU_COUNTS (tilelang.env.Environment attribute) TILELANG_AUTO_TUNING_CPU_UTILITIES (tilelang.env.Environment attribute) TILELANG_AUTO_TUNING_DISABLE_CACHE (tilelang.env.Environment attribute) TILELANG_AUTO_TUNING_MAX_CPU_COUNT (tilelang.env.Environment attribute) TILELANG_CACHE_DIR (tilelang.env.Environment attribute) tilelang_callback_cuda_compile() (in module tilelang.engine.lower) tilelang_callback_cuda_validate() (in module tilelang.engine.lower) tilelang_callback_hip_compile() (in module tilelang.contrib.hipcc) (in module tilelang.engine.lower) TILELANG_CLEANUP_TEMP_FILES (tilelang.env.Environment attribute) TILELANG_COMPILE_TIMEOUT_SECONDS (tilelang.env.Environment attribute) TILELANG_DEFAULT_EXECUTION_BACKEND (tilelang.env.Environment attribute) TILELANG_DEFAULT_TARGET (tilelang.env.Environment attribute) TILELANG_DEFAULT_VERBOSE (tilelang.env.Environment attribute) TILELANG_DISABLE_CACHE (tilelang.env.Environment attribute) tilelang_dtype() (tilelang.engine.param.KernelParam method) TILELANG_GEN_SRC_DIR (in module tilelang.jit.env) TILELANG_HIP_SAVE_TEMP_FILES (in module tilelang.env) (tilelang.env.Environment attribute) TILELANG_JIT_DIAGNOSTICS (tilelang.env.Environment attribute) TILELANG_JIT_DIR (in module tilelang.jit.env) TILELANG_JIT_WORKSPACE_DIR (in module tilelang.jit.env) TILELANG_PACKAGE_PATH (tilelang.env.Environment attribute) TILELANG_PASS_DIFF (tilelang.env.Environment attribute) TILELANG_PASS_DIFF_OUTPUT (tilelang.env.Environment attribute) TILELANG_PRINT_ON_COMPILATION (tilelang.env.Environment attribute) TILELANG_TEMPLATE_PATH (in module tilelang.env) (tilelang.env.Environment attribute) TILELANG_TMP_DIR (tilelang.env.Environment attribute) timeout (tilelang.autodd.Args attribute) (tilelang.autodd.ParTaskManager attribute) (tilelang.autotuner.param.ProfileArgs attribute), [1] (tilelang.autotuner.tuner.AutoTuneImpl attribute) TimeoutException TIR_ADD_LOWER_PASS (tilelang.transform.pass_config.PassConfigKey attribute) TIR_CONTROL_FRAME (in module tilelang.language.eager.builder) TIR_DISABLE_CSE (tilelang.transform.pass_config.PassConfigKey attribute) TIR_DISABLE_STORAGE_REWRITE (tilelang.transform.pass_config.PassConfigKey attribute) TIR_DISABLE_VECTORIZE (tilelang.transform.pass_config.PassConfigKey attribute) TIR_ENABLE_DEBUG (tilelang.transform.pass_config.PassConfigKey attribute) TIR_ENABLE_EQUIV_TERMS_IN_CSE (tilelang.transform.pass_config.PassConfigKey attribute) TIR_MERGE_STATIC_SMEM (tilelang.transform.pass_config.PassConfigKey attribute) TIR_NOALIAS (tilelang.transform.pass_config.PassConfigKey attribute) TIR_SIMPLIFY (tilelang.transform.pass_config.PassConfigKey attribute) TIR_USE_ASYNC_COPY (tilelang.transform.pass_config.PassConfigKey attribute) TIR_VAR_SCOPE_FRAME (in module tilelang.language.eager.builder) TirTemplate (class in tilelang.language.eager.builder) TL_AST_PRINT_ENABLE (tilelang.transform.pass_config.PassConfigKey attribute) TL_CONFIG_INDEX_BITWIDTH (tilelang.transform.pass_config.PassConfigKey attribute) TL_DEBUG_MERGE_SHARED_MEMORY_ALLOCATIONS (tilelang.transform.pass_config.PassConfigKey attribute) TL_DEVICE_COMPILE_FLAGS (tilelang.transform.pass_config.PassConfigKey attribute) TL_DISABLE_DATA_RACE_CHECK (tilelang.transform.pass_config.PassConfigKey attribute) TL_DISABLE_LOOP_UNSWITCHING (tilelang.transform.pass_config.PassConfigKey attribute) TL_DISABLE_OUT_OF_BOUND_WARNING (tilelang.transform.pass_config.PassConfigKey attribute) TL_DISABLE_PRELOWER_SEMANTIC_CHECK (tilelang.transform.pass_config.PassConfigKey attribute) TL_DISABLE_SAFE_MEMORY_ACCESS (tilelang.transform.pass_config.PassConfigKey attribute) TL_DISABLE_SHARED_MEMORY_REUSE (tilelang.transform.pass_config.PassConfigKey attribute) TL_DISABLE_SHUFFLE_ELECT (tilelang.transform.pass_config.PassConfigKey attribute) TL_DISABLE_THREAD_STORAGE_SYNC (tilelang.transform.pass_config.PassConfigKey attribute) TL_DISABLE_TMA_LOWER (tilelang.transform.pass_config.PassConfigKey attribute) TL_DISABLE_VECTORIZE_256 (tilelang.transform.pass_config.PassConfigKey attribute) TL_DISABLE_WARP_SPECIALIZED (tilelang.transform.pass_config.PassConfigKey attribute) TL_DISABLE_WGMMA (tilelang.transform.pass_config.PassConfigKey attribute) TL_DUMP_IR_DIR (tilelang.transform.pass_config.PassConfigKey attribute) TL_ENABLE_AGGRESSIVE_SHARED_MEMORY_MERGE (tilelang.transform.pass_config.PassConfigKey attribute) TL_ENABLE_ASYNC_COPY (tilelang.transform.pass_config.PassConfigKey attribute) TL_ENABLE_DUMP_IR (tilelang.transform.pass_config.PassConfigKey attribute) TL_ENABLE_FAST_MATH (tilelang.transform.pass_config.PassConfigKey attribute) TL_ENABLE_LOWER_LDGSTG (tilelang.transform.pass_config.PassConfigKey attribute) TL_ENABLE_LOWER_LDGSTG_PREDICATED (tilelang.transform.pass_config.PassConfigKey attribute) TL_ENABLE_PTXAS_VERBOSE_OUTPUT (tilelang.transform.pass_config.PassConfigKey attribute) TL_ENABLE_VECTORIZE_PLANNER_VERBOSE (tilelang.transform.pass_config.PassConfigKey attribute) TL_FORCE_LET_INLINE (tilelang.transform.pass_config.PassConfigKey attribute) TL_IF_STMT_BINDING_INLINE_REPLAYABLE_BINDS (tilelang.transform.pass_config.PassConfigKey attribute) TL_LAYOUT_VISUALIZATION_ENABLE (tilelang.transform.pass_config.PassConfigKey attribute) TL_LAYOUT_VISUALIZATION_FORMATS (tilelang.transform.pass_config.PassConfigKey attribute) TL_LIBS (in module tilelang.env), [1] TL_LOOP_UNSWITCHING_ALLOW_NON_TRIVIAL_ELSE (tilelang.transform.pass_config.PassConfigKey attribute) TL_PTXAS_REGISTER_USAGE_LEVEL (tilelang.transform.pass_config.PassConfigKey attribute) TL_ROOT (in module tilelang.env) TL_SIMPLIFY (tilelang.transform.pass_config.PassConfigKey attribute) TL_SIMPLIFY_APPLY_CONSTRAINTS_TO_BOOLEAN_BRANCHES (tilelang.transform.pass_config.PassConfigKey attribute) TL_SIMPLIFY_CONVERT_BOOLEAN_TO_AND_OF_ORS (tilelang.transform.pass_config.PassConfigKey attribute) TL_SIMPLIFY_ENABLE_LET_INLINE (tilelang.transform.pass_config.PassConfigKey attribute) TL_SIMPLIFY_PROPAGATE_KNOWNS_TO_PROVE_CONDITIONAL (tilelang.transform.pass_config.PassConfigKey attribute) TL_SIMPLIFY_PROPAGATE_KNOWNS_TO_SIMPLIFY_EXPRESSIONS (tilelang.transform.pass_config.PassConfigKey attribute) TL_SIMPLIFY_TRANSITIVELY_PROVE_INEQUALITIES (tilelang.transform.pass_config.PassConfigKey attribute) TL_STORAGE_REWRITE_DETECT_INPLACE (tilelang.transform.pass_config.PassConfigKey attribute) TL_TEMPLATE_NOT_FOUND_MESSAGE (in module tilelang.env) tl_template_path (in module tilelang.env) TLCPUSourceWrapper (class in tilelang.jit.adapter.wrapper) TLCUDASourceWrapper (class in tilelang.jit.adapter.wrapper) TLCuTeDSLSourceWrapper (class in tilelang.jit.adapter.cutedsl.wrapper) TLHIPSourceWrapper (class in tilelang.jit.adapter.wrapper) TLMetalSourceWrapper (class in tilelang.jit.adapter.wrapper) TLNVRTCSourceWrapper (class in tilelang.jit.adapter.nvrtc.wrapper) TLPyWrapper (class in tilelang.jit.adapter.wrapper) TLWrapper (class in tilelang.jit.adapter.wrapper) tma_copy() (in module tilelang.language.copy_op) tma_cpp_init_code (tilelang.jit.adapter.cutedsl.libgen.CuTeDSLLibraryGenerator attribute) TMA_DESC_INIT_FUNC (in module tilelang.jit.adapter.wrapper) TMA_DESC_INIT_FUNC_PY (in module tilelang.jit.adapter.nvrtc.wrapper) tma_descriptor_args (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) tma_gather4() (in module tilelang.language.copy_op) tma_gather4_bytes() (in module tilelang.language.copy_op) TMA_IM2COL_DESC_INIT_FUNC (in module tilelang.jit.adapter.wrapper) TMA_IM2COL_DESC_INIT_FUNC_PY (in module tilelang.jit.adapter.nvrtc.wrapper) tma_lib_name (tilelang.jit.adapter.cutedsl.libgen.CuTeDSLLibraryGenerator attribute) tma_load() (in module tilelang.contrib.cutedsl.cpasync) (in module tilelang.language.builtin) tma_load_2sm() (in module tilelang.language.builtin) tma_reduce() (in module tilelang.contrib.cutedsl.cpasync) tma_scatter4() (in module tilelang.language.copy_op) tma_store() (in module tilelang.contrib.cutedsl.cpasync) tma_store_arrive() (in module tilelang.contrib.cutedsl.cpasync) (in module tilelang.language.builtin) tma_store_wait() (in module tilelang.contrib.cutedsl.cpasync) (in module tilelang.language.builtin) TMADescriptorParams (class in tilelang.jit.adapter.utils) tmem_allocate() (in module tilelang.contrib.cutedsl.gemm_tcgen05) tmem_deallocate() (in module tilelang.contrib.cutedsl.gemm_tcgen05) tmp_counter (tilelang.language.eager.ast.DSLMutator attribute) to_buffer_region() (in module tilelang.utils.language) to_dict() (tilelang.carver.roller.hint.Hint method) to_prime_factors() (tilelang.tileop.base.GemmWarpPolicy static method) to_python() (in module tilelang.layout.cute) to_tile_region() (in module tilelang.utils.language) top() (tilelang.autotuner.capture.CaptureStack method) (tilelang.language.frame.FrameStack method) (tilelang.language.kernel.FrameStack method) topo_order() (in module tilelang.carver.roller.node) torch_assert_close() (in module tilelang.utils.tensor) torch_compress() (in module tilelang.utils.sparse) torch_dtype() (tilelang.engine.param.KernelParam method) torch_function (tilelang.jit.kernel.JITKernel attribute), [1] TorchKernelCache (class in tilelang.jit.adapter.torch.kernel_cache) total_flops (tilelang.tools.Analyzer.AnalysisResult attribute) (tilelang.tools.Analyzer.Analyzer attribute) total_global_bytes (tilelang.tools.Analyzer.AnalysisResult attribute), [1] (tilelang.tools.Analyzer.Analyzer attribute) trace() (in module tilelang.language.tir.op) traffic (tilelang.carver.roller.hint.TileDict attribute) trans_a (tilelang.carver.roller.hint.IntrinInfo attribute) trans_A (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] (tilelang.tileop.gemm.Gemm property) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP attribute) trans_b (tilelang.carver.roller.hint.IntrinInfo attribute) trans_B (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] (tilelang.tileop.gemm.Gemm property) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP attribute) trans_E (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP attribute) transaction_size (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.carver.arch.rdna.RDNA attribute) TransformKind (class in tilelang.common.transform_kind) transpose() (in module tilelang.language.copy_op) traverse_dependencies() (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis method) tree (tilelang.autodd.ASTPat attribute) trunc (in module tilelang.language.tir.ir) trunc() (in module tilelang.language.tir.op) truncdiv (in module tilelang.language.tir.ir) truncdiv() (in module tilelang.language.tir.op) truncmod (in module tilelang.language.tir.ir) truncmod() (in module tilelang.language.tir.op) try_inline() (in module tilelang.carver.common_schedules) try_inline_contiguous_spatial() (in module tilelang.carver.common_schedules) tuning (tilelang.carver.arch.rdna.RDNA attribute) tvm_access_ptr (in module tilelang.language.tir.ir) tvm_access_ptr() (in module tilelang.language.tir.op) tvm_bmma_sync (in module tilelang.language.tir.ir) tvm_bmma_sync() (in module tilelang.language.tir.op) tvm_check_return (in module tilelang.language.tir.ir) tvm_check_return() (in module tilelang.language.tir.op) tvm_fill_fragment (in module tilelang.language.tir.ir) tvm_fill_fragment() (in module tilelang.language.tir.op) TVM_IMPORT_PYTHON_PATH (tilelang.env.Environment attribute) TVM_LIBRARY_NOT_FOUND_MESSAGE (in module tilelang.env) TVM_LIBRARY_PATH (tilelang.env.Environment attribute) tvm_load_matrix_sync (in module tilelang.language.tir.ir) tvm_load_matrix_sync() (in module tilelang.language.tir.op) tvm_mfma (in module tilelang.language.tir.ir) tvm_mfma() (in module tilelang.language.tir.op) tvm_mfma_store (in module tilelang.language.tir.ir) tvm_mfma_store() (in module tilelang.language.tir.op) tvm_mma_sync (in module tilelang.language.tir.ir) tvm_mma_sync() (in module tilelang.language.tir.op) tvm_path (in module tilelang.env) TVM_PYTHON_PATH (tilelang.env.Environment attribute) tvm_rdna_wmma (in module tilelang.language.tir.ir) tvm_rdna_wmma() (in module tilelang.language.tir.op) tvm_rdna_wmma_store (in module tilelang.language.tir.ir) tvm_rdna_wmma_store() (in module tilelang.language.tir.op) tvm_stack_alloca (in module tilelang.language.tir.ir) tvm_stack_alloca() (in module tilelang.language.tir.op) tvm_stack_make_array (in module tilelang.language.tir.ir) tvm_stack_make_array() (in module tilelang.language.tir.op) tvm_stack_make_shape (in module tilelang.language.tir.ir) tvm_stack_make_shape() (in module tilelang.language.tir.op) tvm_storage_sync (in module tilelang.language.tir.ir) tvm_storage_sync() (in module tilelang.language.tir.op) tvm_store_matrix_sync (in module tilelang.language.tir.ir) tvm_store_matrix_sync() (in module tilelang.language.tir.op) tvm_struct_get (in module tilelang.language.tir.ir) tvm_struct_get() (in module tilelang.language.tir.op) tvm_struct_set (in module tilelang.language.tir.ir) tvm_struct_set() (in module tilelang.language.tir.op) tvm_thread_allreduce (in module tilelang.language.tir.ir) tvm_thread_allreduce() (in module tilelang.language.tir.op) tvm_thread_invariant (in module tilelang.language.tir.ir) tvm_thread_invariant() (in module tilelang.language.tir.op) tvm_throw_last_error (in module tilelang.language.tir.ir) tvm_throw_last_error() (in module tilelang.language.tir.op) tvm_tuple (in module tilelang.language.tir.ir) tvm_tuple() (in module tilelang.language.tir.op) tvm_warp_activemask (in module tilelang.language.tir.ir) tvm_warp_activemask() (in module tilelang.language.tir.op) tvm_warp_shuffle (in module tilelang.language.tir.ir) tvm_warp_shuffle() (in module tilelang.language.tir.op) tvm_warp_shuffle_down (in module tilelang.language.tir.ir) tvm_warp_shuffle_down() (in module tilelang.language.tir.op) tvm_warp_shuffle_up (in module tilelang.language.tir.ir) tvm_warp_shuffle_up() (in module tilelang.language.tir.op) TVMBackendAllocWorkspace (in module tilelang.language.tir.ir) TVMBackendAllocWorkspace() (in module tilelang.language.tir.op) TVMBackendFreeWorkspace (in module tilelang.language.tir.ir) TVMBackendFreeWorkspace() (in module tilelang.language.tir.op) TVMFFIKernelAdapter (class in tilelang.jit.adapter.tvm_ffi) TVMFFIKernelCache (class in tilelang.jit.adapter.kernel_cache) type_annotation() (in module tilelang.language.tir.op) type_map (in module tilelang.contrib.cutedsl.utils) U uid_counter (tilelang.autodd.RewriteAttacher attribute) undef (in module tilelang.language.tir.ir) undef() (in module tilelang.language.tir.op) Uniform (tilelang.utils.tensor.TensorSupplyType attribute) uninstall_pass_diff_hook() (in module tilelang.utils.pass_diff_hook) Unroll() (in module tilelang.language.loop) unroll() (in module tilelang.language.loop) (in module tilelang.language.tir.ir) UnrollForWithStep (class in tilelang.language.eager.builder) UnrollLoop() (in module tilelang.transform) unwrap_cond() (in module tilelang.language.eager.builder) unwrap_expr() (in module tilelang.language.eager.builder) unwrap_value() (tilelang.language.eager.ast.BaseBuilder method) (tilelang.language.eager.builder.Builder method) update() (tilelang.autodd.PDD method) update_host_func() (tilelang.jit.adapter.cutedsl.libgen.CuTeDSLLibraryGenerator method) (tilelang.jit.adapter.nvrtc.libgen.NVRTCLibraryGenerator method) update_launcher_cpp_code() (tilelang.jit.adapter.cutedsl.libgen.CuTeDSLLibraryGenerator method) update_launcher_lib_name() (tilelang.jit.adapter.cutedsl.libgen.CuTeDSLLibraryGenerator method) update_lib_code() (tilelang.jit.adapter.cutedsl.wrapper.TLCuTeDSLSourceWrapper method) (tilelang.jit.adapter.libgen.LibraryGenerator method) (tilelang.jit.adapter.nvrtc.wrapper.TLNVRTCSourceWrapper method) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper method) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) (tilelang.jit.adapter.wrapper.TLMetalSourceWrapper method) update_tags() (tilelang.carver.roller.node.Node method) update_tuner_result() (tilelang.jit.kernel.JITKernel method) upper_corner (tilelang.jit.adapter.utils.TMADescriptorParams attribute) use_async (tilelang.carver.roller.hint.Hint attribute) use_async_copy (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy attribute) use_swizzle() (in module tilelang.language.annotations) use_tc (tilelang.carver.roller.hint.Hint attribute) V valid (tilelang.carver.roller.hint.TileDict attribute) validate_gemm_ab_dtypes() (in module tilelang.language.dtypes) value (tilelang.layout.cute.IntTupleConst attribute) (tilelang.layout.cute.IntTuplePrimExpr attribute) (tilelang.layout.cute.IntTupleScaledBasis attribute) (tilelang.layout.cute.ScaledBasis property) var (tilelang.carver.analysis.IterInfo attribute) var_map (tilelang.carver.roller.shape_inference.common.Statement attribute) vectorcombine (in module tilelang.language.tir.ir) vectorcombine() (in module tilelang.language.tir.op) vectorhigh (in module tilelang.language.tir.ir) vectorhigh() (in module tilelang.language.tir.op) vectorize (tilelang.carver.roller.hint.Hint attribute) Vectorized() (in module tilelang.language.loop) vectorized() (in module tilelang.language.loop) (in module tilelang.language.tir.ir) VectorizeLoop() (in module tilelang.transform) vectorlow (in module tilelang.language.tir.ir) vectorlow() (in module tilelang.language.tir.op) verbose (tilelang.autotuner.param.CompileArgs attribute), [1] (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.libgen.LibraryGenerator attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) (tilelang.jit.adapter.torch.metal.MetalKernelAdapter attribute) (tilelang.jit.adapter.tvm_ffi.TVMFFIKernelAdapter attribute) (tilelang.jit.JITImpl attribute), [1] (tilelang.jit.kernel.JITKernel attribute) VerifyParallelLoop() (in module tilelang.transform) view() (in module tilelang.language.customize) visit() (tilelang.autodd.ASTMutator method) (tilelang.autodd.RewriteApplier method) (tilelang.autodd.RewriteAttacher method) (tilelang.language.eager.ast.SpanAttacher method) visit_AnnAssign() (tilelang.language.eager.ast.DSLMutator method) visit_Assert() (tilelang.language.eager.ast.DSLMutator method) visit_Assign() (tilelang.language.eager.ast.DSLMutator method) visit_AugAssign() (tilelang.language.eager.ast.DSLMutator method) visit_bind_() (tilelang.transform.hoist_broadcast_values.HoistBroadcastValuesMutator method) visit_BoolOp() (tilelang.language.eager.ast.DSLMutator method) visit_Break() (tilelang.language.eager.ast.DSLMutator method) visit_broadcast_() (tilelang.transform.hoist_broadcast_values.HoistBroadcastValuesMutator method) visit_buffer_load_() (tilelang.transform.decouple_type_cast.AccessReplacer method) (tilelang.transform.decouple_type_cast.MemoryAccessCollector method) visit_buffer_store_() (tilelang.transform.decouple_type_cast.AccessReplacer method) (tilelang.transform.decouple_type_cast.MemoryAccessCollector method) (tilelang.transform.hoist_broadcast_values.HoistBroadcastValuesMutator method) visit_call_() (tilelang.transform.decouple_type_cast.MemoryAccessCollector method) visit_Compare() (tilelang.language.eager.ast.DSLMutator method) visit_Continue() (tilelang.language.eager.ast.DSLMutator method) visit_Expr() (tilelang.language.eager.ast.DSLMutator method) visit_For() (tilelang.language.eager.ast.DSLMutator method) visit_for_() (tilelang.transform.decouple_type_cast.DecoupleTypeCastMutator method) visit_FunctionDef() (tilelang.language.eager.ast.DSLMutator method) visit_If() (tilelang.language.eager.ast.DSLMutator method) visit_IfExp() (tilelang.language.eager.ast.DSLMutator method) visit_Name() (tilelang.language.eager.ast.DSLMutator method) (tilelang.language.eager.ast.QuoteVisitor method) visit_Pass() (tilelang.language.eager.ast.QuoteVisitor method) visit_Return() (tilelang.language.eager.ast.DSLMutator method) visit_UnaryOp() (tilelang.language.eager.ast.DSLMutator method) visit_While() (tilelang.language.eager.ast.DSLMutator method) visit_With() (tilelang.language.eager.ast.DSLMutator method) visited (tilelang.autodd.RewriteApplier attribute) vscale (in module tilelang.language.tir.ir) vscale() (in module tilelang.language.tir.op) W W (tilelang.carver.template.conv.ConvTemplate attribute), [1] wait_wgmma() (in module tilelang.language.builtin) walk_indice() (in module tilelang.carver.roller.shape_inference.tir) warmup (tilelang.autotuner.param.ProfileArgs attribute), [1] (tilelang.autotuner.tuner.AutoTuneImpl attribute) warp (tilelang.carver.roller.hint.Hint attribute) warp_col_tiles (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) warp_cols (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) warp_k (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) warp_reduce_bitand() (in module tilelang.contrib.cutedsl.warp) (in module tilelang.language.reduce_op) warp_reduce_bitor() (in module tilelang.contrib.cutedsl.warp) (in module tilelang.language.reduce_op) warp_reduce_max() (in module tilelang.contrib.cutedsl.warp) (in module tilelang.language.reduce_op) warp_reduce_min() (in module tilelang.contrib.cutedsl.warp) (in module tilelang.language.reduce_op) warp_reduce_sum() (in module tilelang.contrib.cutedsl.warp) (in module tilelang.language.reduce_op) warp_row_tiles (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) warp_rows (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) warp_size (tilelang.carver.arch.arch_base.TileDevice attribute) (tilelang.carver.arch.cdna.CDNA attribute) (tilelang.carver.arch.cuda.CUDA attribute) (tilelang.carver.arch.rdna.RDNA attribute) WARP_SIZE (tilelang.contrib.cutedsl.reduce.CumMax1D attribute) (tilelang.contrib.cutedsl.reduce.CumMax2D attribute) (tilelang.contrib.cutedsl.reduce.CumSum1D attribute) (tilelang.contrib.cutedsl.reduce.CumSum2D attribute) (tilelang.cuda.intrinsics.macro.mma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sm70_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.mma_sp_macro_generator.SparseTensorCoreIntrinEmitter attribute) (tilelang.metal.intrinsics.metal_macro_generator.MPSIntrinEmitter attribute) (tilelang.rocm.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) warpgroup_arrive() (in module tilelang.contrib.cutedsl.gemm_v2) (in module tilelang.language.builtin) warpgroup_commit_batch() (in module tilelang.contrib.cutedsl.gemm_v2) (in module tilelang.language.builtin) warpgroup_fence_operand() (in module tilelang.contrib.cutedsl.gemm_v2) (in module tilelang.language.builtin) warpgroup_wait() (in module tilelang.contrib.cutedsl.gemm_v2) (in module tilelang.language.builtin) WarpSpecialize() (in module tilelang.language.warpgroup) WarpSpecializeFrame (class in tilelang.language.warpgroup) WebGPUPassPipelineBody() (in module tilelang.webgpu.pipeline) weight_transform_kind (tilelang.carver.roller.hint.IntrinInfo attribute) wg_wait (tilelang.tileop.gemm.Gemm property) (tilelang.tileop.gemm.gemm_base.GemmBase property) (tilelang.tileop.gemm_sp.gemm_sp_base.GemmSPBase property) (tilelang.tileop.gemm_sp.GemmSP attribute) wgmma() (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter method) wgmma_a_regs (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter property) wgmma_accum_regs (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter property) wgmma_arrive() (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter method) wgmma_commit() (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter method) wgmma_fence_a() (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter method) wgmma_fence_c() (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter method) wgmma_gemm() (in module tilelang.language.gemm_op) wgmma_gemm_sp() (in module tilelang.language.experimental.gemm_sp_op) wgmma_inst_m (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.wgmma_sp_macro_generator.WGSparseTensorCoreIntrinEmitter attribute) wgmma_inst_n (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.wgmma_sp_macro_generator.WGSparseTensorCoreIntrinEmitter attribute) wgmma_num_inst_m (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter property) wgmma_num_inst_n (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter property) wgmma_num_k_atoms (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter property) wgmma_prefix (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter attribute) (tilelang.cuda.intrinsics.macro.wgmma_sp_macro_generator.WGSparseTensorCoreIntrinEmitter attribute) wgmma_rs() (in module tilelang.contrib.cutedsl.gemm_v2) (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.cuda.intrinsics.macro.wgmma_sp_macro_generator.WGSparseTensorCoreIntrinEmitter method) wgmma_rs_atom() (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter method) wgmma_ss() (in module tilelang.contrib.cutedsl.gemm_v2) (tilelang.cuda.intrinsics.macro.wgmma_sp_macro_generator.WGSparseTensorCoreIntrinEmitter method) wgmma_ss_atom() (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter method) wgmma_wait() (tilelang.cuda.intrinsics.macro.wgmma_macro_generator.TensorCoreIntrinEmitter method) WGMMADescriptorParams (class in tilelang.cuda.intrinsics.macro.wgmma_macro_generator) WGSparseTensorCoreIntrinEmitter (class in tilelang.cuda.intrinsics.macro.wgmma_sp_macro_generator) with_arch() (tilelang.carver.template.base.BaseTemplate method) with_bias (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] with_default_adapter() (tilelang.profiler.Profiler method) with_frame() (tilelang.language.eager.builder.Builder method) with_rocm_target_attrs() (in module tilelang.rocm.target) with_source() (tilelang.autodd.Task method) wmma() (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter method) wmma_k (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy attribute) wmma_shape (tilelang.rocm.intrinsics.wmma_macro_generator.WMMAIntrinEmitter attribute) wmma_store_index_map_gfx11() (in module tilelang.rocm.intrinsics.wmma_layout) wmma_store_index_map_gfx12() (in module tilelang.rocm.intrinsics.wmma_layout) WMMAIntrinEmitter (class in tilelang.rocm.intrinsics.wmma_macro_generator) worker() (tilelang.autodd.ParTaskManager method) wrap() (tilelang.jit.adapter.wrapper.BaseWrapper method) (tilelang.jit.adapter.wrapper.TLPyWrapper method) (tilelang.jit.adapter.wrapper.TLWrapper method) wrapper (tilelang.jit.adapter.cutedsl.adapter.CuTeDSLKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.nvrtc.adapter.NVRTCKernelAdapter attribute) ws (in module tilelang.language.warpgroup) X x (tilelang.contrib.cutedsl.threadblock_swizzle.dim3 attribute) Y y (tilelang.contrib.cutedsl.threadblock_swizzle.dim3 attribute) Z z (tilelang.contrib.cutedsl.threadblock_swizzle.dim3 attribute) Zero (tilelang.utils.tensor.TensorSupplyType attribute)