tilelang.language.tir.op
========================

.. py:module:: tilelang.language.tir.op


Attributes
----------

.. autoapisummary::

   tilelang.language.tir.op.sum
   tilelang.language.tir.op.min
   tilelang.language.tir.op.max


Functions
---------

.. autoapisummary::

   tilelang.language.tir.op.call_packed
   tilelang.language.tir.op.call_cpacked
   tilelang.language.tir.op.call_packed_lowered
   tilelang.language.tir.op.call_cpacked_lowered
   tilelang.language.tir.op.call_intrin
   tilelang.language.tir.op.call_pure_extern
   tilelang.language.tir.op.call_extern
   tilelang.language.tir.op.call_llvm_intrin
   tilelang.language.tir.op.call_llvm_pure_intrin
   tilelang.language.tir.op.tvm_check_return
   tilelang.language.tir.op.tvm_stack_alloca
   tilelang.language.tir.op.tvm_stack_make_shape
   tilelang.language.tir.op.tvm_stack_make_array
   tilelang.language.tir.op.assume
   tilelang.language.tir.op.undef
   tilelang.language.tir.op.call_tir
   tilelang.language.tir.op.start_profile_intrinsic
   tilelang.language.tir.op.end_profile_intrinsic
   tilelang.language.tir.op.tvm_tuple
   tilelang.language.tir.op.tvm_struct_get
   tilelang.language.tir.op.tvm_struct_set
   tilelang.language.tir.op.address_of
   tilelang.language.tir.op.lookup_param
   tilelang.language.tir.op.tvm_thread_allreduce
   tilelang.language.tir.op.tvm_thread_invariant
   tilelang.language.tir.op.tvm_storage_sync
   tilelang.language.tir.op.tvm_warp_shuffle
   tilelang.language.tir.op.tvm_warp_shuffle_up
   tilelang.language.tir.op.tvm_warp_shuffle_down
   tilelang.language.tir.op.tvm_warp_activemask
   tilelang.language.tir.op.type_annotation
   tilelang.language.tir.op.tvm_access_ptr
   tilelang.language.tir.op.tvm_throw_last_error
   tilelang.language.tir.op.tvm_load_matrix_sync
   tilelang.language.tir.op.tvm_mma_sync
   tilelang.language.tir.op.tvm_bmma_sync
   tilelang.language.tir.op.tvm_fill_fragment
   tilelang.language.tir.op.tvm_store_matrix_sync
   tilelang.language.tir.op.ptx_mma
   tilelang.language.tir.op.ptx_mma_sp
   tilelang.language.tir.op.mma_store
   tilelang.language.tir.op.mma_fill
   tilelang.language.tir.op.ptx_ldmatrix
   tilelang.language.tir.op.ptx_cp_async
   tilelang.language.tir.op.ptx_cp_async_bulk
   tilelang.language.tir.op.ptx_commit_group
   tilelang.language.tir.op.ptx_wait_group
   tilelang.language.tir.op.tvm_mfma
   tilelang.language.tir.op.tvm_mfma_store
   tilelang.language.tir.op.tvm_rdna_wmma
   tilelang.language.tir.op.tvm_rdna_wmma_store
   tilelang.language.tir.op.ptx_cp_async_barrier
   tilelang.language.tir.op.ptx_init_barrier_thread_count
   tilelang.language.tir.op.ptx_arrive_barrier
   tilelang.language.tir.op.ptx_arrive_barrier_expect_tx
   tilelang.language.tir.op.ptx_wait_barrier
   tilelang.language.tir.op.create_barriers
   tilelang.language.tir.op.vectorlow
   tilelang.language.tir.op.vectorhigh
   tilelang.language.tir.op.vectorcombine
   tilelang.language.tir.op.ret
   tilelang.language.tir.op.any
   tilelang.language.tir.op.all
   tilelang.language.tir.op.trace
   tilelang.language.tir.op.min_value
   tilelang.language.tir.op.max_value
   tilelang.language.tir.op.infinity
   tilelang.language.tir.op.reinterpret
   tilelang.language.tir.op.exp
   tilelang.language.tir.op.exp2
   tilelang.language.tir.op.exp10
   tilelang.language.tir.op.erf
   tilelang.language.tir.op.tanh
   tilelang.language.tir.op.sigmoid
   tilelang.language.tir.op.log
   tilelang.language.tir.op.log2
   tilelang.language.tir.op.log10
   tilelang.language.tir.op.log1p
   tilelang.language.tir.op.tan
   tilelang.language.tir.op.cos
   tilelang.language.tir.op.cosh
   tilelang.language.tir.op.acos
   tilelang.language.tir.op.acosh
   tilelang.language.tir.op.sin
   tilelang.language.tir.op.sinh
   tilelang.language.tir.op.asin
   tilelang.language.tir.op.asinh
   tilelang.language.tir.op.atan
   tilelang.language.tir.op.atanh
   tilelang.language.tir.op.atan2
   tilelang.language.tir.op.sqrt
   tilelang.language.tir.op.rsqrt
   tilelang.language.tir.op.clz
   tilelang.language.tir.op.floor
   tilelang.language.tir.op.ceil
   tilelang.language.tir.op.trunc
   tilelang.language.tir.op.abs
   tilelang.language.tir.op.bitwise_and
   tilelang.language.tir.op.bitwise_not
   tilelang.language.tir.op.bitwise_or
   tilelang.language.tir.op.bitwise_xor
   tilelang.language.tir.op.round
   tilelang.language.tir.op.nearbyint
   tilelang.language.tir.op.nextafter
   tilelang.language.tir.op.hypot
   tilelang.language.tir.op.copysign
   tilelang.language.tir.op.ldexp
   tilelang.language.tir.op.likely
   tilelang.language.tir.op.isnan
   tilelang.language.tir.op.isnullptr
   tilelang.language.tir.op.isfinite
   tilelang.language.tir.op.isinf
   tilelang.language.tir.op.pow_of_int
   tilelang.language.tir.op.power
   tilelang.language.tir.op.pow
   tilelang.language.tir.op.popcount
   tilelang.language.tir.op.q_multiply_shift
   tilelang.language.tir.op.q_multiply_shift_per_axis
   tilelang.language.tir.op.shift_left
   tilelang.language.tir.op.shift_right
   tilelang.language.tir.op.fmod
   tilelang.language.tir.op.if_then_else
   tilelang.language.tir.op.div
   tilelang.language.tir.op.indexdiv
   tilelang.language.tir.op.indexmod
   tilelang.language.tir.op.truncdiv
   tilelang.language.tir.op.truncmod
   tilelang.language.tir.op.floordiv
   tilelang.language.tir.op.floormod
   tilelang.language.tir.op.ceildiv
   tilelang.language.tir.op.comm_reducer
   tilelang.language.tir.op.TVMBackendAllocWorkspace
   tilelang.language.tir.op.TVMBackendFreeWorkspace
   tilelang.language.tir.op.anylist_getitem
   tilelang.language.tir.op.anylist_resetitem
   tilelang.language.tir.op.anylist_setitem_call_packed
   tilelang.language.tir.op.anylist_setitem_call_cpacked
   tilelang.language.tir.op.vscale


Module Contents
---------------

.. py:function:: call_packed(*args, span=None)

   Build expression by call an external packed function.

   The argument to packed function can be Expr or Buffer.
   The argument is the corresponding POD type when Expr is presented.

   When the argument is Buffer, the corresponding PackedFunc
   will receive an TVMArrayHandle whose content is valid during the callback period.
   If the PackedFunc is a python callback, then the corresponding argument is NDArray.

   :param args: Positional arguments.
   :type args: list of Expr or Buffer.
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **call** -- The call expression.
   :rtype: PrimExpr

   .. seealso::

      :obj:`te.extern`
          Create tensor with extern function call.


.. py:function:: call_cpacked(*args, span=None)

   Build expression by call an external packed function.

   Same as call_packed, except that the first argument is the function name
   (as in call_extern), and the last argument is the resource handle.

   :param args: Positional arguments.
   :type args: list of Expr or Buffer.
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **call** -- The call expression.
   :rtype: PrimExpr

   .. seealso::

      :obj:`te.extern`
          Create tensor with extern function call.


.. py:function:: call_packed_lowered(*args, span=None)

   Lowered version of call packed.
   The argument to packed function can be Expr or Buffer.
   The argument is the corresponding POD type when Expr is presented.
   When the argument is Buffer, the corresponding PackedFunc
   will receive an TVMArrayHandle whose content is valid during the callback period.
   If the PackedFunc is a python callback, then the corresponding argument is NDArray.

   :param args: Positional arguments.
   :type args: list of Expr or Buffer.
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **call** -- The call expression.
   :rtype: PrimExpr

   .. seealso::

      :obj:`te.extern`
          Create tensor with extern function call.


.. py:function:: call_cpacked_lowered(*args, span=None)

   Lowered version of call c-packed.
   Same as call_packed, except that the first argument is the function name
   (as in call_extern), and the last argument is the resource handle.

   :param args: Positional arguments.
   :type args: list of Expr or Buffer.
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **call** -- The call expression.
   :rtype: PrimExpr

   .. seealso::

      :obj:`te.extern`
          Create tensor with extern function call.


.. py:function:: call_intrin(dtype, func_name, *args, span=None)

   Build expression by calling an intrinsic function.

   Intrinsics can be overloaded with multiple data types via
   the intrinsic translation rule.

   :param dtype: The data type of the result.
   :type dtype: str
   :param func_name: The intrinsic function name.
   :type func_name: str
   :param args: Positional arguments.
   :type args: list
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: call_pure_extern(dtype, func_name, *args, span=None)

   Build expression by calling a pure extern function.

   :param dtype: The data type of the result.
   :type dtype: str
   :param func_name: The extern function name.
   :type func_name: str
   :param args: Positional arguments.
   :type args: list
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: call_extern(dtype, func_name, *args, span=None)

   Build expression by calling a extern function.

   :param dtype: The data type of the result.
   :type dtype: str
   :param func_name: The extern function name.
   :type func_name: str
   :param args: Positional arguments.
   :type args: list
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: call_llvm_intrin(dtype, name, *args, span=None)

   Build expression by calling a llvm intrinsic function

   :param dtype: The data type of the result.
   :type dtype: str
   :param name: The name of the llvm intrinsic function.
   :type name: str
   :param args: Positional arguments.
   :type args: list
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: call_llvm_pure_intrin(dtype, name, *args, span=None)

   Build expression by calling a pure llvm intrinsic function

   :param dtype: The data type of the result.
   :type dtype: str
   :param name: The name of the llvm intrinsic function.
   :type name: str
   :param args: Positional arguments.
   :type args: list
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_check_return(expected, return_unexpected, nested_call)

   Return new on stack dtype[num]
   :param expected: The expected return code.
   :type expected: int
   :param return_unexpected: The unexpected return code.
   :type return_unexpected: int
   :param nested_call: The call expression to check return.
   :type nested_call: PrimExpr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_stack_alloca(dtype_str, num)

   Return new on stack dtype[num]

   :param dtype_str: The data type of array.
   :type dtype_str: str
   :param num: The size of array.
   :type num: int

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_stack_make_shape(*args)

   Allocate a shape tuple on stack, return the handle

   :param args: The tuple shape.
   :type args: int

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_stack_make_array(data, shape, strides, ndim, arr_dtype, elem_offset)

   Allocate a NDArray(DLTensor) on stack, return the handle

   :param data: The data of array.
   :type data: Expr
   :param shape: The shape of array.
   :type shape: Expr
   :param strides: The strides of array.
   :type strides: Expr
   :param ndim: The dimensions of array.
   :type ndim: Expr
   :param arr_dtype: The data type of array.
   :type arr_dtype: Expr
   :param elem_offse: The element offset of array.
   :type elem_offse: Expr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: assume(cond=None)

   Provide a true statement that can be used for simplifications

   :param cond: The constraint condition.
   :type cond: Expr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: undef()

   Returns an initialized but arbitrary value

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: call_tir(global_var, *args)

   Performs a call into another PrimFunc in the same IRModule

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: start_profile_intrinsic(id)

   Start profile intrinsic.
   :param id: The intrinsic id.
   :type id: int

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: end_profile_intrinsic(id)

   End profile intrinsic.
   :param id: The intrinsic id.
   :type id: int

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_tuple(*value)

   Create a tuple structure in value field of AttrStmt

   :param value: The value in tuple.
   :type value: Expr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_struct_get(arr, index, field, dtype)

   Get struct field value in array

   :param dtype: The date type of the result.
   :type dtype: str
   :param arr: The array of struct.
   :type arr: StructType*
   :param index: The index of struct.
   :type index: int
   :param field: The field of struct.
   :type field: int

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_struct_set(arr, index, field, value)

   Set value in struct field in array

   :param arr: The array of struct.
   :type arr: StructType*
   :param index: The index of struct.
   :type index: int
   :param field: The field of struct.
   :type field: int
   :param value: The value to be set in field.
   :type value: Expr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: address_of(buffer_load, span=None)

   Returns the address of an element in the buffer

   :param buffer_load: The buffer load.
   :type buffer_load: BufferLoad
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: lookup_param(param_name, span=None)

   Returns the param by name

   :param param_name: The name of param.
   :type param_name: str
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_thread_allreduce(*freduce_args)

   Perform allreduce inside threadblock.

   :param freduce_args: The args.
   :type freduce_args: Expr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_thread_invariant(cond)

   Mark condition as thread invariant.

   :param cond: The condition.
   :type cond: Expr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_storage_sync(storage_scope)

   Perform synchronization in specified scope.

   :param storage_scope: The storage scope to perform synchronization.
   :type storage_scope: str

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_warp_shuffle(mask, value, warp_id, width, warp_size)

   Exchange value between threads inside a warp.

   :param mask: The warp mask indicates active threads inside warp.
   :type mask: PrimExpr
   :param value: The value to exchange.
   :type value: PrimExpr
   :param warp_id: The source lane index to fetch value.
   :type warp_id: PrimExpr
   :param width: The width of sub-sections to perform warp shuffle.
   :type width: PrimExpr
   :param warp_size: The warp size.
   :type warp_size: PrimExpr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_warp_shuffle_up(mask, value, offset, width, warp_size)

   Copy value from a lane with lower (by offset) index relative to caller.

   :param mask: The warp mask indicates active threads inside warp.
   :type mask: PrimExpr
   :param value: The value to exchange.
   :type value: PrimExpr
   :param offset: The difference between source lane index and destination lane index:
                  `offset = dst_lane_idx - src_lane_idx`
   :type offset: PrimExpr
   :param width: The width of sub-sections to perform warp shuffle.
   :type width: PrimExpr
   :param warp_size: The warp size.
   :type warp_size: PrimExpr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_warp_shuffle_down(mask, value, offset, width, warp_size)

   Copy value from a lane with higher (by offset) index relative to caller.

   :param mask: The warp mask indicates active threads inside warp.
   :type mask: PrimExpr
   :param value: The value to exchange.
   :type value: PrimExpr
   :param offset: The difference between source lane index and destination lane index:
                  `offset = src_lane_idx - dst_lane_idx`
   :type offset: PrimExpr
   :param width: The width of sub-sections to perform warp shuffle.
   :type width: PrimExpr
   :param warp_size: The warp size.
   :type warp_size: PrimExpr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_warp_activemask()

   Return a 32-bit mask indicates currently active threads in a calling warp.

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: type_annotation(dtype)

   Create a type annotation expression

   :param dtype: The data type.
   :type dtype: Expr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_access_ptr(ptype, data, offset, extent, rw_mask)

   Get head access address with memory access pattern info

   :param ptype: The data type of pointer.
   :type ptype: Expr
   :param data: The data of pointer.
   :type data: DType*
   :param offset: The offset of pointer.
   :type offset: int
   :param extent: The extent of pointer.
   :type extent: int
   :param rw_mask: The read write mask.
   :type rw_mask: int

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_throw_last_error()

   Throw TVMGetLastError()

   :returns: **ret** -- The return expression
   :rtype: PrimExpr


.. py:function:: tvm_load_matrix_sync(fragment, m, n, k, index, buffer_ptr, stride, layout)

   TVM intrinsic for tensor core load operators

   :param fragment: The wmma fragment.
   :type fragment: Var
   :param m: The shape of wmma fragment.
   :type m: UIntImm
   :param n: The shape of wmma fragment.
   :type n: UIntImm
   :param k: The shape of wmma fragment.
   :type k: UIntImm
   :param index: The fragment index.
   :type index: Expr
   :param buffer_ptr: The fragment buffer pointer.
   :type buffer_ptr: Expr
   :param stride: The fragment stride.
   :type stride: Expr
   :param layout: The fragment layout.
   :type layout: Literal["row_major", "column_major"]

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_mma_sync(fragment_d, index_d, fragment_a, index_a, fragment_b, index_b, fragment_c, index_c)

   TVM intrinsic for tensor core mma_sync operators

   :param fragment_d: The wmma fragment_d.
   :type fragment_d: Var
   :param index_d: The fragment_d index.
   :type index_d: Expr
   :param fragment_a: The wmma fragment_a.
   :type fragment_a: Var
   :param index_a: The fragment_a index.
   :type index_a: Expr
   :param fragment_b: The wmma fragment_b.
   :type fragment_b: Var
   :param index_b: The fragment_b index.
   :type index_b: Expr
   :param fragment_c: The wmma fragment_c.
   :type fragment_c: Var
   :param index_c: The fragment_c index.
   :type index_c: Expr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_bmma_sync(fragment_d, index_d, fragment_a, index_a, fragment_b, index_b, fragment_c, index_c)

   TVM intrinsic for tensor core bmma_sync operators

   :param fragment_d: The bwmma fragment_d.
   :type fragment_d: Var
   :param index_d: The fragment_d index.
   :type index_d: Expr
   :param fragment_a: The bwmma fragment_a.
   :type fragment_a: Var
   :param index_a: The fragment_a index.
   :type index_a: Expr
   :param fragment_b: The bwmma fragment_b.
   :type fragment_b: Var
   :param index_b: The fragment_b index.
   :type index_b: Expr
   :param fragment_c: The bwmma fragment_c.
   :type fragment_c: Var
   :param index_c: The fragment_c index.
   :type index_c: Expr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_fill_fragment(fragment, m, n, k, index, value)

   TVM intrinsic for tensor core fill_fragment operators

   :param fragment: The wmma fragment
   :type fragment: Var
   :param m: The shape of wmma fragment.
   :type m: UIntImm
   :param n: The shape of wmma fragment.
   :type n: UIntImm
   :param k: The shape of wmma fragment.
   :type k: UIntImm
   :param index: The fragment index.
   :type index: Expr
   :param value: The value to be filled in fragment.
   :type value: Expr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_store_matrix_sync(fragment, m, n, k, index, buffer_ptr, stride, layout)

   TVM intrinsic for tensor core store operators

   :param fragment: The wmma fragment.
   :type fragment: Var
   :param m: The shape of wmma fragment.
   :type m: UIntImm
   :param n: The shape of wmma fragment.
   :type n: UIntImm
   :param k: The shape of wmma fragment.
   :type k: UIntImm
   :param index: The fragment index.
   :type index: Expr
   :param buffer_ptr: The fragment buffer pointer.
   :type buffer_ptr: Expr
   :param stride: The fragment stride.
   :type stride: Expr
   :param layout: The fragment layout.
   :type layout: Literal["row_major", "column_major"]

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: ptx_mma(dtype, shape, A_layout, B_layout, A_dtype, B_dtype, C_dtype, multiplicand_a, a_index, multiplicand_b, b_index, accumulator, c_index, saturate, operator=None)

   TVM intrinsic for ptx tensor core mma instructions
   https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-for-mma

   :param dtype: The data type of the result.
   :type dtype: str
   :param shape: The shape of mma fragment.
   :type shape: str
   :param A_layout: The layout of multiplicand fragment A.
   :type A_layout: Literal["row", "col"]
   :param B_layout: The layout of multiplicand fragment B.
   :type B_layout: Literal["row", "col"]
   :param A_dtype: The data type of multiplicand fragment A.
   :type A_dtype: str
   :param B_dtype: The data type of multiplicand fragment B.
   :type B_dtype: str
   :param C_dtype: The data type of accumulator fragment C.
   :type C_dtype: str
   :param multiplicand_a: The multiplicand fragment A variable.
   :type multiplicand_a: Var
   :param a_index: The index of multiplicand fragment A.
   :type a_index: Expr
   :param multiplicand_b: The multiplicand fragment B variable.
   :type multiplicand_b: Var
   :param b_index: The index of multiplicand fragment A.
   :type b_index: Expr
   :param accumulator: The accumulator fragment C variable.
   :type accumulator: Var
   :param c_index: The index of accumulator fragment C.
   :type c_index: Expr
   :param saturate: The optional saturation at the output.
   :type saturate: bool
   :param operator: The 1-bit operator.
   :type operator: Optional[Literal["xor", "and"]]

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: ptx_mma_sp(dtype, shape, A_layout, B_layout, A_dtype, B_dtype, C_dtype, multiplicand_a, a_index, multiplicand_b, b_index, accumulator, c_index, metadata, meta_index, sparse_selector, saturate)

   TVM intrinsic for sparse tensor core ptx instructions
   https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-for-sparse-mma

   :param dtype: The data type of the result.
   :type dtype: str
   :param shape: The shape of mma fragment.
   :type shape: str
   :param A_layout: The layout of multiplicand fragment A.
   :type A_layout: Literal["row", "col"]
   :param B_layout: The layout of multiplicand fragment B.
   :type B_layout: Literal["row", "col"]
   :param A_dtype: The data type of multiplicand fragment A.
   :type A_dtype: str
   :param B_dtype: The data type of multiplicand fragment B.
   :type B_dtype: str
   :param C_dtype: The data type of accumulator fragment C.
   :type C_dtype: str
   :param multiplicand_a: The multiplicand fragment A variable.
   :type multiplicand_a: Var
   :param a_index: The index of multiplicand fragment A.
   :type a_index: Expr
   :param multiplicand_b: The multiplicand fragment B variable.
   :type multiplicand_b: Var
   :param b_index: The index of multiplicand fragment B.
   :type b_index: Expr
   :param accumulator: The accumulator fragment C variable.
   :type accumulator: Var
   :param c_index: The index of accumulator fragment C.
   :type c_index: Expr
   :param metadata: The metadata of operand.
   :type metadata: Expr
   :param meta_index: The metadata index of operand.
   :type meta_index: Expr
   :param sparse_selector: The sparse selector indicating the thread that stores the metadata.
   :type sparse_selector: Expr
   :param saturate: The optional saturation at the output.
   :type saturate: bool

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: mma_store(dtype, m, n, dst_ptr, src_ptr, src_offset, dst_stride)

   TVM intrinsic for storing the result of PTX MMA into a destination pointer

   :param dtype: The data type of the result.
   :type dtype: str
   :param m: The shape of mma fragment.
   :type m: IntImm
   :param n: The shape of mma fragment.
   :type n: IntImm
   :param dst_ptr: The destination pointer variable.
   :type dst_ptr: Var
   :param src_ptr: The source pointer variable.
   :type src_ptr: Var
   :param src_offset: The source offset.
   :type src_offset: Expr
   :param dst_stride: The destination stride.
   :type dst_stride: Var

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: mma_fill(dtype, local_size, local_ptr, offset)

   TVM intrinsic for zero-initalizing an MMA accumulation register

   :param dtype: The data type of the result.
   :type dtype: str
   :param local_size: The number of elements.
   :type local_size: IntImm
   :param local_ptr: The destination pointer variable.
   :type local_ptr: Var
   :param offset: The destination offset.
   :type offset: Expr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: ptx_ldmatrix(dtype, trans, num, type, local_ptr, local_offset, smem_ptr, smem_offset)

   TVM intrinsic for ptx load matrix from shared memory
   https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-ldmatrix

   :param dtype: The data type of the result.
   :type dtype: str
   :param trans: The matrix is loaded in column-major format.
   :type trans: bool
   :param num: The number of matrices.
   :type num: IntImm
   :param type: The data type of the matrices.
   :type type: Literal[".b16"]
   :param local_ptr: The local pointer variable.
   :type local_ptr: Var
   :param local_offset: The offset of local pointer.
   :type local_offset: Expr
   :param smem_ptr: The shared memory pointer variable.
   :type smem_ptr: Var
   :param smem_offset: The offset of shared memort pointer.
   :type smem_offset: Expr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: ptx_cp_async(dtype, shared_ptr, shared_offset, global_ptr, global_offset, bytes)

   TVM intrinsic for ptx async copy from global to shared memory using cp.async
   https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async

   :param dtype: The data type of the result.
   :type dtype: str
   :param shared_ptr: The shared memory pointer variable.
   :type shared_ptr: Var
   :param shared_offset: The offset of shared memory pointer.
   :type shared_offset: Expr
   :param global_ptr: The global memory pointer variable.
   :type global_ptr: Var
   :param global_offset: The offset of global memory pointer.
   :type global_offset: Expr
   :param bytes: The data size to copy.
   :type bytes: int

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: ptx_cp_async_bulk(dtype, shared_ptr, shared_offset, global_ptr, global_offset, bytes, barrier_id)

   TVM intrinsic for ptx async copy from global to shared memory using cp.async.bulk
   https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk

   :param dtype: The data type of the result.
   :type dtype: str
   :param shared_ptr: The shared memory pointer variable.
   :type shared_ptr: Var
   :param shared_offset: The offset of shared memory pointer.
   :type shared_offset: Expr
   :param global_ptr: The global memory pointer variable.
   :type global_ptr: Var
   :param global_offset: The offset of global memory pointer.
   :type global_offset: Expr
   :param bytes: The data size to copy.
   :type bytes: int
   :param barrier_id: The ID of the barrier shared memory pointer.
   :type barrier_id: int

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: ptx_commit_group()

   TVM intrinsic for ptx async copy commit
   https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-commit-group

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: ptx_wait_group(num)

   TVM intrinsic for ptx async copy wait
   https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-wait-group

   :param num: The number of the most recent uncommitted pending cp.async groups to wait.
   :type num: int

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_mfma(dtype, shape, A_layout, B_layout, A_dtype, B_dtype, C_dtype, multiplicand_a, a_index, multiplicand_b, b_index, accumulator, c_index)

   TVM intrinsic for amd matrix core mfma instructions
   https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-for-mma

   :param dtype: The data type of the result.
   :type dtype: str
   :param shape: The shape of mma fragment.
   :type shape: str
   :param A_layout: The layout of multiplicand fragment A.
   :type A_layout: Literal["row", "col"]
   :param B_layout: The layout of multiplicand fragment B.
   :type B_layout: Literal["row", "col"]
   :param A_dtype: The data type of multiplicand fragment A.
   :type A_dtype: str
   :param B_dtype: The data type of multiplicand fragment B.
   :type B_dtype: str
   :param C_dtype: The data type of accumulator fragment C.
   :type C_dtype: str
   :param multiplicand_a: The multiplicand fragment A variable.
   :type multiplicand_a: Var
   :param a_index: The index of multiplicand fragment A.
   :type a_index: Expr
   :param multiplicand_b: The multiplicand fragment B variable.
   :type multiplicand_b: Var
   :param b_index: The index of multiplicand fragment A.
   :type b_index: Expr
   :param accumulator: The accumulator fragment C variable.
   :type accumulator: Var
   :param c_index: The index of accumulator fragment C.
   :type c_index: Expr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_mfma_store(dtype, m, n, dst_ptr, src_ptr, src_offset, dst_stride)

   TVM intrinsic for storing the result of PTX MMA into a destination pointer

   :param dtype: The data type of the result.
   :type dtype: str
   :param m: The shape of mma fragment.
   :type m: IntImm
   :param n: The shape of mma fragment.
   :type n: IntImm
   :param dst_ptr: The destination pointer variable.
   :type dst_ptr: Var
   :param src_ptr: The source pointer variable.
   :type src_ptr: Var
   :param src_offset: The source offset.
   :type src_offset: Expr
   :param dst_stride: The destination stride.
   :type dst_stride: Var

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_rdna_wmma(dtype, shape, A_layout, B_layout, A_dtype, B_dtype, C_dtype, multiplicand_a, a_index, multiplicand_b, b_index, accumulator, c_index)

   TVM intrinsic for amd matrix core mfma instructions
   https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-for-mma

   :param dtype: The data type of the result.
   :type dtype: str
   :param shape: The shape of mma fragment.
   :type shape: str
   :param A_layout: The layout of multiplicand fragment A.
   :type A_layout: Literal["row", "col"]
   :param B_layout: The layout of multiplicand fragment B.
   :type B_layout: Literal["row", "col"]
   :param A_dtype: The data type of multiplicand fragment A.
   :type A_dtype: str
   :param B_dtype: The data type of multiplicand fragment B.
   :type B_dtype: str
   :param C_dtype: The data type of accumulator fragment C.
   :type C_dtype: str
   :param multiplicand_a: The multiplicand fragment A variable.
   :type multiplicand_a: Var
   :param a_index: The index of multiplicand fragment A.
   :type a_index: Expr
   :param multiplicand_b: The multiplicand fragment B variable.
   :type multiplicand_b: Var
   :param b_index: The index of multiplicand fragment A.
   :type b_index: Expr
   :param accumulator: The accumulator fragment C variable.
   :type accumulator: Var
   :param c_index: The index of accumulator fragment C.
   :type c_index: Expr

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: tvm_rdna_wmma_store(dtype, m, n, dst_ptr, src_ptr, src_offset, dst_stride)

   TVM intrinsic for storing the result of PTX MMA into a destination pointer

   :param dtype: The data type of the result.
   :type dtype: str
   :param m: The shape of mma fragment.
   :type m: IntImm
   :param n: The shape of mma fragment.
   :type n: IntImm
   :param dst_ptr: The destination pointer variable.
   :type dst_ptr: Var
   :param src_ptr: The source pointer variable.
   :type src_ptr: Var
   :param src_offset: The source offset.
   :type src_offset: Expr
   :param dst_stride: The destination stride.
   :type dst_stride: Var

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: ptx_cp_async_barrier(barrier_id)

   TVM intrinsic for ptx async copy barrier using cp.async.mbarrier.arrive
   https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive

   :param barrier_id: The ID of the barrier shared memory pointer.
   :type barrier_id: int

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: ptx_init_barrier_thread_count(barrier_id, thread_count)

   TVM intrinsic for ptx barrier initialization of thread count using mbarrier.init
   https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-init

   :param barrier_id: The ID of the barrier shared memory pointer.
   :type barrier_id: int
   :param thread_count: Number of threads expected to arrive at the barrier.
   :type thread_count: int

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: ptx_arrive_barrier(barrier_id)

   TVM intrinsic for ptx barrier arrival using mbarrier.arrive
   https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive

   :param barrier_id: The ID of the barrier shared memory pointer.
   :type barrier_id: int

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: ptx_arrive_barrier_expect_tx(barrier_id, byte_count)

   TVM intrinsic for ptx barrier arrival with expect tx using mbarrier.arrive.expect_tx
   https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive
   https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-expect-tx-operation

   :param barrier_id: The ID of the barrier shared memory pointer.
   :type barrier_id: int
   :param byte_count: Increases the tx count of the mbarrier object to track completion of
                      additional async transactions.
   :type byte_count: int

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: ptx_wait_barrier(barrier_id)

   TVM intrinsic for ptx barrier wait using mbarrier.try_wait
   https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-test-wait-mbarrier-try-wait

   :param barrier_id: The ID of the barrier shared memory pointer.
   :type barrier_id: int

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: create_barriers(barrier_count)

   TVM intrinsic to create N barriers

   :param barrier_count: The number of barriers to create.
   :type barrier_count: int

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: vectorlow(dtype, vec)

   Get the low level half of the vector

   :param dtype: The data type of the result.
   :type dtype: str
   :param vec: The input vector.
   :type vec: list

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: vectorhigh(dtype, vec)

   Get the high level half of the vector

   :param dtype: The data type of the result.
   :type dtype: str
   :param vec: The input vector.
   :type vec: list

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: vectorcombine(dtype, vec1, vec2)

   Concat two vectors

   :param vec1: The input vector.
   :type vec1: list
   :param vec2: The input vector.
   :type vec2: list

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: ret(val)

   Create a tir return expression

   :param val: The returned tir expression, whose data type is int, float or void pointer.
   :type val: Expr

   :returns: **ret** -- The return expression
   :rtype: PrimExpr


.. py:function:: any(*args, span=None)

   Create a new expression of the union of all conditions in the arguments

   :param args: List of symbolic boolean expressions
   :type args: list
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **expr** -- Expression
   :rtype: Expr


.. py:function:: all(*args, span=None)

   Create a new expression of the intersection of all conditions in the
     arguments

   :param args: List of symbolic boolean expressions
   :type args: list
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **expr** -- Expression
   :rtype: Expr


.. py:function:: trace(args, trace_action='tvm.default_trace_action')

   Trace tensor data at the runtime.

   The trace function allows to trace specific tensor at the
   runtime. The tracing value should come as last argument.
   The trace action should be specified, by default
   tvm.default_trace_action is used.

   :param args: Positional arguments.
   :type args: list of Expr or Buffers.
   :param trace_action: The name of the trace action.
   :type trace_action: str.

   :returns: **call** -- The call expression.
   :rtype: PrimExpr

   .. seealso::

      :obj:`tvm.tir.call_packed`
          Creates packed function.


.. py:function:: min_value(dtype, span=None)

   minimum value of dtype

   :param dtype: The data type.
   :type dtype: str
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **value** -- The minimum value of dtype.
   :rtype: tvm.Expr


.. py:function:: max_value(dtype, span = None)

   maximum value of dtype

   :param dtype: The data type.
   :type dtype: str
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **value** -- The maximum value of dtype.
   :rtype: tvm.Expr


.. py:function:: infinity(dtype, span = None)

   infinity value of dtype

   :param dtype: The data type.
   :type dtype: str
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **value** -- The infinity value of dtype.
   :rtype: tvm.Expr


.. py:function:: reinterpret(dtype, value, span = None)

   infinity value of dtype

   :param dtype: The data type.
   :type dtype: str
   :param value: The input value.
   :type value: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **value** -- The reinterpret cast value of dtype.
   :rtype: tvm.Expr


.. py:function:: exp(x)

   Take exponential of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: exp2(x)

   Calculate 2**x

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: exp10(x)

   Calculate 10**x

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: erf(x)

   Take gauss error function of the input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: tanh(x)

   Take hyperbolic tanh of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: sigmoid(x)

   Quick function to get sigmoid

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: log(x)

   Take log of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: log2(x)

   Take log2 of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: log10(x)

   Take log10 of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: log1p(x)

   Take log(x + 1) with respect to input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: tan(x)

   Take tan of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: cos(x)

   Take cos of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: cosh(x)

   Take cosh of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: acos(x)

   Take acos of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: acosh(x)

   Take acos of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: sin(x)

   Take sin of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: sinh(x)

   Take sinh of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: asin(x)

   Take asin of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: asinh(x)

   Take asinh of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: atan(x)

   Take atan of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: atanh(x)

   Take atanh of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: atan2(x1, x2)

   Take arctan2(x1, x2).

   :param x1: Input argument.
   :type x1: PrimExpr
   :param x2: Input argument.
   :type x2: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: sqrt(x)

   Take square root of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: rsqrt(x)

   Take reciprocal of square root of input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: clz(x)

   Count leading zero bits of an integer x.

   :param x: Input 32 or 64 bit integer.
             The result is undefined if the input is 0.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: floor(x, span=None)

   Take floor of float input x.

   :param x: Input argument.
   :type x: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: ceil(x, span=None)

   Take ceil of float input x.

   :param x: Input argument.
   :type x: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: trunc(x, span=None)

   Get truncated value of the input.

   The truncated value of the scalar x is the
   nearest integer i which is closer to zero than x is.

   :param x: Input argument.
   :type x: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: abs(x, span=None)

   Get absolute value of the input element-wise.

   :param x: Input argument.
   :type x: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: bitwise_and(x, y, span=None)

   Take bitwise and of two values

   :param x: Left operand
   :type x: PrimExpr
   :param y: Right operand
   :type y: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **res** -- The result.
   :rtype: PrimExpr


.. py:function:: bitwise_not(x, span=None)

   Take bitwise not of input value

   :param x: Input operand
   :type x: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **res** -- The result.
   :rtype: PrimExpr


.. py:function:: bitwise_or(x, y, span=None)

   Take bitwise or of two values

   :param x: Left operand
   :type x: PrimExpr
   :param y: Right operand
   :type y: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **res** -- The result.
   :rtype: PrimExpr


.. py:function:: bitwise_xor(x, y, span=None)

   Take bitwise xor of two values

   :param x: Left operand
   :type x: PrimExpr
   :param y: Right operand
   :type y: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **res** -- The result.
   :rtype: PrimExpr


.. py:function:: round(x, span=None)

   Round elements of the array to the nearest integer.

   :param x: Input argument.
   :type x: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: nearbyint(x, span=None)

   Round elements of the array to the nearest integer.
   This intrinsic uses llvm.nearbyint instead of llvm.round
   which is faster but will results different from te.round.
   Notably nearbyint rounds according to the rounding mode,
   whereas te.round (llvm.round) ignores that.
   For differences between the two see:
   https://en.cppreference.com/w/cpp/numeric/math/round
   https://en.cppreference.com/w/cpp/numeric/math/nearbyint

   :param x: Input argument.
   :type x: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: nextafter(x1, x2)

   Return the next floating-point value after x1 towards x2.

   :param x1: Input argument.
   :type x1: PrimExpr
   :param x2: Input argument.
   :type x2: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: hypot(x1, x2)

   Equivalent to sqrt(x1**2 + x2**2), element-wise.

   :param x1: Input argument.
   :type x1: PrimExpr
   :param x2: Input argument.
   :type x2: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: copysign(x1, x2)

   Change the sign of x1 to that of x2, element-wise.

   :param x1: Input argument.
   :type x1: PrimExpr
   :param x2: Input argument.
   :type x2: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: ldexp(x1, x2)

   Returns x1 * (2 ** x2).

   :param x1: Input argument.
   :type x1: PrimExpr
   :param x2: Input argument.
   :type x2: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: likely(cond, span=None)

   Mark condition as likely.

   :param cond: Input argument.
   :type cond: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **y** -- The marked expression.
   :rtype: PrimExpr


.. py:function:: isnan(x, span=None)

   Check if input value is Nan.

   :param x: Input argument.
   :type x: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: isnullptr(x, span=None)

   Check if input value is nullptr.

   :param x: Input argument.
   :type x: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: isfinite(x, span=None)

   Check if input value is finite.

   :param x: Input argument.
   :type x: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: isinf(x, span=None)

   Check if input value is infinite.

   :param x: Input argument.
   :type x: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: pow_of_int(x, y)

   Fast power operation than pow(float, float).

   :param x: Base value
   :type x: PrimExpr
   :param y: Exponent value
   :type y: int


.. py:function:: power(x, y, span=None)

   x power y

   :param x: Input argument.
   :type x: PrimExpr
   :param y: The exponent
   :type y: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **z** -- The result.
   :rtype: PrimExpr


.. py:function:: pow(x, y, span=None)

   x power y

   :param x: Input argument.
   :type x: PrimExpr
   :param y: The exponent
   :type y: PrimExpr
   :param span: The location of this operator in the source code.
   :type span: Optional[Span]

   :returns: **z** -- The result.
   :rtype: PrimExpr


.. py:function:: popcount(x)

   Count the number of set bits in input x.

   :param x: Input argument.
   :type x: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: q_multiply_shift(x, y, q, s)

   Execute a multiplication between two Q-numbers x and y
   followed by a right shift s. The mathematical expression is:

      out = round(x*y*2^-s)

   More about Q-numbers here: https://en.wikipedia.org/wiki/Q_(number_format)
   The rounding rule is to the nearest value, rounding half up
   (i.e., round(x.1) = x and round (x.5) = x+1)

   :param x: First Q-number
   :type x: PrimExpr
   :param y: Second Q-number
   :type y: PrimExpr
   :param q: Number of fractional bits in x and y. Needs to be > 0
   :type q: PrimExpr
   :param s: Integer shift
   :type s: PrimExpr

   :returns: **y** -- The result.
   :rtype: PrimExpr


.. py:function:: q_multiply_shift_per_axis(x, y, ls, rs, q, is_lshift_required, is_rshift_required)

   Execute a multiplication between two Q-numbers x and y

   :param x: First Q-number.
   :type x: PrimExpr
   :param y: Second Q-number.
   :type y: PrimExpr
   :param ls: Integer left shift.
   :type ls: PrimExpr
   :param rs: Integer right shift.
   :type rs: PrimExpr
   :param q: Number of fractional bits in x and y. Needs to be > 0.
   :type q: IntImm
   :param is_lshift_required: Whether we need to do left shift or not.
   :type is_lshift_required: IntImm
   :param is_rshift_required: Whether we need to do right shift or not.
   :type is_rshift_required: IntImm

   :returns: **z** -- The result.
   :rtype: PrimExpr


.. py:function:: shift_left(x, y, span=None)

   Return the result of x left shifted by y bits.

   :param x: Input argument.
   :type x: PrimExpr
   :param y: Input argument.
   :type y: PrimExpr

   :returns: **z** -- The result.
   :rtype: PrimExpr


.. py:function:: shift_right(x, y, span=None)

   Return the result of x right shifted by y bits.

   :param x: Input argument.
   :type x: PrimExpr
   :param y: Input argument.
   :type y: PrimExpr

   :returns: **z** -- The result.
   :rtype: PrimExpr


.. py:function:: fmod(x, y)

   Return the remainder of x divided by y with the same sign as x.

   :param x: Input argument.
   :type x: PrimExpr
   :param y: Input argument.
   :type y: PrimExpr

   :returns: **z** -- The result.
   :rtype: PrimExpr


.. py:function:: if_then_else(cond, t, f, span=None)

   Conditional selection expression.

   :param cond: The condition
   :type cond: PrimExpr
   :param t: The result expression if cond is true.
   :type t: PrimExpr
   :param f: The result expression if cond is false.
   :type f: PrimExpr
   :param span: The location of this operator in the source.
   :type span: Optional[Span]

   :returns: **result** -- The result of conditional expression.
   :rtype: Node

   .. note::

      Unlike Select, if_then_else will not execute
      the branch that does not satisfy the condition.
      You can use it to guard against out of bound access.
      Unlike Select, if_then_else cannot be vectorized
      if some lanes in the vector have different conditions.


.. py:function:: div(a, b, span=None)

   Compute a / b as in C/C++ semantics.

   :param a: The left hand operand, known to be non-negative.
   :type a: PrimExpr
   :param b: The right hand operand, known to be non-negative.
   :type b: PrimExpr
   :param span: The location of this operator in the source.
   :type span: Optional[Span]

   :returns: **res** -- The result expression.
   :rtype: PrimExpr

   .. note:: When operands are integers, returns truncdiv(a, b, span).


.. py:function:: indexdiv(a, b, span=None)

   Compute floor(a / b) where a and b are non-negative.

   :param a: The left hand operand, known to be non-negative.
   :type a: PrimExpr
   :param b: The right hand operand, known to be non-negative.
   :type b: PrimExpr
   :param span: The location of this operator in the source.
   :type span: Optional[Span]

   :returns: **res** -- The result expression.
   :rtype: PrimExpr

   .. note::

      Use this function to split non-negative indices.
      This function may take advantage of operands'
      non-negativeness.


.. py:function:: indexmod(a, b, span=None)

   Compute the remainder of indexdiv. a and b are non-negative.

   :param a: The left hand operand, known to be non-negative.
   :type a: PrimExpr
   :param b: The right hand operand, known to be non-negative.
   :type b: PrimExpr
   :param span: The location of this operator in the source.
   :type span: Optional[Span]

   :returns: **res** -- The result expression.
   :rtype: PrimExpr

   .. note::

      Use this function to split non-negative indices.
      This function may take advantage of operands'
      non-negativeness.


.. py:function:: truncdiv(a, b, span=None)

   Compute the truncdiv of two expressions.

   :param a: The left hand operand
   :type a: PrimExpr
   :param b: The right hand operand
   :type b: PrimExpr
   :param span: The location of this operator in the source.
   :type span: Optional[Span]

   :returns: **res** -- The result expression.
   :rtype: PrimExpr

   .. note:: This is the default integer division behavior in C.


.. py:function:: truncmod(a, b, span=None)

   Compute the truncmod of two expressions.

   :param a: The left hand operand
   :type a: PrimExpr
   :param b: The right hand operand
   :type b: PrimExpr
   :param span: The location of this operator in the source.
   :type span: Optional[Span]

   :returns: **res** -- The result expression.
   :rtype: PrimExpr

   .. note:: This is the default integer division behavior in C.


.. py:function:: floordiv(a, b, span=None)

   Compute the floordiv of two expressions.

   :param a: The left hand operand
   :type a: PrimExpr
   :param b: The right hand operand
   :type b: PrimExpr
   :param span: The location of this operator in the source.
   :type span: Optional[Span]

   :returns: **res** -- The result expression.
   :rtype: PrimExpr


.. py:function:: floormod(a, b, span=None)

   Compute the floormod of two expressions.

   :param a: The left hand operand
   :type a: PrimExpr
   :param b: The right hand operand
   :type b: PrimExpr
   :param span: The location of this operator in the source.
   :type span: Optional[Span]

   :returns: **res** -- The result expression.
   :rtype: PrimExpr


.. py:function:: ceildiv(lhs, rhs, span=None)

   Generic ceildiv operator.

   :param lhs: The left operand.
   :type lhs: object
   :param rhs: The right operand.
   :type rhs: object
   :param span: The location of this operator in the source.
   :type span: Optional[Span]

   :returns: **op** -- The result Expr of ceildiv operation.
   :rtype: tvm.Expr


.. py:function:: comm_reducer(fcombine, fidentity, name='reduce')

   Create a commutative reducer for reduction.

   :param fcombine: A binary function which takes two Expr as input to return a Expr.
   :type fcombine: function(Expr -> Expr -> Expr)
   :param fidentity: A function which takes a type string as input to return a const Expr.
   :type fidentity: function(str -> Expr)

   :returns: **reducer** -- A function which creates a reduce expression over axis.
             There are two ways to use it:

             1. accept (expr, axis, where) to produce an Reduce Expr on
                specified axis;
             2. simply use it with multiple Exprs.
   :rtype: function

   .. rubric:: Example

   .. code-block:: python

       n = te.var("n")
       m = te.var("m")
       mysum = te.comm_reducer(lambda x, y: x+y,
           lambda t: tvm.tir.const(0, dtype=t), name="mysum")
       A = te.placeholder((n, m), name="A")
       k = te.reduce_axis((0, m), name="k")
       B = te.compute((n,), lambda i: mysum(A[i, k], axis=k), name="B")


.. py:function:: TVMBackendAllocWorkspace(device_type, device_id, nbytes, dtype_code_hint, dtype_bits_hint)

   Backend function to allocate temporal workspace

   :param device_type: The device type which the space will be allocated.
   :type device_type: int
   :param device_id: The device id which the space will be allocated.
   :type device_id: int
   :param nbytes: The size of the space requested.
   :type nbytes: int
   :param dtype_code_hint: The type code of the array elements. Only used in certain backends such as OpenGL.
   :type dtype_code_hint: int
   :param dtype_bits_hint: The type bits of the array elements. Only used in certain backends such as OpenGL.
   :type dtype_bits_hint: int

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: TVMBackendFreeWorkspace(device_type, device_id, ptr)

   Backend function to free temporal workspace.

   :param device_type: The device type which the space will be allocated.
   :type device_type: int
   :param device_id: The device id which the space will be allocated.
   :type device_id: int
   :param ptr: The result allocated space pointer.
   :type ptr: Var

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: anylist_getitem(list_handle, index)

   Returns an item from any list.
   list_handle: Var
       The handle to anylist
   index : int
       The index
   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: anylist_resetitem(list_handle, index)

   Reset an item from any list.
   list_handle: Var
       The handle to anylist
   index : int
       The index
   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: anylist_setitem_call_packed(list_handle, index, func_name, *args)

   Set anylist item by result of packed call.
   list_handle: Var
       The handle to anylist
   index : int
       The index
   func_name: str
       The name of the function to be called.
   :param Extra arguments:

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: anylist_setitem_call_cpacked(list_handle, index, func_name, *args)

   Set anylist item by result of packed call.
   list_handle: Var
       The handle to anylist
   index : int
       The index
   func_name: str
       The name of the function to be called.
   :param Extra arguments:

   :returns: **call** -- The call expression.
   :rtype: PrimExpr


.. py:function:: vscale()

   Get the target's vscale value. It will be lowered to llvm.vscale intrinsic
   (https://llvm.org/docs/LangRef.html#llvm-vscale-intrinsic)
   :returns: **call** -- Call to the vscale intrinsic
   :rtype: PrimExpr


.. py:data:: sum

.. py:data:: min

.. py:data:: max