
     TiPW                       S SK Jr  S SKJrJrJrJrJr  S SKJ	r	J
r
Jr  S SKJr  S SKJr  S SKJr  S SKJrJrJrJrJrJrJrJrJrJrJrJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)J*r*  S SK+J,r,J-r-   " S	 S
\5      r.g)    )annotations)OptionalSequenceTupleTypeVarUnion)
GraphProtoSparseTensorProtoTensorProto)
get_schema)	TypeAlias)Opset23)BFLOAT16BOOL	COMPLEX64
COMPLEX128DOUBLEFLOAT
FLOAT4E2M1FLOAT8E4M3FNFLOAT8E4M3FNUZ
FLOAT8E5M2FLOAT8E5M2FNUZ
FLOAT8E8M0FLOAT16INT4INT8INT16INT32INT64STRINGUINT4UINT8UINT16UINT32UINT64)OpOpsetc                      \ rS rSr% S r\" S\\\\	5      r
\" S\\\\	5      r\" S\\\\\	\\\\\\\\5      r    S_SSSSSSSS	.                             S`S
 jjjr\" S\\\\\	\\\\\\\\\\\\\\\\\5      r \!\\\\\	\\\\\\\\\\\\\\\\\4   r"S\#S'   SSS.         SaS jjr$\" S\\\\\	\\\\\\\\\\\\\\\\\5      r%\" S\\\\\	\\\\\\\\\\\\\\\\\5      r&SSS.         SbS jjr'\!\\\(\)\\\	\\\\\\\\\\\\\\\\\4   r*S\#S'   SSSSSSSSS.                 ScS jjr+\r,S\#S'   \!\\\\\	\\\\\\\\\\\\\\\\4   r-S\#S'   SS.     SdS jjr.\" S\\\\\\\\\\\\5      r/\" S\\\	\5      r0\!\\\	4   r1S\#S'    SeSSSS.             SfS  jjjr2\" S!\\\(\)\\\	\\\\\\\\\\\\\\\\\5      r3SS".SgS# jjr4\" / S$P\5\6\      P\5\6\(      P\5\6\)      P\5\6\      P\5\6\      P\5\6\	      P\5\6\      P\5\6\      P\5\6\      P\5\6\      P\5\6\      P\5\6\      P\5\6\      P\5\6\      P\5\6\      P\5\   P\5\(   P\5\)   P\5\   P\5\   P\5\	   P\5\   P\5\   P\5\   P\5\   P\5\   P\5\   P\5\   P\5\   P\5\   P\6\   P\6\(   P\6\)   P\6\   P\6\   P\6\	   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\P\P\(P\)P\P\P\	P\P\P\P\P\P\P\P\P\P\P\P\P\P\P\P\P\P76 r7ShS% jr8\r9S\#S&'   \!/ SP\6\   P\6\   P\6\(   P\6\)   P\6\   P\6\   P\6\	   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\P\P\(P\)P\P\P\	P\P\P\P\P\P\P\P\P\P\P\P\P\P\P\P\P\P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P7   r:S\#S''   SiS( jr;\r<S\#S)'   \r=S\#S*'   \" / S+P\5\6\      P\5\6\      P\5\6\(      P\5\6\)      P\5\6\      P\5\6\      P\5\6\	      P\5\6\      P\5\6\      P\5\6\      P\5\6\      P\5\6\      P\5\6\      P\5\6\      P\5\6\      P\5\6\      P\5\   P\5\   P\5\(   P\5\)   P\5\   P\5\   P\5\	   P\5\   P\5\   P\5\   P\5\   P\5\   P\5\   P\5\   P\5\   P\5\   P\5\   P\5\   P\5\   P\5\   P\5\   P\5\   P\5\   P\5\   P\6\   P\6\   P\6\(   P\6\)   P\6\   P\6\   P\6\	   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\6\   P\P\P\(P\)P\P\P\	P\P\P\P\P\P\P\P\P\P\P\P\P\P\P\P\P\P76 r>          SjS, jr?\" S-\\\(\)\\\	\\\\\\\\\\\\\\\\\5      r@\" S.\\5      rA  SkS/S0.           SlS1 jjjrB\" S2\\\	\5      rC\" S3\\\	\\5      rD\" S4\\\\\\\\\\\5      rE SeSSSSSS5.                 SmS6 jjjrF\" S7\\\(\)\\\	\\\\\\\\\\\\\\\\\5      rGSS8.SnS9 jjrH\" S:\\\(\)\\\	\\\\\\\\\\\\\\\\\5      rISSSSS;.               SoS< jjrJ\" S=\\\(\)\\\	\\\\\\\\\\\\\\\\\5      rK\rLS\#S>'   SSS?.SpS@ jjrM\" SA\\\(\)\\\	\\\\\\\\\\\\\\\\\5      rN\rOS\#SB'   SqSC jrP\" SD\\\(\)\\\	\\\\\\\\\5      rQ\" SE\\5      rR\!\6\   \6\   \6\(   \6\)   \6\   \6\   \6\	   \6\   \6\   \6\   \6\   \6\   \6\   \6\   \6\   \6\   4   rSS\#SF'    SeSSSG.         SrSH jjjrT\" SI\\\(\)\\\	\\\\\\\\\\\\\\\\\5      rUSeSsSJ jjrV\" SK\\\\	5      rWSLSM.StSN jjrX\" SO\\\(\)\\\	\\\\\\\\\\\\\\\\\5      rY SeSPSQSR.           SuSS jjjrZ\" ST\\\\	\\\\\\\\5      r[\r\S\#SU'   SVSSSW.           SvSX jjr]\" SY\\\(\)\\\	\\\\\\\\\\\\\\\\\5      r^SSZ.     SwS[ jjr_\" S\\\\(\)\\\	\\\\\\\\\\\\\\\\\5      r`SxS] jraS^rbg)yOpset243   c                2    [         R                  " U SS5      $ )N    )r(   __new__)clss    b/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/onnxscript/onnx_opset/_impl/opset24.pyr/   Opset24.__new__4   s    }}S"b))    T1_AttentionT2_AttentionU_AttentionNr   g        	is_causalkv_num_headsq_num_headsqk_matmul_output_modescalesoftcapsoftmax_precisionc               v    [        SSS5      n[        U SU5      nU" U R                  XX#XEXg5      UU	U
UUUUS.6$ )u>  [🌐 Attention(24)](https://onnx.ai/onnx/operators/onnx__Attention.html#attention-24 "Online Documentation")



Computes scaled dot product attention on query, key and value tensors, using an optional attention mask if passed.

This operator covers self and cross variants of the attention operation based on sequence lengths of K, Q and V.

For self attention, `kv_sequence_length` equals to `q_sequence_length`.

For cross attention, query and key might have different lengths.

This operator also covers the 3 following variants based on the number of heads:
1) Multi-headed Attention (MHA): Described in the paper https://arxiv.org/pdf/1706.03762, `q_num_heads = kv_num_heads`.
2) Group-query Attention (GQA): Described in the paper https://arxiv.org/pdf/2305.13245, `q_num_heads > kv_num_heads`, `q_num_heads % kv_num_heads == 0`.
3) Multi-query Attention (MQA): Described in the paper https://arxiv.org/pdf/1911.02150, `q_num_heads > kv_num_heads`, `kv_num_heads=1`.

Attention bias to be added is calculated based on `attn_mask` input and `is_causal` attribute:
1) `attn_mask`: A boolean mask where a value of `True` indicates that the element should take part in attention or a float mask of the same type as query, key, value that is added to the attention score.
2) If `is_causal` is set to `1`, attention scores above the diagonal are masked out, regardless of the `attn_mask` input.

With respect to KV cache update, this operator allows the following two use cases:

1) Cache update happens inside the Attention operator. In this case, the `K` and `V` inputs contain only the incoming
tokens for the current autoregressive step, and the four optional inputs/outputs past and present key and value are
all needed. The Attention op performs a Concat operation on the past and incoming key and value to form the present
key and value, respectively. Note that this only works correctly for the special case where the past key and value
do not contain padded tokens.
2) Cache update happens outside the Attention operator (for example, through the `TensorScatter` operator). In this
case, the `K` and `V` inputs correspond to the entire cache tensor, so the four optional inputs/outputs past and
present key and value should not be used. An additional input `nonpad_kv_seqlen` of shape (batch_size,) may be
provided to indicate the number of non-padding tokens in each sample of the batch to save unnecessary computation.
Here, the kv_sequence dimension of `attn_mask` can be shorter than `K` and `V`, but still needs to be at least as long
as the maximum value of `nonpad_kv_seqlen`.

Both past and present state key/values are optional. They shall be used together, and not allowed to use only one of them.
The following pattern is applied to the Q, K and V inputs after appropriate reshaping of K and V inputs based on sequence lengths and num heads provided:

::

      The following pattern is applied by this operator:
          Q          K          V
          |          |          |
    Q*sqrt(scale) K*sqrt(scale) |
          |          |          |
          |       Transpose     |
          |          |          |
          ---MatMul---          |
                |               |
     at_mask---Add              |
                |               |
      softcap (if provided)     |
                |               |
             Softmax            |
                |               |
                -----MatMul------
                       |
                       Y





Args:
    Q: Query tensor. 4D tensor with shape `(batch_size, q_num_heads,
        q_sequence_length, head_size)` or 3D tensor with shape `(batch_size,
        q_sequence_length, q_hidden_size)`. For cases with a 3D input tensor,
        `q_hidden_size = q_num_heads * head_size`

    K: Key tensor. 4D tensor with shape `(batch_size, kv_num_heads,
        kv_sequence_length, head_size)` or 3D tensor with shape `(batch_size,
        kv_sequence_length, k_hidden_size)`. For cases with a 3D input tensor,
        `k_hidden_size = kv_num_heads * head_size`

    V: Value tensor. 4D tensor with shape `(batch_size, kv_num_heads,
        kv_sequence_length, v_head_size)` or 3D tensor with shape `(batch_size,
        kv_sequence_length, v_hidden_size)`. For cases with a 3D input tensor,
        `v_hidden_size = kv_num_heads * v_head_size`

    attn_mask: (optional) Attention mask. Shape must be broadcastable to
        `(batch_size, q_num_heads, q_sequence_length, total_sequence_length)`
        where `total_sequence_length = past_sequence_length +
        kv_sequence_length.` The last dimension can also be shorter than
        `total_sequence_length` and will be padded to `total_sequence_length`
        with negative infinity. Two types of masks are supported: a boolean mask
        where a value of `True` indicates that the element should take part in
        attention, or a float mask of the same type as query, key, value that is
        added to the attention score.

    past_key: (optional) past state cache for key with shape `(batch_size,
        kv_num_heads, past_sequence_length, head_size)`

    past_value: (optional) past state cache for value with shape `(batch_size,
        kv_num_heads, past_sequence_length, v_head_size)`

    nonpad_kv_seqlen: (optional) A vector of integers of shape `(batch_size,)`
        that indicates the number of valid (ie, non-padding) tokens in each
        sample. A padding mask can be derived from this. This should not be used
        together with `past_key` and `past_value` inputs or `present_key` and
        `present_value` outputs (See the KV cache use cases in the operator
        description).

    is_causal: If set to `1`, the attention masking is a lower triangular matrix
        when the mask is a square matrix. The attention masking has the form of
        the upper left causal bias due to the alignment.

    kv_num_heads: Number of heads of key and value. Must be used with 3D inputs
        of Q, K and V.

    q_num_heads: Number of heads of query. Must be used with 3D inputs of Q, K
        and V.

    qk_matmul_output_mode: If set to `0`, qk_matmul_output is the output of qk
        matmul. If set to `1`, qk_matmul_output includes the addition of the
        attention mask to the output of qk matmul. If set to `2`,
        qk_matmul_output is the output after the softcap operation. If set to
        `3`, qk_matmul_output is the output after the softmax operation. Default
        value is 0.

    scale: Scaling factor applied to $Q*K^T$. Default value is
        `1/sqrt(head_size)`. To prevent [numerical
        overflow](https://tinyurl.com/sudb9s96), scale `Q`, `K` by `sqrt(scale)`
        before matmul.

    softcap: Softcap value for attention weights. Default value is 0.

    softmax_precision: The floating-point precision used in softmax computation.
        If softmax precision is not provided, the same precision as the input of
        softmax (Q and K) is used.
	Attentionr.   r-   r7   r   r'   _prepare_inputs)selfQKV	attn_maskpast_key
past_valuenonpad_kv_seqlenr8   r9   r:   r;   r<   r=   r>   schemaops                    r1   r@   Opset24.AttentionL   s\    j KR0k6*!!1j  %#"7/
 	
r3   T1_Castr   T2_Castup   
round_modesaturatec               h    [        SSS5      n[        U SU5      nU" U R                  XQ5      UUUS.6$ )u  [🌐 Cast(24)](https://onnx.ai/onnx/operators/onnx__Cast.html#cast-24 "Online Documentation")


The operator casts the elements of a given input tensor to a data type
specified by the 'to' argument and returns an output tensor of the same size in
the converted type. The 'to' argument must be one of the data types specified
in the 'DataType' enum field in the TensorProto message.

Casting from string tensor in plain (e.g., "3.14" and "1000") and scientific numeric representations
(e.g., "1e-5" and "1E8") to float types is supported. For example, converting string "100.5" to an integer may
yield result 100. There are some string literals reserved for special floating-point values;
"+INF" (and "INF"), "-INF", and "NaN" are positive infinity, negative infinity, and not-a-number, respectively.
Any string which can exactly match "+INF" in a case-insensitive way would be mapped to positive infinite. Similarly,
this case-insensitive rule is applied to "INF" and "NaN". When casting from numeric tensors
to string tensors, plain floating-point representation (such as "314.15926") would be used.
Converting non-numerical-literal string such as "Hello World!" is an undefined behavior. Cases
of converting string representing floating-point arithmetic value, such as "2.718", to INT is an undefined behavior.

Conversion from a numerical type to any numerical type is always allowed.
User must be aware of precision loss and value change caused by range difference between two types.
For example, a 64-bit float 3.1415926459 may be round to a 32-bit float 3.141592. Similarly, converting
an integer 36 to Boolean may produce 1 because we truncate bits which can't be stored in the targeted type.

In more detail, the conversion among numerical types should follow these rules
if the destination type is not a float 8 type.

* Casting from floating point to:
  * floating point: +/- infinity if OOR (out of range).
  * fixed point: undefined if OOR.
  * bool: +/- 0.0 to False; all else to True.
* Casting from fixed point to:
  * floating point: +/- infinity if OOR. (+ infinity in the case of uint)
  * fixed point: when OOR, discard higher bits and reinterpret (with respect to two's complement representation for
    signed types). For example, 200 (int16) -> -56 (int8).
  * bool: zero to False; nonzero to True.
* Casting from bool to:
  * floating point: `{1.0, 0.0}`.
  * fixed point: `{1, 0}`.
  * bool: no change.

Float 8 types (E4M3FN, E4M3FNUZ, E5M2, E5M2FNUZ) were introduced to speed up the training of
deep models. By default the conversion of a float *x* obeys
to the following rules. `[x]` means the value rounded to
the target mantissa width.

| x                 | E4M3FN   | E4M3FNUZ | E5M2     | E5M2FNUZ |
| ----------------- | -------- | -------- | -------- | -------- |
| 0                 | 0        | 0        | 0        | 0        |
| -0                | -0       | 0        | -0       | 0        |
| NaN               | NaN      | NaN      | NaN      | NaN      |
| Inf               | FLT_MAX  | FLT_MAX  | FLT_MAX  | FLT_MAX  |
| -Inf              | -FLT_MAX | -FLT_MAX | -FLT_MAX | -FLT_MAX |
| \[x\] > FLT_MAX   | FLT_MAX  | FLT_MAX  | FLT_MAX  | FLT_MAX  |
| \[x\] \< -FLT_MAX | -FLT_MAX | -FLT_MAX | -FLT_MAX | -FLT_MAX |
| else              | RNE      | RNE      | RNE      | RNE      |

The behavior changes if the parameter 'saturate' is set to False.
The rules then become:

| x                 | E4M3FN | E4M3FNUZ | E5M2 | E5M2FNUZ |
| ----------------- | ------ | -------- | ---- | -------- |
| 0                 | 0      | 0        | 0    | 0        |
| -0                | -0     | 0        | -0   | 0        |
| NaN               | NaN    | NaN      | NaN  | NaN      |
| -NaN              | -NaN   | NaN      | -NaN | NaN      |
| Inf               | NaN    | NaN      | Inf  | NaN      |
| -Inf              | -NaN   | NaN      | -Inf | NaN      |
| \[x\] > FLT_MAX   | NaN    | NaN      | Inf  | NaN      |
| \[x\] \< -FLT_MAX | NaN    | NaN      | -Inf | NaN      |
| else              | RNE    | RNE      | RNE  | RNE      |

FLOAT8E8M0 type was introduced to enable [Microscaling (MX) formats](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf).
When casting to FLOAT8E8M0, the rounding behavior can be specified using the `round_mode` and `saturate` attributes.
The current CUDA behavior is to round up and saturate. Casting negative values to FLOAT8E8M0 gives undefined behavior.
The following table describes the casting behavior of special values to FLOAT8E8M0 in the two most common cases.

| x                 | saturate + up | non-saturate + nearest |
| ----------------- | ------------- | ---------------------  |
| 0                 | 0             | NaN                    |
| -0                | Unspecified   | Unspecified            |
| NaN               | NaN           | NaN                    |
| Inf               | E8M0_MAX      | NaN                    |
| x > E8M0_MAX      | E8M0_MAX      | NaN                    |
| x \< E8M0_MIN     | E8M0_MIN      | NaN                    |
| x \< 0            | Unspecified   | Unspecified            |


Args:
    input: (differentiable) Input tensor to be cast.

    round_mode: Rounding mode for conversion to float8e8m0. It only applies to
        casting to float8e8m0 and is `up` by default. `up`: round to nearest
        value away from zero, `down`: round to nearest value towards zero,
        `nearest`: round to nearest value and ties round up.

    saturate: The parameter defines how the conversion behaves if an input value
        is out of range of the destination type. It only applies for float 8
        conversion (float8e4m3fn, float8e4m3fnuz, float8e5m2, float8e5m2fnuz,
        float8e8m0). It is true by default. All cases are fully described in the
        tables inserted in the operator description.

    to: The data type to which the elements of the input tensor are cast.
        Strictly must be one of the types from DataType enum in TensorProto
Castr.   r-   )rS   rT   torA   )rC   inputrS   rT   rW   rK   rL   s          r1   rV   Opset24.Cast#  sE    X FB+ff%!!&0!	
 	
r3   T1_CastLikeT2_CastLikec               h    [        SSS5      n[        U SU5      nU" U R                  XQU5      UUS.6$ )u  [🌐 CastLike(24)](https://onnx.ai/onnx/operators/onnx__CastLike.html#castlike-24 "Online Documentation")


The operator casts the elements of a given input tensor (the first input) to
the same data type as the elements of the second input tensor.
See documentation of the Cast operator for further details.


Args:
    input: (differentiable) Input tensor to be cast.

    target_type: (non-differentiable) The (first) input tensor will be cast to
        produce a tensor of the same type as this (second input) tensor.

    round_mode: Rounding mode for conversion to float8e8m0. It only applies to
        casting to float8e8m0 and is `up` by default. `up`: round to nearest
        value away from zero, `down`: round to nearest value towards zero,
        `nearest`: round to nearest value and ties round up. Please refer to
        operator Cast description for further details.

    saturate: The parameter defines how the conversion behaves if an input value
        is out of range of the destination type. It only applies for float 8
        conversion (float8e4m3fn, float8e4m3fnuz, float8e5m2, float8e5m2fnuz,
        float8e8m0). It is true by default. Please refer to operator Cast
        description for further details.
CastLiker.   r-   rR   rA   )rC   rX   target_typerS   rT   rK   rL   s          r1   r]   Opset24.CastLike  sD    F JB/j&)!!&=!
 	
r3   
T_Constantsparse_valuevaluevalue_floatvalue_floats	value_int
value_intsvalue_stringvalue_stringsc               P    [        SSS5      n	[        U SU	5      n
U
" UUUUUUUUS9$ )u  [🌐 Constant(24)](https://onnx.ai/onnx/operators/onnx__Constant.html#constant-24 "Online Documentation")


This operator produces a constant tensor. Exactly one of the provided attributes, either value, sparse_value,
or value_* must be specified.


Args:
    sparse_value: The value for the elements of the output tensor in sparse
        format.

    value: The value for the elements of the output tensor.

    value_float: The value for the sole element for the scalar, float32, output
        tensor.

    value_floats: The values for the elements for the 1D, float32, output
        tensor.

    value_int: The value for the sole element for the scalar, int64, output
        tensor.

    value_ints: The values for the elements for the 1D, int64, output tensor.

    value_string: The value for the sole element for the scalar, UTF-8 string,
        output tensor.

    value_strings: The values for the elements for the 1D, UTF-8 string, output
        tensor.
Constantr.   r-   ra   )r   r'   )rC   rb   rc   rd   re   rf   rg   rh   ri   rK   rL   s              r1   rk   Opset24.Constant  sF    V JB/j&)%#%!%'	
 		
r3   T1_ConstantOfShapeT2_ConstantOfShape)rc   c               d    [        SSS5      n[        U SU5      nU" U R                  X15      SU06$ )u  [🌐 ConstantOfShape(24)](https://onnx.ai/onnx/operators/onnx__ConstantOfShape.html#constantofshape-24 "Online Documentation")


Generate a tensor with given value and shape.


Args:
    input: 1D tensor. The shape of the expected output tensor. If empty tensor
        is given, the output would be a scalar. All values must be >= 0.

    value: (Optional) The value of the output elements.Should be a one-element
        tensor. If not specified, it defaults to a tensor of value 0 and
        datatype float32
ConstantOfShaper.   r-   rc   rA   )rC   rX   rc   rK   rL   s        r1   rp   Opset24.ConstantOfShaped  s=    $ -r26'04''6DeDDr3   T1_DequantizeLinearT2_DequantizeLinearT3_DequantizeLinearaxis
block_sizeoutput_dtypec               j    [        SSS5      n[        U SU5      nU" U R                  XqX#5      UUUS.6$ )u	  [🌐 DequantizeLinear(24)](https://onnx.ai/onnx/operators/onnx__DequantizeLinear.html#dequantizelinear-24 "Online Documentation")


The linear dequantization operator. It consumes a quantized tensor, a scale, and a zero point to compute the
full-precision tensor. The dequantization formula is `y = (x - x_zero_point) * x_scale`. `x_scale` and `x_zero_point`
must have the same shape, determining the quantization's granularity: a scalar for per-tensor/per-layer quantization,
a 1-D tensor for per-axis quantization, or have a rank identical to the input for blocked quantization.
See QuantizeLinear for details on quantization granularity.

`x_zero_point` and `x` must have the same type. `x` and `y` must have the same shape. In the case of dequantizing
`int32`, there's no zero point (zero point is supposed to be 0).
`zero-point` is usually not used in the case of float8 and 4-bit types quantization, but the dequantization formula remains the same
for consistency. The output type is determined by the attribute `output_dtype`. If `output_dtype` is not supplied then the output type
is the same as `x_scale`. The output type also determines the precision of the multiplication operation.



Args:
    x: N-D quantized input tensor to be de-quantized.

    x_scale: Scale for input `x`. For per-tensor/layer dequantization the scale
        is a scalar, for per per-axis dequantization it is a 1-D Tensor and for
        blocked dequantization it has the same shape as the input, except for
        one dimension in which blocking is performed.

    x_zero_point: (optional) Zero point for input `x`. Shape must match x_scale.
        It's optional. Zero point is 0 when it's not specified.

    axis: (Optional) The axis of the dequantizing dimension of the input tensor.
        Used for per-axis and blocked quantization. Negative value means
        counting dimensions from the back. Accepted range is `[-r, r-1]` where
        `r = rank(input)`.

    block_size: (Optional) The size of the quantization block (number of times
        every scale is replicated). Used only for blocked quantization. The
        block size is a positive integer. Given `x` shape `(D0, ..., Di, ...,
        Dn)`, `y_scale` shape `(S0, ... Si, ...Sn)` and `axis=i`, the accepted
        range is `[ceil(Di/Si), ceil(Di/(Si-1))-1]`

    output_dtype: (Optional) The output data type. If not supplied, the output
        data type is inferred from `x_scale` data type (`T2`)
DequantizeLinearr.   r-   ru   rA   )	rC   xx_scalex_zero_pointrv   rw   rx   rK   rL   s	            r1   rz   Opset24.DequantizeLinear  sI    j .B7(&1!!&WC!%	
 	
r3   	T_Flatten)rv   c               d    [        SSS5      n[        U SU5      nU" U R                  X15      SU06$ )u  [🌐 Flatten(24)](https://onnx.ai/onnx/operators/onnx__Flatten.html#flatten-24 "Online Documentation")


Flattens the input tensor into a 2D matrix. If input tensor has shape
(d_0, d_1, ... d_n) then the output will have shape
(d_0 X d_1 ... d_(axis-1), d_axis X d_(axis+1) ... X dn).


Args:
    input: (differentiable) A tensor of rank >= axis.

    axis: Indicate up to which input dimensions (exclusive) should be flattened
        to the outer dimension of the output. The value for axis must be in the
        range [-r, r], where r is the rank of the input tensor. Negative value
        means counting dimensions from the back. When axis = 0, the shape of the
        output tensor is (1, (d_0 X d_1 ... d_n), where the shape of the input
        tensor is (d_0, d_1, ... d_n).
Flattenr.   r-   rv   rA   )rC   rX   rv   rK   rL   s        r1   r   Opset24.Flatten  s;    ( Ir2.i(4''6BTBBr3   
V_Identityc                ^    [        SSS5      n[        U SU5      nU" U R                  X!5      6 $ )u   [🌐 Identity(24)](https://onnx.ai/onnx/operators/onnx__Identity.html#identity-24 "Online Documentation")

Identity operator

Args:
    input: (differentiable) Input tensor
Identityr.   r-   rA   )rC   rX   rK   rL   s       r1   r   Opset24.IdentityI  s6     JB/j&)4''677r3   B_IfV_Ifc               f    [        SSS5      n[        U SU5      nU" U R                  XA5      UUS.6$ )ug  [🌐 If(24)](https://onnx.ai/onnx/operators/onnx__If.html#if-24 "Online Documentation")

If conditional

Args:
    cond: Condition for the if. The tensor must contain a single element.

    else_branch: Graph to run if condition is false. Has N outputs: values you
        wish to be live-out to the enclosing scope. The number of outputs must
        match the number of outputs in the then_branch.

    then_branch: Graph to run if condition is true. Has N outputs: values you
        wish to be live-out to the enclosing scope. The number of outputs must
        match the number of outputs in the else_branch.
Ifr.   r-   )else_branchthen_branchrA   )rC   condr   r   rK   rL   s         r1   r   
Opset24.If  sA    " D"b)dF#!!&/##
 	
r3   I_LoopB_LoopV_Loopc               j    [        SSS5      n[        U SU5      nU" U R                  " XQU/UQ76 SU06$ )u3  [🌐 Loop(24)](https://onnx.ai/onnx/operators/onnx__Loop.html#loop-24 "Online Documentation")


Generic Looping construct. This loop has multiple termination conditions:

1) Trip count. Iteration count specified at runtime. Set by
   specifying the input M. Optional. Set to empty string to omit.
   Note that a static trip count (specified at graph construction time) can be
   specified by passing in a constant node for input M.
2) Loop termination condition. This is an input to the op that determines
   whether to run the first iteration and also a loop-carried dependency for
   the body graph. The body graph must yield a value for the condition variable,
   whether this input is provided or not.

This table summarizes the operating modes of this operator with equivalent
C-style code:

Operator inputs defined as (max_trip_count, condition_var).

* input ("", ""):
        for (int i=0; ; ++i) {
          cond = ... // Note this value is ignored, but is required in the body
        }

* input ("", cond) // Note this is analogous to a while loop
        bool cond = ...;
        for (int i=0; cond; ++i) {
          cond = ...;
        }

* input ("", 1) // Note this is analogous to a do-while loop
        bool cond = true
        for (int i=0; cond; ++i) {
          cond = ...;
        }

* input (trip_count, "") // Note this is analogous to a for loop
        int trip_count = ...
        for (int i=0; i < trip_count; ++i) {
          cond = ...; // ignored
        }

* input (trip_count, cond)
        int trip_count = ...;
        bool cond = ...;
        for (int i=0; i < trip_count && cond; ++i) {
          cond = ...;
        }


*Sample usage - cond as well as trip count*

    graph predict-net {
      %a = Constant[value = <Scalar Tensor [3]>]()
      %b = Constant[value = <Scalar Tensor [6]>]()
      %keepgoing = Constant[value = <Scalar Tensor [1]>]()
      %max_trip_count = Constant[value = <Scalar Tensor [10]>]()
      %keepgoing_out, %b_out, %user_defined_vals = Loop[body = <graph body-net>](%max_trip_count, %keepgoing, %b)
      return
    }

    graph body-net (
      %i[INT32, scalar]           // iteration number
      %keepgoing_in[BOOL, scalar] // incoming loop-termination-condition; not used
      %b_in[INT32, scalar]        // incoming value of loop-carried-dependency b
    ) {
      %my_local = Add(%a, %b_in)
      %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-dependency b
      %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-termination-condition
      %user_defined_val = Add(%b_in, %b_in) // scan-output value to be accumulated
      return %keepgoing_out, %b_out, %user_defined_val
    }

*Sample equivalent C code*

    {
      /* User-defined code (enclosing scope) */
      int a = 3, b = 6;
      bool keepgoing = true; // Analogous to input cond
      /* End user-defined code */

      /* Implicitly-defined code */
      const int max_trip_count = 10; // Analogous to input M
      int user_defined_vals[]; // Imagine this is resizable
      /* End implicitly-defined code */
      /* initialize loop-carried variables and scan-output variables */
      bool keepgoing_out = keepgoing
      int b_out = b

      for (int i=0; i < max_trip_count && keepgoing_out; ++i) {
        /* Implicitly-defined code: bind actual parameter values
           to formal parameter variables of loop-body */
        bool keepgoing_in = keepgoing_out;
        bool b_in = b_out;

        /* User-defined code (loop body) */
        int my_local = a + b_in; // Reading value "a" from the enclosing scope is fine
        b_out = a - b_in;
        keepgoing_out = my_local > b_out;
        user_defined_val = b_in + b_in; // b_in and b_out are different variables
        /* End user-defined code */

        /* Implicitly defined-code */
        user_defined_vals[i] = user_defined_val // accumulate scan-output values
      }
      // int t = my_local; // Can't do this. my_local is not accessible here.

      // The values below are bound to the output variables of the loop and therefore accessible
      // b_out; user_defined_vals; keepgoing_out;
    }

There are several things of note in this code snippet:

1) Values from the enclosing scope (i.e. variable "a" here) are in scope and can
   be referenced in the inputs of the loop.
2) Any values computed in the loop body that needs to be used in a subsequent
   iteration or after the loop are modelled using a pair of variables in the loop-body,
   consisting of an input variable (eg., b_in) and an output variable (eg., b_out).
   These are referred to as loop-carried dependences. The loop operation node
   supplies the input value of the input variable for the first iteration, and
   returns the output value of the output variable produced by the final
   iteration.
3) Scan_output variables are used to implicitly concatenate values computed across
   all the iterations. In the above example, the value of user_defined_val computed
   over all iterations are concatenated and returned as the value of user_defined_vals
   after the loop.
4) Values created in the body cannot be accessed in the enclosing scope,
   except using the mechanism described above.

Note that the semantics of this op support "diagonal" or "wavefront" execution.
(See Step 3 here for an example:
https://devblogs.nvidia.com/optimizing-recurrent-neural-networks-cudnn-5/).
Frontends should emit multi-layer RNNs as a series of While operators (with
time being the inner looping dimension), with each successive layer consuming
the scan_outputs from the previous layer, possibly going through several
point-wise operators (e.g. dropout, residual connections, linear layer).

The input/output of subgraph (produced by loop node) matching is based on order instead of name. The implementation will figure out the names based on this order.


Args:
    M: (optional) A maximum trip-count for the loop specified at runtime.
        Optional. Pass empty string to skip.

    cond: (optional) A boolean termination condition. Optional. Pass empty
        string to skip.

    v_initial: (variadic, heterogeneous) The initial values of any loop-carried
        dependencies (values that change across loop iterations)

    body: The graph run each iteration. It has 2+N inputs: (iteration_num,
        condition, loop carried dependencies...). It has 1+N+K outputs:
        (condition, loop carried dependencies..., scan_outputs...). Each
        scan_output is created by concatenating the value of the specified
        output value at the end of each iteration of the loop. It is an error if
        the dimensions or data type of these scan_outputs change across loop
        iterations.
Loopr.   r-   bodyrA   )rC   Mr   r   	v_initialrK   rL   s          r1   r   Opset24.Loop  sC    L FB+ff%4''4D)DP4PPr3   T_PadTind_Padconstant)modec          	     h    [        SSS5      n[        U SU5      nU" U R                  XaX#U5      SU06$ )u;  [🌐 Pad(24)](https://onnx.ai/onnx/operators/onnx__Pad.html#pad-24 "Online Documentation")


Given a tensor containing the data to be padded (`data`), a tensor containing the number of start and end pad values for axis (`pads`), (optionally) a `mode`, and (optionally) `constant_value`,
a padded tensor (`output`) is generated.

The three supported `modes` are (similar to corresponding modes supported by `numpy.pad`):

1) `constant`(default) - pads with a given constant value as specified by `constant_value` (which defaults to 0, empty string, or False)

2) `reflect` - pads with the reflection of the vector mirrored on the first and last values of the vector along each axis

3) `edge` - pads with the edge values of array

4) `wrap` - wrap-around padding as if the data tensor forms a torus


Example 1 (`constant` mode):

Insert 0 pads to the beginning of the second dimension.

::

    data = [
        [1.0, 1.2],
        [2.3, 3.4],
        [4.5, 5.7],
    ]

    pads = [0, 2, 0, 0]

    mode = 'constant'

    constant_value = 0.0

    output = [
        [0.0, 0.0, 1.0, 1.2],
        [0.0, 0.0, 2.3, 3.4],
        [0.0, 0.0, 4.5, 5.7],
    ]



Example 2 (`reflect` mode):

::

    data = [
        [1.0, 1.2],
        [2.3, 3.4],
        [4.5, 5.7],
    ]

    pads = [0, 2, 0, 0]

    mode = 'reflect'

    output = [
        [1.0, 1.2, 1.0, 1.2],
        [2.3, 3.4, 2.3, 3.4],
        [4.5, 5.7, 4.5, 5.7],
    ]



Example 3 (`edge` mode):

::

    data = [
        [1.0, 1.2],
        [2.3, 3.4],
        [4.5, 5.7],
    ]

    pads = [0, 2, 0, 0]

    mode = 'edge'

    output = [
        [1.0, 1.0, 1.0, 1.2],
        [2.3, 2.3, 2.3, 3.4],
        [4.5, 4.5, 4.5, 5.7],
    ]



Example 4 (`wrap` mode):

::

    data = [
        [1.0, 1.2],
        [2.3, 3.4],
        [4.5, 5.7],
    ]

    pads = [2, 1, 1, 1]

    mode = 'wrap'

    output = [
        [3.4, 2.3, 3.4, 2.3],
        [5.7, 4.5, 5.7, 4.5],
        [1.2, 1.0, 1.2, 1.0],
        [3.4, 2.3, 3.4, 2.3],
        [5.7, 4.5, 5.7, 4.5],
        [1.2, 1.0, 1.2, 1.0],
    ]




Args:
    data: (differentiable) Input tensor.

    pads: (non-differentiable) Tensor of integers indicating the number of
        padding elements to add or remove (if negative) at the beginning and end
        of each axis. For 2D input tensor, it is the number of pixels. `pads`
        should be a 1D tensor of shape [2 * num_axes] where `num_axes` refers to
        the number of elements in the `axes` input or the input rank if `axes`
        are not provided explicitly. `pads` format should be: [x1_begin,
        x2_begin, ..., x1_end, x2_end,...], where xi_begin is the number of pad
        values added at the beginning of axis `axes[i]` and xi_end, the number
        of pad values added at the end of axis `axes[i]`.

    constant_value: (optional, non-differentiable) (Optional) A scalar value to
        be used if the mode chosen is `constant` (by default it is 0, empty
        string or False).

    axes: (optional, non-differentiable) 1-D tensor of axes that `pads` apply
        to. Negative value means counting dimensions from the back. Accepted
        range is [-r, r-1] where r = rank(data). Behavior is undefined if an
        axis is repeated. If not provided, all axes are assumed (`[0, 1, ...,
        input_rank-1]`).

    mode: Supported modes: `constant`(default), `reflect`, `edge`, `wrap`
Padr.   r-   r   rA   )rC   datapadsconstant_valueaxesr   rK   rL   s           r1   r   Opset24.Pad  sA    h E2r*eV$4''dDQ]X\]]r3   T1_QuantizeLinearT2_QuantizeLinearT3_QuantizeLinearrv   rw   rx   	precisionrT   c          	     n    [        SSS5      n	[        U SU	5      n
U
" U R                  XX#5      UUUUUS.6$ )u  [🌐 QuantizeLinear(24)](https://onnx.ai/onnx/operators/onnx__QuantizeLinear.html#quantizelinear-24 "Online Documentation")


The linear quantization operator consumes a high-precision tensor, a scale, and a zero point to compute the
low-precision/quantized tensor. The scale factor and zero point must have the same shape, determining the quantization
granularity. The quantization formula is `y = saturate((x / y_scale) + y_zero_point)`.

Saturation is done according to:
- uint16: [0, 65535]
- int16: [-32768, 32767]
- uint8: [0, 255]
- int8: [-128, 127]
- uint4: [0, 15]
- int4: [-8, 7]

For `(x / y_scale)`, it rounds to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details.

`y_zero_point` and `y` must have the same type. `y_zero_point` is usually not used for quantization to float8 and 4bit types, but the quantization
formula remains the same for consistency, and the type of the attribute `y_zero_point` still determines the quantization type.
`x` and `y_scale` are allowed to have different types. The type of `y_scale` determines the precision of the division operation between `x` and
`y_scale`, unless the `precision` attribute is specified.

There are three supported quantization granularities, determined by the shape of `y_scale`.
In all cases, `y_zero_point` must have the same shape as `y_scale`.
- Per-tensor (per-layer) quantization: `y_scale` is a scalar.
- Per-axis quantization: The scale must be a 1-D tensor, with the length of the quantization axis. For an input shape
 `(D0, ..., Di, ..., Dn)` and `axis=i`, `y_scale` is a 1-D tensor of length `Di`.
- Blocked quantization: The scale's shape is identical to the input's shape, except for one dimension, in which
  blocking is performed. Given `x` shape `(D0, ..., Di, ..., Dn)`, `axis=i`, and block size `B`: `y_scale` shape is
  `(D0, ..., ceil(Di/B), ..., Dn)`.


Args:
    x: N-D full precision Input tensor to be quantized.

    y_scale: Scale for doing quantization to get `y`. For per-tensor/layer
        quantization the scale is a scalar, for per-axis quantization it is a
        1-D Tensor and for blocked quantization it has the same shape as the
        input, except for one dimension in which blocking is performed.

    y_zero_point: (optional) Zero point for doing quantization to get `y`. Shape
        must match `y_scale`. Default is uint8 with zero point of 0 if it's not
        specified.

    axis: (Optional) The axis of the dequantizing dimension of the input tensor.
        Used only for per-axis and blocked quantization. Negative value means
        counting dimensions from the back. Accepted range is `[-r, r-1]` where
        `r = rank(input)`. When the rank of the input is 1, per-tensor
        quantization is applied, rendering the axis unnecessary in this
        scenario.

    block_size: (Optional) The size of the quantization block (number of times
        every scale is replicated). Used only for blocked quantization. The
        block size is a positive integer. Given `x` shape `(D0, ..., Di, ...,
        Dn)`, `y_scale` shape `(S0, ... Si, ...Sn)` and `axis=i`, the accepted
        range is `[ceil(Di/Si), ceil(Di/(Si-1))-1]`

    output_dtype: (Optional) The output data type. If not supplied, the output
        data type is inferred from `y_zero_point` data type (`T3`). If neither
        `output_dtype` nor `y_zero_point` are supplied, output data type is
        uint8. If both `output_dtype` and `y_zero_point` are specified,
        `output_dtype` must be `T3`.

    precision: (Optional) The precision of the division operation between `x`
        and `y_scale`. If not provided, it will be the same as the type of
        `y_scale`.

    saturate: The parameter defines how the conversion behaves if an input value
        is out of range of the destination type. It only applies for float 8
        quantization (float8e4m3fn, float8e4m3fnuz, float8e5m2, float8e5m2fnuz).
        It is true by default. All cases are fully described in two tables
        inserted in the operator description.
QuantizeLinearr.   r-   r   rA   )rC   r{   y_scaley_zero_pointrv   rw   rx   r   rT   rK   rL   s              r1   r   Opset24.QuantizeLinearz  sO    l ,b"5&/!!&WC!%
 	
r3   	T_Reshape)	allowzeroc               f    [        SSS5      n[        U SU5      nU" U R                  XAU5      SU06$ )u  [🌐 Reshape(24)](https://onnx.ai/onnx/operators/onnx__Reshape.html#reshape-24 "Online Documentation")


Reshape the input tensor similar to numpy.reshape.
First input is the data tensor, second input is a shape tensor which specifies the output shape. It outputs the reshaped tensor.
At most one dimension of the new shape can be -1. In this case, the value is
inferred from the size of the tensor and the remaining dimensions. A dimension
could also be 0, in which case the actual dimension value is unchanged (i.e. taken
from the input tensor). If 'allowzero' is set, and the new shape includes 0, the
dimension will be set explicitly to zero (i.e. not taken from input tensor).
Shape (second input) could be an empty shape, which means converting to a scalar.
The input tensor's shape and the output tensor's shape are required to have the same number of elements.

If the attribute 'allowzero' is set, it is invalid for the specified shape to
contain both a zero value and -1, as the value of the dimension corresponding
to -1 cannot be determined uniquely.


Args:
    data: (differentiable) An input tensor.

    shape: (non-differentiable) Specified shape for output.

    allowzero: (Optional) By default, when any value in the 'shape' input is
        equal to zero the corresponding dimension value is copied from the input
        tensor dynamically. allowzero=1 indicates that if any value in the
        'shape' input is set to zero, the zero value is honored, similar to
        NumPy.
Reshaper.   r-   r   rA   )rC   r   shaper   rK   rL   s         r1   r   Opset24.Reshape  s=    > Ir2.i(4''e<R	RRr3   V_Scan)scan_input_axesscan_input_directionsscan_output_axesscan_output_directionsc          
     r    [        SSS5      n[        U SU5      n	U	" U R                  " U/UQ76 UUUUUUS.6$ )u  [🌐 Scan(24)](https://onnx.ai/onnx/operators/onnx__Scan.html#scan-24 "Online Documentation")


Scan can be used to iterate over one or more scan_input tensors,
constructing zero or more scan_output tensors. It combines ideas from general recurrences,
functional programming constructs such as scan, fold, map, and zip, and is intended to enable
generalizations of RNN-like constructs for sequence-to-sequence processing.
Other tensors (referred to as state_variables here) can be used to carry a state
when iterating from one element to another (similar to hidden-state in RNNs, also referred
to as loop-carried dependences in the context of loops).
Many common usages involve a single scan_input tensor (where functionality
similar to scan, fold and map can be obtained). When more than one scan_input is used,
a behavior similar to zip is obtained.

The attribute body must be a graph, specifying the computation to be performed in
every iteration. It takes as input the current values of the state_variables and
the current iterated element of the scan_inputs. It must return the (updated) values
of the state_variables and zero or more scan_output_element tensors. The values of the
scan_output_element tensors are concatenated over all the iterations to produce the
scan_output values of the scan construct (similar to the concatenated intermediate
hidden-state values of RNN-like constructs). All the output tensors (state_variables as
well as scan_output_element tensors) are required to have the same shape in each iteration
of the loop (a restriction imposed to enable efficient memory allocation).

Note that the iterated element passed to the body subgraph does not have a sequence
axis. It will have a rank one less than the rank of the corresponding scan_input.

The scan operation returns the final values of the state_variables as well as the
scan_outputs.

The optional attribute scan_input_directions specifies the direction (forward or backward)
for each scan input. If this attribute is omitted, all sequences are scanned in the forward
direction. A bidirectional scan may be performed by specifying the same tensor input twice
in the scan_inputs, once with a forward direction, and once with a backward direction.

The scan_output of the operation is produced by concatenating the scan_output_element
values produced by the body in each iteration.  The optional attribute scan_output_directions
specifies the direction in which scan_output is constructed (by appending or prepending the
scan_output_element to scan_output in each iteration) for each scan_output. If this attribute
is omitted, the scan_output_element is appended to the scan_output in each iteration.

The optional attribute scan_input_axes specifies the axis to be scanned for each scan_input.
If omitted, every scan_input will be scanned in axis 0. For example, if axis 0 is the
batch axis and axis 1 is the time axis (to be scanned), specify an axis value of 1.
Note that scanning a non-zero axis may be less efficient than scanning axis zero.

The optional attribute scan_output_axes specifies the axis along which the scan_outputs
are accumulated for each scan_output. For example, if axis 1 is the time axis (to be
scanned) for both inputs and outputs, specify a scan_input axis and scan_output axis
value of 1.

Note that because of the ONNX restriction that only the last parameter of an operator can
be variadic, the initial-states and scan-inputs are listed together as one input parameter.
Similarly, the final-states and scan-outputs are listed together as one output parameter.
The attribute num_scan_inputs indicates the number M of scan-inputs.

The behavior of

    Scan <
        num_scan_inputs = m,
        body = loop-body,
        scan_input_axes = [axis_1, ..., axis_m]
    > (init_1, ..., init_n, scan_1, ..., scan_m)

is equivalent to the following pseudo-code:

    // scan_i.shape[axis_i] denotes the (max) sequence-length of scan_i
    // scan_i.shape[axis_i] is required to be equal to scan_j.shape[axis_j] for all i,j.
    sequence_length = scan_1.shape[axis_1];

    // initialize state-variables
    st_1 = init_1; ... st_n = init_n;
    // initialize scan-output variables: [] denotes an empty tensor
    scan_out_1 = []; ...; scan_out_k = [];
    // identify number of iterations:

    // execute loop
    for (int t = 0; t < sequence_length; ++t) {
        // generate the scan-input elements: the notation T<axis=k>[t] indicates the sub-tensor
        // of rank one less than T obtained by indexing T at position t along axis k.
        si_1 = scan_1<axis=axis_1>[t];
        ... ;
        si_m = scan_m<axis=axis_m>[t];
        // execute loop-body
        st_1, ..., st_n, so_1, ..., so_k = loop-body(st_1, ..., st_n, si_1, ..., si_m)
        // accumulate the scan-output elements
        scan_out_1 = Concat<axis=0>(scan_out_1, so_1); ... ; scan_out_k = Concat<axis=0>(scan_out_k, so_k);
    }

    return st_1, ..., st_n, scan_out_1, ..., scan_out_k;

*Sample usage: Encoding RNN using a Scan*

The following example shows how a simple RNN over an input tensor %X, with weight tensor %Wi,
recurrence weight tensor %Ri, bias tensors %Wbi and %Rbi, and initial hidden-state %H_0 can
be encoded as a ScanLoop. Note that the loop-body is a nested graph, and it directly computes
%Wi, %Ri, %Wbi, and %Rbi (typically constants or initializers in the body graph). If these
values are computed in the outer graph, they need to be passed in as extra state_variables.

    graph rnn-encoding {
      %H_0 = ...
      %X = ...
      %Y_h, %Y = Scan[body = <graph rnn-cell-1>, num_scan_inputs=1](%H_0, %X)
      return %Y, %Y_h
    }

    graph rnn-cell-1 (
      %H_tminus1[FLOAT, tensor]
      %X_t[FLOAT, tensor]
    ) {
      %Wi = ...
      %Ri = ...
      %Wbi = ...
      %Rbi = ...
      %t1 = X_t * (Wi^T)
      %t2 = H_tminus1*(Ri^T)
      %t3 = Add(%t1, %t2)
      %t4 = Add(%t3, %Wbi)
      %t5 = Add(%t4, %Rbi)
      %Ht = Tanh(%t5)
      %Accumulate = Identity(%Ht)
      return %Ht, %Accumulate
    }



Args:
    initial_state_and_scan_inputs: (variadic, heterogeneous) Initial values of
        the loop's N state variables followed by M scan_inputs

    body: The graph run each iteration. It has N+M inputs: (loop state
        variables..., scan_input_elts...). It has N+K outputs: (loop state
        variables..., scan_output_elts...). Each scan_output is created by
        concatenating the value of the specified scan_output_elt value at the
        end of each iteration of the loop. It is an error if the dimensions of
        these values change across loop iterations.

    num_scan_inputs: An attribute specifying the number of scan_inputs M.

    scan_input_axes: An optional list of M flags. The i-th element of the list
        specifies the axis to be scanned (the sequence axis) for the i-th
        scan_input. If omitted, 0 will be used as the scan axis for every
        scan_input. Negative value for an axis means counting dimensions from
        the back. Accepted range is [-r, r-1] where r = rank(input).

    scan_input_directions: An optional list of M flags. The i-th element of the
        list specifies the direction to be scanned for the i-th scan_input
        tensor: 0 indicates forward direction and 1 indicates reverse direction.
        If omitted, all scan_input tensors will be scanned in the forward
        direction.

    scan_output_axes: An optional list of K flags. The i-th element of the list
        specifies the axis for the i-th scan_output. The scan outputs are
        accumulated along the specified axis. If omitted, 0 will be used as the
        scan axis for every scan_output. Negative value for an axis means
        counting dimensions from the back. Accepted range is [-r, r-1].

    scan_output_directions: An optional list of K flags, one for each
        scan_output. The i-th element of the list specifies whether the i-th
        scan_output should be constructed by appending or prepending a new value
        in each iteration: 0 indicates appending and 1 indicates prepending. If
        omitted, all scan_output tensors will be produced by appending a value
        in each iteration.
Scanr.   r-   )r   num_scan_inputsr   r   r   r   rA   )
rC   r   r   r   r   r   r   initial_state_and_scan_inputsrK   rL   s
             r1   r   Opset24.Scan6  sT    ^ FB+ff%!!&I+HI++"7-#9
 	
r3   T_ShapeT1_Shapeendstartc               d    [        SSS5      n[        U SU5      nU" U R                  XA5      X#S.6$ )u  [🌐 Shape(24)](https://onnx.ai/onnx/operators/onnx__Shape.html#shape-24 "Online Documentation")


Takes a tensor as input and outputs an 1D int64 tensor containing the shape of the input tensor.
Optional attributes start and end can be used to compute a slice of the input tensor's shape.
If start axis is omitted, the slice starts from axis 0.
The end axis, if specified, is exclusive (and the returned value will not include the size of that axis).
If the end axis is omitted, the axes upto the last one will be included.
Negative axes indicate counting back from the last axis.
Note that axes will be clamped to the range [0, r], where r is the
rank of the input tensor if they are out-of-range (after adding r in the case of
negative axis). Thus, specifying any end value > r is equivalent to specifying an end
value of r, and specifying any start value < -r is equivalent to specifying a start
value of 0. If start > end, the result will be an empty shape.

Examples:

::

    Input tensor with shape: [2, 3, 4]
    No attributes specified.
    Output: [2, 3, 4]



::

    Input tensor with shape: [2, 3, 4]
    start: -1
    Output: [4]



::

    Input tensor with shape: [2, 3, 4]
    end: -1
    Output: [2, 3]



::

    Input tensor with shape: [2, 3, 4]
    start: 1
    end: 2
    Output: [3]




Args:
    data: (non-differentiable) An input tensor.

    end: (Optional) Ending axis for slicing the shape. Negative value means
        counting dimensions from the back. If omitted, sizes of all axes upto
        (including) the last one will be included.

    start: (Optional) Starting axis for slicing the shape. Default value is
        0.Negative value means counting dimensions from the back.
Shaper.   r-   r   rA   )rC   r   r   r   rK   rL   s         r1   r   Opset24.Shape  s9    ~ GR,gv&4''53LLr3   T_SizeT1_Sizec                ^    [        SSS5      n[        U SU5      nU" U R                  X!5      6 $ )u  [🌐 Size(24)](https://onnx.ai/onnx/operators/onnx__Size.html#size-24 "Online Documentation")


Takes a tensor as input and outputs a int64 scalar that equals to the total number of elements of the input tensor.


Args:
    data: (non-differentiable) An input tensor.
Sizer.   r-   rA   )rC   r   rK   rL   s       r1   r   Opset24.Sizep  s6     FB+ff%4''566r3   T_SplitToSequenceI_SplitToSequenceS_SplitToSequencerv   keepdimsc               f    [        SSS5      n[        U SU5      nU" U R                  XQU5      X4S.6$ )u  [🌐 SplitToSequence(24)](https://onnx.ai/onnx/operators/onnx__SplitToSequence.html#splittosequence-24 "Online Documentation")


Split a tensor into a sequence of tensors, along the specified 'axis'.
Lengths of the parts can be specified using the optional argument 'split'.
If the argument `split' is not specified, a default scalar value of 1
is used as the value of `split'.
'split' must contain only positive numbers.
'split' is either a scalar (tensor of empty shape), or a 1-D tensor.
If 'split' is a scalar, then 'input' will be split into chunks all of size 'split'
if possible. The last chunk alone may be smaller than 'split' if the 'input' size
along the given axis 'axis' is not divisible by 'split'.
If 'split' is a 1-dimensional tensor, the input tensor is split into 'size(split)' chunks,
with lengths of the parts on 'axis' specified in 'split'. In this scenario, the sum of entries
in 'split' must be equal to the dimension size of input tensor on 'axis'.


Args:
    input: The tensor to split

    split: (optional) Length of each output. It can be either a scalar(tensor of
        empty shape), or a 1-D tensor. All values must be >= 0.

    axis: Which axis to split on. A negative value means counting dimensions
        from the back. Accepted range is [-rank, rank-1].

    keepdims: Keep the split dimension or not. Default 1, which means we keep
        split dimension. If input 'split' is specified, this attribute is
        ignored.
SplitToSequencer.   r-   r   rA   )rC   rX   splitrv   r   rK   rL   s          r1   r   Opset24.SplitToSequence  s=    N -r26'04''u=D\\r3   	T_Squeezec                `    [        SSS5      n[        U SU5      nU" U R                  X1U5      6 $ )u  [🌐 Squeeze(24)](https://onnx.ai/onnx/operators/onnx__Squeeze.html#squeeze-24 "Online Documentation")


Remove single-dimensional entries from the shape of a tensor.
Takes an input `axes` with a list of axes to squeeze.
If `axes` is not provided, all the single dimensions will be removed from
the shape. If an axis is selected with shape entry not equal to one, an error is raised.


Args:
    data: (differentiable) Tensors with at least max(dims) dimensions.

    axes: (optional, non-differentiable) 1D tensor of integers indicating the
        dimensions to squeeze. Negative value means counting dimensions from the
        back. Accepted range is [-r, r-1] where r = rank(data).
Squeezer.   r-   rA   rC   r   r   rK   rL   s        r1   r   Opset24.Squeeze  s8    $ Ir2.i(4''d;<<r3   T_Swishg      ?)alphac               d    [        SSS5      n[        U SU5      nU" U R                  X15      SU06$ )ug  [🌐 Swish(24)](https://onnx.ai/onnx/operators/onnx__Swish.html#swish-24 "Online Documentation")


Swish function takes one input data (Tensor<T>) and produces one output data (Tensor<T>) of the same shape,
where $Swish(x) = x * sigmoid(alpha * x)$.


Args:
    X: (differentiable) Input tensor

    alpha: Coefficient to multiply with input before sigmoid.
Swishr.   r-   r   rA   )rC   Xr   rK   rL   s        r1   r   Opset24.Swish  s;     GR,gv&4''2@%@@r3   T_TensorScatterlinearrv   r   c               h    [        SSS5      n[        U SU5      nU" U R                  XaX#5      UUS.6$ )u
  [🌐 TensorScatter(24)](https://onnx.ai/onnx/operators/onnx__TensorScatter.html#tensorscatter-24 "Online Documentation")


TensorScatter is a generic tensor update operation, motivated by the requirements for KV cache updates for Attention
ops commonly found in LLMs. It is a functional operation that models an in-place update to a KV cache buffer.

The past and present cache tensors have the same shape (batch_size, D1, D2, ..., max_sequence_length, ..., Dn), with
the sequence dimension (indicated by the `axis` attribute) being max_sequence_length, so the sizes of these tensors do
not need to grow between iterations. The `update` tensor's shape only differs from the cache tensors in the sequence
dimension: (batch_size, D1, D2, ..., sequence_length, ..., Dn), where sequence_length <= max_sequence_length.

The optional `write_indices` input indicates the write index for each sample in the batch, assumed to be zero
if not provided. When the `mode` attribute is set to "circular", the write index is modulo max_sequence_length.
The operation can be described using the following pseudocode:

::

    for prefix_idx in np.ndindex(past_cache.shape[:axis]):
        batch_idx = prefix_idx[0]
        for sequence_idx in range(sequence_length):
            cache_idx = (*prefix_idx, write_indices[batch_idx] + sequence_idx)
            if mode == "circular":
                cache_idx = tuple(np.mod(np.asarray(cache_idx), max_sequence_length))
            update_idx = (*prefix_idx, sequence_idx)
            present_cache[cache_idx] = update[update_idx]



During the prefill phase of attention, only the first two inputs are needed. During the decode phase, `write_indices`
is also needed so that the incoming key or value update can be appended after the last valid token for each sample
in the batch.


Args:
    past_cache: (differentiable) Past state cache for key or value with shape
        `(batch_size, D1, D2, ..., max_sequence_length, ..., Dn)`.

    update: (differentiable) New update tensor with shape `(batch_size, D1, D2,
        ..., sequence_length, ..., Dn)`.

    write_indices: (optional, non-differentiable) Write indices for the incoming
        update tensor in the cache. Shape is `(batch_size,)`. Assumed to be all
        zeros if not provided.

    axis: Sequence dimension of the `past_cache` and `update` tensors. It cannot
        be 0 (the batch dimension). Default is -2.

    mode: Write mode of cache update. Supported modes include `linear` and
        `circular`. `linear` mode requires
        write_indices+sequence_length<=max_sequence_length. For `circular` mode,
        the updates happen in wrap-around fashion, ie, the update index is
        modulo `max_sequence_length`
TensorScatterr.   r-   r   rA   )rC   
past_cacheupdatewrite_indicesrv   r   rK   rL   s           r1   r   Opset24.TensorScatter5  sD    ~ OR4ov.!!&fL
 	
r3   T_TopKI_TopKrv   largestsortedc               j    [        SSS5      n[        U SU5      nU" U R                  XaU5      UUUS.6$ )u  [🌐 TopK(24)](https://onnx.ai/onnx/operators/onnx__TopK.html#topk-24 "Online Documentation")


Retrieve the top-K largest or smallest elements along a specified axis. Given an input tensor of
shape [a_0, a_1, ..., a_{n-1}] and integer argument k, return two outputs:

* Value tensor of shape [a_0, a_1, ..., a_{axis-1}, k, a_{axis+1}, ... a_{n-1}]
  which contains the values of the top k elements along the specified axis
* Index tensor of shape [a_0, a_1, ..., a_{axis-1}, k, a_{axis+1}, ... a_{n-1}] which
  contains the indices of the top k elements (original indices from the input
  tensor).

* If "largest" is 1 (the default value) then the k largest elements are returned.
* If "sorted" is 1 (the default value) then the resulting k elements will be sorted.
* If "sorted" is 0, order of returned 'Values' and 'Indices' are undefined.

Given two equivalent values, this operator uses the indices along the axis as
a tiebreaker. That is, the element with the lower index will appear first.


Args:
    X: (differentiable) Tensor of shape [a_0, a_1, ..., a_{n-1}]

    K: (non-differentiable) A 1-D tensor containing a single positive value
        corresponding to the number of top elements to retrieve

    axis: Dimension on which to do the sort. Negative value means counting
        dimensions from the back. Accepted range is [-r, r-1] where r =
        rank(input).

    largest: Whether to return the top-K largest or smallest elements.

    sorted: Whether to return the elements in sorted order.
TopKr.   r-   r   rA   )rC   r   rE   rv   r   r   rK   rL   s           r1   r   Opset24.TopK  sG    L FB+ff%!!&Q/	
 	
r3   T_Transpose)permc               d    [        SSS5      n[        U SU5      nU" U R                  X15      SU06$ )u  [🌐 Transpose(24)](https://onnx.ai/onnx/operators/onnx__Transpose.html#transpose-24 "Online Documentation")


Transpose the input tensor similar to numpy.transpose. For example, when
perm=(1, 0, 2), given an input tensor of shape (1, 2, 3), the output shape
will be (2, 1, 3).


Args:
    data: (differentiable) An input tensor.

    perm: A list of integers. By default, reverse the dimensions, otherwise
        permute the axes according to the values given. Its length must be equal
        to the rank of the input.
	Transposer.   r-   r   rA   )rC   r   r   rK   rL   s        r1   r   Opset24.Transpose  s;    & KR0k6*4''5ADAAr3   T_Unsqueezec                `    [        SSS5      n[        U SU5      nU" U R                  X1U5      6 $ )u  [🌐 Unsqueeze(24)](https://onnx.ai/onnx/operators/onnx__Unsqueeze.html#unsqueeze-24 "Online Documentation")


Insert single-dimensional entries to the shape of an input tensor (`data`).
Takes one required input `axes` - which contains a list of dimension indices and this operator will insert a dimension of value `1` into the corresponding index of the output tensor (`expanded`).

For example, given an input tensor (`data`) of shape [3, 4, 5], then
Unsqueeze(data, axes=[0, 4]) outputs a tensor (`expanded`) containing same data as `data` but with shape [1, 3, 4, 5, 1].

The input `axes` should not contain any duplicate entries. It is an error if it contains duplicates.
The rank of the output tensor (`output_rank`) is the rank of the input tensor (`data`) plus the number of values in `axes`.
Each value in `axes` should be within the (inclusive) range [-output_rank , output_rank - 1].
The order of values in `axes` does not matter and can come in any order.


Args:
    data: (differentiable) Original tensor

    axes: (non-differentiable) 1D tensor of integers indicating the dimensions
        to be inserted. Negative value means counting dimensions from the back.
        Accepted range is [-r, r-1] where r = rank(expanded).
	Unsqueezer.   r-   rA   r   s        r1   r   Opset24.Unsqueeze	  s8    0 KR0k6*4''d;<<r3    )NNNN)rD   r4   rE   r4   rF   r5   rG   zOptional[U_Attention]rH   zOptional[T1_Attention]rI   zOptional[T2_Attention]rJ   Optional[INT64]r8   intr9   Optional[int]r:   r   r;   r   r<   Optional[float]r=   floatr>   r   returnz=Tuple[T1_Attention, T1_Attention, T2_Attention, T1_Attention])
rX   rN   rS   strrT   r   rW   r   r   rO   )
rX   rZ   r^   r[   rS   r  rT   r   r   r[   )rb   zOptional[SparseTensorProto]rc   Optional[TensorProto]rd   r   re   zOptional[Sequence[float]]rf   r   rg   Optional[Sequence[int]]rh   zOptional[str]ri   zOptional[Sequence[str]]r   r`   )rX   rm   rc   r  r   rn   )N)r{   rr   r|   rs   r}   zOptional[T1_DequantizeLinear]rv   r   rw   r   rx   r   r   rt   )rX   r   rv   r   r   r   )rX   r   r   r   )r   r   r   r	   r   r	   r   r   )
r   zOptional[I_Loop]r   zOptional[B_Loop]r   r   r   r	   r   r   )NN)r   r   r   r    r   zOptional[T_Pad]r   zOptional[Tind_Pad]r   r  r   r   )r{   r   r   r   r   zOptional[T3_QuantizeLinear]rv   r   rw   r   rx   r   r   r   rT   r   r   r   )r   r   r   r    r   r   r   r   )r   r   r   r	   r   r   r   r  r   r  r   r  r   r  r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   )
rX   r   r   zOptional[I_SplitToSequence]rv   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   rv   r   r   r  r   r   )r   r   rE   r    rv   r   r   r   r   r   r   zTuple[T_TopK, I_TopK])r   r   r   r  r   r   )r   r   r   r    r   r   )c__name__
__module____qualname____firstlineno__r/   r   r   r   r   r   r4   r5   r   r   r   r    r   r$   r%   r&   r#   r6   r@   r   r   r   r   r   r   r   r!   r"   rN   r   rO   __annotations__rV   rZ   r[   r]   r   r   r`   rk   rm   rn   rp   rr   rs   rt   rz   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   __static_attributes__r   r3   r1   r*   r*   3   s   * >8VUGLL>8VUGLLK, ,0+/-1,0b
 &*%)%&!%+/!b
b
 b
 	b

 )b
 )b
 +b
 *b
 b
 $b
 #b
  #b
 b
 b
  )!b
" 
G#b
H /G4 +	GY 4 48s
s
-0s
CFs
PSs
	s
j /K4 /K> )
)
 !)

 )
 )
 
)
V "/	J	 < 59'+'+26#'.2&*156
 26
 %	6

 %6
 06
 !6
 ,6
 $6
 /6
 
6
p %*	)$))	%	 2 LPE'E3HE	E, "  ""75'S]^%*8UG+C%DD 7;	<
 <
<
 %<
 4	<
 <
 <
 <
 
<
| 3I8 89 C0  GG$ G 	*%&G 	)$%	G
 	&!"G 	%!G 	'"#G 	%!G 	%!G 	%!G 	$ G 	&!"G 	&!"G 	&!"G 	&!"G  	%!!G" 	#G$ 	%G& 	'G( 	)G* 	+G, 	-G. 	/G0 	1G2 	3G4 	5G6 	7G8 	9G: 	;G< 	=G> 	?G@ 	AGB 	CGD 	EGF 	GGH 	IGJ 	KGL 	MGN 	OGP 	QGR 	SGT 	UGV 	WGX 	YGZ 	[G\ 	]G^ 	_G` 	aGb 	cGd 	eGf 	gGh 	iGj 	kGl 	mGn 	oGp 	qGr 	sGt 	uGv 	wGx 	yGz 	{G| 	}G~ 	G@ 	AGB 	CGD 	EGF 	GGH 	IGJ 	KGL 	MGJR8 D)0	 0	0	0	 	0	 		0	
 	0	 	0	 	0	 	0	 	0	 	0	 	0	 	0	 	0	 	0	 	0	  	!0	" 	#0	$ 	%0	& 	'0	( 	)0	* 	+0	, 	-0	. 	/0	0 	10	2 	30	4 	50	6 	70	8 	90	: 	;0	< 	=0	> 	?0	@ 	A0	B 	C0	D 	E0	F 	G0	H 	I0	J 	K0	L 	M0	N 	O0	P 	Q0	R 	S0	T 	U0	V 	 W0	X 	Y0	Z 	 [0	\ 	]0	^ 	_0	` 	a0	2D) 2h
2 FIFI ZZ(#$Z 	$ Z 	*%&	Z
 	)$%Z 	&!"Z 	%!Z 	'"#Z 	%!Z 	%!Z 	%!Z 	$ Z 	&!"Z 	&!"Z 	&!"Z  	&!"!Z" 	%!#Z$ 	%Z& 	'Z( 	)Z* 	+Z, 	-Z. 	/Z0 	1Z2 	3Z4 	5Z6 	 7Z8 	9Z: 	 ;Z< 	=Z> 	?Z@ 	AZB 	CZD 	EZF 	GZH 	IZJ 	KZL 	MZN 	OZP 	QZR 	SZT 	UZV 	WZX 	YZZ 	[Z\ 	]Z^ 	_Z` 	aZb 	cZd 	eZf 	 gZh 	iZj 	 kZl 	mZn 	oZp 	qZr 	sZt 	uZv 	wZx 	yZz 	{Z| 	}Z~ 	Z@ 	AZB 	CZD 	EZF 	GZH 	IZJ 	KZL 	MZN 	OZP 	QZR 	SZT 	UZV 	WZX 	YZZ 	[Z\ 	]Z^ 	_Z` 	aZb 	cZd 	eZf 	gZh 	iZj 	kZl 	mZn 	oZp 	qZr 	sZFxhQhQ hQ 	hQ
 hQ 
hQT 3E8 z5%0H +/#'V^ V^V^ V^ (	V^
 !V^ V^ 
V^p   3XuguUXugz5  & 59	_
 _
_
 #_
 2	_
 _
 _
 _
 _
 _
 
_
B 3I8 JK !SF 3FB 489=48:>y
(.y
 y
 	y

 1y
  7y
 2y
 !8y
 
y
v 3G8  Hi;?a AMF 3F8 GY7  #(   3UEB#(	$y , .2)]
 )] )] +)]
 )] )] 
)]V 3I8=, i65'BG25 A$ 3O@ *.	E
 E
#E
  E
 '	E
 E
 E
 
E
N F  FI 35QVW-
-
!-
,/-
?B-
PS-
	-
^ 3K: EIBB*AB	B. 3K8=r3   r*   N)/
__future__r   typingr   r   r   r   r   onnxr	   r
   r   	onnx.defsr   typing_extensionsr   #onnxscript.onnx_opset._impl.opset23r   onnxscript.onnx_typesr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   onnxscript.valuesr'   r(   r*   r   r3   r1   <module>r     sU    # < < ; ;   ' 7      4 (s#=g s#=r3   