
    Ki                       S SK Jr  S SKrS SKrS SKrS SKrS SKJr  S SKJ	r	  S SK
r
S SKrS SKJrJrJr  S SKJrJrJr  S SKJr  S SKJrJrJrJr  S S	KJr  S S
KJrJrJr   S SK J!r!  Sr#Sr$Sr%Sr&Sr'Sr(Sr)Sr*Sr+Sr,0 r-\." \5       V s0 s H$  n \/" \0" \U 5      \15      (       d  M  \0" \U 5      U _M&     sn r2 " S S\5      r3 " S S\5      r4 " S S\5      r5 " S S\5      r6\R"                  Rn                  \
Rp                  " S5      \R"                  Rr                  \
Rp                  " S5      \R"                  Rt                  \
Rp                  " S 5      \R"                  Rv                  \
Rp                  " S!5      \R"                  Rx                  \\R"                  Rz                  \\R"                  R|                  \0r?\R"                  Rr                  \
R                  " S \
R                  S"9\
R                  " S#\
R                  S"94\R"                  Rn                  \
R                  " S$\
R                  S"9\
R                  " S%\
R                  S"94\R"                  Rv                  \
R                  " S \
R                  S"9\
R                  " S&\
R                  S"94\R"                  Rt                  \
R                  " S'\
R                  S"9\
R                  " S(\
R                  S"94\R"                  R|                  \
R                  " S \S"9\
R                  " S)\S"94\R"                  Rz                  \
R                  " S*\S"9\
R                  " S+\S"940rE\R"                  Rn                  \
R                  " S,\
R                  S"9\
R                  " S%\
R                  S"94\R"                  Rt                  \
R                  " S-\
R                  S"9\
R                  " S(\
R                  S"940rF\R"                  Rr                  \
R                  " S \
R                  S"9\
R                  " S%\
R                  S"94\R"                  Rn                  \
R                  " S.\
R                  S"9\
R                  " S/\
R                  S"94\R"                  Rv                  \
R                  " S \
R                  S"9\
R                  " S(\
R                  S"94\R"                  Rt                  \
R                  " S0\
R                  S"9\
R                  " S1\
R                  S"94\R"                  R|                  \
R                  " S \S"9\
R                  " S+\S"94\R"                  Rz                  \
R                  " S2\S"9\
R                  " S3\S"940rGS4S5.S6 jrHS`S7 jrISaS8 jrJS9 rK    Sb               ScS: jjrL Sb SdS; jjrM  S`             SeS< jjrNSfS= jrOSfS> jrPSgS? jrQShS@ jrR " SA SB5      rS " SC SD5      rT " SE SF5      rUSG rVSH rWSI rXSJ rYSiSK jrZSL r[SjSM jr\SkSN jr]SlSO jr^SmSP jr_SnSQ jr`SoSR jraSnSS jrbSoST jrcSpSU jrdSqSV jreSrSW jrfSsSX jrgStSY jrhSuSZ jriSuS[ jrjSvS\ jrkSvS] jrlSvS^ jrmSvS_ jrng! \" a    Sr! GNf = fs  sn f )w    )annotationsN)Enum)Path)float8_e4m3fnint4uint4)
ModelProtoTensorProtoexternal_data_helper)onnx_pb)
make_graph
make_model	make_nodemake_tensor_value_info)ReferenceEvaluator)GraphOptimizationLevelInferenceSessionSessionOptions)to_array_extendedzonnx.quantizez0.1.0ai.onnxzcom.microsoftQuantizeLinear_QuantizeLinear_InputDequantizeLinear_DequantizeLinear_Output
_quantizedl        c                  2    \ rS rSrSrSrS r\S 5       rSr	g)QuantizationMode4   r      c                    U R                   $ Nnameselfs    f/var/www/html/dynamic-report/venv/lib/python3.13/site-packages/onnxruntime/quantization/quant_utils.py__str__QuantizationMode.__str__8       yy    c                F     [         U    $ ! [         a    [        5       ef = fr!   )r   KeyError
ValueError)modes    r&   from_stringQuantizationMode.from_string;   s)    	#D)) 	,	      N)
__name__
__module____qualname____firstlineno__
IntegerOps
QLinearOpsr'   staticmethodr/   __static_attributes__r2   r*   r&   r   r   4   s%    JJ  r*   r   c                  2    \ rS rSrSrSrS r\S 5       rSr	g)QuantizedValueTypeC   r   r   c                    U R                   $ r!   r"   r$   s    r&   r'   QuantizedValueType.__str__G   r)   r*   c                F     [         U    $ ! [         a    [        5       ef = fr!   )r<   r,   r-   )vs    r&   r/   QuantizedValueType.from_stringJ   s)    	%a(( 	,	r1   r2   N)
r3   r4   r5   r6   InputInitializerr'   r9   r/   r:   r2   r*   r&   r<   r<   C   s%    EK  r*   r<   c                  V    \ rS rSrSrSrSrSrSrSr	Sr
S	 r\S
 5       r\S 5       rSrg)	QuantTypeR   r   r                  c                    U R                   $ r!   r"   r$   s    r&   r'   QuantType.__str__[   r)   r*   c                F     [         U    $ ! [         a    [        5       ef = fr!   )rF   r,   r-   )ts    r&   r/   QuantType.from_string^   s(    	Q< 	,	r1   c                   U [         R                  :X  a  [        R                  $ U [         R                  :X  a  [        R
                  $ U [         R                  :X  a  [        R                  $ U [         R                  :X  a  [        R                  $ U [         R                  :X  a  [        R                  $ U [         R                  :X  a  [        R                  $ U [         R                  :X  a  [        R                  $ [!        SU < S35      e)NzUnexpected value qtype=.)rF   QInt8r
   INT8QUInt8UINT8QUInt16UINT16QInt16INT16QFLOAT8E4M3FNFLOAT8E4M3FNQUInt4UINT4QInt4INT4r-   r$   s    r&   tensor_typeQuantType.tensor_typee   s    9??"###9###$$$9$$$%%%9###$$$9***+++9###$$$9??"###24(!<==r*   r2   N)r3   r4   r5   r6   rT   rV   r\   rZ   rX   r`   r^   r'   r9   r/   propertyrb   r:   r2   r*   r&   rF   rF   R   sR    EFMFGEF   > >r*   rF   c                  2    \ rS rSrSrSrS r\S 5       rSr	g)QuantFormatx   r   r   c                    U R                   $ r!   r"   r$   s    r&   r'   QuantFormat.__str__|   r)   r*   c                F     [         U    $ ! [         a    [        5       ef = fr!   )rf   r,   r-   )formats    r&   r/   QuantFormat.from_string   s)    	v&& 	,	r1   r2   N)
r3   r4   r5   r6   	QOperatorQDQr'   r9   r/   r:   r2   r*   r&   rf   rf   x   s%    I
C  r*   rf   int8uint8int16uint16dtype   i   i  i i     i   iii@   i i @  rI   zero_point_indexc                d   / n[        U5       GH   u  p4[        R                  " [        U5      [        R                  5      (       a&  UR                  [        R                  " U5      5        OB[        U[        R                  5      (       a  UR                  U5        O[        SU SU 35      eX0:X  d  M  US   nUR                  [        R                  :X  d   UR                  [        R                  :X  d  M  [        SUR                   35      e   [        U5      S:  a  [        U5      $ US   $ )Nzarg z is not an array: r{   zzero_point cannot be r   r   )	enumeratenumpy
issubdtypetypenumberappendarray
isinstancendarray	TypeErrorrt   float32float16lentuple)r}   argsnew_argsiarA   s         r&   _check_typer      s    H$DGU\\22OOEKKN+5==))OOAd1#%7s;<< Aww%--'177emm+C"7y ABB   "(ma/5?@Xa[@r*   c                   U [         ;   d   SU  S35       eU [        R                  R                  [        R                  R                  [        R                  R
                  [        R                  R                  4;   Ga/  US:w  a  [        SU< S35      eUR                  [        R                  :X  a  [        R                  nOHUR                  [        R                  :X  a  [        R                  nO[        SUR                   S35      e[        [!        [#        S/ S/[$        R&                  R)                  SU / S/5      S	9[#        S
/ SQS/5      /S[+        SUS 5      [+        SUS 5      /[+        SU S 5      /5      5      n[-        U5      n[/        UR1                  S XS.5      S   5      $ [         U    n	[3        U SSS9u  pUb  [5        X5      OU
nUb  [7        X5      OUn[        R8                  " UR;                  [        R                  5      U-  R=                  5       U-   5      n[        R>                  " XXS9  [/        UR;                  U	5      5      $ )NUnexpected data type > requested. Only INT8, UINT8, INT16, and UINT16 are supported.r   z2zero_point is expected to be null for float 8 not rS   zUnexpected dtype Constant
zero_point)valuer   )Xscaler   Yqur   r   )r   r   F)reduce_range	symmetric)out) ONNX_TYPE_TO_NP_TYPE
onnx_protor
   r]   FLOAT8E4M3FNUZ
FLOAT8E5M2FLOAT8E5M2FNUZNotImplementedErrorrt   r   r   FLOATr   FLOAT16r-   r   r   r   onnxhelpermake_tensorr   r   r   runget_qmin_qmax_for_qTypemaxminasarrayastyperoundclip)qTypearrr   r   lowhigh	onnx_type
onnx_modelrefrt   qminqmaxcliplowcliphigharr_fp32s                  r&   quantize_nparrayr      s!   (( 
w&de( ++--))--	  ?%(Z[eZhhi&jkk99%#))IYY%--'#++I01=>>"Bdkk>U>UVbdikmpqor>s .0LseT	 *3	4@*7ItD (UD9:

  !,3774s)CDQGHH %U+,URWX
$'O#d.&*&63t?D==#**U]]";e"C!J!J!Lz!YZ

8h=8??5122r*   c           
        US:  d  US:  a  [        SU SU 35      e[        R                  " U [        R                  " SU R                  S95      n [        R
                  " U[        R                  " SUR                  S95      nUb*  [        X[        R                  " XPR                  S9-   5      nU(       aE  [        R
                  " [        R                  " U 5      [        R                  " U5      5      nU* n U7nX#::  d   SU  SU 35       e[        R                  " X-
  [        R                  S9n[        R                  " U[        R                  S9[        R                  " U[        R                  S9-
  n[        R                  " Xx-  5      n	U	S:  d   S5       eU	[        R                  " UR                  5      R                  :  aA  [        R                  " SUR                  S9n	[        R                  " SUR                  S9n
X/$ U(       aZ  [        R                  " [        R                  " X#-   [        R                  " S	[        R                  S9-  5      UR                  S9n
O8[        R                  " [        R                  " X U	-  -
  5      UR                  S9n
U	R                  UR                  5      n	X/$ )
a  Calculate the scale s and zero point z for the quantization relation
r = s(q-z), where r are the original values and q are the corresponding
quantized values.

r and z are calculated such that every value within [rmin,rmax] has an
approximate representation within [qmin,qmax]. In addition, qmin <= z <=
qmax is enforced. If the symmetric flag is set to True, the interval
[rmin,rmax] is symmetrized to [-absmax, +absmax], where
absmax = max(abs(rmin), abs(rmax)).

:parameter rmin: minimum value of r
:parameter rmax: maximum value of r
:parameter qmin: minimum value representable by the target quantization data type
:parameter qmax: maximum value representable by the target quantization data type
:parameter symmetric: True if the floating-point range should be made symmetric. Defaults to False.
:parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
:return: zero and scale [z, s]

r   Bqmin and qmax must meet requirement: qmin <= 0 <= qmax while qmin:, qmmax:rs   zqmin=z > qmax=zscale issue      ?g       @)r-   r   minimumr   rt   maximumr   r   absfloat64finfotinyr   r   )rminrmaxr   r   r   min_real_rangeabsmaxdrdqr   r   s              r&   compute_scale_zpr      s   ( ax4!8]^b]ccklpkqrss
 ==u{{1DJJ?@D==u{{1DJJ?@D !4nJJ OOPuyy		$@ww<55htf55<	T[	6B	T	/%++d%--2X	XBKK EA:$}$:u{{4::&+++Ctzz2[[$**5
   T[EKK5==,QQRZ^ZdZdJ U[[u1D%ETZZXJTZZ(r*   c                   SnU [         ;  a  U [        R                  :X  a  SSKJn  Un[        S5       Vs/ s H  n[        U5      PM     nn[        R                  " U Vs/ s H?  n[        R                  " U5      (       a  M   [        R                  " U5      (       a  M=  UPMA     sn[        R                  S9nO[        SU  S35      eU[         U '   OU [        R                  :X  a  SSKJn  UnUc  [        SU  S	35      e[        R                  " [         U    5      n[        R                  " SUS9n	[        R                  " X-  UR                  S9n
X/$ s  snf s  snf )
aZ  Calculate the scale s for a float8 type (E4M3FN).
The function assumes the coefficient distribution and the float 8
distribution are similar to two gaussian laws.

:return: zero and scale [z, s]

More details in notebook `quantization_fp8.ipynb
<https://github.com/microsoft/onnxruntime/blob/main/docs/python/notebooks/quantization_fp8.ipynb>`_.
Nr   )r      rs   zQuantization to element_type=z not implemented.zUnexpected element_type rS   )FLOAT8_DISTRIBUTIONSr
   r]   	ml_dtypesr   rangefloatr   r   isnanisinfr   r-   r   stdrt   )element_typer   zp_dtyper   r   
all_valuesfvaluesstd_f8zeror   s              r&   compute_scale_zp_float8r   ,  s"    H//;333/$H,1#J7Jq%(JJ7[[&TJqekk!nU[[QR^JT\a\i\iF <\NJ[\]]-3\*	11	1+ 2<.BCCYY+L9:F;;q)DKKCII6E=# 8Ts   E!EE!Ec                   [        U [        R                  5      (       d  [        S[	        U 5       S35      eUb  UnO"[        U 5      (       a  U R                  5       OSnUb  UnO"[        U 5      (       a  U R                  5       OSn[        R                  " XpR                  S9n[        R                  " XR                  S9n[        R                  " SU R                  S9n	U[        R                  :X  a?  U(       a  [        S5      e[        R                  " U 5      n
[        X5      u  p[        XSS9$ U[        R                   [        R"                  [        R$                  [        R&                  [        R(                  [        R*                  4;   aU  [-        XUS	9u  p[        U 5      (       a  [/        XxXX$5      u  pO[        R                  " SUR                  S9n[        XSS9$ [1        S
U S35      e)a  
Returns the zero_point and scale for the given data.

:param data: The data for which to compute quantization parameters.
:param quant_type: The quantization data type.
:param symmetric: whether symmetric quantization is used or not.
:parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
:parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
:parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
:parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
:return: zero point and scale
z%Weight must be given as an array not rS   g        rs   r   z1Unsupported option reduce_range=True for float 8.r   r|   r   z Unexpected value for quant_type=)r   r   r   r   r   r   r   r   r   rt   r
   r]   RuntimeErrorr   r   r   rU   rW   r[   rY   ra   r_   r   r   r-   )data
quant_typer   r   r   rmin_overridermax_overrider   r   r   r   r   r   r   s                 r&   compute_data_quant_paramsr   P  s   * dEMM**?T
|1MNN  YYtxxzC  YYtxxzC;;t::.D;;t::.DKK4::.E[---RSSiio3JD
:qAA  -ZQZ[
t99 0T cJQdjj9J:qAA
7
|1E
FFr*   c                   [        U UUUUUU5      u  pxU[        R                  :X  a  [        XX5      n	[	        U	R                  [        R                  5      R                  5       S-  S:H  5      (       af  [        R                  " U 5      n
[        SU
R                  5        SU
R                  5        SU	R                  5        SU	R                  5        S3	5      eXxU	4$ U[        R                  [        R                  [        R                  [        R                   [        R"                  [        R$                  4;   a  [        XX5      n	XxU	4$ ['        SU S35      e)a   
:param data: data to quantize
:param qType: data type to quantize to.
:param symmetric: whether symmetric quantization is used or not.
:parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
:parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
:parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
:parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
:return: minimum, maximum, zero point, scale, and quantized weights

To pack weights, we compute a linear transformation

- when data `type == uint8` mode, from `[rmin, rmax]` -> :math:`[0, 2^{b-1}]` and
- when data `type == int8`, from `[-m , m]` -> :math:`[-(2^{b-1}-1), 2^{b-1}-1]` where
    `m = max(abs(rmin), abs(rmax))`

and add necessary intermediate nodes to transform quantized weight to full weight using the equation

:math:`r = S(q-z)`, where

- *r*: real original value
- *q*: quantized value
- *S*: scale
- *z*: zero point
rv   z+One of the quantized value is NaN data in [z, z], quantized_data in [z].zUnexpected value for qType=rS   )r   r
   r]   r   anyviewr   rp   ravelr   r   r   r   rU   rW   r[   rY   ra   r_   r-   )r   r   r   r   r   r   r   r   r   quantized_datanp_datas              r&   quantize_datar     sO   8 2J ((()%uI##EKK06683>3FGGmmD)G=gkkm_Bw{{}o ^&&4&8&8&:%;2n>P>P>R=SSUW  .00  *%uI.00
25';
<<r*   c                   [        U 5      nSnUc  [        XR                  5       X25      nOUR                  U   n[	        UR                  5      n	SX'   / n
[        U5       Hj  nUR                  X5      nX;   nX+   n[        XR                  5       X5      nU
R                  [        R                  " U5      R                  U	5      5        Ml     [        R                  " X5      nU(       a  UOU R                   [         3nU[        R                  R                   :X  Ga&  [        R                  " 5       nUUl        UR$                  R'                  U R$                  5        UUl        UR)                  5       R+                  5       R-                  5       Ul        [0        b  [1        U5      nUR                  UR                  :w  d"  UR-                  5       UR-                  5       :w  a]  [3        SUR                   SUR-                  5       SS  SUR-                  5       SS  SU R                   S[5        U5      SS	  S
35      eU$ U[        R                  R6                  [        R                  R8                  4;   aw  UR:                  [<        [>        4;  a  [3        SU S35      e[A        [C        UR-                  5       5      5      n[        RD                  RG                  UXR$                  USS9nU$ [        RD                  RI                  U5      n[        R                  " UUS9R                  U R$                  5      n[        RJ                  RM                  UU5      nU$ )a  
Returns a quantized version of the given ONNX initializer.

:param weight: The ONNX initializer to quantize.
:param quant_type: The final quantized data type.
:param zero_point: The zero-point value to use for quantization.
:param scale: The scale value to use for quantization.
:param axis: The quantization axis if quantizing per-channel. Defaults to None.
:param quant_weight_name: The name of the quantized initializer.
                          If not specified, the quantized name is generated.
:return: The quantized ONNX initializer.
Nr   zThe initializer of shape z! could not be created, expecting 
   z, got z and shape=z
raw=   rS   zQuantized weights for z. must be 8-bit before packing as 4-bit values.T)rawrs   )'tensor_proto_to_arrayr   r   shapelistr   taker   r   r   reshapeconcatenater#   TENSOR_NAME_QUANT_SUFFIXr   r
   r]   	data_typedimsextendflattencopytobytesraw_datar   r   strra   r_   rt   r   r   bytespack_bytes_to_4bitr   r   tensor_dtype_to_np_dtypenumpy_helper
from_array)weightr   r   r   axisquant_weight_nameweight_dataq_weight_datachannel_countchannel_dimsquantized_channel_data_listr   channel_datachannel_scalechannel_zero_pointquantized_channel_dataq_weight_nameq_weight_initializercheckpacked_dataquant_np_dtypes                        r&   quantize_onnx_initializerr    s   ( (/K*.M|(5F5F5H%\#))$/K--.&(#}%A&++A4L!HM!+%5..0-&" (..u}}=S/T/\/\]i/jk & ))*EL):%6;;-PhOi@jMT%%222#//1)3&!!((5$1!(5(=(=(?(D(D(F(N(N(P%( &&:;E{{k///5==?mF[F[F]3]"/0A0A/BBc$,,.s34F5==?3B;O:PP[\b\h\h[iS!56t<=Q@ (   
((--t/?/?/E/EF	FtUm3!7Ftuvv .}/D/D/FGH  ${{66}jR]R]_jpt6u  	 ==jIm>JRRSYS^S^_#00;;M=Yr*   c                   U [         R                  R                  :X  a  [        S5      eSnU(       a  [        R                  U 5      nO0U(       a  U [        ;   a
  [        U    nO[        R                  U 5      nU(       d  [        SU  S35      eUu  pEUS:  d  US:  a'  [        SU SU SUR                   S	U S
U SU  35      eU$ )z
Return qmin and qmax, the minimum and maximum value representable by the given qType
:parameter qType: onnx.onnx_pb.TensorProto.UINT8 or onnx.onnx_pb.TensorProto.UINT8
:return: qmin, qmax
z;This function is not implemented for float 8 as not needed.Nr   r   r   r   r   z, dtype=z, reduce_range=z, symmetric=z, qType=)
r   r
   r]   r   ONNX_INT_TYPE_REDUCED_RANGEgetONNX_INT_TYPE_SYMMETRIC_RANGEONNX_INT_TYPE_RANGEr-   rt   )r   r   r   qranger   r   s         r&   r   r     s     
&&333!"_``F,007	u ==.u5$((/07uvwwJDax4!86$x

|?<. Y"8E74
 	
 Mr*   c                "    [        XUS9u  p4XC-
  $ )z
Helper function to get the quantization range for a type.
    parameter qType: quantization type.
    return: quantization range.
r   )r   )r   r   r   r   r   s        r&   get_qrange_for_qTyper!  :  s     )	RJD;r*   c                @    U S:  a  X-   OU nUS:  =(       a    X!:  nX24$ )z
Helper function that tries to return a normalized axis in the range [0, rank - 1].
:parameter axis: The axis to normalize.
:parameter rank: The tensor rank (number of dimensions).
:return (is_valid, axis_norm)
r   r2   )r	  rank	axis_normis_valids       r&   normalize_axisr&  D  s-      $axTIA~2)"2Hr*   c                    [        U 5      nUS:X  a
  [        5       $ US-   S-  n[        U5      nSnSnXAS-
  :  a+  XS-      S-  S-  X   S-  -  X5'   US-  nUS-  nXAS-
  :  a  M+  XA:  a	  X   S-  X5'   U$ )a.  
Copies a source array of 8-bit values into a destination bytearray of packed 4-bit values.
Assumes that the source values are already in the appropriate int4 range.
:parameter src_8bit: The 8-bit element values to pack.
:return A bytearray with every two 8-bit src elements packed into a single byte.
r   r   rH   rw   rJ   )r   	bytearray)src_8bit	num_elemsdst_sizedstsrc_idst_is         r&   r  r  P  s     HIA~{A!#H
H
CEE a-
	*S0Q68?S;PQ


 a-

 _s*
Jr*   c                  (    \ rS rSrSr/ / S4S jrSrg)QuantizedInitializerin  zB
Represents a linearly quantized weight input from ONNX operators
Nc
                p    Xl         X l        X0l        X@l        XPl        X`l        Xpl        Xl        Xl        g r!   )	r#   initializerrminsrmaxszero_pointsscalesr   r   r	  )
r%   r#   r2  r3  r4  r5  r6  r   r   r	  s
             r&   __init__QuantizedInitializer.__init__s  s4     	&

&	,	r*   )	r	  r   r2  r#   r   r4  r3  r6  r5  r3   r4   r5   r6   __doc__r7  r:   r2   r*   r&   r0  r0  n  s     r*   r0  c                  *    \ rS rSrSr    SS jrSrg)QuantizedValuei  zA
Represents a linearly quantized value (input\output\intializer)
Nc
                p    Xl         X l        X0l        X@l        XPl        X`l        Xpl        Xl        Xl        g r!   )	original_nameq_name
scale_namezp_name
value_typer	  	node_type
node_qtype
scale_type)
r%   r#   new_quantized_namer@  zero_point_namequantized_value_typer	  rC  rD  rE  s
             r&   r7  QuantizedValue.__init__  s2     "($&.	"$$r*   )	r	  rD  rC  r>  r?  r@  rE  rB  rA  )NNNNr9  r2   r*   r&   r<  r<    s     %r*   r<  c                      \ rS rSrSrS rSrg)BiasToQuantizei  z#
Represents a bias to be quantized
c                (    Xl         X l        X0l        g r!   	bias_name
input_nameweight_name)r%   rN  rO  rP  s       r&   r7  BiasToQuantize.__init__  s    "$&r*   rM  Nr9  r2   r*   r&   rK  rK    s    'r*   rK  c                   U R                   S:X  a  [        SU R                   S35      eU R                   S:X  a  U R                  nGO,U R                   S:X  a  U R                  nGOU R                   S:X  a  U R
                  nOU R                   S:X  a  U R                  nOU R                   S:X  a  U R                  nOU R                   S	:X  a  U R                  nOU R                   S
:X  a  U R                  nO}U R                   S:X  a  U R                  nO`U R                   S:X  a  U R                  nOCU R                   S:X  a  U R                  nO&[        SU R                   SU R                    S35      eU R                  U0$ )z
Convert attribute to kwarg format for use with onnx.helper.make_node.
    :parameter attribute: attribute in AttributeProto format.
    :return: attribute in {key: value} format.
r   z
attribute z does not have type specified.r   rH   rI   rJ   rK   rL   rx      	   r   z has unsupported type rS   )r   r-   r#   r   r   srP   gfloatsintsstringstensorsgraphs)	attributer   s     r&   attribute_to_kwargr]    s;    ~~:inn%55STUU ~~	1		1		1		1		1	  	1		1	!!	1	!!	2	  :inn%55KINNK[[\]^^NNE""r*   c                ~    U Vs/ s H  o"R                   U :X  d  M  UPM     nn[        U5      S:  a  US   $ S$ s  snf )z
Helper function to find item by name in a list.
    parameter item_name: name of the item.
    parameter item_list: list of items.
    return: item if found. None otherwise.
r   N)r#   r   )	item_name	item_listitemitemss       r&   find_by_namerc    s@     (Bid99	+ATiEB5zA~58/4/ Cs   ::c                X    Sn[        [        U5      5       H  nX   U :X  d  M  UnM     U$ )z;
Helper function to return index of an item in a node list
r{   )r   r   )	elem_name	elem_listelem_idxr   s       r&   get_elem_indexrh    s2     H3y>"<9$H # Or*   c                F    [         R                  R                  SX/U5      $ )z
Helper function to create a Mul node.
    parameter inputs: list of input names.
    parameter output: output name.
    parameter name: name of the node.
    return: Mul node in NodeProto format.
Mul)r   r   r   )inputsoutputr#   s      r&   get_mul_noderm    s     ;;  $??r*   c                l    U R                   R                  U R                  U-   U R                  -   5      $ )zh
Helper function to generate a identifiable filepath by concatenating the given identifier as a suffix.
)parentjoinpathstemsuffix)filename
identifiers     r&   generate_identified_filenameru    s+     ??##HMMJ$>$PQQr*   c                R   SS K nSS KJn  SS KnUR                  " UR
                  S9  [        S5        [        U 5        [        S5        [        U5        UR                  XSS9  UR                  S5        UR                  S5        UR                  S	5        UR                  5         g )
Nr   )	thresholdz
Histogram:zHistogram Edges:T)fillzTensor valueCountszTensor value V.S. Counts)sysmatplotlib.pyplotpyplotr   set_printoptionsmaxsizeprintstairsxlabelylabeltitleshow)hist
hist_edgesrz  pltr   s        r&   
apply_plotr    s{    #	S[[1	,	$K	
	*JJtdJ+JJ~JJxII()HHJr*   c           	     n	  ^^^^^ SSK mSSKnSSKmSSKJs  Js  Jn  SSKJs  Js  Jn  SSK	J
mJmJm  [        R                  " SU  35         " UUUUU4S jSTR                  5      nTR!                  XS9n[#        [$        R&                  R)                  US5      S	5       nUR+                  U5        SSS5        TR-                  S5      nUR/                  S
5      n	/ n
[1        U R3                  5       5       H  nX   nUR5                  5       n[7        UR9                  SU5      R;                  5       5      [7        UR9                  SU5      R;                  5       5      /n[=        [?        U5      5      nU	RA                  U5      nU	RA                  U5      nURC                  U	5        URE                  U	U5        URG                  U	U5        URI                  U	5      nU
RK                  U5        M     URM                  U	[O        U
5      5        U
 H  nU	RQ                  U5        M     U	RS                  5       nURU                  U	5        URW                  U	U5        URY                  U	5      nU	R[                  U5        U	R]                  5       n[#        [$        R&                  R)                  US5      S5       nUR+                  U5        SSS5        [$        R^                  R9                  SS5      S;   a  UR                  Ra                  US5      nURc                  5       n[e        U5       H\  nURg                  U5      n[        R                  " URi                  5       5        [        R                  " URk                  5       5        M^     [#        [$        R&                  R)                  US5      S	5       n[1        U R3                  5       5       H  nX   nUR5                  5       n[7        UR9                  SU5      R;                  5       5      [7        UR9                  SU5      R;                  5       5      /nUS-   [=        [?        U5      5      -   nUR+                  U5        UR+                  S5        M     SSS5        g! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       g= f)z6
Helper function to write calibration table to files.
r   N)CalibrationMethod
TensorDataTensorsDatazcalibration cache: c                  *   > \ rS rSrU UUUU4S jrSrg)*write_calibration_table.<locals>.MyEncoderi#  c                j  > [        UTT45      (       a  UR                  5       $ [        UTR                  5      (       a'  UR                  5       [	        UR
                  5      SS.$ [        UT5      (       a"  UR                  R                  [	        U5      S.$ TR                  R                  X5      $ )Nznumpy.array)r   rt   CLS)r  r   )
r   to_dictr   tolistr  rt   	__class__r3   JSONEncoderdefault)r%   objr  r  r  jsonnps     r&   r  2write_calibration_table.<locals>.MyEncoder.default$  s    #
K899{{}$#rzz** #

s399~m\\#011"}}55CII##++D66r*   r2   N)r3   r4   r5   r6   r  r:   )r  r  r  r  r  s   r&   	MyEncoderr  #  s    	7 	7r*   r  )clszcalibration.jsonwi   highestlowestzcalibration.flatbufferswbQUANTIZATION_DEBUG0)r   1zcalibration.cache 
)6r  flatbuffersr   5onnxruntime.quantization.CalTableFlatBuffers.KeyValuequantizationCalTableFlatBuffersKeyValue5onnxruntime.quantization.CalTableFlatBuffers.TrtTableTrtTable"onnxruntime.quantization.calibrater  r  r  logginginfor  dumpsopenospathjoinwriter   Buildersortedkeysr  r   r  ra  r  r   CreateStringKeyValueStartKeyValueAddKeyKeyValueAddValueKeyValueEndr   TrtTableStartDictVectorr   PrependUOffsetTRelative	EndVectorTrtTableStartTrtTableAddDictTrtTableEndFinishOutputenvironGetRootAsTrtTable
DictLengthr   DictKeyValue)calibration_cachedirr  r  r  r  	json_datafiler   builderkey_value_listkeyr   d_valuesrW  r   flat_key
flat_value	key_value	main_dict	cal_tablebufdict_lenr   r  r  r  r  r  s                           @@@@@r&   write_calibration_tabler    s   
 LLLL]]LL&'8&9:;7 7D$$ 7 

,
<I	bggll3 23S	9T

9 
: 88A;D!!$'GN',,./"'>>#(,,y$/4467(,,x.3356
 CK '',))%0
w'2!!':6((1	i(# 0& $$Wc..AB#	''	2 $!!#I7#Wi0$$W-INN9
..
C	bggll3 9:D	AT

3 
B 
zz~~*C0H<%%77Q?	'')xA!q)ILL)LL*+ ! 
bggll3 34c	:d+0023C&+F~~'Hhll9d388:;hll8T2779:F #ICK 00EJJuJJt 4 
;	:g 
:	9L 
B	A 
;	:s%   "R7R6CR&
R
R#&
R4c                   U S:H  R                  [        R                  5      nU S:g  R                  [        R                  5      nUR                  5       nU R                  U-
  nU(       d  gU[        U5      -  [        U5      -  nUS:  d   SU SU SU 35       eU R                  [        R                  5      nXqU-  U* U-  -   -  nUS:*  R                  5       S:X  d   eU$ )aj  Given a discrete distribution (may have not been normalized to 1),
smooth it by replacing zeros with eps multiplied by a scaling factor
and taking the corresponding amount off the non-zero values.
Ref: http://web.engr.illinois.edu/~hanj/cs412/bk3/KL-divergence.pdf
     https://github.com//apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py
r   Nr   zn_zeros=z, n_nonzeros=z, eps1=)r   r   r   sumsizer   )pepsis_zerosis_nonzerosn_zeros
n_nonzeroseps1r  s           r&   smooth_distributionr  o  s     Qu}}-H6//%--0KllnG'!Jw%
"33D#:Q'-
|74&QQ:88EMM"D(Nte{222DAI??!!!Kr*   c                    [         R                  " U R                  5       SS9n[        S UR                  R
                   5       5      $ )NF)load_external_datac              3  N   #    U  H  n[         R                  " U5      v   M     g 7fr!   )r   uses_external_data).0
intializers     r&   	<genexpr>*model_has_external_data.<locals>.<genexpr>  s!     mUlz#66zBBUls   #%)r   loadas_posixr   graphr2  )
model_pathmodels     r&   model_has_external_datar    s9    IIj))+FEmUZU`U`UlUlmmmr*   c                    [        5       nUR                  5       Ul        [        R                  Ul        0 nS/US'   [        U R                  5       U4SS/0UD6ng)z
    Generate model that applies graph optimization (constant folding, etc.)
    parameter model_path: path to the original onnx model
    parameter opt_model_path: path to the optimized onnx model
:return: optimized onnx model
ConstantSharingdisabled_optimizers	providersCPUExecutionProviderN)r   r  optimized_model_filepathr   ORT_ENABLE_BASICgraph_optimization_levelr   )r  opt_model_pathsess_optionkwargs_s        r&   optimize_modelr     sb     !"K+9+B+B+DK(+A+R+RK(F%6$7F !,,.jH^G_jcijAr*   c                    SS0nU R                   (       a:  U R                    H*  nUR                  UR                  UR                  05        M,     [        R
                  R                  X5        g)z>Tag the model that it went through quantization pre-processingonnx.quant.pre_processonnxruntime.quantNmetadata_propsupdater  r   r   r   set_model_props)r  r  props      r&   add_pre_process_metadatar	    sS    .0CDN((D!!488TZZ"89 )KK6r*   c                    U R                   (       a7  U R                    H'  nUR                  S:X  d  M  UR                  S:X  d  M'    g   g)zCCheck the model whether it went through quantization pre-processingr  r  TFr  r  r   )r  r  s     r&   model_has_pre_process_metadatar    s<    ((Dxx33

FY8Y ) r*   c                    SS0nU R                   (       a:  U R                    H*  nUR                  UR                  UR                  05        M,     [        R
                  R                  X5        g )N
onnx.inferr  r  )r  r  r  s      r&   add_infer_metadatar    sS    "$78N%%A!!155!''"23 &KK6r*   c                    U R                   (       a7  U R                    H'  nUR                  S:X  d  M  UR                  S:X  d  M'    g   g)Nr  r  TFr  )r  r  s     r&   model_has_infer_metadatar    s;    %%Auu$4G)G & r*   c                    U R                    Vs/ s H'  oR                  (       a  UR                  S:X  d  M%  UPM)     nn[        U5      S:w  a  [        S5      eUS   R                  nU$ s  snf )Nr   r   z$Failed to find proper ai.onnx domainr   )opset_importdomainr   r-   version)r  opsetai_onnx_domainopset_versions       r&   get_opset_versionr    sf    ).););m);<<SXS_S_clSle);Nm
>a?@@"1%--M ns
   $A*A*c                   [        U 5      nUn[        USU5      nUS:  a;  U[        R                  R                  :X  a  [
        R                  " SU S35        SnOCUS:X  a  [
        R                  " SU S35        O"US:  a  [
        R                  " SU S35        SnX2:w  a*  [        R                  R                  X5      n [        U 5      n U $ )	Nrb      z$The original model opset version is z, which does not support quantization to float 8. Please update the model to opset >= 19. Automatically update the model to opset 19. Please verify the quantized model.r   ze, which does not support node fusions. Please update the model to opset >= 11 for better performance.z, which does not support quantization. Please update the model to opset >= 11. Automatically update the model to opset 11. Please verify the quantized model.   )
r  getattrr   r
   r]   r  warningversion_converterconvert_version&save_and_reload_model_with_shape_infer)r  weight_typer  target_opset_versionweight_quant_types        r&   update_opset_versionr%    s    %e,M(]KHr/43C3C3P3PP2=/ B1 1	

  "	"	2=/ BM M	

 
	2=/ B1 1	

  ",&&66uS 7u=Lr*   c                    [        U S5      n[        R                  R                  [	        U 5      [	        U5      5        [        R
                  " UR                  5       5      n[        U5        UR                  5         U$ )Nz	-inferred)	ru  r   shape_inferenceinfer_shapes_pathr  r  r  r  unlink)r  inferred_model_pathr  s      r&   load_model_with_shape_inferr+    s`    6z;O**3z?C@S<TUII)2245Eu Lr*   c                   [         R                  " SS9 n[        R                  " U 5      n[	        U5      R                  S5      n[        R                  " X#R                  5       SS9  [        U5      sS S S 5        $ ! , (       d  f       g = f)Nz
ort.quant.)prefixz
model.onnxT)save_as_external_data)
tempfileTemporaryDirectoryr   deepcopyr   rp  r   
save_modelr  r+  )r  quant_tmp_dir
model_copyr  s       r&   r!  r!    sa    		$	$L	9]]]5)
-(11,?

$7$7$9QUV*:6	 
:	9	9s   AA==
Bc                   U R                   [        R                  R                  [        R                  R                  4;   a  [
        R                  R                  U 5      $ [        SU R                   S[        U R                       35      e)Nz&Only float type is supported. Weights z is )r   r   r
   r   r   r   r  to_arrayr-   r#   type_to_name)r2  s    r&   r   r     su    !7!7!=!=z?U?U?]?] ^^  ))+66

01A1A0B$|T_TiTiGjFkl r*   c                    U S-   $ )N_QuantizeLinearr2   tensor_names    r&   add_quant_suffixr<    s    ***r*   c                    U [         -   $ r!   )QUANT_INPUT_SUFFIXr:  s    r&   add_quant_input_suffixr?  
  s    +++r*   c                    U S-   $ )N_QuantizeLinear_Outputr2   r:  s    r&   add_quant_output_suffixrB    s    111r*   c                    U S-   $ )N_DequantizeLinearr2   r:  s    r&   add_dequant_suffixrE    s    ,,,r*   c                    U S-   $ )N_DequantizeLinear_Inputr2   r:  s    r&   add_dequant_input_suffixrH    s    222r*   c                    U [         -   $ r!   )DEQUANT_OUTPUT_SUFFIXr:  s    r&   add_dequant_output_suffixrK    s    ...r*   )NN)FN)FNNN)r   numpy.ndarrayr   onnx.TensorProto.DataTyper   boolr   rN  r   float | Noner   rO  r   rO  returnz#tuple[numpy.ndarray, numpy.ndarray])rP  z2tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray])r  onnx.TensorProtor   rM  r   rL  r   rL  r	  z
int | Noner
  z
str | NonerP  rQ  )FF)r	  intr#  rR  rP  ztuple[bool, int])r)  r  rP  r(  )rs  r   rt  r  rP  r   )rS   )g-C6?)r  r   )r  r   r  r   )r  r	   )r  r	   rP  rN  )r  r	   rP  rR  )r  r	   r"  rF   rP  r	   )r  r   rP  r	   )r  r	   rP  r	   )r2  r
   rP  rL  )r;  r  rP  r  )rP  r  )o
__future__r   r   r  r  r/  enumr   pathlibr   r   r   r   r   r   r   r	   r
   r   r   r   onnx.helperr   r   r   r   onnx.referencer   onnxruntimer   r   r   onnx.reference.op_runr   ImportError__producer____version__onnx_domain	ms_domainQUANT_OP_NAMEr>  DEQUANT_OP_NAMErJ  r   MODEL_SIZE_THRESHOLDr   r  r   r  rR  r7  r   r<   rF   rf   rU   rt   rW   r[   rY   r]   ra   r_   r   r   rp   ro   rr   rq   r  r  r  r   r   r   r   r   r   r  r   r!  r&  r  r0  r<  rK  r]  rc  rh  rm  ru  r  r  r  r  r   r	  r  r  r  r  r%  r+  r!  r   r<  r?  rB  rE  rH  rK  )ks   0r&   <module>rc     sd   #   	      0 0 > > & Q Q - P P7 	 , $2 ' !  474Dq4Dq
SZ[fhiSjloHp*Q'*4Dqt  #> #>L$   V!4  %++g"6  %++g"6!!5;;x#8''  %    5;;q#DekkRU]b]h]hFi"j%++d%**"Eu{{SV^c^h^hGi!j!!EKK$FTYafamamHn#o  5;;vU[[#I5;;W\didodoKp"q  5;;q#>BV[@\"]%++b"=u{{1TX?Y!Z  %++d%**"Eu{{SV^c^h^hGi!j  5;;vU[[#I5;;W\didodoKp"q!    5;;q#DekkRU]b]h]hFi"j%++c"DekkRT\a\f\fFg!h!!EKK$FTYafamamHn#o  5;;vU[[#I5;;W\didodoKp"q  5;;q#>AUZ@["\%++b"=u{{1TX?Y!Z  )+ A 13h<~!P #'"&"&;G
;G);G ;G 	;G
 !;G  ;G  ;G );G~ hl:=7:=D $(L L )L  L  	L 
 L  "L  L ^@	< >% %8' '"#J0@R$Yx2n
k 77!H7+,2-3/  $ rs   "Y3 Z%Z3Y?>Y?