
    `i                         S SK r S SKrS SKJr  S SKrS SKrS SKrS SKrS SKJ	r
  S SKJrJrJr  SSKJrJrJrJr  SSKJrJrJrJrJrJr  SSKJrJrJrJr  SS	KJrJrJ r J!r!  SS
K"J#r#J$r$  SSK%J&r&   " S S5      r'g)    N)Path)onnx_pb)SessionOptionsInferenceSessionGraphOptimizationLevel   )QuantizationModeQuantizedValueTypeQuantizedInitializerQuantizedValue)find_by_nameget_elem_indexget_mul_nodegenerate_identified_filenameattribute_to_kwargtype_to_name)quantize_nparrayquantize_datacompute_scale_zpget_qrange_for_qType)	QuantTypeonnx_domain__producer____version__)CreateOpQuantizerCreateDefaultOpQuantizer)	ONNXModelc                       \ rS rSrS rS rS rS rS r\	S 5       r
S rS	 rS
 rS rS rS rS rSS jrS rS rS rSS jrS rS rS rS rS rSrg)ONNXQuantizer   c                    [         R                  R                  U5      nUR                  R                   Vs0 s H  oR
                  U_M     snU l        U R                  R                  UR                  R                   Vs0 s H  oR
                  U_M     sn5        U R                  R                  UR                  R                   Vs0 s H  oR
                  U_M     sn5        [        U5      U l        X l        X0l        X@l        XPl        SU l        U["        R$                  :X  a  [&        R(                  R*                  O[&        R(                  R,                  U l        U["        R$                  :X  a  [&        R(                  R*                  O[&        R(                  R,                  U l         Xl        Xl        Xl        Xl        / U l        U R=                  5       U l        U R                  [@        ;  a$  [C        SRE                  U R                  5      5      eU RG                  5       U l$        SU l%        SU l&        SU l'        SU l(        0 U l)        g s  snf s  snf s  snf )NFz unsupported quantization mode {}fixed_quantization_range_uint8fixed_quantization_range_int8
fixed_zerofixed_zero_zp)*onnxshape_inferenceinfer_shapesgraph
value_infonamevalue_infosupdateoutputinputr   modelper_channelreduce_rangemodestaticfuse_dynamic_quantr   QInt8
onnx_protoTensorProtoINT8UINT8input_qTypeweight_qTypetensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantize	new_nodescheck_opset_versionopset_versionr	   
ValueErrorformatcalculate_quantization_paramsquantization_paramsfixed_qrange_uint8_namefixed_qrange_int8_namefixed_zero_namefixed_zero_zp_namequantized_value_map)selfr0   r1   r2   r3   r4   r<   r;   r=   r>   r?   r@   viotits                  l/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/onnxruntime_tools/quantization/onnx_quantizer.py__init__ONNXQuantizer.__init__   s    $$11%827++2H2HI2HBGGRK2HIu{{7I7I J7I"7I JKu{{7H7H I7H"7H IJu%
&(	"':E:X:1166^h^t^t^z^z;G9??;ZJ2277`j`v`v`|`|
	 +!2 0$8!!557yy,,?FFtyyQRR#'#E#E#G  (H$&E#+"1 $& _ J J Is   III#c                    U R                   R                   R                   Vs/ s H'  oR                  (       a  UR                  S:X  d  M%  UPM)     nnS[        U5      :w  a  [	        S5      eUS   R
                  nUS:X  a&  [        R                  " SR                  U5      5        gUS:  a  [        R                  " SR                  U5      5        U R                   R                   R                  R                  US   5        U R                   R                   R                  R                  [        R                  R                  SS	5      /5        S	nS
U l        U$ s  snf )Nzai.onnxr   z$Failed to find proper ai.onnx domainr   
   zThe original model opset version is {}, which does not support node fusions. Please update the model to opset >= 11 for better performance.zThe original model opset version is {}, which does not support quantization. Please update the model to opset >= 11. Updating the model automatically to opset 11. Please verify the quantized model.    T)r0   opset_importdomainlenrD   versionloggingwarningrE   removeextendr&   helpermake_opsetidr5   )rM   opsetai_onnx_domainrC   s       rQ   rB   !ONNXQuantizer.check_opset_versionR   s)   #zz//<<
<eLLTYT`T`dmTmE< 	 
 N##CDD&q)11BOO ^&( 2OO X&( JJ))001BCJJ))00$++2J2J2r2R1STM"&-
s   $EEc                    / n/ nU R                   R                  5        GH  nUR                  S:X  d  M  Su  pEnU R                   R                  U5       H  nUR                  S:X  d  M  UnM     Uc$  [	        SR                  UR                  5      5      eU R                   R                  US5      nUc$  [	        SR                  UR                  5      5      eU R                   R                  U5      n[        U5      S:X  a$  [	        SR                  UR                  5      5      eUR                  S   n	UR                  S	   n
[        XR                   R                  5       5      n[        XR                   R                  5       5      n[        R                  R                  U5      [        R                  R                  U5      /nU Hl  n[        UR                   S   UR                  5      nUS
:w  a  UR                  S   UR                  U'   MJ  [	        SR                  UR                  5      5      e   UR                  S   nU R"                  c  0 U l        XR"                  U'   UR%                  U/5        UR%                  U/5        UR%                  U/5        UR%                  U/5        GM     U R                   R'                  U5        U R                   R)                  U5        U R                   R                   $ )z
Detect and remove the quantize/dequantizelinear node pairs(fake quantized nodes in Quantization-Aware training) 
and reconnect and update the nodes.
QuantizeLinear)NNNDequantizeLinearzQRemove fake-quantized node pair Error: DequantizeLinear node is not found for {}.r   zGRemove fake-quantized node pair Error: Parent node is not found for {}.zHRemove fake-quantized node pair Error: No successive nodes found for {}.r      zhRemove fake-quantized node pair Error: Connection failed. No matched successive node input found for {}.)r0   nodesop_typeget_childrenrD   rE   r+   
get_parentrZ   r/   r   initializerr&   numpy_helperto_arrayr   r.   rG   r_   remove_nodesremove_initializers)rM   nodes_to_removeinitializers_to_remove	curr_node	next_node	prev_node	succ_node
child_node
succ_nodesscale_tensor_namezp_tensor_nameinitializer_scaleinitializer_zpzp_and_scalesucc_idx
param_names                   rQ   remove_fake_quantized_nodes)ONNXQuantizer.remove_fake_quantized_nodesk   s   
 !#))+I  $442B/	i"&**"9"9)"DJ!))-??$.	 #E $$krr%NN,- - !JJ11)Q?	$$%n%u%u!&( ) ) "ZZ44Y?
z?a'$%o%v%v!&( ) ) %.OOA$6!!*!3$01BJJDZDZD\$]!!-njj>T>T>V!W%%..~>%%../@A  ",I-i.>.>q.A9??SH2~4=OOA4F	1( G#VINN35 5 ", '__Q/
++3/1D,7C((4  &&	{3&&	{3 '--/@.AB&--~.>?i ,l 	

0

&&'=>zz    c                    U R                   b4  [        U R                   5      S:w  a  UR                  U R                   ;  a  gUR                  U R                  ;  a  gU R
                  b  UR                  U R
                  ;   a  gg)Nr   FT)r>   rZ   r+   rk   r@   r?   )rM   nodes     rQ   should_quantizeONNXQuantizer.should_quantize   ss    !!-#&&3(+,3-15$BXBX1XLL 9 99  ,d>S>S1Sr   c                    U R                  5         U R                  R                  5        H@  nU R                  U5      (       a  [	        X5      nO[        X5      nUR                  5         MB     U R                  5         U R                  R                  5       R                  S5        U R                  R                  5       R                  R                  U R                  5        U R                  R                  5         [        U R                  R                  l        [         U R                  R                  l        U R                  R                  $ )Nr   )r   r0   rj   r   r   r   quantize_dequantize_outputsr)   
ClearFieldr   r_   rA   remove_unused_constantr   producer_namer   producer_version)rM   r   op_quantizers      rQ   quantize_modelONNXQuantizer.quantize_model   s    ((*JJ$$&D##D))0<7C!!# ' 	  " 	

%%f-

&&t~~6 	

))+)5

&,7

)zzr   c                     U R                   [        R                  R                  :X  a!  [        R
                  R                  U 5      nU$ [        SR                  U R                  [        U R                      5      5      e)Nz=Only float type quantization is supported. Weights {} is {}. )	data_typer7   r8   FLOATr&   ro   rp   rD   rE   r+   r   )rn   weightss     rQ   tensor_proto_to_array#ONNXQuantizer.tensor_proto_to_array   so      J$:$:$@$@@''00=G  \cc  ,{/D/D"EG H Hr   c                 P    [        XR                  R                  5       5      nUS L$ N)r   r0   rn   )rM   
input_namern   s      rQ   is_input_a_weightONNXQuantizer.is_input_a_weight   s$    ":zz/E/E/GH$&&r   c                     U R                   $ r   )r1   )rM   s    rQ   is_per_channelONNXQuantizer.is_per_channel   s    r   c                     [        XR                  R                  5       5      nUS L=(       a'    UR                  [        R
                  R                  :H  $ r   )r   r0   rn   r   r7   r8   r   )rM   weight_nameweights      rQ   is_valid_quantize_weight&ONNXQuantizer.is_valid_quantize_weight   s@    k::+A+A+CDT!Vf&6&6*:P:P:V:V&VVr   c                     U[         R                  R                  :X  a  U R                  X5      $ U R	                  X5      $ )a*  
Create nodes for dynamic quantization of input and add them to nodes_list.
    parameter input_name: Name of the input.
    parameter nodes_list: new nodes are appended to this list.
    parameter qType: type to quantize to.
    return: scale_name, zero_point_name, scale_shape, zero_point_shape.
)r7   r8   r9   +_get_dynamic_input_quantization_params_int8,_get_dynamic_input_quantization_params_uint8)rM   r   
nodes_listqTypes       rQ   &_get_dynamic_input_quantization_params4ONNXQuantizer._get_dynamic_input_quantization_params   s:     J**///CCJ[[@@XXr   c                 2   [         R                  R                  nUS-   nUS-   n[        R                  R                  SU/US-   /USS9nUR                  U5        US-   n[        R                  R                  SU/US-   /USS9nUR                  U5        US	-   n	[        R                  R                  S
UR                  S   /U	S-   /U	5      n
UR                  U
5        US	-   n[        R                  R                  S
UR                  S   /US-   /U5      nUR                  U5        US-   n[        R                  R                  SU
R                  S   UR                  S   /US-   /U5      nUR                  U5        [        R                  R                  U R                  [         R                  R                  / [        U5      S-  /5      nU R                  R                  U5        US-   n[        R                  R                  SUR                  S   U R                  /U/U5      nUR                  U5        [        R                  R                  U R                  U/ S/5      nU R                  R                  U5        X@R                  / / 4$ )a  
Create nodes for dynamic quantization of input to int8 and add them to nodes_list
    parameter input_name: Name of the input.
    parameter nodes_list: new nodes are appended to this list.
    return: scale_name, zero_point_name, scale_shape, zero_point_shape.
_scale
_ReduceMin	ReduceMin:0r   keepdims
_ReduceMax	ReduceMax_AbsAbs_Abs_MaxMaxg       @	scale_DivDiv)r7   r8   r9   r&   r`   	make_nodeappendr.   make_tensorrI   r   r   r0   add_initializerrK   )rM   r   r   r   input_scale_namereduce_min_namereduce_min_nodereduce_max_namereduce_max_nodereduce_min_abs_namereduce_min_abs_nodereduce_max_abs_namereduce_max_abs_nodeabs_max_nameabs_max_nodeinitializer_divscale_div_namescale_div_noder~   s                      rQ   r   9ONNXQuantizer._get_dynamic_input_quantization_params_int8   s    &&++ &0$|3++//j\O^bLbKc0?9: 0 < 	/*$|3++//j\O^bLbKc0?9: 0 < 	/* .6"kk33EO<R<RST<U;VYlosYsXt4GI-.-6"kk33EO<R<RST<U;VYlosYsXt4GI-.!J.{{,,U5H5O5OPQ5RTgTnTnopTq4r.:T.A-BLR,'++11$2M2MzOeOeOkOkmo3G3NQT3T2UW

""?3#k1..u|7J7J17MtOjOj6k0@/A>S.) 001H1H%QSVWUXY

"">2!8!8"b@@r   c                 z   [         R                  R                  nUS-   nUS-   nUS-   n[        R                  R                  SU/US-   /USS9nUR                  U5        US-   n[        R                  R                  S	U/US-   /USS9n	UR                  U	5        [        R                  R                  U R                  [         R                  R                  / [        U5      /5      n
U R                  R                  U
5        [        R                  R                  U R                  [         R                  R                  / S
/5      nU R                  R                  U5        US-   n[        R                  R                  SU	R                  S   UR                  S   /US-   /U5      nUR                  U5        US-   n[        R                  R                  SUR                  S   U R                  /U/U5      nUR                  U5        US-   n[        R                  R                  SU R                  UR                  S   /US-   /U5      nUR                  U5        US-   n[        R                  R                  SUR                  S   U/US-   /U5      nUR                  U5        US-   n[        R                  R                  SUR                  US-   /U5      nUR                  U5        US-   n[        R                  R                  SUR                  U/UUS9nUR                  U5        XE/ / 4$ )a  
Create nodes for dynamic quantization of input to uint8 and add them to nodes_list
    parameter input_name: Name of the input.
    parameter nodes_list: new nodes are appended to this list.
    return: scale_name, zero_point_name, scale_shape, zero_point_shape.
r   _zero_pointr   r   r   r   r   r   r   g        
_scale_SubSub
_scale_Divr   _zero_point_Sub_zero_point_Div_zero_point_FloorFloor_zero_point_CastCastto)r7   r8   r:   r&   r`   r   r   r   rH   r   r   r0   r   rJ   r.   )rM   r   r   r   r   input_zp_namer   r   r   r   initializer_qrangeinitializer_qvaluescale_sub_namescale_sub_noder   r   zp_sub_namezp_sub_nodezp_div_namezp_div_nodezp_floor_namezp_floor_nodezp_cast_namezp_cast_nodes                           rQ   r   :ONNXQuantizer._get_dynamic_input_quantization_params_uint8-  s;    &&,,%0"]2$|3++//j\O^bLbKc0?9: 0 < 	/*$|3++//j\O^bLbKc0?9: 0 < 	/* "[[44T5Q5QS]SiSiSoSoqs6J56Q5RT

""#56![[44T5I5I:KaKaKgKgiknqmrs

""#56 $l2..u7M7Ma7PRaRhRhijRk6l0>0E/FX.)#l2..u~7L7LQ7OQUQmQm6n0@/A>S.) !#44kk++ED4H4H/J`J`abJc3d-84-?,@+O+& #44kk++EK4F4Fq4IK[3\_jmq_q^r,79+&"%88--g{7I7IM\`L`Kacpq-(!$66{{,,V]5I5IM?\hmr,s,'B66r   c                 &   U R                   b  XR                   ;  a  gU R                   U   nUb  [        U5      S:w  a  [        SR                  X5      5      eUS   /n/ nUS-   nU R                  nUS   /n/ nUS-   n	[
        R                  R                  XVXC5      n
U R                  R                  U
5        [
        R                  R                  U	[        R                  R                  X5      nU R                  R                  U5        SXX4$ )	a4  
Create initializers and inputs in the graph for zero point and scale of output.
Zero point and scale values are obtained from self.quantization_params if specified.
    parameter param_name: Name of the quantization parameter.
    return: result, scale_name, zero_point_name, scale_shape, zero_point_shape.
)FrV   rV   rV   rV   rh   z_Quantization parameters should contain zero point and scale. Specified values for output {}: {}r   r   r   r   T)rG   rZ   rD   rE   r;   r&   r`   r   r0   r   r7   r8   r   )rM   r   paramszero_point_valueszero_point_shapezero_point_namezero_point_typescale_valuesscale_shape
scale_nameinit_zp
init_scales               rQ   _get_quantization_params&ONNXQuantizer._get_quantization_paramsn  s    ##+zAYAY/Y())*5>S[A- BBH&B\^ ^ $AYK$}4**q	{(*
 ++))/L\p

""7+[[,,Z9O9O9U9UWbq


"":.Z+OOr   Nc                    UR                   U   nUS-   nUb  Ub  SXEpnOU R                  U5      u  pn
  nU R                  (       aK  US:X  a  [        SR	                  U5      5      e[
        R                  R                  SXiU
/U/US-   5      nU/$ US:X  a+  [
        R                  R                  SXiU
/U/US-   5      nU/$ U R                  (       aS  U[        R                  R                  :X  a5  US-   n	US-   n[
        R                  R                  S	U/XyU/US-   5      nU/$ / nU R                  XnU5      u  pnn[
        R                  R                  SXiU
/U/US-   5      nX/-   $ )
aj  
Given an input for a node (which is not a initializer), this function
    - add nodes to compute zero point and scale for this input if they don't exist.
    - add new QuantizeLinear node to quantize the input.
    parameter node: node being quantized in NodeProto format.
    parameter input_index: index of input in node.input.
    parameter qType: type to quantize to.
    parameter given_scale_name: if those inputs need to be quanitzed using this scale tensor.
    parameter given_zp_name: if those inputs to be quantized using this zeropoint tensor.
    return: List of newly created nodes in NodeProto format.

_quantizedTFzQuantization parameters are not specified for param {}.In static mode quantization params for inputs and outputs of nodes to be quantized are required.rf   _QuantizeLinearr   r   DynamicQuantizeLinear)r/   r   r4   rD   rE   r&   r`   r   r5   r7   r8   r:   r   )rM   r   input_indexr   given_scale_namegiven_zp_namer   output_name
data_foundr   zp_name_qlinear_nodezeropoint_namerj   r   zp_shapes                    rQ   _get_quantize_input_nodes'ONNXQuantizer._get_quantize_input_nodes  s    ZZ,
 </(}/H/35EGJG484Q4QR\4]1JGQ;;U" wF:&( (
  ;;001AJ\cCdgrfs1;>O1OQL >! T!#{{445E
`gGhkvjw5?BS5S U$~% **u
8N8N8T8T/T!+h!6J%/-%?N#';;#8#89PS]R^:ESa9b9CFW9W$YL )>) ECC&u6 ?Jh $(;;#8#89IJdkKl:E
UfHf$hL !>11r   c                 `   [        UR                  S   U R                  R                  5       5      nUc'  [	        SR                  UR                  S   5      5      eUnUS-   nXg/n[        R                  " [        UR                  5      [        R                  S9n	SU	S'   [        R                  R                  U[        R                  R                   [        UR                  5      /U	5      n
U R                  R#                  U
5        UR$                  S   S-   n[        R                  R'                  SX/US	-   5      nUR)                  U5        U/nUR)                  U5        UR$                  S   S
-   n[        R                  R'                  SX/US-   5      nUR)                  U5        U$ )a-  
Given a node, this function handles bias add by adding a "reshape" node on bias and an "add" node
    parameter nodes: new nodes would be appended into nodes
    parameter node: current node (Conv)
    parameter last_output: output of previous node (input to bias add)
    return: the name of output
r   z Expected {} to be an initializer_reshape_shapedtyperi   r   _reshapeReshapereshape	_bias_addAddbias_add)r   r/   r0   rn   rD   rE   nponesrZ   dimsint64r&   r`   r   r7   r8   INT64r   r.   r   r   )rM   rj   r   last_outputquantized_bias_namer   reshape_input_datareshape_input_shapereshape_inputreshape_shape
init_shapereshape_op_outputreshape_nodebias_add_inputadd_node_outputadd_nodes                   rQ   get_bias_add_nodes ONNXQuantizer.get_bias_add_nodes  sz    djjmTZZ-C-C-EF>?FFtzzRS}UVV 114DD+AV[[!1"((Ca[[,,-@*BXBXB^B^adekepepaq`r-:<


"":. KKNZ7{{,,YGZ-@9-LN\" &/0++a.;6;;((@QSfisSstXr   c                 ^   U R                   U   R                  n[        XPR                  R	                  5       5      nU R                  U5      n[        XR                  R	                  5       5      nU R                  U5      n	US-   n
[        R                  R                  nUS-   n[        R                  R                  SX/US-   /US-   5      nUR                  U5        [        R                  R                  SXR                  S   /US-   /US-   5      nUR                  U5        [        R                  R                  S	UR                  US
-   /US-   5      nUR                  U5        [        R                  R                  SUR                  U
/U
S-   US9nUR                  U5        U
$ )M
Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
r   r   Mul_scale_noder   r   z_tmp_quant:0
_tmp_qauntr   z_quant_rounded:0_quant_roundedr   _noder   )rL   r   r   r0   rn   r   r7   r8   INT32r&   r`   r   r   r.   )rM   	bias_namer   r   new_node_listweight_scale_nameweight_initializerweight_scalebias_initializer	bias_datar  r   r   bias_scale_nodequantize_bias_nodebias_rounded_nodebias_cast_nodes                    rQ   quantize_bias_dynamic#ONNXQuantizer.quantize_bias_dynamic  s    !44[ALL)*;ZZ=S=S=UV112DE (	::3I3I3KL../?@	',6&&,,%0++//8H7\_hks_s^t09M0IK_-![[2259F\F\]^F_:`4=4N3OQZ]iQik/0 KK11';M;T;TW`cuWuVv2;>N2NP./..v/@/G/GJ]I^/BW/L27 / 9 	^,""r   c           	         XR                   ;   a  U R                   U   R                  $ U R                   U   R                  n[        X@R                  R                  5       5      nU R                  U5      n[        XR                  R                  5       5      nU R                  U5      nUS-   n	X R                   ;   a  U R                   U   R                  n
O@X R                  ;   a  U R                  U5      u  p    nO[        SR                  U5      5      e[        XR                  R                  5       5      nU R                  U5      nX-  n[        R                  " U5      U-  R                  5       R                  [        R                  5      n[        R                  " U[        R                  S9R!                  UR"                  5      n[$        R&                  R)                  UU	5      nU R                  R                  5       R+                  U/5        U	S-   n[        R                  " U[        R,                  S9R!                  S5      n[$        R&                  R)                  UU5      nU R                  R                  5       R+                  U/5        U	S-   n[        R.                  " UR0                  [        R                  S9R!                  S5      n[$        R&                  R)                  UU5      nU R                  R                  5       R+                  U/5        XR                   ;  d   e[3        XUU[4        R6                  UR8                  S:  a  SOS	5      nUU R                   U'   U	$ )
r%  r   z@Expected {} to be in quantized value map for static quantizationr	  r   ri   r   r   r   N)rL   q_namer   r   r0   rn   r   rG   r   rD   rE   r  asarrayroundastypeint32r  r  r&   ro   
from_arrayr_   float32zerosshaper   r
   Initializersize)rM   r,  r   r   r.  r/  r0  r1  r2  r  r   r  inputscale_initializerinput_scale
bias_scalequantized_databias_np_datapacked_bias_initializerquantized_bias_scale_namebias_scale_datapacked_bias_scale_initializerquantized_bias_zp_namebias_zp_datapacked_bias_zp_initializerquantized_values                            rQ   quantize_bias_static"ONNXQuantizer.quantize_bias_static  s    000++I6=== !44[ALL)*;ZZ=S=S=UV112DE (	::3I3I3KL../?@	',6 111#77
CNN333+/+H+H+T(AAq_ffgqrss!-.>

@V@V@X!Y001GH !/
 **Y/*<CCELLRXXV zz.AIIJZJ_J_`"&"3"3">">|M`"a

 '')@(AB %8($B!**ZrzzBJJ2N(,(9(9(D(D_Vo(p%

 '')F(GH "5}!Dxx
 0 0AII"M%)%6%6%A%A,Pf%g"

 '')C(DE!9!99:9(Ib)?ASA_A_.=.B.BQ.FDR />  +""r   c                    / n/ n/ n/ nU GHL  nUR                   U   n	XR                  ;   ab  U R                  U	   n
UR                  U
R                  5        UR                  U
R                  5        UR                  U
R
                  5        M  [        XR                  R                  5       5      nUbg  U R                  X(       a  U R                  OU R                  5      u  pnUR                  U5        UR                  U5        UR                  U5        GM  U R                  R                  U	S-   U R                  U R                  R                  5       5      nUc2  U R                  XU R                  5      nUR!                  U5        US   nUR"                  S:X  aZ  UR!                  UR$                  5        UR                  UR                   S   5        UR                  UR                   S   5        GM  UR                  UR$                  S   5        UR                  UR$                  S   5        UR                  UR$                  S   5        GMO     XeXG4$ )aC  
Given a node, this function quantizes the inputs as follows:
    - If input is an initializer, quantize the initializer data, replace old initializer
      with new initializer
    - Else, add QuantizeLinear nodes to perform quantization
    parameter node: node being quantized in NodeProto format.
    parameter indices: input indices to quantize.
    return: (List of quantized input names,
             List of zero point names used for input quantization,
             List of scale names used for input quantization,
             List of new QuantizeLinear nodes created)
r   ri   rf   r   rh   r   )r/   rL   r   r   r   r:  r   r0   rn   quantize_weightr<   r;   find_node_by_namerA   r)   r  r_   rk   r.   )rM   r   indicesinitializer_use_weight_qTypescale_nameszero_point_namesquantized_input_namesrj   r   
node_inputrQ  rn   q_weight_namer   r   r  quantize_input_nodess                    rQ   quantize_inputsONNXQuantizer.quantize_inputsN  s%     ""KK0J 555"&":"::"F""?#=#=> ''(?(?@%,,_-C-CD 'z::3I3I3KLK&595I5I6R!2!2X\XhXh6j2
 &,,]; ''0"":.  $zz;;JIZ<Z\`\j\j<@JJ<L<L<N P'+/+I+I$]a]m]m+n(LL!56#7#;L''+;;)001D1DE&&|'9'9!'<=$++L,>,>q,AB)001D1DQ1GH&&|':':1'=>$++L,?,?,BCI #L &LLr   c                    UR                   U R                  ;   a<  U R                  UR                      nUR                  UR                  UR                  4$ UR                   S-   nUR                   S-   nUR                   S-   nU R                  U5      n[        UR                  5       R                  5       [        X R                  5      U5      u    pp[        R                  " U[        R                  R                  U   S9R!                  UR"                  5      n[        R$                  R'                  X5      n[        R(                  R+                  U[,        R.                  R0                  / U
/5      n[        R(                  R+                  XR/ U	/5      nU R2                  R5                  5       R7                  XU/5        [9        UR                   XFU[:        R<                  S5      nX0R                  UR                   '   XEU4$ )z
:param weight: TensorProto initializer
:param qType: type to quantize to
:return: quantized weight name, zero point name, scale name
r   r   r   r	  N)r+   rL   r:  r   r   r   r   flattentolistr   r2   r  r;  r&   mappingTENSOR_TYPE_TO_NP_TYPEr  r  ro   r?  r`   r   r7   r8   r   r0   rn   r_   r   r
   rC  )rM   r   r   rQ  r]  r   r   weight_datar  
zero_pointscaleq_weight_dataq_weight_initializerscale_initializerzero_initializers                  rQ   rU  ONNXQuantizer.quantize_weight  s    ;;$222"66v{{CO#**O,C,C_E_E_``l2++-[[8+
 0081>{?R?R?T?[?[?]?STY[l[l?mot2v.1%

=8[8[\a8bckklrlwlwx#00;;MY KK33J
@V@V@\@\^`chbij;;2272
|T

 '')=Rb(cd )mQX);)G)GO0?  -z11r   c                 n   XR                   ;   a2  U R                   U   nUR                  UR                  UR                  4$ [	        XR
                  R                  5       5      nUc  [        SU5      eU R                  U5      nUR                  U   n/ n/ n	/ n
/ n/ n[        U5       H  nUR                  X5      n[        UR                  5       R                  5       [        X R                   5      U5      u  nnnnnUR#                  U5        U	R#                  U5        U
R#                  U5        UR#                  U5        UR#                  U5        M     [%        UR                  5      nSUU'   [&        R(                  " US   5      R+                  U5      n[        S[-        U5      5       HC  n[&        R(                  " X   5      R+                  U5      n[&        R.                  " UU4U5      nME     US-   nUS-   nUS-   n[1        UUUU[2        R4                  S 5      nX@R                   U'   [&        R(                  " U[6        R8                  R:                  U   S9R+                  UR<                  5      n[6        R>                  RA                  UU5      nUR<                  U   /n[6        RB                  RE                  U[F        RH                  RJ                  UU5      n[6        RB                  RE                  UUUU
5      nU R
                  R                  5       RM                  UUU/5        UUU4$ )Nz{} is not an initializerr   r   r   r   r   r	  )'rL   r:  r   r   r   r0   rn   rD   r   rB  rangetaker   rb  rc  r   r2   r   listr  r;  r  rZ   concatenater   r
   rC  r&   rd  re  r  ro   r?  r`   r   r7   r8   r   r_   )rM   r   r<   channel_axisrQ  rn   r   channel_count	rmin_list	rmax_listzero_point_list
scale_listquantized_per_channel_data_listiper_channel_datarminrmaxrg  rh  quantized_per_channel_datareshape_dimsquantized_weightschannel_weightsr]  r   r   rj  zero_scale_shaperk  rl  s                                 rQ   quantize_weight_per_channel)ONNXQuantizer.quantize_weight_per_channel  s   222"66{CO#**O,C,C_E_E_``";

0F0F0HI7EE,,[9l3		
*,'}%A&||A<HU ((*1135I,XiXi5jIED$
E+E T"T""":.e$+223MN & GMM*%&\"JJ'Fq'IJRRS_`q#=>?A jj)H)KLTTUabO "0A?/SUa b @ $l2- 8+
(mZQX);)G)GO0?  - JJT\\%H%H%VXX_X_`k`p`pXq 	#00;;<M}]',,\:; KK33J
@V@V@\@\^n4>@;;227LJZ\kl

 '')=?PRb(cdw
33r   c                    XR                   ;   a  U R                   U   nUS-   nU R                  R                  X0R                  U R                  R	                  5       5      nUcG  UR
                  UR                  UR                  /n[        R                  R                  SXQ/U5      nU$ XR                  S   :X  d   eg)as  
Given a value (input/output) which is quantized, add a DequantizeLinear node to dequantize
it back to float32
    parameter value_name: value to dequantize
    parameter new_nodes_list: List of new nodes created before processing current node
    return: None if there is already a DequantizeLinear node that dequantizes it
            A DequantizeLinear node otherwise
_DequantizeLinearNrg   r   )rL   r0   rV  rA   r)   r:  r   r   r&   r`   r   r.   )rM   
value_namerQ  dqlinear_namedqlinear_nodedqlinear_inputsdequantize_nodes          rQ   _dequantize_valueONNXQuantizer._dequantize_value  s     111"66zBO&)<<M JJ88X\XbXbXhXhXjkM$#2#9#9?;U;UWfWnWn"o"&++"7"78JO]i8E#G&& #&:&:1&==>=r   c                     U R                   R                  5       R                   H>  nU R                  UR                  5      nUc  M#  U R
                  R                  U5        M@     g)z
Dequantize output if it is quantized
    parameter new_nodes_list: List of new nodes created before processing current node
    return: List of new nodes created
N)r0   r)   r.   r  r+   rA   r   )rM   r.   r  s      rQ   r   !ONNXQuantizer._dequantize_outputs  sM     jj&&(//F"44V[[AO*%%o6 0r   c           	      6   U R                   c  g U R                  R                  5        H  nUR                  S;  a  M  U R	                  U5      (       d  M-  [        U R                  R                  5       UR                  S      5      S:w  a  Mf  UR                  S   U R                   R                  5       ;  d+  UR                  S   U R                   R                  5       ;  a  M  U R                   UR                  S      U R                   UR                  S   '   M     0 nU R                   R                  5        HX  nU R                   U   u  pE[        US5      n[        US5      n[        XEU R                  [        U R                  5      5      X#'   MZ     U$ )N)ClipRelur   r   )r=   r0   rj   rk   r   rZ   input_name_to_nodesr/   keysr.   minmaxr   r;   r   )rM   r   rG   tensor_namer|  r}  s         rQ   rF   +ONNXQuantizer.calculate_quantization_params  s_   % JJ$$&D||#33''--4::113DJJqMBCqHzz!}D$6$6$;$;$==QW[WiWiWnWnWpAp040B0B4;;q>0RDtzz!}- ' !--224K++K8JD tQ<DtQ<D/?DL\L\@TUYUeUe@f0h, 5 #"r   )rI   rH   rJ   rK   r5   r;   r3   r0   rA   r?   r>   r@   rC   r1   rG   rL   r2   r4   r=   r,   r<   )NN)T)__name__
__module____qualname____firstlineno__rR   rB   r   r   r   staticmethodr   r   r   r   r   r   r   r   r  r"  r7  rR  r_  rU  r  r  r   rF   __static_attributes__ r   rQ   r   r      s    4&l2A F 4  ' WY5An?7BP@62p#J$#L;#z9Mv2B84t0
7#r   r   )(osstructpathlibr   numpyr  r\   r&   onnx.numpy_helperr   r7   onnxruntimer   r   r   quant_utilsr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   registryr   r   
onnx_modelr   r   r  r   rQ   <module>r     sU    
       & P P c c D  D ` ` J J A !I# I#r   