
    `i9                     V   S SK r S SKrS SKrS SKrS SKrS SKrS SKJr  S SKJ	r
  S SKJrJrJr  SSKJrJrJrJr  SSKJrJrJrJrJr  SSKJrJr  SS	KJrJr  SS
KJr  SSK J!r!  SSK"J#r#  SSK$J%r%J&r&J'r'  S\4S jr(SS\4S jjr)SS\RT                  SSSSSSS/ 4S jr+\RX                  / SS\RZ                  \RZ                  / / SS\'R\                  4S\%4S jjr// SS\RZ                  \RZ                  / / SS4	S\S\4S jjr0/ SS\RZ                  \RZ                  / / S4S\S\4S jjr1g)    N)Path)onnx_pb)SessionOptionsInferenceSessionGraphOptimizationLevel   )QuantizationModeQuantizedValueTypeQuantizedInitializerQuantizedValue)find_by_nameget_elem_indexget_mul_nodegenerate_identified_filenameattribute_to_kwarg)	QuantTypeQuantFormat)QLinearOpsRegistryIntegerOpsRegistry)	ONNXModel)ONNXQuantizer)QDQQuantizer)CalibrationDataReadercreate_calibratorCalibrationMethod
model_pathc                    [        U S5      n[        5       nUR                  5       Ul        [        R
                  Ul        [        U R                  5       US/S9n[        R                  " UR                  5       5      nU$ )z
Generate model that applies graph optimization (constant folding,etc.)
parameter model_path: path to the original onnx model
return: optimized onnx model
z-optCPUExecutionProvider)	providers)
r   r   as_posixoptimized_model_filepathr   ORT_ENABLE_BASICgraph_optimization_levelr   onnxload)r   opt_model_pathsess_option_optimized_models        f/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/onnxruntime_tools/quantization/quantize.pyoptimize_modelr+      so     2*fEN "K+9+B+B+DK(+A+R+RK(,,.H^G_`Aii 7 7 9:O    Tc                     U(       a9  [        [        [        U 5      5      5      nUR                  5         UR                  $ [
        R                  " [        U 5      5      $ )N)r   r+   r   replace_gemm_with_matmulmodelr$   r%   )r   optimize
onnx_models      r*   
load_modelr2   -   sE    ~d:.>?@
++-99T*%&&r,   F   c                    [         R                  " S5        US:X  d  US:X  a  Un[        R                  " 5       nUR	                  U 5        U(       a  [        U5      S:X  aC  U(       a  [        [        R                  " 5       5      O[        [        R                  " 5       5      n[        XUS:H  XUXhXU5      nUR                  5         UR                  R                  $ [        S5      e)a  
    Given an onnx model, create a quantized onnx model and save it into a file
:param model: ModelProto to quantize
:param per_channel: quantize weights per channel
:param nbits: number of bits to represent quantized data. Currently only supporting 8-bit types
:param quantization_mode: Can be one of the QuantizationMode types.
    IntegerOps:
        the function will use integer ops. Only ConvInteger and MatMulInteger ops are supported now.
    QLinearOps:
        the function will use QLinear ops. Only QLinearConv and QLinearMatMul ops are supported now.
:param static:
    True: The inputs/activations are quantized using static scale and zero point values
          specified through quantization_params.
    False: The inputs/activations are quantized using dynamic scale and zero point values
           computed while running the model.
:param symmetric_activation:
    True: activations are quantized into signed integers.
    False: activations are quantized into unsigned integers.
:param symmetric_weight:
    True: weights are quantized into signed integers.
    False: weights are quantized into unsigned integers.
:param quantization_params:
    Dictionary to specify the zero point and scale values for inputs to conv and matmul nodes.
    Should be specified when static is set to True.
    The quantization_params should be specified in the following format:
        {
            "input_name": [zero_point, scale]
        }.
    zero_point should be of type np.uint8 and scale should be of type np.float32.
    example:
        {
            'resnet_model/Relu_1:0': [np.uint8(0), np.float32(0.019539741799235344)],
            'resnet_model/Relu_2:0': [np.uint8(0), np.float32(0.011359662748873234)]
        }
:param nodes_to_quantize:
    List of nodes names to quantize. When this list is not None only the nodes in this list
    are quantized.
    example:
    [
        'Conv__224',
        'Conv__252'
    ]
:param nodes_to_exclude:
    List of nodes names to exclude. The nodes in this list will be excluded from quantization
    when it is not None.
:param op_types_to_quantize: specify the types of operators to quantize, like ['Conv'] to quantize Conv only. It quantizes all supported operators by default.
:return: ModelProto with quantization
zonnxruntime.quantization.quantize is deprecated.
         Please use quantize_static for static quantization, quantize_dynamic for dynamic quantization.r3      r   z4Only 8 and 7 bit quantization is currently supported)loggingwarning
onnx_proto
ModelProtoCopyFromlenlistr   keysr   r   quantize_modelr/   
ValueError)r/   per_channelnbitsquantization_modestaticforce_fusionssymmetric_activationsymmetric_weightquantization_paramsnodes_to_quantizenodes_to_excludeop_types_to_quantizemode
copy_model	quantizers                  r*   quantizerN   8   s    x OO i jzUaZ **,
E"#s+?'@A'EFL4(:(?(?(A#BRVWiWnWnWpRq !*5A:tUe"6M^"68	 	  "$$$OPPr,   calibration_data_readerc                    U[         R                  :w  a  [        S5      e[        R                  nU(       a  [        U5      S:X  a  [        [        R                  " 5       5      n[        [        U 5      U5      n[        XUS9nUR                  U5        UR                  5       nU[        R                  L a  [!        UUUUSUUUU	U
U5      nO[#        UUUUSUUUU	U
U5      nUR%                  5         UR&                  R)                  X5        g)a  
    Given an onnx model and calibration data reader, create a quantized onnx model and save it into a file
:param model_input: file path of model to quantize
:param model_output: file path of quantized model
:param calibration_data_reader: a calibration data reader. It enumerates calibration data and generates inputs for the original model.
:param quant_format: QuantFormat{QOperator, QDQ}.
    QOperator format quantizes the model with quantized operators directly.
    QDQ format quantize the model by inserting QuantizeLinear/DeQuantizeLinear on the tensor.
:param op_types_to_quantize: specify the types of operators to quantize, like ['Conv'] to quantize Conv only. It quantizes all supported operators by default.
:param op_types: operators to quantize
:param per_channel: quantize weights per channel
:param reduce_range: quantize weights with 7-bits. It may improve the accuracy for some models running on non-VNNI machine, especially for per-channel mode
:param activation_type: quantization data type of activation
:param weight_type: quantization data type of weight
:param nodes_to_quantize:
    List of nodes names to quantize. When this list is not None only the nodes in this list
    are quantized.
    example:
    [
        'Conv__224',
        'Conv__252'
    ]
:param nodes_to_exclude:
    List of nodes names to exclude. The nodes in this list will be excluded from quantization
    when it is not None.
:param optimize_model: optimize model before quantization.
:parma use_external_data_format: option used for large size (>2GB) model. Set to False by default. 
:param calibrate_method: 
    Current calibration methods supported are MinMax and Entropy. 
    Please use CalibrationMethod.MinMax or CalibrationMethod.Entropy as options.
z:Static quantization only support uint8 for activation now.r   )calibrate_methodTN)r   QUInt8r?   r	   
QLinearOpsr;   r<   r   r=   r2   r   r   collect_datacompute_ranger   	QOperatorr   r   r>   r/   save_model_to_file)model_inputmodel_outputrO   quant_formatrJ   r@   reduce_rangeactivation_typeweight_typerH   rI   r+   use_external_data_formatrQ   rK   r/   
calibratortensors_rangerM   s                      r*   quantize_staticra      s   ^ )***UVV&&D3';#<#A#$6$;$;$=>tK(.9E"5QabJ34,,.M{,,,! "	 ! "	 OO&&|Nr,   rX   rY   c                 6   [         R                  nU(       a  [        U5      S:X  a  [        [        R
                  " 5       5      n[        [        U 5      U	5      n[        UUUUSUUSUUU5      nUR                  5         UR                  R                  X5        g)a  
    Given an onnx model, create a quantized onnx model and save it into a file
:param model_input: file path of model to quantize
:param model_output: file path of quantized model
:param op_types_to_quantize: specify the types of operators to quantize, like ['Conv'] to quantize Conv only. It quantizes all supported operators by default
:param per_channel: quantize weights per channel
:param reduce_range: quantize weights with 7-bits. It may improve the accuracy for some models running on non-VNNI machine, especially for per-channel mode
:param nbits: number of bits to represent quantized data. Currently only supporting 8-bit types
:param activation_type: quantization data type of activation
:param weight_type: quantization data type of weight
:param nodes_to_quantize:
    List of nodes names to quantize. When this list is not None only the nodes in this list
    are quantized.
    example:
    [
        'Conv__224',
        'Conv__252'
    ]
:param nodes_to_exclude:
    List of nodes names to exclude. The nodes in this list will be excluded from quantization
    when it is not None.
:parma use_external_data_format: option used for large size (>2GB) model. Set to False by default. 
r   FN)r	   
IntegerOpsr;   r<   r   r=   r2   r   r   r>   r/   rW   )rX   rY   rJ   r@   r[   r\   r]   rH   rI   r+   r^   rK   r/   rM   s                 r*   quantize_dynamicrd      s    F &&D3';#<#A#$6$;$;$=>tK(.9EI OO&&|Nr,   c
                 4   [         R                  n
[        [        U 5      5      nU(       a  [	        U5      S:X  a  [        [        R                  " 5       5      n[        UUUU
SUUSUUU5      nUR                  5         UR                  R                  X5        g)a2  
    Given a quantize-aware traning onnx model, create a quantized onnx model and save it into a file
:param model_input: file path of model to quantize
:param model_output: file path of quantized model
:param op_types_to_quantize: specify the types of operators to quantize, like ['Conv'] to quantize Conv only. It quantizes all supported operators by default
:param per_channel: quantize weights per channel
:param reduce_range: quantize weights with 7-bits. It may improve the accuracy for some models running on non-VNNI machine, especially for per-channel mode
:param activation_type: quantization data type of activation
:param nodes_to_quantize:
    List of nodes names to quantize. When this list is not None only the nodes in this list
    are quantized.
    example:
    [
        'Conv__224',
        'Conv__252'
    ]
:param nodes_to_exclude:
    List of nodes names to exclude. The nodes in this list will be excluded from quantization
    when it is not None.
:parma use_external_data_format: option used for large size (>2GB) model. Set to False by default. 
r   FN)r	   rc   r+   r   r;   r<   r   r=   r   r>   r/   rW   )rX   rY   rJ   r@   r[   r\   r]   rH   rI   r^   rK   r)   rM   s                r*   quantize_qatrf     s    @ &&D %T+%67O3';#<#A#$6$;$;$=>I OO&&|Nr,   )T)2osr$   onnx.numpy_helperstructr6   numpynppathlibr   r   r8   onnxruntimer   r   r   quant_utilsr	   r
   r   r   r   r   r   r   r   r   r   registryr   r   r1   r   onnx_quantizerr   qdq_quantizerr   	calibrater   r   r   r+   r2   rc   rN   rV   rR   MinMaxra   rd   rf    r,   r*   <module>ru      sZ   
       & P P c c u u / < ! ) ' R Rt '4 ' /:: "'#!%#""$MQf "-!6!6)+ %!&$-$4$4 ) 0 0&(%'#'-2%6%=%=YO-BYO| +-!&"'%.%5%5!*!1!1')&($(.37O$ 7O#'7Ox ')"#!*!1!1&--#%"$*/6Od 6O#6Or,   