
    9i#y                        S SK r S SKrS SKrS SKrS SKrS SKrS SKJs  Js  J	r
  S SKJr  S SKJr  S SKJr  S SKJrJrJrJrJrJr  S SKJrJrJrJrJrJrJrJr  S SK J!r!J"r"  S SK#J$r$  SS	K%J&r&J'r'J(r(  / S
Qr)\r*\RV                  \RX                  RV                  \RZ                  \RX                  RZ                  0\RX                  RV                  \R                  RV                  \RX                  RZ                  \R                  RZ                  0S.r.S r/   S!S jr0S"S jr1S r2S r3S#S jr4    S$S jr5S r6S r7\Rp                  " \&5          S%S j5       r9S r:S r;\Rp                  " \&5      S&S j5       r<\Rp                  " \&5      S\Rz                  SS4S j5       r>\Rp                  " \&5      S&S j5       r?\Rp                  " \&5      S#S j5       r@\Rp                  " \&5            S'S j5       rA     S(S jrB S#S jrCS)S  jrDg)*    N)_FusedModule)_is_activation_post_process)_activation_is_memoryless_add_module_to_qconfig_obs_ctrdefault_dynamic_qconfigfloat16_dynamic_qconfig!float_qparams_weight_only_qconfig&float_qparams_weight_only_qconfig_4bit)_get_special_act_post_process_has_special_act_post_process)get_default_dynamic_quant_module_mappingsget_default_qat_module_mappings$get_default_qconfig_propagation_list(get_default_static_quant_module_mappings2get_default_static_quant_reference_module_mappingsno_observer_set)DeQuantStubQuantWrapper)type_before_parametrizations   )DEPRECATION_WARNINGget_qparam_dict)has_no_children_ignoring_parametrizations)
get_default_custom_config_dictpropagate_qconfig_add_quant_dequantpreparequantizequantize_dynamicprepare_qatquantize_qatconvertswap_module)%float_to_observed_custom_module_class)observed_to_quantized_custom_module_classc                      [         $ )z'Defines the default custom config dict.)_DEFAULT_CUSTOM_CONFIG_DICT     ^/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/torch/ao/quantization/quantize.pyr   r   G   s    &&r)   c                    UR                  [        U 5      U5      nUR                  X55      n[        U SU5      n[        R                  R
                  R                  R                  XP5        [        XP5      nX`l        U R                  5        H]  u  pxU(       a  US-   U-   OUn	Ub8  XtR                  S/ 5      ;   a  M0  [        U5      UR                  S/ 5      ;   a  MQ  [        XXi5        M_     g)a  This is a helper function for `propagate_qconfig_`

Args:
    module: input module
    qconfig_dict: dictionary that maps from name of submodule to quantization
                 configuration
    qconfig_parent: quantization config of parent module, we will fallback to
                   this config when there is no specified config for current
                   module
    prefix: corresponding prefix of the current module, used as key in
            qconfig_dict
    prepare_custom_config_dict: dictionary for custom handling of modules
                                see docs for :func:`~torch.ao.quantization.prepare_fx`

Return:
    None, module is modified inplace with qconfig attached
qconfig.Nnon_traceable_module_namenon_traceable_module_class)getr   getattrtorchaoquantizationr,   _assert_valid_qconfigr   named_childrentype_propagate_qconfig_helper)
moduleqconfig_dictqconfig_parentprefixprepare_custom_config_dictmodule_qconfigqconfig_with_device_checknamechildmodule_prefixs
             r*   r8   r8   L   s    2 "%%$V,nN "%%f=NVY?N	HH!!77O >~ V.N,,./5t+4%-223NPRSSE{)--.JBOP &%> /r)   c                 ,    Uc  0 nUc  0 n[        XUS9  g)ac  Propagate qconfig through the module hierarchy and assign `qconfig`
attribute on each leaf module

Args:
    module: input module
    qconfig_dict: dictionary that maps from name or type of submodule to
        quantization configuration, qconfig applies to all submodules of a
        given module unless qconfig for the submodules are specified (when
        the submodule already has qconfig attribute)
    prepare_custom_config_dict: dictionary for custom handling of modules
        see docs for :func:`~torch.ao.quantization.prepare_fx`

Return:
    None, module is modified inplace with qconfig attached
N)r=   )r8   )r9   r:   r=   s      r*   r   r   }   s)      !)%'"9Sr)   c                 $    U R                  U5      $ )z.Forward hook that calls observer on the outputactivation_post_process)selfinputoutputs      r*   _observer_forward_hookrJ      s    ''//r)   c                 *    U R                  US   5      $ )z2Forward pre hook that calls observer on the outputr   rE   )rG   rH   s     r*   _observer_forward_pre_hookrL      s    ''a11r)   Fc                     [        U S5      (       d   S5       eU(       a  U R                  [        SS9  g U R                  [        SS9  g )NrF   zGExpect activation_post_process attribute already attached to the moduleT)prepend)hasattrregister_forward_pre_hookrL   register_forward_hookrJ   )r9   pre_hooks     r*   &_register_activation_post_process_hookrS      sM    6455 Q5 (()CT(R$$%;T$Jr)   c                   ^^^ Uc
  [        5       nUc  0 nTcI  [        U 5      n[        U5      S::  d
   SU 35       e[        U5      S:  a  [        [	        U5      5      OSmSS jmS mSUUU4S jjnU R                  5        GH  u  px[        U5      [        R                  4;   a  M&  [        [        U5      [        R                  [        R                  45      (       aN  T" U5      (       a?  [        US5      (       d   S	[        U5       S
35       eT" UR                  T5      Ul        M  M  [!        U["        5      (       a  T" U5      (       a
  U" U5        M  M  Ub*  [        U5      U;   a  T" U5      (       a  U" U5        GM  GM  [%        U5      (       a  ['        U5      n	U" X5        GM-  T" U5      (       aj  [        U5      U;   a[  U[        U5         n
U
R)                  U5      n[+        XU5        [        U
[-        [/        5       5      5      (       d  U" U5        GM  GM  [1        UUUTU5        GM     [3        U 5      (       a@  [!        U [4        R                  R6                  5      (       d  [        U 5      U;   a  U" U 5        [        U S5      (       aC  [!        U [4        R                  R6                  5      (       d  [        U 5      U;   a	  U" U 5        gggg)aG  Add observer for the leaf child of the module.

This function insert observer module to all leaf child module that
has a valid qconfig attribute.

Args:
    module: input module with qconfig attributes for all the leaf modules that we want to quantize
    qconfig_propagation_list: a list of quantizable modules that will have observers added to them
        if they are leaf nodes
    device: parent device, if any
    non_leaf_module_list: list of non-leaf modules we want to add observer

Return:
    None, module is modified inplace with added observer modules and forward_hooks
Nr   zR_add_observer_ only works with cpu or single-device CUDA modules, but got devices r   c                 b    Uc  U R                  5       OU" 5       nUb  UR                  U5        U$ N)
activationto)r,   devicespecial_act_post_processrW   s       r*   get_activation_post_process3_add_observer_.<locals>.get_activation_post_process   s=     (/  )+ 	
 MM&!r)   c                 D    [        U S5      =(       a    U R                  S L$ )Nr,   rO   r,   )ms    r*   needs_observation)_add_observer_.<locals>.needs_observation   s    q)$>$)>>r)   c                    > T" U 5      (       aX  [        U [        5      (       dB  U R                  ST" U R                  TU5      5        [	        U [        U R                  5      S9  ggg)z]Adds an activation post process module and register
a pre or post hook that calls the module
rF   rR   N)
isinstancer   
add_moduler,   rS   r   )r_   rZ   rY   r[   r`   s     r*   insert_activation_post_process6_add_observer_.<locals>.insert_activation_post_process   sa    
 Q
1k(B(BLL)+IIv'? 35aii@ )Cr)   rF   zfunctional class z- has no pre-defined `activation_post_process`weight_fake_quantrV   )r   _get_unique_devices_lennextiterr6   r   nnDropout
issubclassnnqFloatFunctionalQFunctionalrO   r,   rF   rd   r   r   r   
from_floatsetattrtupler   _add_observer_r   r2   
Sequential)r9   qconfig_propagation_listnon_leaf_module_listrY   custom_module_class_mappingdevicesrf   r@   rA   rZ   observed_classobserved_childr[   r`   s      `        @@r*   rv   rv      s   ,  '#G#I "*&(# ~&v.7|q  	
`ah`ij	
  ),Gq(8d7m$d? & ,,.'.2::,>(/#2E2Es1W
 
 !''u&?@@ '(DU(K'LLyz@ 1LMM61-	 ( |,, ''.u5 ( !,,U37KK ''.u5 (*511'DU'K$*5Ke$$,U37RR8,U3N ,66u=NF.1 neO4E.FGG.~> H ($+S /h 	2&996588#6#677(04LL&v. 	+,,6588#6#677(04LL&v. M 8 	-r)   c                 0   U R                  5        Vs1 s H*  oR                  R                  S:w  d  M  UR                  iM,     snU R                  5        Vs1 s H*  oR                  R                  S:w  d  M  UR                  iM,     sn-  $ s  snf s  snf )Nmeta)
parametersrY   r7   buffers)r9   ps     r*   ri   ri   3  sy    $//1M1XX]]f5LHAHH1M ..*Q*Qhhmmv.E*Q  M Qs   BBB9Bc                     [        U 5      (       a-  [        U S5      (       a  U R                  (       a  [        U 5      $ U R	                  5        H  u  p[        U5      U R                  U'   M     U $ )aO  Wrap the leaf child module in QuantWrapper if it has a valid qconfig
Note that this function will modify the children of module inplace and it
can return a new module which wraps the input module as well.

Args:
    module: input module with qconfig attributes for all the leaf modules
    that we want to quantize

Return:
    Either the inplace modified module with submodules wrapped in
    `QuantWrapper` based on qconfig or a new `QuantWrapper` module which
    wraps the input module, the latter case only happens when the input
    module is a leaf module and we want to quantize it.
r,   )r   rO   r,   r   r6   r   _modules)r9   r@   rA   s      r*   r   r   9  s[      	2&99FI&&NNF##,,. 1% 8 /Mr)   c                 |   [         R                  R                  S5        Uc
  [        5       nUR	                  S0 5      nU(       d  [
        R                  " U 5      n UnUc
  [        5       n[        U SS9  [        S U R                  5        5       5      (       d  [        R                  " S5        [        U UUUS9  U $ )a  Prepares a copy of the model for quantization calibration or quantization-aware training.

Quantization configuration should be assigned preemptively
to individual submodules in `.qconfig` attribute.

The model will be attached with observer or fake quant modules, and qconfig
will be propagated.

Args:
    `model`: input model to be modified in-place
    `inplace`: carry out model transformations in-place, the original module is mutated
    `allow_list`: list of quantizable modules
    `observer_non_leaf_module_list`: list of non-leaf modules we want to add observer
    `prepare_custom_config_dict`: customization configuration dictionary for prepare function

.. code-block:: python

   # Example of prepare_custom_config_dict:
   prepare_custom_config_dict = {
       # user will manually define the corresponding observed
       # module class which has a from_float class method that converts
       # float custom module to observed custom module
       "float_to_observed_custom_module_class": {CustomModule: ObservedCustomModule}
   }

z!quantization_api.quantize.prepareNr$   r:   c              3   `   #    U  H$  n[        US 5      =(       a    UR                  v   M&     g7f)r,   Nr^   ).0r_   s     r*   	<genexpr>prepare.<locals>.<genexpr>  s#     LOqwq)$22Os   ,.zNone of the submodule got qconfig applied. Make sure you passed correct configuration through `qconfig_dict` or by assigning the `.qconfig` attribute directly on submodules)rz   )r2   _C_log_api_usage_oncer   r0   copydeepcopyr   r   anymoduleswarningswarnrv   )modelinplace
allow_listobserver_non_leaf_module_listr=   rz   rx   s          r*   r   r   T  s    D 
HH  !DE!)%C%E""<"@"@/# e$  *#G#I u40 LEMMOLLLK	
  %$?	 Lr)   c                    ^  [        T S5      (       a&  [        T R                  5      (       a  [        T S5        SU 4S jjnU" SS9  U" SS9  g )NrF   Fc                   > U (       a  TR                   OTR                  nU (       a  [        O[        n[	        5       nUR                  5        H  u  pEXRL d  M  UR                  U5        M     U H  nUR                  U5        M     g rV   )_forward_pre_hooks_forward_hooksrL   rJ   setitemsaddpop)rR   hook_mapobserver_hookhandle_ids_to_remove	handle_idhook_fnr9   s         r*   remove_hooks5_remove_activation_post_process.<locals>.remove_hooks  sp    086,,f>S>S*2&8N 	  #u"*.."2I'$((3 #3 .ILL# .r)   Trc   F)rO   r   rF   delattr)r9   r   s   ` r*   _remove_activation_post_processr     sM     v0116Q&&7 7 	12
$ $% r)   c                     U R                  5        H  n[        U5        M     [        U S5      (       a  U ?[	        U 5        g)zzClean up the qconfig left in the module so that new qconfig can be
propagated.

Args:
    module: module to be cleaned up
r,   N)children_remove_qconfigrO   r,   r   )r9   rA   s     r*   r   r     s9     " # vy!!N#F+r)   c                     [         R                  R                  S5        Uc
  [        5       nU(       d  [        R
                  " U 5      n U R                  5         [        U SS9  U" U /UQ76   [        XSS9  U $ )aS  Quantize the input float model with post training static quantization.

First it will prepare the model for calibration, then it calls
`run_fn` which will run the calibration step, after that we will
convert the model to a quantized model.

Args:
    model: input float model
    run_fn: a calibration function for calibrating the prepared model
    run_args: positional arguments for `run_fn`
    inplace: carry out model transformations in-place, the original module is mutated
    mapping: correspondence between original module types and quantized counterparts

Return:
    Quantized model.
z"quantization_api.quantize.quantizeTr   )	r2   r   r   r   r   r   evalr   r"   )r   run_fnrun_argsmappingr   s        r*   r   r     sd    $ 
HH  !EF:<e$	JJLE4 
58ED)Lr)   c                    [         R                  R                  S5        UGc  U[         R                  :X  a|  [        R
                  [        [        R                  [        [        R                  [        [        R                  [        [        R                  [        [        R                  [        0nGOU[         R                  :X  a|  [        R
                  [        [        R                  [        [        R                  [        [        R                  [        [        R                  [        [        R                  [        0nGO4U[         R                  :X  a+  [        R                  [         [        R"                  [         0nOU[         R$                  :X  a  [        R                  [&        0nO[)        SU S35      e[+        U[,        5      (       a  U[         R                  L a  [        nOcU[         R                  L a  [        nOIU[         R                  L a  [         nO/U[         R$                  L a  [&        nO[/        S[1        U5      5      e[3        [5        U[6        R8                  " U5      5      5      nUc
  [;        5       nU(       d  [<        R>                  " U 5      n U RA                  5         [C        X5        [E        XSS9  U $ )a*  Converts a float model to dynamic (i.e. weights-only) quantized model.

Replaces specified modules with dynamic weight-only quantized versions and output the quantized model.

For simplest usage provide `dtype` argument that can be float16 or qint8. Weight-only quantization
by default is performed for layers with large weights size - i.e. Linear and RNN variants.

Fine grained control is possible with `qconfig` and `mapping` that act similarly to `quantize()`.
If `qconfig` is provided, the `dtype` argument is ignored.

Args:
    model: input model
    qconfig_spec: Either:

        - A dictionary that maps from name or type of submodule to quantization
          configuration, qconfig applies to all submodules of a given
          module unless qconfig for the submodules are specified (when the
          submodule already has qconfig attribute). Entries in the dictionary
          need to be QConfig instances.

        - A set of types and/or submodule names to apply dynamic quantization to,
          in which case the `dtype` argument is used to specify the bit-width

    inplace: carry out model transformations in-place, the original module is mutated
    mapping: maps type of a submodule to a type of corresponding dynamically quantized version
        with which the submodule needs to be replaced

z*quantization_api.quantize.quantize_dynamicz5Don't know how to quantize with default settings for z. Provide full qconfig pleasez.Unknown dtype specified for quantize_dynamic: Tr   )#r2   r   r   qint8rm   Linearr   LSTMGRULSTMCellRNNCellGRUCellfloat16r   quint8EmbeddingBagr	   	Embeddingquint4x2r
   
ValueErrorrd   r   RuntimeErrorstrdictzip	itertoolsrepeatr   r   r   r   r   r"   )r   qconfig_specdtyper   r   default_qconfigs         r*   r   r     s   @ 
HH  !MNEKK		20/4

3

3L emm#		20/4

3

3L ell"!B?L enn$!GL GwNkl  
L#	&	&EKK5Oemm#5Oell"?Oenn$DO@#e*  Ci.>.>.OPQ;=e$	JJLu+ED)Lr)   c                 6   [         R                  R                  S5        U R                  (       d   S5       eUc
  [	        5       nU(       d  [
        R                  " U 5      n [        U SS9  [        XSSS9  [        U [        UR                  5       5      SS9  U $ )	a  
Prepares a copy of the model for quantization calibration or
quantization-aware training and converts it to quantized version.

Quantization configuration should be assigned preemptively
to individual submodules in `.qconfig` attribute.

Args:
    model: input model to be modified in-place
    mapping: dictionary that maps float modules to quantized modules to be
             replaced.
    inplace: carry out model transformations in-place, the original module
             is mutated
z%quantization_api.quantize.prepare_qatz1prepare_qat only works on models in training modeNr   TF)r   r   remove_qconfig)r   r   )r2   r   r   trainingr   r   r   r   r"   r   r   values)r   r   r   s      r*   r    r    :  s{      
HH  !HI>>NNN>13e$u40EDGEW^^5E1FPTULr)   c                     [         R                  R                  S5        U(       d  [        R                  " U 5      n U R                  5         [        U SS9  U" U /UQ76   [        U SS9  U $ )aC  Do quantization aware training and output a quantized model

Args:
    model: input model
    run_fn: a function for evaluating the prepared model, can be a
            function that simply runs the prepared model or a training
            loop
    run_args: positional arguments for `run_fn`

Return:
    Quantized model.
z&quantization_api.quantize.quantize_qatTr   )r2   r   r   r   r   trainr    r"   )r   r   r   r   s       r*   r!   r!   X  sW     
HH  !IJe$	KKMt$
58E4 Lr)   c           	          [         R                  R                  S5        U(       d  [        R                  " U 5      n [        U USUUUS9  U(       a  [        U 5        U $ )ad  Converts submodules in input module to a different module according to `mapping`
by calling `from_float` method on the target module class. And remove qconfig at the
end if remove_qconfig is set to True.

Args:
    `module`: prepared and calibrated module
    `mapping`: a dictionary that maps from source module type to target
               module type, can be overwritten to allow swapping user defined
               Modules
    `inplace`: carry out model transformations in-place, the original module
               is mutated
    `convert_custom_config_dict`: custom configuration dictionary for convert function
    `use_precomputed_fake_quant`: a flag to enable use of precomputed fake quant

.. code-block:: python

   # Example of convert_custom_config_dict:
   convert_custom_config_dict = {
       # user will manually define the corresponding quantized
       # module class which has a from_observed class method that converts
       # observed custom module to quantized custom module
       "observed_to_quantized_custom_module_class": {
           ObservedCustomModule: QuantizedCustomModule
       }
   }

z!quantization_api.quantize.convertT)r   is_referenceconvert_custom_config_dictuse_precomputed_fake_quant)r2   r   r   r   r   _convertr   )r9   r   r   r   r   r   r   s          r*   r"   r"   p  sU    J 
HH  !DEv&!#=#= Mr)   c           
         Uc  U(       a
  [        5       O	[        5       nUc
  [        5       nUR                  S0 5      nU(       d  [        R
                  " U 5      n 0 nU R                  5        HE  u  p[        U	[        5      (       d  [        U	5      U;  a  [        U	USUUUS9  [        XXe5      Xx'   MG     UR                  5        H  u  pXR                  U
'   M     U $ )aC  Converts submodules in input module to a different module according to `mapping`
by calling `from_float` method on the target module class

Args:
    module: input module
    mapping: a dictionary that maps from source module type to target
             module type, can be overwritten to allow swapping user defined
             Modules
    inplace: carry out model transformations in-place, the original module
             is mutated
    is_reference: a flag to enable quantized reference module
    use_precomputed_fake_quant: a flag to enable use of precomputed fake quant

r%   Tr   )r   r   r   r0   r   r   r6   rd   r   r   r   r#   r   r   )r9   r   r   r   r   r   rz   reassignr@   modkeyvalues               r*   r   r     s    ,   ?@9; 	
 ")%C%E""<"@"@3R# v&H**,	 3--,S19TT*+E %5
 -& nn&
$ ' Mr)   c                    U n[        U S5      (       Ga)  U R                  Gb  Sn[        U 5      U;   a   U[        U 5         R                  U 5      nSnO[        U 5      U;   a  U[        U 5         n[        US5      (       ai  UR                  (       aX  U R                  c   eU R                  R                  5       nU" U R
                  5        [        U5      nUR                  X5      nOQ[        R                  " UR                  5      n	SU	R                  ;   a  UR                  XS9nOUR                  U 5      nSnU(       a  U R                  R                  5        H  n
UR                  U
5        M     U R                  R                  5        H  nU[        Ld  M  UR!                  U5        M!     [#        U 5      n[%        U5      S::  d3  [%        U5      S	:X  a  [&        R(                  " S
5      U;   d
   SU 35       e[%        U5      S:  a  [+        [-        U5      5      OSnU(       a  UR/                  U5        U$ )zSwaps the module if it has a quantized counterpart and it has an
`observer` attached.

Args:
    mod: input module
    mapping: a dictionary that maps from nn module to nnq module

Return:
    The corresponding quantized module of `mod`
r,   NFT_IS_REFERENCEr   r   r      r   zOswap_module only works with cpu or single-device CUDA modules, but got devices r   )rO   r,   r   from_observedr   weightr   rs   inspect	signaturer   r   r   rP   r   rJ   rQ   ri   rj   r2   rY   rk   rl   rX   )r   r   rz   r   new_modswappedqmodweight_post_processweight_qparamssigpre_hook_fnr   r{   rY   s                 r*   r#   r#     s    GsI3;;#:',0KK1,S1mC   G)#.'97<=Dt_--$2D2D{{...&)kk&8&8&:##CJJ/!01D!E//#>''8/3>>A"oo . G #ooc2GG"55<<>11+>  ? --446"8811': 7
 +3/Gw<1$G!ell6&:g&E bbiajk 
 -0L1,<T$w-($F

6"Nr)   c                     S n[        U S5      (       a  U R                  X" U5      S-   '   U R                  5        H%  u  pEU(       a  U" U5      U-   OUn[        XQU5        M'     g)a  Traverse the modules and save all observers into dict.
This is mainly used for quantization accuracy debug
Args:
    mod: the top module we want to save all observers
    prefix: the prefix for the current module
    target_dict: the dictionary used to save all the observers
c                     U S:X  a  U $ U S-   $ )N r-   r(   )r<   s    r*   
get_prefix&_get_observer_dict.<locals>.get_prefix*  s    2v76C<7r)   rF   N)rO   rF   r6   _get_observer_dict)r   target_dictr<   r   r@   rA   rB   s          r*   r   r   !  si    8 s-..'' 	Jv&)BBC ))+5;
6*T15}= ,r)   )Nr   N)NNr   )NNNN)FNNN)NF)NFTFNF)NFFNF)r   )Er   r   r   typing_extensionsr   r2   torch.ao.nn.quantizedr3   rm   	quantizedrp   torch.nntorch.ao.nn.intrinsicr   torch.ao.quantization.observerr   torch.ao.quantization.qconfigr   r   r   r   r	   r
   +torch.ao.quantization.quantization_mappingsr   r   r   r   r   r   r   r   torch.ao.quantization.stubsr   r   torch.nn.utils.parametrizer   utilsr   r   r   __all__is_activation_post_processr   quantizableMultiheadAttentionr'   r   r8   r   rJ   rL   rS   rv   ri   r   
deprecatedr   r   r   r   r   r   r    r!   r"   r   r#   r   r(   r)   r*   <module>r      sB         # #  . F 	 	 	 B C  9 
 	$$
r~~@@.
 	R\\..
))2<<+J+J2	 ' #.b20
2
K " $F/R6 12 "&#? 3?D!4,  12 3: 12EKKuW 3Wt 12 3: 12 3. 12 #$1 31l #$;~ KP;|>r)   