
    @Ki,                     
   S SK Jr  S SKJr  SSKJr  SSKJrJrJ	r	  SSK
Jr  SSKJr  \(       a  SS	KJr  \" 5       (       a  S S
Kr\" 5       (       a  S SKJr  \S 5       r\\l        \	R*                  " \5      r " S S\5      rg
)    )defaultdict)TYPE_CHECKING   )prepare_for_hqq_linear)is_hqq_availableis_torch_availablelogging   )HfQuantizer)get_module_from_name)PreTrainedModelN)	HQQLinearc                 V    [         R                  " SU R                  U R                  S9$ )Nr   )dtypedevice)torchemptycompute_dtyper   selfs    g/var/www/html/dynamic-report/venv/lib/python3.13/site-packages/transformers/quantizers/quantizer_hqq.pyweightr   %   s    {{1D$6$6t{{KK    c            	          ^  \ rS rSrSrSrSrSrS/rU 4S jr	SSS	\
\   S
\S\
\   4S jrSSS\
\   S\
\   S\
\   4S jrSSS\S\4S jrSSSSS\SS4S jrS r  SS jrSS jrSS jr\S\4S j5       rSrU =r$ )HqqHfQuantizer.   z~
HQQ quantizer base HF class.
nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading().
FThqqc                   > [        5       (       d  [        S5      e[        TU ]  " U40 UD6  S U l        SU l        [        S S 5      R                  5       S1-
  U l        UR                  SS5      (       d  UR                  SS5      (       a  [        S5      eU R                  c;  SU;   a  US   U l        O*[        R                  U l        [        R                  S5        UR                  S	5      n[        U[         5      (       a^  S
UR#                  5       ;   d  SUR#                  5       ;   a  [        S5      e[%        ['        UR#                  5       5      5      S:  U l        g g )NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.Fbiasfrom_tf	from_flaxzwConverting weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.r   zOSetting dtype to torch.float32 as the default value since it was not specified.
device_mapcpudiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r
   )r   ImportErrorsuper__init__r   using_multi_gpur   state_dict_keyshqq_keysget
ValueErrorr   float32loggerinfo
isinstancedictvalueslenset)r   quantization_configkwargsr"   	__class__s       r   r'   HqqHfQuantizer.__init__9   sA   !! T  	,77
$!$-==?6(J::i''6::k5+I+I; 
 ::& #G_
"]]
mnZZ-
j$''
))++v9J9J9L/L h 
 (+3z/@/@/B+C'Dq'H$ (r   modelr   missing_keysprefixreturnc                 h    U R                   (       a  U Vs/ s H  nSU;  d  M  UPM     sn$ U$ s  snf )Nr   )pre_quantized)r   r9   r:   r;   r6   keys         r   update_missing_keys"HqqHfQuantizer.update_missing_keys[   s5     #/I<CHC4GC<II Js   
//expected_keysloaded_keysc                 *  ^^ U R                   (       d  U$ U4S jm[        U5      nUR                  5        H  u  pVXVl        M     [        5       nT" X5        [        5       nU H;  n	UR                  R
                  S    H  n
X;   d  M
  UR                  U	5        M     M=     Xx-  n[        S S [        R                  SSS9R                  5       S1-
  n[        5       nU H0  m[        U4S jU 5       5      (       d  M  UR                  T5        M2     XL-  nU Hg  n	U	S-   U;   a  UR                  U	S-   5        O'UR                  U Vs1 s H
  oS	-   U-   iM     sn5        U	S
-   U;   d  MS  UR                  U	S
-   5        Mi     [        U5      $ s  snf )Nc                    > U R                  5        HQ  u  p#[        U[        R                  R                  5      (       a  UR                  UR                  5        T" X15        MS     g N)named_childrenr0   r   nnLinearaddname)r9   layersrK   module_find_hqq_quantizable_layerss       r   rN   IHqqHfQuantizer.update_expected_keys.<locals>._find_hqq_quantizable_layersk   sE     % 4 4 6fuxx88JJv{{+,V< !7r   skip_modulesr#   Flinear_layerquant_configr   r   del_origr   c              3   ,   >#    U  H	  oT;   v   M     g 7frF    ).0_moduler?   s     r   	<genexpr>6HqqHfQuantizer.update_expected_keys.<locals>.<genexpr>   s     @gc>   z.weight.z.bias)r>   r4   named_modulesrK   configr5   rJ   r   r   float16r)   anyupdatelist)r   r9   rB   rC   new_keysrK   rM   _valid_modules_skipped_modulesrX   _skip_module	_ref_keys_rm_keys_ref_keyrN   r?   s                 @@r   update_expected_keys#HqqHfQuantizer.update_expected_keysd   s    !!  	= }% "//1LDK 2 $U; 5%G % @ @ P*$((1 !Q & 	* --
 /
vh'	 5C@@@@S!  	 &G"k1Wy01) T)h3!9) TU K/Ww./ & H~	 !Us   
F

param_namec                 d    [        X5      u  pE[        U[        R                  R                  5      $ rF   )r   r0   r   rH   rI   )r   r9   rl   r6   rM   _s         r   param_needs_quantization'HqqHfQuantizer.param_needs_quantization   s%    (;	 &%((//22r   param_valueztorch.Tensortarget_deviceztorch.devicec                   ^^ [        X5      u  mnUR                  SS5      S   n[        X5      u  pUR                  R                  S   n
UR                  R                  S   n[	        U4S jU 5       5      (       a*  TR                  XbR                  X@R                  S90SS	S
9  g U R                  (       GaS  [        U S5      (       d  [        [        5      U l        U R                  U   R                  Xb05        U R                  U   m[        U4S jU R                   5       5      (       a  ST;   d  TR                   c  [#        S S U R                  USS9nUR                  T5        UR                   bW  [%        UR                   [&        R(                  5      (       a.  [&        R*                  R-                  UR                   5      Ul        U R.                  (       a  U R1                  U5      n[3        XU5        U R                  U	 @g TR                  Xb0SS	S
9  TR4                  R6                  R8                  S:g  =(       a8    TR                   S L =(       d#    TR                   R6                  R8                  S:g  nU(       a  SR;                  TR<                  R?                  S5      SS  5      nSU
;   a  U
nO	X;   a  X   n[#        TWU R                  US	S9nUR                   bW  [%        UR                   [&        R(                  5      (       a.  [&        R*                  R-                  UR                   5      Ul        U R.                  (       a  U R1                  U5      n[3        XU5        g g )Nr\   r
   r   rS   rP   c              3   @   >#    U  H  oTR                   ;   v   M     g 7frF   )rK   )rW   skip_modulerM   s     r   rY   8HqqHfQuantizer.create_quantized_param.<locals>.<genexpr>   s     J\kfkk)\s   )r   r   FT)strictassign
hqq_paramsc              3   ,   >#    U  H	  oT;   v   M     g 7frF   rV   )rW   kry   s     r   rY   rv      s     :Mq
?Mr[   r   rQ   metaweight_quant_params)rS   r   r   rT   ) r   rsplitr^   r5   r`   load_state_dicttor   r>   hasattrr   r1   ry   ra   allr*   r   r   r0   r   TensorrH   	Parameterr(   _patch_layer_for_multigpusetattrr   r   typejoinrK   split)r   r9   rq   rl   rr   r6   tensor_namemodule_nameparent_modulenoderS   rP   	hqq_layermodule_is_ready
module_tagmodule_quant_configry   rM   s                   @@r   create_quantized_param%HqqHfQuantizer.create_quantized_param   s    35E ''Q/225F||77G||77G J\JJJ""nnMnTU^clp #   4.."-d"3OOK(//0JK5J :DMM:::*@TX^XcXcXk%!%!%"&**("	 ))*5>>-*Y^^U\\2Z2Z%*XX%7%7	%GIN'' $ > >y IIY7OOK0& 	9%PTU !--..33v= 
KK4D6;;#5#5#:#:f#D 	 &++"3"3C"8"=>J$4&2#+&2&>#!0"jj$I ~~)j.V.V!&!3!3INN!C	## ::9E	M3+ r   c                 (   ^^ S mUU4S jTl         T$ )Nc                     [         R                  " UR                  U R                  5      U R	                  5       R                  5       5      nU R                  b  X R                  -  nU$ rF   )r   matmulr   r   
dequantizetr   )r   xouts      r   forward_with_deviceEHqqHfQuantizer._patch_layer_for_multigpu.<locals>.forward_with_device   sJ    ,,qttDKK0$//2C2E2E2GHCyy$yy Jr   c                    > T" TU 5      $ rF   rV   )r   r   r   s    r   <lambda>:HqqHfQuantizer._patch_layer_for_multigpu.<locals>.<lambda>   s    &9)Q&Gr   )forward)r   r   r   s    `@r   r   (HqqHfQuantizer._patch_layer_for_multigpu   s    	 H	r   c                 *    [        XR                  S9ng )N)r5   )r   r5   r   r9   r6   s      r   $_process_model_before_weight_loading3HqqHfQuantizer._process_model_before_weight_loading  s     'uBZBZ[r   c                 >    SUl         U R                  5       Ul        U$ NT)is_hqq_quantizedis_serializableis_hqq_serializabler   s      r   #_process_model_after_weight_loading2HqqHfQuantizer._process_model_after_weight_loading
  s     !%$($8$8$:!r   c                     gr   rV   )r   safe_serializations     r   r   HqqHfQuantizer.is_serializable  s    r   c                     gr   rV   r   s    r   is_trainableHqqHfQuantizer.is_trainable  s    r   )r   r*   ry   r(   )r9   r   rF   )__name__
__module____qualname____firstlineno____doc__use_keep_in_fp32_modules requires_parameters_quantizationrequires_calibrationrequired_packagesr'   rb   strr@   rj   boolro   r   r   r   r   r   propertyr   __static_attributes____classcell__)r7   s   @r   r   r   .   s   
  %'+$  ID & 6:3i IL 	c 9&97;Cy9OSTWy9	c9v3.? 3S 3_c 3P4 P4 $P4 	P4
 &P4d\ \
 d  r   r   )collectionsr   typingr   integrationsr   utilsr   r   r	   baser   quantizers_utilsr   modeling_utilsr   r   hqq.core.quantizer   r   r   
get_loggerr   r.   r   rV   r   r   <module>r      s|    $   1 A A  2 0 +
 L L I			H	%f[ fr   