
    @KiU                          S SK JrJr  SSKJr  SSKJr  SSKJr  \(       a  SSK	J
r
  SSKJrJrJrJrJr  SS	KJr  \" 5       (       a  S S
Kr\R(                  " \5      r " S S\5      rg
)    )TYPE_CHECKINGOptional   )tqdm   )HfQuantizer)get_module_from_name)PreTrainedModel)is_accelerate_availableis_flute_availableis_hadamard_availableis_torch_availablelogging)QuantizationConfigMixinNc                      ^  \ rS rSrSrSrSrSS/rS\4U 4S jjr	S	 r
SS jrSSSSS\SS4S jr S SSS\\\      4S jjrS!S jrS\\   S\S
\\   4S jr\S
\4S j5       rS S jrSSS\S
\4S jrS rSrU =r$ )"HiggsHfQuantizer"   z
Quantizer of the HIGGS method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
FTzflute-kernelfast_hadamard_transformquantization_configc                 4   > [         TU ]  " U40 UD6  Xl        g N)super__init__r   )selfr   kwargs	__class__s      i/var/www/html/dynamic-report/venv/lib/python3.13/site-packages/transformers/quantizers/quantizer_higgs.pyr   HiggsHfQuantizer.__init__+   s    ,77#6     c                    [         R                  R                  5       (       d  [        S5      e[	        5       (       d  [        S5      e[        5       (       d  [        S5      e[        5       (       d  [        S5      eUc  [        S5      e[        U[        5      (       a4  SUR                  5       ;   d  SUR                  5       ;   a  [        S5      eg g )	NzNHIGGS quantization is only supported on GPU. Please use a different quantizer.zHUsing `higgs` quantization requires Accelerate: `pip install accelerate`zLUsing `higgs` quantization requires FLUTE: `pip install flute-kernel>=0.3.0`zbUsing `higgs` quantization requires fast_hadamard_transform: `pip install fast_hadamard_transform`zwYou are attempting to load a HIGGS model without setting device_map. Please set device_map comprised of 'cuda' devices.cpudiskzYou are attempting to load a HIGGS model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availableNotImplementedErrorr   ImportErrorr   r   
ValueError
isinstancedictvalues)r   
device_mapr   s      r   validate_environment%HiggsHfQuantizer.validate_environment/   s    zz&&((%&vww&((hii!##lmm$&&t  F  
D))u
8I8I8K/KvYcYjYjYlOld  Pm)r   returnc                     Uc'  [         R                  S5        [        R                  nU$ U[        R                  :w  a#  U[        R                  :w  a  [        SU S35      eU$ )NzG`dtype` is None. Setting `dtype=torch.float16` for FLUTE compatibility.zInvalid `dtype` zS. HIGGS quantization only supports `dtype=torch.float16` or `dtype=torch.bfloat16`.)loggerinfor#   float16bfloat16r(   )r   dtypes     r   update_dtypeHiggsHfQuantizer.update_dtypeI   s^    =KKabMME  emm#(?"5')|}  r   modelr
   param_valueztorch.Tensor
param_nametarget_deviceztorch.devicec                    SSK Jn  U" UR                  U5      U R                  R                  U R                  R
                  U R                  R                  U R                  R                  5      nA[        X5      u  pSR                  UR                  S5      S S 5      n
UR                  5        H  u  pXR                  ;   a-  [        R                  R                  USS9UR                  U'   MA  XR                   ;   a.  [        R                  R#                  U5      UR                   U'   M~  US:X  a/  Xl        UR'                  5       U R                  R$                  U
'   M  [)        SU S	U 35      e   g )
Nr   )quantize_with_higgs.F)requires_gradtune_metadatazUnexpected key z in module )integrationsr=   tor   bitsp
group_sizehadamard_sizer	   joinsplititems_parametersr#   nn	Parameter_buffersBufferrA   to_dictr(   )r   r8   r9   r:   r;   r   r=   
flute_dictmodule_module_namekeyvalues                r   create_quantized_param'HiggsHfQuantizer.create_quantized_paramT   s7    	7(NN=)$$))$$&&$$//$$22

 (;	hhz//4Sb9:$**,JC(((*/((*<*<URW*<*X""3''',xxu'=$'',$FKmmo((66{C ?3%{6(!KLL -r   keep_in_fp32_modulesc                     SSK Jn  U R                  XR                  R                  U5      U l        U" UU R                  U R                  S9  U R                  UR
                  l        g )Nr   )replace_with_higgs_linear)r   modules_to_not_convert)rB   r[   get_modules_to_not_convertr   r\   config)r   r8   rY   r   r[   s        r   $_process_model_before_weight_loading5HiggsHfQuantizer._process_model_before_weight_loadingt   s_     	=&*&E&E++BBDX'
# 	" $ 8 8#'#>#>	

 ,0+C+C(r   c                    SSK JnJn  SSKJn  SSKJn  0 nUR                  5        VV	s0 s H  u  p[        X5      (       d  M  X_M     n
nn	[        U
R                  5       SSS9 GH  u  pU	R                  R                  U;  a0  U" U	R                  R                  S	9XyR                  R                  '   XyR                  R                     U	l        UR                  U R                  R                   U   5      U	l        U" U	R                  R"                  U	R$                  R"                  U	R                   S
9u  U	R                  l        U	l        U	R                   R'                  5       U R                  R                   U'   GM     g s  sn	nf )Nr   )TuneMetaDatamaybe_tune_and_repack)make_workspace_streamkr   HiggsLinearzRepacking HIGGS modulesF)descleave)device)weightscalesmetadata)
flute.tunerb   rc   flute.utilsrd   rB   rf   named_modulesr)   r   rJ   rj   ri   	workspace	from_dictr   rA   datark   rP   )r   r8   r   rb   rc   rd   rf   flute_workspacesnamerR   flute_moduless              r   #_process_model_after_weight_loading4HiggsHfQuantizer._process_model_after_weight_loading   s@   B6.:?:M:M:Os:O,$S]^dSr:Os !4!4!6=V^cdLD }}##+;;9OW]WdWdWkWk9l !5!56/0D0DEF $0#9#9$:R:R:`:`ae:f#gF 7L}}))}}))--84FMM 4
 <B;O;O;W;W;YD$$2248 e ts   FFmissing_keysprefixc                   ^^	 SSK Jn  UR                  5        VVs1 s H  u  pV[        Xd5      (       d  M  UiM     snnm	S[        S[
        4U	U4S jjnU Vs/ s H  o" U5      (       a  M  UPM     sn$ s  snnf s  snf )Nr   re   rU   r/   c                    >^ ^ T R                  S5      (       d  T R                  S5      (       a  gT ST  3m[        UU 4S jT 5       5      $ )Nz.weightz.biasFr>   c              3   D   >#    U  H  oT;   =(       d    UT;   v   M     g 7fr    ).0rt   full_keyrU   s     r   	<genexpr>NHiggsHfQuantizer.update_missing_keys.<locals>.should_update.<locals>.<genexpr>   s      O;4s{6dh&66;s    )endswithany)rU   r   higgs_namesry   s   `@r   should_update;HiggsHfQuantizer.update_missing_keys.<locals>.should_update   sD    ||I&&#,,w*?*? 3%(HO;OOOr   )rB   rf   ro   r)   strbool)
r   r8   rx   ry   rf   rt   rR   r   rU   r   s
      `     @r   update_missing_keys$HiggsHfQuantizer.update_missing_keys   su    .050C0C0Ei0ETZIht0Ei	Ps 	Pt 	P 	P  ,F|=3E|FF j Gs   A6A6A<-A<c                     g)NFr}   )r   s    r   is_trainableHiggsHfQuantizer.is_trainable   s    r   c                     g)NTr}   )r   safe_serializations     r   is_serializable HiggsHfQuantizer.is_serializable   s    r   c                 X    SSK Jn  [        X5      u  pV[        XT5      (       a  US:X  a  gg)Nr   re   rj   TF)rB   rf   r	   r)   )r   r8   r:   r   rf   rR   tensor_names          r   param_needs_quantization)HiggsHfQuantizer.param_needs_quantization   s*    .25Ef**{h/Fr   c                 "    SSK Jn  U" U5      nU$ )Nr   )dequantize_higgs)rB   r   )r   r8   r   s      r   _dequantizeHiggsHfQuantizer._dequantize   s    3 'r   )r\   r   )r5   torch.dtyper/   r   r   )r8   r
   )__name__
__module____qualname____firstlineno____doc__requires_calibration requires_parameters_quantizationrequired_packagesr   r   r-   r6   r   rW   r   listr_   rv   r   propertyr   r   r   r   r   __static_attributes____classcell__)r   s   @r   r   r   "   s    !'+$')BC7,C 74	M M $M 	M
 &MF 59D D 'tCy1D&Z2GtCy G# GRVWZR[ G d  .? S _c  r   r   )typingr   r   utils.loggingr   baser   quantizers_utilsr	   modeling_utilsr
   utilsr   r   r   r   r   utils.quantization_configr   r#   
get_loggerr   r1   r   r}   r   r   <module>r      sP    +    2 0 s s ? 			H	%`{ `r   