
    iX                     H   S SK JrJr  S SKrS SKJs  Jr  S SKJ	r	  S SK
JrJr  S SKJrJrJrJrJrJr  S SKJrJr  S SKJr  S SKJr  S	S
KJrJrJr  S	SKJ r   S	SK!J"r"   " S S\R
                  RF                  5      r$ S)S\R
                  RJ                  S\&4S jjr'S\R
                  RJ                  4S jr( " S S\5      r) " S S\)5      r* " S S\$5      r+S\R
                  RJ                  4S jr,S\R
                  RJ                  4S jr-S\R\                  S\4S jr/S\0S\R\                  S\4S jr1 " S  S!\)5      r2 " S" S#\$5      r3S\R
                  RJ                  4S$ jr4S\R
                  RJ                  4S% jr5S\0S\R\                  S\4S& jr6 " S' S(\)5      r7g)*    )AnyOptionalN)	is_device)PerGroupPerRow)Int8DynActInt4WeightLinearWeightOnlyInt4Linear_check_linear_int4_k_replace_linear_8da4w_replace_linear_int4 groupwise_affine_quantize_tensor)TorchAODTypeZeroPointDomain)TwoStepQuantizer)get_group_qparams_symmetric   )FakeQuantizeConfigBaseFloat8FakeQuantizeConfigIntxFakeQuantizeConfig)FakeQuantizerBase)_get_qmin_qmaxc                   *  ^  \ rS rSrSr   SS\S\S\S\\   S\\   S	S4U 4S
 jjjr	S\
R                  S	\
R                  4S jrS	\
R                  R                  4S jr\  SS\
R                  R                  S\\   S\\   4S jj5       rSrU =r$ )FakeQuantizedLinear*   aY  
General linear layer with fake quantized weights and activations.

Specific target dtypes, granularity, schemes etc. are specified
through separate configs for weights and activations.

Example usage::

    activation_config = IntxFakeQuantizeConfig(
        dtype=torch.int8,
        granularity="per_token",
        is_symmetric=False,
    )
    weight_config = IntxFakeQuantizeConfig(
        dtype=torch.int4,
        group_size=8,
        is_symmetric=True,
    )
    fq_linear = FakeQuantizedLinear(
        16, 32, False, activation_config, weight_config,
    )
    fq_linear(torch.randn(16))
Nin_featuresout_featuresbiasactivation_configweight_configreturnc                   > [         T	U ]  " UUU/UQ70 UD6  [        R                  R	                  S5        Ub  [
        R                  " U5      U l        OS U l        Ub{  [        U[        5      (       aJ  [        UR                  [        5      (       a+  UR                  nUb  X-  S:w  a  [        SU< SU< S35      e[
        R                  " U5      U l        g S U l        g )Nz,torchao.quantization.qat.FakeQuantizedLinearr   zin_features (z) % group_size (z) must be == 0)super__init__torch_C_log_api_usage_oncer   from_configactivation_fake_quantizer
isinstancer   granularityr   
group_size
ValueErrorweight_fake_quantizer)
selfr   r   r   r   r   argskwargsr+   	__class__s
            ^/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/torchao/quantization/qat/linear.pyr#   FakeQuantizedLinear.__init__C   s     		
 		

 	
 	$$%ST(->-J-J!.D* .2D* $-)?@@Z))8F F +55
)k.F!.K$&
4  *;)F)F})UD&)-D&    xc                     U R                   b  U R                  U5      nU R                  b  U R                  U R                  5      nOU R                  n[        R                  " XU R
                  5      $ N)r(   r-   weightFlinearr   )r.   r5   ws      r2   forwardFakeQuantizedLinear.forwardl   s\    ))5..q1A%%1**4;;7AAxxdii((r4   c                 x   [         R                  R                  U R                  U R                  U R
                  S LU R                  R                  U R                  R                  S9nU R                  R                  [         R                  " S5      :w  a"  U R                  Ul        U R
                  Ul        U$ )Ndevicedtypemeta)	r$   nnLinearr   r   r   r8   r@   rA   )r.   
new_linears     r2   	to_linearFakeQuantizedLinear.to_linearu   s    XX__IIT!;;%%++## % 

 ;;f!55 $J"iiJOr4   modc           
      T   [        UR                  UR                  UR                  S LUUUR                  R
                  UR                  R                  S9nUR                  R
                  [        R
                  " S5      :w  a"  UR                  Ul        UR                  Ul        U$ )Nr   r   r@   rA   rB   )r   r   r   r   r8   r@   rA   r$   )clsrH   r   r   rE   s        r2   from_linearFakeQuantizedLinear.from_linear   s     )OOHHD /'::$$**""

 ::V 44 #

J!hhJOr4   )r(   r-   )FNN)NN)__name__
__module____qualname____firstlineno____doc__intboolr   r   r#   r$   Tensorr<   rC   rD   rF   classmethodrL   __static_attributes____classcell__r1   s   @r2   r   r   *   s    8 >B:>'.'. '. 	'.
 $$:;'.   67'. 
'. '.R) )%,, )588??    ?C:>	XX__ $$:;   67	 r4   r   rH   enabledc                     [        U [        5      (       a<  U R                  b  XR                  l        U R                  b  XR                  l        ggg)zG
Helper function to enable fake quantization in `FakeQuantizedLinear`.
N)r)   r   r(   rZ   r-   )rH   rZ   s     r2   enable_linear_fake_quantr\      sL     #*++((44;))1$$007%%- 1 ,r4   c                     [        U SS9  g)zH
Helper function to disable fake quantization in `FakeQuantizedLinear`.
F)rZ   N)r\   rH   s    r2   disable_linear_fake_quantr_      s     S%0r4   c                   @    \ rS rSrSrS\\   4S jrS\\   4S jrSr	g)_LegacyQATQuantizer   zE
Base class for sharing common methods across legacy QAT quantizers.
r    c                     g r7    r.   s    r2   #get_activation_fake_quantize_config7_LegacyQATQuantizer.get_activation_fake_quantize_config       r4   c                     g r7   rd   re   s    r2   get_weight_fake_quantize_config3_LegacyQATQuantizer.get_weight_fake_quantize_config   rh   r4   rd   N)
rN   rO   rP   rQ   rR   r   r   rf   rj   rW   rd   r4   r2   ra   ra      s+    X>T5U :P1Q r4   ra   c                     ^  \ rS rSrSrSS\R                  \R                  4S\S\S\R                  S\R                  S	S
4
U 4S jjjr
S\R                  R                  S\S\S	\R                  R                  4S jrS\R                  R                  S\S\S	\R                  R                  4S jrS\R                  R                  4S jrS	\\   4S jrS	\\   4S jrSrU =r$ ) Int8DynActInt4WeightQATQuantizer   z
Quantizer for performing QAT on a model, where linear layers have int8
dynamic per token fake quantized activations and int4 fake quantized
grouped per channel weights.
   F	groupsizepadding_allowed	precisionscales_precisionr    Nc                    > [         TU ]  5         [        R                  R	                  S5        Xl        X l        X0l        X@l        [        R                  U l
        g )Nz9torchao.quantization.qat.Int8DynActInt4WeightQATQuantizer)r"   r#   r$   r%   r&   rp   rq   rr   rs   float32activation_scales_precision)r.   rp   rq   rr   rs   r1   s        r2   r#   )Int8DynActInt4WeightQATQuantizer.__init__   sI     	$$G	
 (%4&/-=+0==(r4   modelr/   r0   c           
      |    [        UU R                  U R                  U R                  U R                  [
        SS9  U$ )NT)copy_weights)r   rp   rq   rr   rs   Int8DynActInt4WeightQATLinearr.   rx   r/   r0   s       r2   prepare(Int8DynActInt4WeightQATQuantizer.prepare   s<     	NN  NN!!)	
 r4   c                 (    U R                  U5        U$ r7   )_convert_qat_linear_8da4wr|   s       r2   convert(Int8DynActInt4WeightQATQuantizer.convert   s     	&&u-r4   modulec           
         UR                  5        GHg  u  p#[        U[        5      (       Ga:  UR                  R                  n[        UR                  UR                  UR                  SLUR                  UR                  R                  UR                  S9n[        XU5        Sn[        U5      u  px[        UR                  UUR                  UR                  S9u  pU
R!                  UR"                  5      n
SSKJn  U" UR                  U	U
UU[(        R*                  UR                  5      nXl
        Xl        Xl        UR                  b  UR                  Ul        GMS  GMV  U R1                  U5        GMj     g)zP
Replace all `Int8DynActInt4WeightQATLinear` with `Int8DynActInt4WeightLinear`.
N)rp   rr   rs      )rr   r   )8_quantized_decomposed_quantize_per_channel_group_wrapper)named_childrenr)   r{   r-   configr   r   r   r   r+   r8   rA   scale_precisionsetattrr   r   tozero_point_precisiontorchao._executorch_opsr   r$   int8scaleszerosr   )r.   r   namechildr   quantized_linearn_bitqminqmaxszpr   q_weights                r2   r   :Int8DynActInt4WeightQATQuantizer._convert_qat_linear_8da4w   sT    "002KD%!>??44;;#=%%&&JJd*$//#ll00%+%;%;$  &67 -e45LL%%$44	 UU6667 TLLJJ%% +3'*+')+&::),1JJ$) * ..u5U 3r4   c                 ,    [        U R                  5      $ r7   )_get_8da4w_activation_configrv   re   s    r2   rf   DInt8DynActInt4WeightQATQuantizer.get_activation_fake_quantize_config&  s    +D,L,LMMr4   c                 B    [        U R                  U R                  5      $ r7   )_get_8da4w_weight_configrp   rs   re   s    r2   rj   @Int8DynActInt4WeightQATQuantizer.get_weight_fake_quantize_config)  s    '8M8MNNr4   )rv   rp   rq   rr   rs   )rN   rO   rP   rQ   rR   r$   ru   rS   rT   rA   r#   rC   Moduler   r}   r   r   r   r   rf   rj   rW   rX   rY   s   @r2   rm   rm      s     %!&(-99 9 ;;	9
  ++9 
9 9$XX__-0<?	XX__-0<?	.6 .6`NX>T5U NO:P1Q O Or4   rm   c                      ^  \ rS rSrSrSSS\R                  \R                  4S\S\S\S	\R                  S
\S\R                  S\R                  SS4U 4S jjjrSS\4S jjrS rSrU =r$ )r{   i-  a,  
This module implements a linear layer with int8 dynamic per token fake
quantized activations with int4 fake quantized grouped per channel weights.

args:
    groupsize: the number of elements in each quantized group for weights
    precision: precision of weights
    scales_precision: precision of per group scales and zero points

Note: we hardcode activation scales to use torch.fp32, but allow users to specify the weight scales (defaults to torch.fp32).
Here scales_precision refers specifically to the weight scales only, not the activation scales.
FNro   r   r   r   r@   rp   rr   rs   r    c           
      t   > [        [        R                  5      n[        XW5      n	[        T
U ]  UUUUU	UUS9  g )Nr?   )r   r$   ru   r   r"   r#   )r.   r   r   r   r@   rp   rr   rs   r   r   r1   s             r2   r#   &Int8DynActInt4WeightQATLinear.__init__;  sE     9G0M 	 	
r4   rZ   c                 D    XR                   l        XR                  l        g r7   r(   rZ   r-   r.   rZ   s     r2   enable_fake_quant/Int8DynActInt4WeightQATLinear.enable_fake_quantT      18&&.-4""*r4   c                 &    U R                  S5        g NFr   re   s    r2   disable_fake_quant0Int8DynActInt4WeightQATLinear.disable_fake_quantX      u%r4   rd   T)rN   rO   rP   rQ   rR   r$   ru   rS   rT   r@   rA   r#   r   r   rW   rX   rY   s   @r2   r{   r{   -  s    " #!&(-

 
 	

 
 
 ;;
  ++
 

 
25 5& &r4   r{   c                 P    [        U [        5      (       a  U R                  5         gg)zL
(deprecated) Enable fake quantization for `Int8DynActInt4WeightQATLinear`.
N)r)   r{   r   r^   s    r2   enable_8da4w_fake_quantr   ]  s#     #455 6r4   c                 P    [        U [        5      (       a  U R                  5         gg)zM
(deprecated) Disable fake quantization for `Int8DynActInt4WeightQATLinear`.
N)r)   r{   r   r^   s    r2   disable_8da4w_fake_quantr   f  s#     #455  6r4   qparams_precisionr    c                     U [         R                  :X  d   e[        [         R                  SSSU U [         R                  " U 5      R
                  S9$ )zX
Return the activation `IntxFakeQuantizeConfig` for `Int8DynActInt4WeightQATQuantizer`.
	per_tokenFT)rA   r*   is_symmetric
is_dynamicr   r   eps)r$   ru   r   r   finfor   )r   s    r2   r   r   n  sL     ---!jj).KK)*.. r4   r+   c           	      :    [        [        R                  U SSUUS9$ )zT
Return the weight `IntxFakeQuantizeConfig` for `Int8DynActInt4WeightQATQuantizer`.
T)rA   r+   r   r   r   r   )r   r   INT4r+   r   s     r2   r   r     s)     "). r4   c                     ^  \ rS rSrSrSS\R                  \R                  4S\S\\   S\R                  S\R                  S	S
4
U 4S jjjr
S\R                  R                  S\S\S	\R                  R                  4S jrS\R                  R                  S\S\S	\R                  R                  4S jrS\R                  R                  4S jrS	\\   4S jrSrU =r$ )Int4WeightOnlyQATQuantizeri  zt
Quantizer for performing QAT on a model, where linear layers have
int4 fake quantized grouped per channel weights.
ro      rp   inner_k_tilesrr   rs   r    Nc                    > [         TU ]  5         [        R                  R	                  S5        US;   d   eUS;   d   eX l        Xl        X0l        X@l        g )Nz3torchao.quantization.qat.Int4WeightOnlyQATQuantizer)   r   r   )    @      ro   )	r"   r#   r$   r%   r&   r   rp   rr   rs   )r.   rp   r   rr   rs   r1   s        r2   r#   #Int4WeightOnlyQATQuantizer.__init__  sZ     	$$A	
 	)))....*"" 0r4   rx   r/   r0   c                 ~    [        UU R                  U R                  SU R                  U R                  [
        SS9  U$ )NT)rq   rr   rs   linear_classrz   )r   rp   r   rr   rs   Int4WeightOnlyQATLinearr|   s       r2   r}   "Int4WeightOnlyQATQuantizer.prepare  s?     	NN nn!220		
 r4   c                 (    U R                  U5        U$ r7   )_convert_qat_linear_4wr|   s       r2   r   "Int4WeightOnlyQATQuantizer.convert  s     	##E*r4   r   c                    UR                  5        GH  u  p#[        U[        5      (       Ga  UR                  nUR                  nUR
                  nUR                  R                  n[        UUSUR                  UUR                  R                  UR                  [        UR                  5       5      R                  S9n[!        XU5        Sn	[#        UR                  U	UR                  5      u  p[%        U
R                  R&                  S5      (       aX  [(        R*                  R,                  R/                  U
R1                  UR                  R                  5      UR
                  5      n
OW[(        R*                  R,                  R3                  U
R1                  UR                  R                  5      UR
                  5      n
Xl
        Xl        GM  U R7                  U5        GM     g)zD
Replace all `Int4WeightOnlyQATLinear` with `WeightOnlyInt4Linear`.
F)r   rp   r   rr   rs   r@   r   cpuN)r   r)   r   r   r   r   r-   r   r	   r+   r8   rA   r   next
parametersr@   r   r   r   typer$   opsaten#_convert_weight_to_int4pack_for_cpur   _convert_weight_to_int4packscales_and_zerosr   )r.   r   r   r   r   r   r   r   r   r   r   r   s               r2   r   1Int4WeightOnlyQATQuantizer._convert_qat_linear_4w  s    "002KD%!899#//$11 % 3 344;;#7 $//"/#ll00%+%;%; 0 0 23::	$  &67 /OLL%%0,
 X__11599$yy~~QQ ELL$7$78++ H
  %yy~~II ELL$7$78++ H +3'4D1++E2M 3r4   c                 B    [        U R                  U R                  5      $ r7   )_get_4w_weight_configrp   rs   re   s    r2   rj   :Int4WeightOnlyQATQuantizer.get_weight_fake_quantize_config  s    $T^^T5J5JKKr4   )rp   r   rr   rs   )rN   rO   rP   rQ   rR   r$   bfloat16rS   r   rA   r#   rC   r   r   r}   r   r   r   rj   rW   rX   rY   s   @r2   r   r     s     '(!&(-11  }1 ;;	1
  ++1 
1 1$XX__-0<?	XX__-0<?	*3UXX__ *3XL:P1Q L Lr4   r   c                      ^  \ rS rSrSrSSSS\R                  \R                  4S\S\S	\S
\R                  S\S\S\R                  S\R                  SS4U 4S jjjrSS\4S jjrS rSrU =r$ )r   i  a|  
This module implements a linear layer with int4 fake quantized grouped
per channel weights, with forward numerics matching `WeightOnlyInt4Linear`,
which uses the efficient int4 tinygemm kernel.

args:
    groupsize: the number of elements in each quantized group for weights
    precision: precision of weights
    scales_precision: precision of per group scales and zero points
FNro   r   r   r   r   r@   rp   r   rr   rs   r    c	           
         > U[         R                  :X  d   S5       e[        XU5      (       d  [        S5      eX`l        [        XX5      n	[        T
U ]  UUUS U	UUS9  g )Nz!only bf16 is supported for scalesz'Padding for QAT 4w is not supported yetrJ   )r$   r   r
   r,   r   r   r"   r#   )r.   r   r   r   r@   rp   r   rr   rs   r   r1   s             r2   r#    Int4WeightOnlyQATLinear.__init__   sm      5>>1V3VV1#KMJJFGG*-iJ"' 	 	
r4   rZ   c                 D    XR                   l        XR                  l        g r7   r   r   s     r2   r   )Int4WeightOnlyQATLinear.enable_fake_quant  r   r4   c                 &    U R                  S5        g r   r   re   s    r2   r   *Int4WeightOnlyQATLinear.disable_fake_quant  r   r4   )r   r   )rN   rO   rP   rQ   rR   r$   r   rS   rT   r@   rA   r#   r   r   rW   rX   rY   s   @r2   r   r     s    	 #!&(-

 
 	

 
 
 
 ;;
  ++
 

 
45 5& &r4   r   c                 P    [        U [        5      (       a  U R                  5         gg)zF
(deprecated) Enable fake quantization for `Int4WeightOnlyQATLinear`.
N)r)   r   r   r^   s    r2   enable_4w_fake_quantr   #  s#     #.// 0r4   c                 P    [        U [        5      (       a  U R                  5         gg)zG
(deprecated) Disable fake quantization for `Int4WeightOnlyQATLinear`.
N)r)   r   r   r^   s    r2   disable_4w_fake_quantr   ,  s#     #.//  0r4   c           
      X    [        [        R                  U SSUU[        R                  S9$ )zN
Return the weight `IntxFakeQuantizeConfig` for `Int4WeightOnlyQATQuantizer`.
FT)rA   r+   r   r   r   r   zero_point_domain)r   r$   uint4r   FLOATr   s     r2   r   r   4  s0     "kk).)// r4   c                   \   \ rS rSrSrS\R                  4S\\   S\R                  4S jjr
S\R                  R                  S\S	\S
\R                  R                  4S jrS\R                  R                  S\S	\S
\R                  R                  4S jrS
\\   4S jrS
\\   4S jrSrg)Float8ActInt4WeightQATQuantizeriK  a  
QAT quantizer for applying dynamic rowwise float8 activation + int4
per group/channel symmetric weight fake quantization to linear layers
in the model. Currently only supports rowwise granularity for float8
activations.

args:
    group_size (Optional[int]): the number of elements in each quantized
        group for weights, defaults to 64. Use None for per channel.
    scale_precision: precision of weight scales, defaults to torch.bfloat16.
r   r+   r   c           	          [         R                  R                  S5        Ub  SnOSn[        [         R                  [        5       S9U l        [        [         R                  UUSSUS9U l	        g )Nz8torchao.quantization.qat.Float8ActInt4WeightQATQuantizer	per_groupper_channel)rA   r*   T)rA   r*   r+   r   r   r   )
r$   r%   r&   r   float8_e4m3fnr   _activation_configr   int4_weight_config)r.   r+   r   weight_granularitys       r2   r#   (Float8ActInt4WeightQATQuantizer.__init__X  sm    
 	$$F	
 !!,!.":%%#
 5***!+
r4   rx   r/   r0   r    c                    UR                  5        Hv  u  pE[        U[        R                  R                  5      (       a7  [
        R                  UU R                  U R                  S9n[        XU5        Me  U R                  U5        Mx     U$ )z
Swap all `nn.Linear` with `FakeQuantizedLinear` with float8
fake quantizer for activations and int4 fake quantizer for weights.
)r   r   )r   r)   r$   rC   rD   r   rL   r   r   r   r}   )r.   rx   r/   r0   r   r   rE   s          r2   r}   'Float8ActInt4WeightQATQuantizer.prepareq  sw     !//1KD%110<<&*&=&="&"5"5 = 

 Z0U# 2 r4   c                     [         er7   NotImplementedErrorr|   s       r2   r   'Float8ActInt4WeightQATQuantizer.convert  s
     "!r4   c                     [        S5      e)Nz,Float8 FakeQuantizeConfig does not exist yetr   re   s    r2   rf   CFloat8ActInt4WeightQATQuantizer.get_activation_fake_quantize_config  s    !"PQQr4   c                     U R                   $ r7   )r   re   s    r2   rj   ?Float8ActInt4WeightQATQuantizer.get_weight_fake_quantize_config  s    !!!r4   )r   r   N)rN   rO   rP   rQ   rR   r$   r   r   rS   rA   r#   rC   r   r   r}   r   r   rf   rj   rW   rd   r4   r2   r   r   K  s    
 %'',~~
SM
 
2XX__-0<?	("XX__"-0"<?"	"
RX>T5U R":P1Q "r4   r   r   )8typingr   r   r$   torch.nn.functionalrC   
functionalr9   torchao.dtypes.utilsr    torchao.quantization.granularityr   r   )torchao.quantization.linear_quant_modulesr   r	   r
   r   r   r   %torchao.quantization.quant_primitivesr   r   torchao.quantization.unifiedr   torchao.quantization.utilsr   fake_quantize_configr   r   r   fake_quantizerr   utilsr   rD   r   r   rT   r\   r_   ra   rm   r{   r   r   rA   r   rS   r   r   r   r   r   r   r   rd   r4   r2   <module>r     s   !    * =  : B 

q%((// ql 8	881588?? 1	* 	"aO': aOH,&$7 ,&`   !%((// !{{&{{ ,ZL!4 ZLz+&1 +&^ ehhoo  !uxx !{{ .C"&9 C"r4   