
    i-              	          S SK r S SKrS SKJr  S SKJr  S SKJrJr  S SK	r	S SK
Jr  S SKJr  S SKJr  S SKJr  S SKJr  \ R(                  " \5      r\R/                  \ R0                  5        \ R2                  " \R4                  5      r\ R8                  " S	5      r\R=                  \5        \R?                  \5         " S
 S\ \5      r! SS\	RD                  S\#S\#S\$4S jjr% " S S\RL                  5      r'0 4S\RL                  4S jjr( " S S5      r)\ " S S\5      5       r*\" \*5      S\	R                  RL                  S\*S\	R                  RL                  4S j5       r+g)    N)	dataclass)Enum)OptionalUnion)quantize_per_channel_group)AOBaseConfig)'_choose_qparams_and_quantize_affine_hqq) register_quantize_module_handlerz4%(asctime)s - %(name)s - %(levelname)s - %(message)sc                   $    \ rS rSrSr Sr SrSrg)UIntxChooseQParamsAlgorithm'   z[Variant of quantization algorithm to calculate scale and zero_point for UIntx quantization.min_maxhqq N)__name__
__module____qualname____firstlineno____doc__MIN_MAXHQQ__static_attributes__r       ]/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/torchao/experimental/quant_api.pyr   r   '   s    e G Cr   r   vals
group_sizenbithas_weight_zerosc           
      v   US:  a  US::  d   eU(       a  SUS-
  -  * nSUS-
  -  S-
  nO
SnSU-  S-
  nU R                   u  pxU R                  SU5      n [        R                  " U SS9u  p[        R                  " U SS9u  pX-
  Xe-
  -  nU(       d  [        R
                  " U5      nOU[        R                  " X-  5      -
  nU R                  Xx5      n UR                  US5      nUR                  US5      n[        U UUUUU(       a  [        R                  O[        R                  US9nU(       d  S nXU4$ )N      r   )axis)inputscaleszero_points	quant_min	quant_maxdtyper   )
shapereshapetorchminmax
zeros_likeroundr   int8uint8)r   r   r   r   signedqminqmaxnkvmins_vmaxsgroup_scalesgroup_zerosgroup_qvalss                  r   	_quantizer>   9   s6    19""tax!dQh1$T	Q::DA<<J'DyyA&HEyyA&HEMdk2L&&|4U[[)=>><<D''2.L%%a,K,"ejjK k11r   c                   ~   ^  \ rS rSr SS\\R                     4U 4S jjjr\R                  4S\4S jjr
S rSrU =r$ )	UIntxWeightOnlyQuantizedLinearc   biasc                    > [         TU ]  5         Xl        X l        Ub  [        R
                  " USS9U l        g U R                  SS 5        g )NFrequires_gradrB   )super__init___pack_weights_op
_linear_opnn	ParameterrB   register_parameter)selfpack_weight_op	linear_oprB   	__class__s       r   rG   'UIntxWeightOnlyQuantizedLinear.__init__d   sC     	 .#T?DI##FD1r   uintx_choose_qparams_algorithmc                 ~   X l         X0l        U[        R                  :X  aM  [	        UUUSUR
                  UR                  SSS9u  pVpxUR                  [        R                  5      nU* U-  nOLU[        R                  :X  a*  [        XR                  U R                   SSS9u  pVnU* U-  nO[        SU 35      e[        R                  " USS9U l        [        R                  " USS9U l        U R#                  UR%                  5       5      R                  UR                  S9n	[        R                  " U	SS9U l        g )	Nr    FT)nbitsr   r#   compute_dtypedeviceverbose
raw_output)r   r3   z,Unsupported uintx_choose_qparams_algorithm: rD   )rV   )r   r   r   r   r	   r)   rV   tor,   r2   r   r>   
ValueErrorrJ   rK   weight_scalesweight_zerosrH   cpupacked_weights)
rM   weightsr   r   rR   weight_qvalsr[   r\   r9   r^   s
             r   quantize_and_pack_weights8UIntxWeightOnlyQuantizedLinear.quantize_and_pack_weightsr   s7    	$)-H-L-LL7)")--">>!#	 9L (??5;;7L )==8L+/J/R/RR8A$))dSX95L )==8L>?]>^_   \\-uMLLUK..|/?/?/ABEE&& F 
 !ll>Or   c                 p   UR                  5       S:  d   eUR                  5       S:X  aZ  U R                  UU R                  U R                  U R                  U R
                  5      nU R                  b  X R                  -   nU$ UR                  SS nUR                  S   nU R                  R                  S   nU R                  UR                  SU5      U R                  U R                  U R                  U R
                  5      R                  " / UQUP76 nU R                  b  X R                  -   nU$ )N   r   r"   )	dimrI   r^   r   r[   r\   rB   r*   r+   )rM   xoutput
lead_shaper7   r6   s         r   forward&UIntxWeightOnlyQuantizedLinear.forward   s#   uuw!||557a<__##""!!F yy$))+MWWQr]
GGBK$$Q'IIb!OO
 ' " "  !" 99 ii'Fr   )rI   rH   rB   r   r   r^   r[   r\   )N)r   r   r   r   r   r,   TensorrG   r   r   ra   ri   r   __classcell__)rP   s   @r   r@   r@   c   sO    
 (,	2 u||$	2 2& GbFiFi*P
 )D*PX r   r@   modulec           
      n   US   nUS   n[        U [        R                  5      (       a   eUS:  a  US::  d   eU R                  5        H  u  pE[        U[        R                  5      (       d  [	        XQ5        M1  UR
                  R                  5       (       d  [        SU S35      e[        [        [        R                  R                  SU S35      [        [        R                  R                  S	U S
35      UR                  S9n[        XU5        UR                  UR
                  X25        M     g )Nr   r   r       zFUIntxWeightOnlyQuantizedLinear requires contiguous weights for layer 'zF'. Please call .contiguous() on the weight tensor before quantization._pack_weight_bit_linear_fp_act_
bit_weightrN   rO   rB   )
isinstancerJ   Linearnamed_children)_replace_linear_with_quantized_linear_mpsweightis_contiguousrZ   r@   getattrr,   opstorchaorB   setattrra   )rm   kwargsr   r   namechildqlinears          r   rx   rx      s   %J&>D&")),,,,19"",,.%++5eD<<--// \]a\b cZ Z  5&uyy'8'8M$s:ST!II%%j'I ZZG F'*--ellDM# /r   c            
           \ rS rSrSSSSS.S\\   S\\R                     S\\   S\\   4S jjr	S	\
R                  S
\
R                  4S jrSrg)UIntxWeightOnlyLinearQuantizer   N)rV   	precisionbitwidth	groupsizerV   r   r   r   c                   U(       a  US:w  a  [        S5      eXl        U(       a>  U[        R                  [        R                  [        R
                  4;  a  [        S5      eX l        Uc  Sn[        R                  SU S35        U[        SS5      ;  a  [        S	5      eX0l        Uc  S
n[        R                  SU S35        US;  a  [        S5      eX@l        g )NmpszHOnly device=mps is currently supported in UIntxWeightOnlyLinearQuantizerz[Only precisions float32, float16 & bfloat16 are supported in UIntxWeightOnlyLinearQuantizer   z&bitwidth not specified, defaulting to .r    r!   zDOnly bitwidts 1 to 7 are supported in UIntxWeightOnlyLinearQuantizer   z'groupsize not specified, defaulting to     @   r      zQOnly groupsizes 32, 64, 128 & 256 are supported in UIntxWeightOnlyLinearQuantizer)NotImplementedErrorrV   r,   float32float16bfloat16rZ   r   loggerwarningranger   r   )rM   rV   r   r   r   s        r   rG   'UIntxWeightOnlyLinearQuantizer.__init__   s     fo%Z  !KMMMMNN+
 

 m  'NHNNCH:QOP5A;&V  %MINNDYKqQR..c  'Nr   modelreturnc                     U R                   (       a  UR                  U R                   5      nU R                  (       a  UR                  U R                  5      n[        UU R                  U R
                  S.S9  U$ )N)r   r   )r   )rV   rY   r   rx   r   r   )rM   r   s     r   quantize'UIntxWeightOnlyLinearQuantizer.quantize  sY    ;;HHT[[)E>>HHT^^,E1"nn	
 r   )r   rV   r   r   )r   r   r   r   r   strr,   r)   intrG   rJ   Moduler   r   r   r   r   r   r      sn     !%+/"&#',' ,' EKK(	,'
 3-,' C=,'\bii BII r   r   c                   h    \ rS rSr% SrSr\\S'   Sr\\S'   \	R                  r\\	\4   \S'   S rS	rg
)UIntxWeightOnlyConfigi  a[  
Configuration for applying uintx weight-only asymmetric per-group quantization
to linear layers for MPS devices.

Args:
    bitwidth (int): Number of bits for quantization, must be between 1 and 7 inclusive.
        Default is 4.
    group_size (int): Group size for quantization. Must be one of [32, 64, 128, 256].
        Default is 128.
    uintx_choose_qparams_algorithm (Union[UIntxChooseQParamsAlgorithm, str]): Algorithm for
        choosing quantization parameters. Options:
        - "min_max" (default): Simple min-max scaling
        - "hqq": Half-Quadratic Quantization for better accuracy
r   r   r   r   rR   c                 ,   U R                   [        SS5      ;  a  [        SU R                    35      eU R                  S;  a  [        SU R                   35      e[	        U R
                  [        5      (       a  [        U R
                  5      U l        g g )Nr    r!   z0bitwidth must be between 1 and 7 inclusive, got r   z2group_size must be one of [32, 64, 128, 256], got )r   r   rZ   r   ru   rR   r   r   )rM   s    r   __post_init__#UIntxWeightOnlyConfig.__post_init__,  s    ==a+B4==/R  ??"44DT__DUV  d993??2M333D/ @r   )rR   N)r   r   r   r   r   r   r   __annotations__r   r   r   rR   r   r   r   r   r   r   r   r   r     sB     HcJ#++ #E*Es*J$K r   r   configr   c           	         UR                   nUR                  nUR                  nU R                  R	                  5       (       d  [        S5      e[        [        [        R                  R                  SU S35      [        [        R                  R                  SU S35      U R                  S9nUR                  U R                  X#U5        U$ )NzUIntxWeightOnlyQuantizedLinear requires contiguous weights. Please call .contiguous() on the weight tensor before quantization.rp   rq   rr   rs   rt   )r   r   rR   ry   rz   rZ   r@   r{   r,   r|   r}   rB   ra   )rm   r   r   r   rR   r   s         r    _uintx_weight_only_mps_transformr   <  s     ??D""J%+%J%J"==&&((R
 	

 -uyy00M$s2KL%))++tfJ-OP[[G
 %%t)G Nr   )T),loggingsysdataclassesr   enumr   typingr   r   r,   torch.nnrJ   $torch.ao.quantization.fx._decomposedr   torchao.core.configr   %torchao.quantization.quant_primitivesr	   %torchao.quantization.transform_moduler
   	getLoggerr   r   setLevelWARNINGStreamHandlerstdouthandler	Formatter	formattersetFormatter
addHandlerr   r   rk   r   boolr>   r   r@   rx   r   r   r   r   r   r   <module>r      si    
 !  "   - 
		8	$    




+TU	   Y    ' 
#t & TX'2
,,'2$''2/2'2FJ'2TURYY Ur IK Nbii N6; ;| #L # #L ""78HHOO%:
XX__ 9r   