
    i                         S SK r S SKrS SKJr  S SKJr  S SKJr  S SKJrJ	r	  SSK
JrJrJrJr  \R                  R                   r\R                  R"                  r\R                  R$                  rS SKJr  \" SS	9S
 5       r\" SS	9S 5       r " S S\5      r\R1                  \R2                  R4                  5      S 5       r\R1                  \R8                  R4                  5      S 5       r\R1                  \R:                  R<                  5      S 5       r\R1                  \R>                  R4                  5      S 5       r\R@                  R4                  \R@                  R4                  \RB                  R4                  \RB                  R4                  \RD                  R4                  /r#\	" S5      (       a%  \#RI                  \RJ                  R4                  5        \R1                  \#5      S 5       r\R1                  \RL                  R4                  5      S 5       r\R1                  \RN                  R                  5      S 5       r\" \/5        g)    N)Tensor)add_safe_globals)return_and_correct_aliasing)TorchAOBaseTensortorch_version_at_least   )create_dynamic_mapdequant_with_qmapquantize_8bit_with_qmapscale_tensor)	lru_cache)maxsizec                      [        SS9$ )NTsignedr	        Z/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/torchao/optim/subclass_8bit.pyget_qmap_signedr      s    T**r   c                      [        SS9$ )NFr   r   r   r   r   get_qmap_unsignedr   #   s    U++r   c                   @   \ rS rSr/ SQr\ SS\S\S\S\S\R                  S-  4
S	 jj5       r
 SS\S\S\S\S\R                  S-  4
S
 jjrS r\ SS j5       rSS jr\    SS\S\S\R"                  R$                  S\R                  S-  4S jj5       rS rSrg)OptimState8bit(   )codesscaleqmapNr   r   r   r   dtypec                 V    [         R                  " XR                  UR                  US9$ )N)devicer   )r   _make_wrapper_subclassshaper!   )clsr   r   r   r   r   s         r   __new__OptimState8bit.__new__,   s&     ,,U\\
 	
r   c                     UR                   [        R                  L d   eUR                  S:X  d   eUR                   [        R                  L d   eXl        X l        X0l        X@l        UR                  5       UR                  5       -  U l
        g)a  Create quantized 8-bit optimizer state as proposed in https://arxiv.org/abs/2110.02861

Args
    codes: quantized 8-bit data stored as uint8. Has the same shape as the original float tensor.
    scale: scale data for block-wise quantization.
    qmap: lookup table that maps between quantized value (code) and float value.
    signed: whether the tensor is signed or unsigned.

NOTE: To get block-wise scale, the original float tensor is first reshape to (-1, block_size).
Thus, the last dimension of the original float tensor is not necessarily divisible by block size.
Given `codes` and `scale`, `block_size` is calculated as `codes.numel() // scale.numel()`.
r   N)r   torchuint8ndimfloat32r   r   r   r   numel
block_size)selfr   r   r   r   r   s         r   __init__OptimState8bit.__init__9   sl    ( {{ekk)))zzQzzU]]***

	++-5;;=8r   c                 J    U R                   U R                  U R                  /4$ N)tensor_attrsr   r   r.   s    r   __tensor_flatten__!OptimState8bit.__tensor_flatten__V   s       4;;

";;;r   c                 Z    U " / U R                    Vs/ s H  oQU   PM	     snQUQ76 $ s  snf r2   )r3   )r$   tensor_data_dicttensor_attributes
outer_sizeouter_stridenames         r   __tensor_unflatten__#OptimState8bit.__tensor_unflatten__Y   s>      
141A1AB1At$1AB
EV
 	
Bs   (
c                     [        U R                  U R                  U R                  5      nUb  UR	                  U5      nU$ r2   )r
   r   r   r   to)r.   output_dtype
float_datas      r   
dequantizeOptimState8bit.dequantizea   s6    &tzz499djjI
##|4Jr   r-   r!   c                 $   [         R                  " U[         R                  US9n[         R                  " UR                  5       U-  US9nU(       a
  [	        5       O	[        5       n[         R                  " U[         R                  US9n	U " XgXUS9$ )N)r   r!   r!   r   )r(   zerosr)   r,   r   r   tensorr+   )
r$   r#   r   r-   r!   r   r   r   	qmap_listr   s
             r   rH   OptimState8bit.zerosg   sj     EVDEKKMZ7G)/O%5F5H	||IU]]6J5U;;r   c                     U R                   R                   SU R                   SU R                   S[	        U R
                  5       SU R                   SU R                   SU R                   S3$ )Nz(signed=z, block_size=z, shape=z, dtype=z	, device=z, requires_grad=))		__class____name__r   r-   tupler#   r   r!   requires_gradr4   s    r   __repr__OptimState8bit.__repr__v   sn    ~~&&'x}M$//IZ [4::&'x

|9T[[M R!//03	
r   )r-   r   r   r   r   r2   )NN)T   NN)rO   
__module____qualname____firstlineno__r3   staticmethodr   boolr(   r   r%   r/   r5   classmethodr=   rC   inttypesDevicerH   rR   __static_attributes__r   r   r   r   r   (   s   -L  %)



 

 	


 

 {{T!

 

$ %)99 9 	9
 9 {{T!9:< PT
 
  %)$(< < 	<
 ""< {{T!< <
r   r   c                    US   nUS   n[        U[        5      (       a  [        U[        5      (       a  UR                  UR                  :X  a  UR                  UR                  :X  d   eUR                  R                  UR                  5        UR                  R                  UR                  5        U$ [        U[        5      (       ad  [        XTR                  5      u  pg[        XdR                  5      nUR                  R                  U5        UR                  R                  U5        U$ UR                  UR                  5       5        U$ )Nr   r   )
isinstancer   r   r-   r   copy_r   r   r   r   rC   )	funcr\   argskwargsdstsrc
scaled_srcr   r   s	            r   _rh   ~   s    
q'C
q'C#~&&:c>+J+JzzSZZ'CNNcnn,LLL				"				" J 
C	(	((nn=
'
HH=				
 J 			#.."#Jr   c                 L   UR                  SUS   R                  5      nUR                  SS 5      n[        US   R                  R	                  US9US   R
                  R	                  US9US   R                  R	                  US9US   R                  US9n[        XX65      $ )Nr   r   r!   rF   rG   )	getr   r   r   r@   r   r   r   r   )rb   r\   rc   rd   r   r!   outs          r   rh   rh      s     JJwQ.EZZ$'F
Q'Q'Qv&QC 't6??r   c                     U Vs/ s H*  n[        U[        5      (       a  UR                  5       OUPM,     nnU " U0 UD6$ s  snf r2   )r`   r   rC   rb   r\   rc   rd   xs        r   rh   rh      sC    LPQDqjN;;ALLNBDDQ    Rs   1A c                     Uu  pE[        UR                  R                  U5      UR                  UR                  UR
                  5      $ r2   )r   r   viewr   r   r   )rb   r\   rc   rd   rn   r#   s         r   rh   rh      s3    HA!'',,u-qwwIIr   z
2.11.0.devc           	          US   n[        U[        5      (       d  [        S[        U5       35      e[        U " UR                  /USS  Q70 UD6U " UR
                  /USS  Q70 UD6UR                  R                  5       UR                  5      $ )Nr   z%expecting a OptimState8bit but found r   )	r`   r   
ValueErrortyper   r   r   cloner   rm   s        r   rh   rh      s    QAa((@a	JKK QWW*tABx*6*QWW*tABx*6*			 r   c                     US   R                   R                  5       =(       aA    US   R                  R                  5       =(       a    US   R                  R                  5       $ )Nr   )r   	is_pinnedr   r   )rb   r\   rc   rd   s       r   rh   rh      sO     	Q! 	%GMM##%	%GLL""$r   c                    US S u  pEpg[        U5      S:  a  US   OSnUS:w  a  [        S5      eUS:w  a  [        S5      eUR                  n	[        R                  " UR
                  SS  5      n
Xj-  U	-  S:w  d  Xz-  U	-  S:w  a"  [        SUR
                   SU	 SU S	U S
3	5      e[        UR                  Xg UR                  Xj-  U	-  Xz-  U	-   UR                  R                  5       UR                  5      $ )N   r   r   z+Only support aten.slice along the first dimz#Only support aten.slice with step=1zInvalid start or end for shape=z and block_size=zD. Make sure start and end align with block boundary. Received start=z, end=.)lenrr   r-   mathprodr#   r   r   r   r   rt   r   )rb   r\   rc   rd   rn   dimstartendstepr-   strides              r   rh   rh      s   bqAE$i!m47D axFGGqy>??JYYqwwqr{#F 	*$)clj-HA-M-aggY6Fzl S#WF3%q2
 	
 		*,s|z/IJ			 r   )(r{   r(   r   torch.serializationr   torch.utils._python_dispatchr   torchao.utilsr   r   quant_utilsr	   r
   r   r   opsatenc10d_functional_c10d_functional	functoolsr   r   r   r   
implementsra   defaultrh   _to_copylerpScalarrp   all_gather_into_tensorwait_tensordetach_optim_state_8bit_c10d_opsappend_wrap_tensor_autogradrv   slicer   r   r   <module>r      s:      0 D C  yy~~))++99--    1+ + 1, ,S
& S
l 4::--. /, 4==001@ 2@ 499++,! -! 499,,-J .J **22++33''  ((KK  ,''%%&6&L&L&T&TU 56 7" 4>>112 3 4::,,- .< .! "r   