
    ia                     T   S SK r S SKrS SKrS SKJr  S SKrS SKrS SKJr  S SKJ	r	J
r
Jr  S SKJr  S SKJr  S SKJr  S SKJrJrJrJr  \R.                  c  S	\l        S S
KJr  S SKJr  S SKJrJrJr  S SK J!r!  S SK"J#r#  S SK$J%r%  S SK&J'r'  S SK(J)r)J*r*   S SK+r, S SK.r.\R^                  R`                  b  \Rb                  " SSS9  \)" 5       r2 " S S\5      r3 " S S\5      r4Sr5 " S S\5      r6\" \35        \" \45        \7S:X  a  \" 5         gg! \- a    Sr, Nf = f! \- a    Sr. Nf = f)    N)Path)nn)CPUOffloadPolicyOffloadPolicyfully_shard)common_utils)skip_if_lt_x_gpu)FSDPTest)TestCaseinstantiate_parametrized_testsparametrize	run_testsi  )Version)optim)_fp32_to_bf16_srquantize_4bit_with_qmapquantize_8bit_with_qmap)OptimState4bit)OptimState8bit)OptimStateFp8)skip_if_rocm)get_available_devicestorch_version_at_leastzSkipping the test in ROCmT)allow_module_levelc                       \ rS rSr\" S\5      S 5       r\" S\5      S 5       r\" S\5      S 5       r\" S\5      S 5       r	\" S\5      \" SSS	/5      S
 5       5       r
\" S\5      \" SSS	/5      S 5       5       rSrg)TestQuantize@   devicec                 z   [         R                  " SSUS9n[         R                  " SUS9R                  5       R                  nUR	                  S5      U-
  R                  5       R                  S5      R                  [         R                  5      n[        X#5      n[         R                  R                  XE5        g )N       r      )torchrandsortvalues	unsqueezeabsargmintouint8r   testingassert_closeselfr   xqmapactualexpecteds         V/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/test/test_low_bit_optim.py(test_quantize_8bit_with_qmap_correctness5TestQuantize.test_quantize_8bit_with_qmap_correctnessA   s    JJr4/zz#f-224;;++b/D(--/66r:==ekkJ*13""64    c                     [         R                  " SSUS9n[         R                  " SUS9R                  5       R                  n[         R                  " [
        SS9nU" X#5      n[        X#5      n[         R                  R                  XV5        g )Nr    r!   r"   r#   T	fullgraph)r%   r&   r'   r(   compiler   r.   r/   r1   r   r2   r3   
compiled_fr4   r5   s          r6   $test_quantize_8bit_with_qmap_compile1TestQuantize.test_quantize_8bit_with_qmap_compileK   sj    JJr4/zz#f-224;;]]#:dK
A$*13""64r9   c                 z   [         R                  " SSUS9n[         R                  " SUS9R                  5       R                  nUR	                  S5      U-
  R                  5       R                  S5      R                  [         R                  5      n[        X#5      n[         R                  R                  XE5        g )Nr    r!   r"      r$   )r%   r&   r'   r(   r)   r*   r+   r,   r-   r   r.   r/   r0   s         r6   (test_quantize_4bit_with_qmap_correctness5TestQuantize.test_quantize_4bit_with_qmap_correctnessV   s    JJr4/zz"V,113::++b/D(--/66r:==ekkJ*13""64r9   c                     [         R                  " SSUS9n[         R                  " SUS9R                  5       R                  n[         R                  " [
        SS9nU" X#5      n[        X#5      n[         R                  R                  XV5        g )Nr    r!   r"   rC   Tr;   )r%   r&   r'   r(   r=   r   r.   r/   r>   s          r6   $test_quantize_4bit_with_qmap_compile1TestQuantize.test_quantize_4bit_with_qmap_compile`   sj    JJr4/zz"V,113::]]#:dK
A$*13""64r9   r=   FTc                    [         R                  " SUS9S-  nUR                  SS5      R                  SS5      n[         R                  " [
        SSU(       + S	9nU" U5      nUR                  [         R                  L d   e[         R                  R                  UR                  5       R                  S5      US
S
S9  g )Nr    r"   d   r$      順 TFr<   dynamicdisablegiUMu>)atolrtol)r%   r&   viewrepeatr=   r   dtypebfloat16r.   r/   floatmean)r1   r   r=   r2   x_repfunc
x_rep_bf16s          r6   test_bf16_stochastic_round'TestQuantize.test_bf16_stochastic_roundk   s     JJr&)C/r1$$Q0}}e[
 %[
5>>111 	"":#3#3#5#:#:1#=qtRV"Wr9   c                 j   [         R                  " S5        SS KJn  SSKJn  SSKJnJn  SnUR                  5       (       aN  UR                  5       (       d9  UR                  [        R                  " 5       S5      nUR                  SUSSS9  S	n [        R                   " ["        R$                  5        [        R&                  " S
US9S-  n	U	R)                  SS5      R+                  SS5      n
[        R,                  " [.        S	SU(       + S9nU" U
5      nU" US5      nUR1                  XU" 5       /SS9nU" U5      n[3        X5      (       d   e[        R4                  R7                  UR9                  5       U5        U(       a  UR;                  5         g g ! U(       a  UR;                  5         f f = f)Nztorch.distributedr   )init_device_mesh)DTensor	ReplicateFrK   gloo)backendstorerank
world_sizeTr    r"   rJ   r$   rL   rM   )rK   )	run_check)pytestimportorskiptorch.distributeddistributedtorch.distributed.device_meshr^   torch.distributed.tensorr_   r`   is_availableis_initialized	FileStoretempfilemktempinit_process_groupr%   manual_seedr   SEEDr&   rR   rS   r=   r   
from_local
isinstancer.   r/   to_localdestroy_process_group)r1   r   r=   distr^   r_   r`   
created_pgrc   r2   rX   rY   	out_plainmeshx_dtout_dts                   r6   "test_bf16_stochastic_round_dtensor/TestQuantize.test_bf16_stochastic_round_dtensorz   sn    	/0(B?
t':':'<'<NN8??#4a8E##	 $  J	-l//0

2f-3AFF2qM((G4E== D%WD UI#FD1D%%eIK=E%RD$ZFf....MM&&v'8)D**, z**, s   C.F F2 N)__name__
__module____qualname____firstlineno__r   _DEVICESr7   r@   rD   rG   r[   r   __static_attributes__r   r9   r6   r   r   @   s    8$5 %5 8$5 %5 8$5 %5 8$5 %5 8$UDM*X + %X 8$UDM*#- + %#-r9   r   c            	       z   \ rS rSr\" S/ SQ5      \" S\R                  \R                  /5      \" S\5      \	" S5      S 5       5       5       5       r
\" S/ SQ5      \" S\5      S	 5       5       r\" S/ SQ5      \" S\5      S
 5       5       r\" S\\\/5      \" SSS/5      \" S\5      S 5       5       5       r\" S\\\/5      \" S\5      S 5       5       r\R&                  R)                  \SL SS9\R&                  R)                  \R,                  R/                  5       (       + SS9\	" S5      \R&                  R)                  \" S5      SS9\" SSS/5      S 5       5       5       5       5       r\R&                  R)                  \SL SS9\R&                  R)                  \R,                  R/                  5       (       + SS9\" SSS/5      S 5       5       5       r\R&                  R)                  \R,                  R/                  5       (       + =(       a    \R8                  R/                  5       (       + SS9\" S / S!Q5      S" 5       5       r\R&                  R)                  \R,                  R/                  5       (       + =(       a    \R8                  R/                  5       (       + SS9S# 5       r\" S\5      S$ 5       rS%r g)&	TestOptim   
optim_name)Adam8bit	AdamW8bitAdam4bit	AdamW4bitAdamFp8AdamWFp8rT   r   ROCm enablement in progressc                    UR                  S5      (       a>  US:X  a8  [        R                  R                  5       S:  a  [        R
                  " S5        [        R                  " [        R                  " SS5      [        R                  " 5       [        R                  " SS5      5      nUR                  X2S9  [        [        U5      " UR                  5       5      n[        R                  " SSX2S9nU" U5      R                  5       nUR!                  5         UR#                  5         UR%                  5         [&        R(                  " 5        n[        R*                  " UR-                  5       UR.                  5        [        R0                  " UR.                  S	S
9n	S S S 5        [2        R4                  " U5      n
[        [        U5      " U
R                  5       5      nUR7                  W	5        [9        S5       H  n[        R                  " SSX2S9nU" U5      R                  5       R!                  5         UR#                  5         UR%                  5         U
" U5      R                  5       R!                  5         UR#                  5         UR%                  5         M     [;        UR                  5       U
R                  5       5       H$  u  p[        R<                  R?                  X5        M&     g ! , (       d  f       GN[= f)NFp8cuda   	   +FP8 CUDA requires compute capability >= 8.9r    r#   r   rT      cpumap_location   ) endswithr%   r   get_device_capabilityrg   skipr   
SequentialLinearReLUr,   getattrr   
parametersrandnsumbackwardstep	zero_gradrp   NamedTemporaryFilesave
state_dictnameloadcopydeepcopyload_state_dictrangezipr.   r/   )r1   r   rT   r   model	optimizerr2   lossfr   model2optim2_p1p2s                  r6   test_optim_smokeTestOptim.test_optim_smoke   s
    u%%&F*:zz//1F:IJbiiC0"'')RYYsB=OP,E:.u/?/?/AB	KK2f:Qx||~ ((*aJJy++-qvv6AFF?J + u%
+F,=,=,?@z*qAAr&>A!HLLN##%NN!1IMMO$$&KKM  %**,f.?.?.ABFBMM&&r. C' +*s   AK11
L )r   r   r   c                 J   UR                  S5      (       a>  US:X  a8  [        R                  R                  5       S:  a  [        R
                  " S5        [        R                  " 5       n[        R                  " [        R                  5         [        R                  " [        R                  " SS5      [        R                  " 5       [        R                  " SS5      5      nUR                  US9  [        [        U5      " UR!                  5       5      n[        R"                  " SSUS9nU" U5      R%                  5       nUR'                  5         UR)                  5         UR+                  5         [        R                  " U5        g ! [        R                  " U5        f = f)	Nr   r   r   r   r    r#   r"   r   )r   r%   r   r   rg   r   get_default_dtypeset_default_dtyperU   r   r   r   r   r,   r   r   r   r   r   r   r   r   )r1   r   r   	old_dtyper   r   r2   r   s           r6   test_optim_default_dtype_bf16'TestOptim.test_optim_default_dtype_bf16   s    u%%&F*:zz//1F:IJ++-	/	/MM"))B"4bggi3PRASTEHHFH#z253C3C3EFIAr&1A8<<>DMMONN! ##I.E##I.s   C$F
 
F"c                    UR                  S5      (       a>  US:X  a8  [        R                  R                  5       S:  a  [        R
                  " S5        [        R                  " [        R                  " SS5      [        R                  " 5       [        R                  " SS5      5      nUR                  US9  [        [        US   R                  5       5      S	S
9[        [        US   R                  5       5      SS
9/n[        [        U5      " U5      n[        R                   " SSUS9nU" U5      R#                  5       nUR%                  5         UR'                  5         UR)                  5         g )Nr   r   r   r   r    r#   r"   r   g-C6?)paramslrr   h㈵>r   )r   r%   r   r   rg   r   r   r   r   r   r,   dictlistr   r   r   r   r   r   r   r   )r1   r   r   r   param_groupsr   r2   r   s           r6   test_param_groupsTestOptim.test_param_groups   s    u%%&F*:zz//1F:IJbiiC0"'')RYYsB=OPU1X0023=U1X0023=
 E:.|<	KK2f-Qx||~r9   subclassshape)i   )r#   r#   c                    U[         :X  a>  US:X  a8  [        R                  R                  5       S:  a  [        R
                  " S5        UR                  X#S9nUS   S-  n[        R                  R                  UR                  5       S U US U R                  5       5        [        R                  R                  UR                  5       XUS-   XEUS-   R                  5       5        g )Nr   r   r   r"   r   r   )
r   r%   r   r   rg   r   zerosr.   r/   
dequantize)r1   r   r   r   tensoroffsets         r6   test_subclass_sliceTestOptim.test_subclass_slice  s     }$EJJ$D$D$F$OIJ5qQ""(&&/*D*D*F	
 	""!4FQJ'224	
r9   c                    SnUR                  X2S9nU R                  UR                  [        R                  5        UR                  [        R                  5      nU R                  UR                  [        R                  5        UR                  (       d:  UR                  5       nU R                  UR                  [        R                  5        UR                  [        R                  5      nU R                  UR                  [        R                  5        UR                  X2[        R                  S9nU R                  UR                  [        R                  5        g )N)r!   r"   r   )	r   assertEqualrT   r%   float32r,   rU   is_cpur   )r1   r   r   r   r   tensor_bf16tensor_bf16_cputensor_fp32_recasts           r6   test_subclass_appearance_dtype(TestOptim.test_subclass_appearance_dtype  s      5u}}5 ii/**ENN; !!)oo/O_22ENNC )^^EMM:+115==A nnUnP**ENN;r9   Nzbitsandbytes is not availablereasonz+bitsandbytes 8-bit Adam only works for CUDAz2.7.0Failing in CIr   r   c                    Sn[         R                  " [         R                  " SS5      [         R                  " 5       [         R                  " SS5      5      nUR	                  U5        [
        R                  " U5      n[        [        R                  5      [        S5      :  a  SOSn[        [        R                  U5      " UR                  5       5      n[        [        U5      " UR                  5       US9n[        S	5       H  n[        R                  " S
SUS9n	U" U	5      R!                  5       n
U
R#                  5         UR%                  5         UR'                  5         U" U	5      R!                  5       nUR#                  5         UR%                  5         UR'                  5         M     [)        UR                  5       UR                  5       5       H$  u  p[        R*                  R-                  XSSS9  M&     g )Nr   r    r!      z0.44.0r#   i   )
block_sizer   r   r"   r   rQ   rP   )r   r   r   r   r,   r   r   r   bnb__version__r   r   r   r   r%   r   r   r   r   r   r   r.   r/   )r1   r   r   model1r   r   optim1r   r   r2   loss1loss2r   r   s                 r6   test_optim_8bit_correctness%TestOptim.test_optim_8bit_correctness/  sf    ryyT2BGGIryys?ST		&v& $COO48IISt
J/0A0A0CD
+F,=,=,?JWqAAr&1A1IMMOENNKKM1IMMOENNKKM  &++-v/@/@/BCFBMM&&rDt&D Dr9   zlpmm is not availablez#lpmm 4-bit Adam only works for CUDAr   r   c                 <   Sn[         R                  " [         R                  " SS5      [         R                  " 5       [         R                  " SS5      5      nUR	                  U5        [
        R                  " U5      nUS:X  a-  [        R                  R                  UR                  5       SS9nOCUS:X  a.  [        R                  R                  UR                  5       5      nO[        S	U S
35      e[        [        U5      " UR                  5       5      n[        S5       H  n[        R                  " SSUS9nU" U5      R!                  5       n	U	R#                  5         UR%                  5         UR'                  5         U" U5      R!                  5       n
U
R#                  5         UR%                  5         UR'                  5         M     [)        UR                  5       UR                  5       5       H$  u  p[        R*                  R-                  XSSS9  M&     g )Nr   r    r!   r   r   r   )weight_decayr   zUnsupported z optimizer for lpmmr   r   r"   r   r   )r   r   r   r   r,   r   r   lpmmr   AdamWr   
ValueErrorr   r   r%   r   r   r   r   r   r   r.   r/   )r1   r   r   r   r   r   r   r   r2   r   r   r   r   s                r6   test_optim_4bit_correctness%TestOptim.test_optim_4bit_correctnessV  s    ryyT2BGGIryys?ST		&v& #ZZ%%f&7&7&9%JF;&ZZ%%f&7&7&9:F|J<7JKLL
+F,=,=,?@qAAr&1A1IMMOENNKKM1IMMOENNKKM  &++-v/@/@/BCFBMM&&rDt&D Dr9   z&optim CPU offload requires CUDA or XPUzoffload_grad,grad_accum))FrK   )Fr   )TrK   c                    [         S   n[        R                  " [        R                  " SS5      [        R                  " 5       [        R                  " SSSS9[        R                  " 5       [        R                  " SSSS9[        R                  " 5       [        R                  " SSSS95      nUR                  U5        US   R                  S	5        [        R                  " U5      n[        R                  R                  UR                  5       5      n[        R                  " UR                  5       [        R                  R                  US
9n[        R                  R                  R                  US5      n[        R                  R                  R                  US5      n	[        R                   " US9n
U
R#                  S5        [%        S5       H  n[%        U5       H=  n[        R&                  " SSX:S9nU" U5      R)                  5       R+                  5         M?     UR-                  5         UR/                  5         UR-                  5         M     U
R#                  S5        [%        S5       H  n[%        U5       H=  n[        R&                  " SSX:S9nU" U5      R)                  5       R+                  5         M?     UR-                  5         UR/                  5         U	R-                  5         M     [1        UR                  5       UR                  5       5       H$  u  p[        R2                  R5                  X5        M&     g )Nr$   r    i   r   Tbiasr   r   F)offload_gradientsrJ   r"   *   r   )r   	generator)r   r   r   r   r   r,   requires_grad_r   r   r%   r   r   r   CPUOffloadOptimizerlr_schedulerCosineAnnealingLR	Generatorrs   r   r   r   r   r   r   r   r.   r/   )r1   offload_grad
grad_accumr   r   r   r   r   
scheduler1
scheduler2rngr   r2   r   r   s                  r6   "test_optim_cpu_offload_correctness,TestOptim.test_optim_cpu_offload_correctnessz  sY    " IIb&!GGIIIfbt,GGIIIb"4(GGIIIb#D)
 			& 	q	  'v&""6#4#4#67**KK*
 [[--??L
[[--??L
ooV, qA:&KK2fDq	((* ' KKMOO  	qA:&KK2fDq	((* ' KKMOO  &++-v/@/@/BCFBMM&&r. Dr9   c           
         [         S   n[        R                  " [        R                  " SSSS9[        R                  " 5       [        R                  " SSSS95      nUR                  U5        [        R                  " UR                  5       [        R                  R                  5      n[        S5       H]  n[        R                  " SSUS	9nU" U5      R                  5       R                  5         UR                  5         UR!                  5         M_     ["        R$                  " 5        n[        R&                  " UR)                  5       UR*                  5        [        R,                  " UR*                  S
S9nS S S 5        [.        R0                  " U5      n[        R                  " UR                  5       [        R                  R                  5      n	U	R3                  W5        [        S5       H  n[        R                  " SSUS	9nU" U5      R                  5       R                  5         UR                  5         UR!                  5         U" U5      R                  5       R                  5         U	R                  5         U	R!                  5         M     [5        UR                  5       UR                  5       5       H$  u  p[        R6                  R9                  X5        M&     g ! , (       d  f       GNt= f)Nr$   r    r!   Tr   r   r   r   r"   r   r   )r   r   r   r   r   r,   r   r   r   r%   r   r   r   r   r   r   r   rp   r   r   r   r   r   r   r   r   r   r.   r/   )r1   r   r   r   r   r2   filer   r   r   r   r   s               r6    test_optim_cpu_offload_save_load*TestOptim.test_optim_cpu_offload_save_load  s   
 " IIb$T*BGGIryysQU7V
 			&**6+<+<+>@Q@QRqAAr&1A1IMMO$$&KKM	  ((*dJJv((*DII6DIIEBJ +
 v&**6+<+<+>@Q@QRz*qAAr&1A1IMMO$$&KKM1IMMO$$&KKM  &++-v/@/@/BCFBMM&&r. D) +*s   ,AK((
K7c                 ,  ^	 [         R                  " S5        [        R                  " [        R                  " SS5      [        R
                  " 5       [        R                  " SS5      5      nUR                  U5        [        R                  " U5      R                  5       n[         R                  R                  UR                  5       SS9n[        R                  " UR                  5       SSS9n[         R                  " S	SUS
9n[        S5       H  m	[         R                   " U[         R                  S9   U" U5      nS S S 5        WR#                  5       nUR%                  5         UR'                  5         UR)                  5         U" UR                  5       5      R#                  5       nUR%                  5         UR'                  5         UR)                  5         [         R*                  R-                  XxU	4S jS9  M     g ! , (       d  f       N= f)Ni  r    r!   r   r   r   T)r   bf16_stochastic_roundr   r"      )rT   c                    > ST SU  3$ )Nz
Iteration z. r   )msgidxs    r6   <lambda>HTestOptim.test_optim_bf16_stochastic_round_correctness.<locals>.<lambda>
  s    
3%r#.Gr9   )r  )r%   rs   r   r   r   r   r,   r   r   rU   r   r   r   _AdamWr   r   autocastr   r   r   r   r.   r/   )
r1   r   r   r   r   r   r2   r   r   r  s
            @r6   ,test_optim_bf16_stochastic_round_correctness6TestOptim.test_optim_bf16_stochastic_round_correctness  sp   $ryyT2BGGIryys?ST		&v&//1 ""6#4#4#64"@"&
 KK2f-8Cenn=q	 >IIKENNKKM 1::<(,,.ENNKKMMM&&"G '  ==s   9	H
H	r   )!r   r   r   r   r   r%   r   rU   r   r   r   r   r   r   r   r   r   r   rg   markskipifr   r   rm   r   r   r   r   xpur	  r  r  r   r   r9   r6   r   r      s(   Q 5==%..9:8$/0$/ 1 % ;	$/L BC8$/ % D/, BC8$ % D0 nnmLM7J/08$
 % 1 N
  nnmLM8$< % N<0 [[t,KL[[JJ##%%<   /0[[w'   
K89E : 1	 ME: [[-DE[[JJ##%%.S   
K89E : F
E> [[JJ##%%Feii.D.D.F*F7   *,OP:/ Q	
:/x [[JJ##%%Feii.D.D.F*F7  &/	&/P 8$# %#r9   r   r   c                       \ rS rSr\S\4S j5       r\" \5      \	" S5      \
R                  R                  \" S5      SS9S 5       5       5       rS	 r\" \5      \	" S5      S
 5       5       rSrg)	TestFSDP2i  returnc                     [         $ )N)_FSDP_WORLD_SIZE)r1   s    r6   re   TestFSDP2.world_size  s    r9   r   z2.11.0r   r   c                 P   [         R                  [        4[         R                  [        4[         R                  [        4/n[
        R                  R                  5       S:  a%  UR                  [         R                  [        45        U R                  SU0U R                  5        g )Nr   args)r   r   r   r   r   r%   r   r   appendr   run_subtests_test_fsdp2)r1   	args_lists     r6   
test_fsdp2TestFSDP2.test_fsdp2  s}     __m,__m,__./
	
 ::++-7ennm<=Y	
r9   c           	      R
   SS K Jn  SS KJs  Jn  SS KJs  Jn  SSKJn  SSK	J
nJnJn  Uu  pSnSnSnU" SSSUUSS	9n[        R                  " S
5        [        R                  " S5         U" U5      nS S S 5        U	" WR!                  5       SS9n["        R$                  " U5      nUR'                  5        H   n[)        UU5      (       d  M  [+        UU
S9  M"     [+        UU
S9  U	" UR!                  5       SS9n[        R                  " S
U R,                  -   S-   5        [/        S5       GH  n[        R0                  " SXU4SS9nUR3                  US-  S:H  S9  U" U5      R5                  5       nUR7                  5         UR9                  5         UR3                  US-  S:H  S9  U" U5      R5                  5       nUR7                  5         UR!                  5        H@  nUR:                  c  M  UR=                  UR:                  UR>                  R@                  S9  MB     UR9                  5         U RC                  UU5        GM     URD                  S   S   S   nURF                  U   S   nURD                  S   S   S   nURF                  U   S   nURI                  5       nU RC                  URK                  5       URK                  5       5        SU	RL                   3n[O        U5      RQ                  5       (       a  [R        RT                  " U5        URW                  URY                  5       US9  U	" UR!                  5       SS9nUR!                  5        H  n [        RZ                  " U 5      U l        M      UR9                  5         UR]                  URY                  5       US9  UR_                  5       S:X  a  [R        RT                  " U5        [`        [b        [d        4n![g        URi                  URY                  5       5      URi                  URY                  5       5      5       H  u  n"n#U"Rj                  U#Rj                  :X  d   U"Rj                  U#Rj                  45       e[)        U"U5      (       aW  U"Rm                  5       n"U#Rm                  5       n#U"Rj                  U#Rj                  :X  d   U"Rj                  U#Rj                  45       e[)        U"U!5      (       a   U"RK                  5       n"U#RK                  5       n#U RC                  U"U#5        M     g ! , (       d  f       GN= f)Nr   )r_   )	ModelArgsTransformerTransformerBlock   r!   r   r      )n_layersn_headsdim
vocab_sizemax_seq_len	dropout_pr   r   g{Gz?r  )offload_policyrK   r  r"   r   )set_to_none)opr   exp_avg_fsdp_low_bit_optim_)checkpoint_id)7ri   rj   torch.distributed.checkpoint
checkpointtorch.utils._pytreeutils_pytreerl   r_   :torch.testing._internal.distributed._tensor.common_dtensorr.  r/  r0  r%   rs   r   r   r   r   modulesrv   r   rd   r   randintr   rW   r   r   grad
all_reduceReduceOpAVGr   r   statefull_tensorr   r   r   existsshutilrmtreer   r   
zeros_liker   get_rankr   r   r   r   	tree_iter	__class__rw   )$r1   r&  ry   dcppytreer_   r.  r/  r0  	optim_clsr9  
batch_sizer6  seq_len
model_args
base_model
base_optim
fsdp_modelm
fsdp_optimiter_idxinp	fsdp_loss	base_lossparam
base_parambase_exp_avg
fsdp_paramfsdp_exp_avgfull_fsdp_exp_avgr>  resumed_fsdp_optimp
subclassesv1v2s$                                       r6   r)  TestFSDP2._test_fsdp2(  s]   (22,,4	
 	
 %)!	

!

 	"\\&!$Z0J "z4464@
]]:.
##%A!-..An= & 	J~>z4464@
"tyy.1,-aH--:G/DVTC  hla.? A"3,,.I OO  hla.? A"3,,.I #..0::)OOEJJ4==3D3DOE 1 OOY	2 !   ,,Q/9!<
!''
3I>,,Q/9!<
!''
3I>(4460024E4P4P4RS /y/A/A.BC%%''MM-(&&(F
 'z'<'<'>4H&&(A%%a(AF ) 	!#..0N==?aMM-($nmD
/::<=Z2245
FB <<2<</M",,1MM/"g&&[[][[]||r||3QbllBLL5QQ3"j))]]_]]_R$
w "!s   *	T
T&c                    Sn[         S-  S-   n[        R                  " XSS9nUR                  R                  S   [         -  S:w  d   e[        U5        [        R                  " UR                  5       5      n[        S5       H]  n[        R                  " SUSS9nU" U5      R                  5       R                  5         UR                  5         UR                  5         M_     g )Nr2  rC   rK   r   r"   r   r   )r#  r   r   weightr   r   r   r   r   r   r%   r   r   r   r   r   )r1   in_dimout_dimr   r   r   inputss          r6   test_uneven_shardTestFSDP2.test_uneven_shard  s     "R'!+ 		&&9||!!!$'771<<<E OOE$4$4$67	qA[[F6:F&M((*NN!	 r9   r   N)r   r   r   r   propertyintre   r	   r#  r   rg   r  r  r   r+  r)  rt  r   r   r9   r6   r   r     s     C     &'/0[[.x8Q
 R 1 (
`%D &'/0" 1 ("r9   r   __main__)8r   rN  rp   pathlibr   rg   r%   r   "torch.distributed._composable.fsdpr   r   r   torch.testing._internalr   *torch.testing._internal.common_distributedr	   #torch.testing._internal.common_fsdpr
   $torch.testing._internal.common_utilsr   r   r   r   rt   packaging.versionr   torchaor   torchao.optim.quant_utilsr   r   r   torchao.optim.subclass_4bitr   torchao.optim.subclass_8bitr   torchao.optim.subclass_fp8r   torchao.testing.utilsr   torchao.utilsr   r   bitsandbytesr   ImportErrorr   versionhipr   r   r   r   r#  r   r   r   r9   r6   <module>r     s4          
 1 G 8  L %  
 7 6 4 .
 	== 
KK+E "_-8 _-Di iX  L" L"^ | , y ) zK e  
C
  Ds$   D D DDD'&D'