
    9iUJ                     "   S r SSKrSSKrSSKrSSKrSSKrSSKrSSKrSSK	J
r
Jr  SSKJr  SrS rS rS rSS	 jrS
 r " S S5      rS r  SS jrS rS rS r     SS jrS rS r  SS jrSS jrSS jrSS jr SS jr!SS jr"S r#g) z'Utilities for logging and serialization    N)mpuprint_rank_0)FP16_Optimizerrunsc                 L    [         R                  R                  U[        U 5      $ N)ospathjoinSUMMARY_WRITER_DIR_NAME)namebases     `/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/nlp/mglm/utils.pyget_log_dirr      s    77<<5t<<    c                  z    S/n [         R                  " U SS9nUR                  S5      R                  5       S   nU$ )Nzhostname -ITshellutf-8r   )
subprocesscheck_outputdecodesplit)hostname_cmdresultmaster_addrs      r   get_hostnamer   "   s=    !?L$$\>F--(..03Kr   c                    [         R                  R                  5       S:X  a  [        R                  " S/SS9n[        UR                  5       5      nXR                  :X  a/  [        R                  " S/SS9n[        UR                  5       5      n[         R                  R                  U/5      nO [         R                  R                  S/5      n[         R                  R                  US5        UR                  5       nU$ )Nr   zshuf -n 1 -i 10000-65535Tr   )torchdistributedget_rankr   r   intstripmaster_portcuda
LongTensor	broadcastitem)argsports     r   get_spare_portr+   )   s    !!#q(&&(B'C-134::< ###**,F+G157Dtzz|$Dzz$$dV,zz$$aS)	a(99;DKr   c                    U(       aP  [        SSS9  [        U 5       H7  nSS[        U5      -
  -  n[        SR                  X4[	        X5      5      SS9  M9     Ub  [
        R                  R                  US5      n[        US	5       n[        R                  " [        U 5      USS
9  SSS5        U R                  (       a  U R                  b  [        U R                  SS9 n[        R                  " U5      nSSS5        [
        R                  R                  US5      n	[        U	S	5       n[        R                  " WU5        SSS5        gggg! , (       d  f       N= f! , (       d  f       No= f! , (       d  f       g= f)zPrint arguments.z
arguments:Tflush.   z
  {} {} {}Nzconfig.jsonw)	sort_keysr   encodingzconfig_gpt_large.json)printvarslenformatgetattrr	   r
   r   openjsondump	deepspeeddeepspeed_configload)
r)   verboselog_dirargdots	json_fileoutputfiler>   deepspeed_json_files
             r   print_and_save_argsrH   :   s*   l$':C"s3x-(D##Cwt/AB$P  GGLL-8	)S!VIId4j&D9 ">>d33?d++g>$#'99T?  ?"$'',,w/F#H)3/6		*F3 0/ @>	 !! ?> 0/s$    E!E!,E2
E!
E/2
F c                    Sn[         R                  R                  5       nSnU n[        U [        5      (       a  U R
                  nUR                   H  nUS    H  nUS-  nUR                  R                  5       nUR                  R                  5       n	UR                  R                  5       n
USR                  XU[        UR                  5      5      -  nUSR                  XU
5      -  nM     M     [        USS9  g	)
z+Print min, max, and norm of all parameters.r   z6iteration, rank, index, model-parallel,min, max, norm
params   z{:7d}, {:4d}, {:4d}, {:2d}, z{:.6E}, {:.6E}, {:.6E}
Tr-   N)r   r    r!   
isinstancer   	optimizerparam_groupsdataminmaxnormr8   r"   model_parallelr5   )rM   	iterationindexrankstring
optimizer_param_groupparammin_max_rR   s              r   print_params_min_max_normr]   O   s    E%%'DFFJ)^,,((
!.. *EQJE::>>#D::>>#D::??$D4;;E,@,@(AC CF077DIIF + / 
&r   c                   B    \ rS rSrSr " S S5      rS rS rS
S jrSr	g	)Timersc   zGroup of timers.c                   :    \ rS rSrSrS rS rS rS rS
S jr	Sr
g	)Timers.Timerf   zTimer.c                 `    Xl         SU l        SU l        [        R                  " 5       U l        g )N        F)name_elapsed_started_time
start_timeselfr   s     r   __init__Timers.Timer.__init__i   s"    JDM!DM"iikDOr   c                     U R                   (       a   S5       e[        R                  R                  5         [        R                  " 5       U l        SU l         g)zStart the timer.ztimer has already been startedTN)rh   r   r%   synchronizeri   rj   rl   s    r   startTimers.Timer.starto   s9    }}F&FF$JJ""$"iikDO DMr   c                     U R                   (       d   S5       e[        R                  R                  5         U =R                  [
        R
                  " 5       U R                  -
  -  sl        SU l         g)zStop the timer.ztimer is not startedFN)rh   r   r%   rp   rg   ri   rj   rq   s    r   stopTimers.Timer.stopv   sH    ==8"88=JJ""$MMdiikDOO;<M!DMr   c                      SU l         SU l        g)zReset timer.re   FN)rg   rh   rq   s    r   resetTimers.Timer.reset}   s    DM!DMr   c                     U R                   nU R                   (       a  U R                  5         U R                  nU(       a  U R                  5         U(       a  U R	                  5         U$ )zCalculate the elapsed time.)rh   ru   rg   rx   rr   )rl   rx   rh   rg   s       r   elapsedTimers.Timer.elapsed   sA    }}H}}		}}H



Or   )rg   rf   rj   rh   N)T)__name__
__module____qualname____firstlineno____doc__rm   rr   ru   rx   r{   __static_attributes__ r   r   Timerrb   f   s    	*	!	"	"
	r   r   c                     0 U l         g r   timersrq   s    r   rm   Timers.__init__   s	    r   c                 z    XR                   ;  a  U R                  U5      U R                   U'   U R                   U   $ r   )r   r   rk   s     r   __call__Timers.__call__   s2    {{" $

4 0DKK{{4  r   c                     US:  d   eSnU H9  nU R                   U   R                  US9S-  U-  nUSR                  XV5      -  nM;     [        U5        g)zLog a group of timers.re   z	time (ms))rx   g     @@z | {}: {:.2f}N)r   r{   r8   r   )rl   names
normalizerrx   rW   r   elapsed_times          r   log
Timers.log   so    CD;;t,44 5 %&(23Lo,,T@@F  	Vr   r   N)g      ?T)
r}   r~   r   r   r   r   rm   r   r   r   r   r   r   r_   r_   c   s    * *X!
r   r_   c                    SnU S-   nUSR                  [        R                  R                  5       U-  5      -  nUSR                  [        R                  R	                  5       U-  5      -  nUSR                  [        R                  R                  5       U-  5      -  nUSR                  [        R                  R                  5       U-  5      -  n[        U5        g)zSimple GPU memory report.g      0Az memory (MB)z | allocated: {}z | max allocated: {}z | cached: {}z | max cached: {}N)r8   r   r%   memory_allocatedmax_memory_allocatedmemory_cachedmemory_reservedr   )r   
mega_bytesrW   s      r   report_memoryr      s     !JN"F
 ''

(C(C(E*4)5 6 6F
$++EJJ,K,K,M.8-9 : :F
o$$UZZ%=%=%?*%LMMF
!(()C)C)E+5*6 7 7Fr   c                    U(       a  SnOSR                  U5      nU(       a)  [        R                  " 5       nUSR                  U5      -  n[        R                  R                  XSR                  [        R                  " 5       5      5      $ )Nreleasez{}z_zero_dp_rank_{}zmp_rank_{:02d}_model_states.pt)r8   r   get_data_parallel_rankr	   r
   r   get_model_parallel_rank)checkpoints_pathrT   r   zeroddp_ranks         r   get_checkpoint_namer      sr     KK	",,.	&&w//77<<(//0K0K0MNP Pr   c                     [         R                  R                  U 5      n[         R                  R                  U5      (       d  [         R                  " USS9  g g )NT)exist_ok)r	   r
   dirnameexistsmakedirs)filenamer   s     r   ensure_directory_existsr      s:    ggooh'G77>>'""
Gd+ #r   c                 B    [         R                  R                  U S5      $ )Nz!latest_checkpointed_iteration.txt)r	   r
   r   )r   s    r   get_checkpoint_tracker_filenamer      s    77<<(*MNNr   c                     UUR                  5       S.n[        U R                  USS9n[        U5        [        R                  " X45        [        SR                  U5      5        g )N)rT   optimizer_state_dictT)r     successfully saved {})
state_dictr   saver   r   r5   r8   )r)   rT   rM   zero_sdzero_checkpoint_names        r   save_zero_checkpointr      sW     ) 4 4 6G /tyy)$O01	JJw-	
#
*
*+?
@Ar   c
                    Uc  [        U 5      nUR                  (       a  U(       d  [        XX4US9  GO'[        R                  " 5       S:X  Ga  [        UR                  U5      n
[        SR                  [        R                  R                  5       U U
5      5        SU 0nUR                  (       a  UR                  nUR                  5       nU(       a[  0 nUR                  5        H  u  pUR                  X'   M     UR!                  5        VVs0 s H  u  nnUU   (       d  M  UU_M     nnnXS'   UR"                  (       d3  U	(       d,  Ub  UR                  5       US'   Ub  UR                  5       US'   UR$                  (       d  [&        R(                  " 5       US	'   [*        R&                  R-                  5       US
'   [        R.                  " 5       US'   [        R0                  R/                  5       US'   [        R2                  " 5       R5                  5       US'   [7        U
5        [        R                  " X5        [        SR                  U
5      5        U(       a  [        R                  R9                  5         [        R                  R                  5       S:X  a<  [;        UR                  5      n[=        US5       nUR?                  U5        SSS5        ggs  snnf ! , (       d  f       g= f)Save a model checkpoint.N)tagr   z<global rank {} is saving checkpoint at iteration {:7d} to {}rT   modulerM   lr_schedulerrandom_rng_statenp_rng_statetorch_rng_statecuda_rng_staterng_tracker_statesr   r1   ) strr=   save_ds_checkpointr   r   r   r   r5   r8   r   r    r!   r   r   named_parametersrequires_graditemsno_save_optimno_save_rngrandomgetstatenp	get_stateget_rng_stater%   get_cuda_rng_tracker
get_statesr   barrierr   r:   write)rT   modelrM   r   r)   r   r   only_changed_parametersno_deepspeedr   checkpoint_namesdr   requires_grad_dictr   	parameterkeyvaluetracker_filenamefs                       r   save_checkpointr      sl    {)n~~l9\SI %%'1,1$))SAONu((113Y&() y)B~~))+J&%'"','='='?OD/8/F/F&, (@ '1&6&6&8&8
U)#. CJ&8  
 &xL %%m(&/&:&:&<B{O+)5)@)@)BB~& ##)/):%&%'YY%8%8%:>"(-(;(;(=$%',zz'?'?'A#$+.+C+C ,*, '( $O4JJr++22?CD !!#!!#q(:499E"C(AGGCL )( )?B )(s   K%KK%%
K3c                    0 nXS'   Ub  UR                  5       US'   UR                  (       d  [        R                  " 5       US'   [        R                  R                  5       US'   [        R                  " 5       US'   [        R                  R                  5       US'   [        R                  " 5       R                  5       US'   UR                  UR                  XES	9  g)
r   rT   Nclient_lr_schedulerr   r   r   r   r   )client_state)r   r   r   r   r   r   r   r   r%   r   r   r   r   r   )rT   r   r   r)   r   r   s         r   r   r     s     
B{O$0$;$;$= !!'!2YY002> % 3 3 5$zz779#&#;#;#=#H#H#J 	$))S:r   c                 :   [        U 5      n[        R                  R                  U5      (       d  [	        SR                  U5      5        [        R                  R                  U 5      (       aP  [        R                  R                  U 5      n[        R                  R                  U5      u  p4[	        S5        X4SS4$ [	        S5        U SSS4$ [        USSS	9 nUR                  5       R                  5       nUS
:H  nS S S 5        U WWS4$ ! , (       d  f       N= f)Nz-WARNING: could not find the metadata file {} z6Try to directly load the checkpoint from the directoryFTz<    will not load any checkpoints and will start from randomr   rr   r3   r   )r   r	   r
   isfiler   r8   isdirnormpathr   r:   readr#   )	load_pathr   r
   load_dirr   r   
metastringr   s           r   get_checkpoint_iterationr   /  s    6yA77>>*++DKK 	77==##77##I.DGGMM$/MHHJ%--  	!UE))	g	6!VVX^^%
	) 
7 j'4// 
7	6s   $D
Dc                 d   [        UR                  5      u  pgpU	(       d  gUR                  (       a  U(       d  U R                  UUUR                  (       + =(       a    U(       + UR
                  (       + S9u  pUR
                  (       d%  SU;   a  UR                  US   5        [        S5        U
c&  [        R                  " 5       S:X  a  [        S5        U$ GO![        XgU5      n
[        R                  " 5       S:X  a7  [        SR                  [        R                  R                  5       U
5      5        [        R                  " U
SS9nUR                  (       a  U R                   n U R                  US	   S
S9u  pU(       d  U(       a  [        SU SU 35        U(       dX  UR"                  (       dG  UR                  (       d6  U(       d/   Ub  UR                  US   5        Ub  UR                  US   5        UR"                  (       d  U(       a  SnO US   nU(       d  UR"                  (       d  UR&                  (       d   [(        R*                  " US   5        [,        R(                  R/                  US   5        [        R0                  " US   5        [        R2                  R1                  US   5        [        R4                  " 5       R7                  US   5        [        R                  " 5       S:X  a  [        SR                  U
5      5        U$ ! [$         a    [        SR                  U
5      5         GNDf = f! [$         a8     US   n GN:! [$         a!    [        SR                  U
5      5        Sn  GNdf = ff = f! [$         a    [        SR                  U
5      5         Nf = f)zLoad a model checkpoint.r   )load_optimizer_statesload_lr_scheduler_statesr   zLoad lr scheduler statezUnable to load checkpoint.z'global rank {} is loading checkpoint {}cpu)map_locationr   F)strictzMissing keys z, unexpected keys rM   r   zUnable to load optimizer from checkpoint {}, exiting. Specify --no-load-optim or --finetune to prevent attempting to load the optimizer state.rT   total_iterszbA metadata file exists but Unable to load iteration  from checkpoint {}, starting from 0 iterationr   r   r   r   r   zUnable to load random state from checkpoint {}, exiting. Specify --no-load-rng or --finetune to prevent attempting to load the random state.z  successfully loaded {})r   r?   r=   load_checkpointno_load_optimno_load_lr_schedulerload_state_dictr   r   r   r5   r   r8   r   r    r!   r   finetuneKeyErrorno_load_rngr   setstater   	set_stateset_rng_stater%   r   
set_states)r   rM   r   r)   r   r   r   r   r   successr   r   missing_keysunexpected_keysrT   s                  r   r   r   P  s3    '?tyy&I#H7~~l#33&*&8&8"8"N=N)-)B)B%B	 4 D
 ((-Bb-H((,A)BC23"))+q023J # .hWE%%'1,;BB!!**,o? @ ZZe< >>LLE(-(=(=xL )> )(%?~-??PQ
 t}}T5G5GP]
6(--bo>+ 00N1CD }}	
	;I 4==1A1A	2OOB123II> 23#4 56JJ$$R(8%9:$$&11"5I2JK !!#q((//@AS  6 $VO4	66  	}-	 EEKV'F)* 		$  	2  0	2	2sO   .L M BN $M ?M 
NM&N<N NN$N/.N/c                    S[        [        U 5      5      ;   nU R                  5        H  u  pEU(       a&  UR                  U   R                  nUR                  nO%UR                  nUR                  U   R                  nU(       a$  SU;   a  UR                  5       R                  5       nUR                  U5        M     g)z
Loads weights from src to dst via in place copy.
src is a huggingface gpt2model, while dst is one of our models.
dst2src=True loads parameters from our models into huggingface's.
^dst2src is still untested
Conv1DweightN)r   typer   _parametersrO   t
contiguouscopy_)srcdstdst2src
conv_layernprO   r?   s           r   load_weightsr	    s     Sc^+J$$&??1%**D66D66D??1%**D(a-668&&(D

4 'r   c                     [        UR                  U R                  U5        [        UR                  U R                  U5        g r   )r	  c_fcdense_h_to_4hc_projdense_4h_to_houroair  s      r   load_mlpr    s.    3,,g6S..8r   c                     [        UR                  U R                  U5        [        UR                  U R                  U5        g r   )r	  c_attnquery_key_valuer  denser  s      r   load_attentionr    s,    S00':SYY0r   c                    [        UR                  U R                  U5        [        UR                  U R                  U5        [        U R                  UR                  U5        [        U R                  UR                  U5        g r   )
r	  ln_1input_layernormln_2post_attention_layernormr  mlpr  	attentionattnr  s      r   load_transformer_layerr     sV    3..8377ASWWcggw'3==#((G4r   c                    UR                   n[        UR                  U R                   R                  U5        [        UR                  U R
                  U5        [        UR                  U R                  U5        [        U R                   R                  UR                   R                  5       H  u  pE[        XEU5        M     g)z
Loads weights from `oai` to `our` via in place copy.
`oai` is a huggingface gpt2model, while `our` is one of our models.
dst2src=True loads parameters from our models into huggingface's.
^dst2src=True is still untested
N)transformerr	  ln_ffinal_layernormwteword_embeddingswpeposition_embeddingsziplayershr   )r  r  r  transformer_model	our_layer	oai_layers         r   move_weightsr/    s     "'')H)H"&&(;(;WE"&&(?(?I #COO$:$:COO<M<M N	yW= !Or   c                 `   U S   U S   pCU S   U S   U S   pvn/ nXQ   R                  5       n	[        X1   S U	 R                  5       5       HH  u  pUR                  U5      nUS:X  a  SXq   SU
4   R                  5        S	3nUR	                  U5        MJ     [        S
R                  U5      5        / n[        XR                  S5      5       H"  n
Xa   U
   (       d  M  UR	                  U
5        M$     [        U5        [        UR                  X1   U   R                  5       5      5        [        UR                  5      S:  a.  [        UR                  XA   U   R                  5       5      5        O*[        UR                  XA   R                  5       5      5        [        Xq   S S 2U4   5        g )Ntokens
target_idsattention_mask
logit_maskposition_idsz[MASK][r   ]    )r(   	enumeratetolist	IdToTokenappendr5   r   rangesize	DecodeIdsr7   shape)
local_varsbatch_id	tokenizerr1  r2  r3  r4  r5  output_tokenssepitokentarget_positionss                r   debug_finetune_datarK    s   #H-z,/GJ/90%l3Z5O !-NM

"
'
'
)Cf.t4;;=>##E*H.q!t499;<A>EU#	 ?
 
#((=
!"3B(""##A& ) 

	)

f./?@GGI
JK
:q $%56==?A	B 	i!!*"6"="="?@A	,
 $4!4
56r   )TN)FF)NTFFF)F)$r   r	   r   r   ri   r;   numpyr   r   megatron_utilr   r   megatron_util.fp16r   r   r   r   r+   rH   r]   r_   r   r   r   r   r   r   r   r   r   r	  r  r  r   r/  rK  r   r   r   <module>rO     s    . 	       + -  ="4*(? ?D" !&"P ,OB   ,1!&"'BJ;"0J "'"'bJ.9
1
5>&7r   