
    9iL                     |   S SK r S SKrS SKrS SKJr  S SKrS SKrS SKrS SK	J
r
  S SKJrJrJr  S SKJr  S SKJr  S SKJr  S SKJr  S S	KJr  S S
KJrJrJr  S SKJr  S SKJ r    " S S5      r!\RD                  " \RF                  S9 " S S\5      5       r#\RD                  " \RH                  S9 " S S\#5      5       r$g)    N)Optional)check_model_is_id)UploadStrategypush_to_hub_in_queuewait_for_done)Hooks)HOOKS)CheckpointProcessor)Hook)Priority)DEFAULT_REPOSITORY_REVISIONLogKeys	ModelFile)
get_logger)	is_masterc                        \ rS rSrSrSrSrSrg)CheckpointStrategy   by_epochby_stepno N)__name__
__module____qualname____firstlineno__r   r   r   __static_attributes__r       t/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/trainers/hooks/checkpoint/checkpoint_hook.pyr   r      s    HG	Br   r   )module_namec                   V   \ rS rSrSr\R                  rSrSr	\
R                  SSSSSSSSS\\R                  S4S	\\   S
\\   S\\   S\\   S\\   S\\   S\\   S\\   S\\   S\\   S\\   S\\   S\4S jjrS rS rS rS rS rS rS rS%S jrS rS  rS! rS" rS# r S$r!g)&CheckpointHook    a  Save checkpoints periodically.

Args:
    save_strategy(str): The strategy to save checkpoint, can be `by_epoch`, `by_step` or `no`
    interval (int): The frequency to save model. If `by_epoch=True`,
        it means the number of epochs, else means the number of iterations
    save_dir (str): The directory to save checkpoints. If is None, use `trainer.work_dir`
    output_dir (str): The absolute path to save the output files for inference. If it's not specified,
        the default dir is `{sub_dir}/output`.
    save_last (bool): Whether to save the last checkpoint. Default: True.
    max_checkpoint_num (int): The max number of checkpoint files, default None which means never delete anything.
        If the number exceeding the limit, earlier checkpoints will be deleted first.
    push_to_hub (bool): Whether push the checkpoint to modelhub.
    hub_repo_id (str): The hub repo id.
    hub_token (str): The token of the modelhub. You can also set the environment variable `MODELSCOPE_API_TOKEN`.
    private_hub (bool): Whether push to a private hub, default True.
    hub_revision (str): Which branch to push the model to, default is `master`.
    upload_strategy (str): The action adopted when the previous uploading is not done
    and the next one is coming, can be `cancel` or `wait`.
    save_trainer_state (bool): Save the trainer state for continue training, default True.
    kwargs:
        by_epoch (bool): Same with `save_strategy`, but has a higher priority, legacy argument.
        output_sub_dir (str): The folder under the `save_dir` to save the output checkpoint for inference.
            This argument is kept to fit the existing configs.
zeval_result.txtztrain.checkpointr   NTFsave_strategyintervalsave_dir
output_dir	save_lastmax_checkpoint_numpush_to_hubhub_repo_id	hub_tokenprivate_hubhub_revisionupload_strategysave_trainer_statec                    X l         X0l        SU;   a0  US   (       a  [        R                  O[        R                  U l        OXl        SU;   a  US   U l        S U l        OS U l        X@l        XPl        S U l	        Xpl
        Xl        Xl        Xl        Xl        Xl        Xl        SU l        S U l        S U l        Ub  [)        [+        U5      S5      U l        / U l        [/        5       U l        g )Nr   output_sub_dir   )r%   r&   r   r   r   r$   r2   r'   r(   	rng_stater*   r+   r,   r-   r.   r/   r0   tagis_model_idr)   maxinthistory_checkpointsr
   	processor)selfr$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   kwargss                  r   __init__CheckpointHook.__init__B   s     ! @FA!3!<!<!3!;!;  "/v%"()9":D"DO"&D(O"&&"&(."4"&)&)#.@*A1&ED##% ,.r   c                     Xl         g)zV
The checkpoint hook accepts a processor to finish the actual saving/deleting action.
N)r;   )r<   r;   s     r   set_processorCheckpointHook.set_processoro   s	     #r   c                 D   SU l         U R                  (       d  UR                  U l        U R                  (       d  U R                  (       a:  [
        R                  R                  U R                  U R                  5      U l        O=[
        R                  R                  U R                  [        R                  5      U l        [
        R                  R                  U R                  5      (       d  [
        R                  " U R                  SS9  [        US5      (       d  [        5       U l        OUR                  U l        [        5       (       aP  U R                  nU R                   R#                  X5        U R                  R%                  SU R                   35        g g )Nr3   T)exist_okloggerzCheckpoints will be saved to )r6   r&   work_dirr'   r2   ospathjoinr   TRAIN_OUTPUT_DIRexistsmakedirshasattrr   rE   r   r;   prepare_outputinfo)r<   trainerr'   s      r   
before_runCheckpointHook.before_runu   s   }}#,,DM"""$'',,t}}/3/B/B#D #%'',,t}}/8/I/I#K ww~~dmm,,KK5w))$,DK!..DK;;JNN))'>KK<T]]OLM	 r   c                     U[         R                  :X  a"  [        R                   SUR                  S-    3$ [        R
                   SUR                  S-    3$ )N_r4   )r   r   r   EPOCHepochITERiterr<   rP   r$   s      r   generate_prefixCheckpointHook.generate_prefix   sO    .777mm_Agmma&7%899ll^1W\\A%5$677r   c                    U R                  X5      nU R                  R                  U5      (       a  [        5       (       am  U[        R
                  :X  a-  U R                  R                  SUR                  S-    S35        O,U R                  R                  SUR                  S-    S35        U R                  X5        [        5       (       a  U R                  (       at  U R                  [        R                  :X  a  U R                  nSnO4U R                  S-   U-   n[         R"                  " U R                  USS9  SnU R%                  XXE5        g g g )	NzSaving checkpoint at r4   z epochz iterF_upload_T)dirs_exist_ok)rZ   r;   should_save_on_rankr   r   r   rE   rO   rV   rX   _save_checkpointr*   r/   r   cancelr'   shutilcopytree_push_to_hub)r<   rP   r$   prefixr'   
delete_dirs         r   _do_saveCheckpointHook._do_save   s   %%g=>>--g66{{ $6$?$??KK$$/0A/B&IK KK$$/q0@/AGI!!'2;;4++##~'<'<<!__
"
!__z9FB
OOZtE!
gzF ,;r   c                     U R                   [        R                  :w  a  g U R                  U5      (       a!  U R	                  U[        R                  5        g g N)r$   r   r   _should_saverg   r<   rP   s     r   after_train_epoch CheckpointHook.after_train_epoch   sE    !3!<!<<W%%MM'#5#>#>? &r   c                     U R                   [        R                  :w  a  g U R                  U5      (       a!  U R	                  U[        R                  5        g g rj   )r$   r   r   rk   rg   rl   s     r   after_train_iterCheckpointHook.after_train_iter   sE    !3!;!;;W%%MM'#5#=#=> &r   c                     U R                   R                  S5        [        U R                  U R                  SS9  [        U R                  5        U R                  (       a  U R                   R                  S5        g g )Nz,Train finished. Uploading models, waiting...T)strategydonezUploading models done.)rE   rO   r   PUSH_TO_HUB_QUEUE_NAMEr/   r   r*   rl   s     r   	after_runCheckpointHook.after_run   sc    GH''))	 	d112KK56 r   c                    U R                   c%  [        UR                  U R                  5      U l         U =R                  S-  sl        [        U R                  U R                  U R                  UU R                  U R                  USU R                   3U R                  U R                   (       a  UR                  US9$ SUS9$ )Nr4   zv1. )
rs   	repo_namer'   tokenprivatecommit_messager6   revisionsource_reporf   )r7   r   input_model_idr,   r6   r   ru   r/   r+   r-   r.   )r<   rP   re   r'   rf   s        r   rd   CheckpointHook._push_to_hub   s    #01G1G15 ADA#''))&&!..$$!dhhZ &&262B2B..!# 	# IK!# 	#r   c                    [        [        R                  R                  U R                  U R
                  5      S5       nUR                  [        R                  " UR                  5      5        S S S 5        g ! , (       d  f       g = f)Nw)
openrG   rH   rI   r'   EVAL_RESULT_FILEwritejsondumpsmetric_values)r<   rP   fs      r   save_evaluate_results$CheckpointHook.save_evaluate_results   sV    "'',,t0E0EFGGDJJw4456  s   0A88
Bc                 Z   [         R                  R                  U R                  U5      nU R	                  U5      nU R
                  R                  XU R                  UU R                  5        U R                  U5        U R                  R                  U5        U R                  U5        U$ )z7Save checkpoint files and remove obsolete ones
        )rG   rH   rI   r&   _create_training_stater;   save_checkpointsr'   r0   r   r:   append_remove_obsolete_checkpointsr<   rP   re   checkpoint_path_prefixmetas        r   r`   CheckpointHook._save_checkpoint   s     "$dmmV!D**73''(,(,(?(?	A 	""7+  ''(>?))'2r   c                    U R                   b  [        U R                  5      U R                   :  a  U R                   Vs/ s H  o"PM     nnU R                  R                  5         [	        U5       Hu  u  pEU[        U5      U R                   -
  :  a9  U R
                  R                  SU 35        U R                  R                  XS9  MZ  U R                  R                  U5        Mw     g g g s  snf )Ndeleting checkpoint: r   )
r)   lenr:   clear	enumeraterE   rO   r;   remove_checkpointsr   )r<   rP   ckptr:   ir   s         r   r   +CheckpointHook._remove_obsolete_checkpoints   s    "".D,,-0G0GG484L4L"M4LD44L"M$$**,-67J-K)s./$2I2IIIKK$$/0F/GHJNN55 6 P ,,334JK .L H /"Ms   C-c                 P   U R                   [        R                  :X  a  U R                  nU R                  nO8U R                   [        R
                  :X  a  U R                  nU R                  nOgU" UU R                  5      (       d  U R                  (       a  U" U5      (       a  gg)NFT)
r$   r   r   is_last_epochevery_n_epochsr   is_last_iterevery_n_itersr%   r(   )r<   rP   
check_lastcheck_frequencys       r   rk   CheckpointHook._should_save   s    !3!<!<<++J"11O#5#=#==**J"00O7==* *.2nn2<W2E2Er   c                    [         R                  " 5       [        R                   R                  5       [        R                   R                  5       [        R                  R                  5       S.U l        UR                  UR                  S-   UR                  S-   U R                  S.nSnUR                   HP  n[        US5      (       d  M  [        USS5      (       d  M*  UR                  5       X$R                    SU 3'   US-  nMR     U$ )	N)randomnumpycpucudar4   )rV   rX   
inner_iterr5   r   
state_dictrk   T-)r   getstatenp	get_statetorchget_rng_stater   get_rng_state_allr5   rV   rX   r   hooksrM   getattrr   	__class__)r<   rP   r   r   hooks        r   r   %CheckpointHook._create_training_state  s    oo'YY((*<<--/JJ002	
 ]]LL1$!,,q0	
 MMDt\**wt^7;0= 0=040A'q,-Q	 " r   )r:   r+   r.   r,   r%   r7   rE   r)   r'   r2   r-   r;   r*   r5   r&   r(   r$   r0   r6   r/   )F)"r   r   r   r   __doc__r   LOWPRIORITYr   ru   r   r   r   r   ra   r   strr9   boolr>   rA   rQ   rZ   rg   rm   rp   rv   rd   r   r`   r   rk   r   r   r   r   r   r"   r"       sM   4 ||H(/ 1C0K0K+,+/-1-159/4.2,0/3/J2@2G2G,0+/ (+/#C=+/ $C=+/ &c]	+/
 %TN+/ &.c]+/ 'tn+/ 'sm+/ %SM+/ 'tn+/  (}+/ #+3-+/ &*+/Z#N48G.@?7#$7
L r   r"   c                      ^  \ rS rSrSr\R                  rS S S.r      SS\	S\
\   S\
\	   S	\
\	   S
\
\   S\
\   S\4U 4S jjjrS rS rS rS rS rS rS rS rS rS rSrU =r$ )BestCkptSaverHooki!  a  
Save best checkpoints hook.

Args:
    metric_key (str): Metric key to compare rule for best score.
    save_best(bool): Save the best checkpoint, if set to False, this hook will have no effect.
    rule (str): Comparison rule for best score. Support "max" and "min". If rule is "max", the checkpoint
        at the maximum `metric_key` will be saved, If rule is "min", the checkpoint at the minimum `metric_key`
        will be saved.
    save_file_name: The manual specified saving file name.
    restore_best (bool): Whether to restore the best checkpoint after training.
    max_checkpoint_num (int): The max number of checkpoint files, default None which means never delete anything.
        If the number exceeding the limit, checkpoints with worse metric will be deleted, which is judged by the
        `rule` and `metric_key` arguments.
    save_trainer_state (bool): Save the trainer state for continue training, default True.

The `BestCkptSaverHook` class accepts `output_sub_dir` and `output_dir` argument as its super class do.
If neither of them are passed, the default value is `{save_dir}/output_best`.

This class will not accept the `interval` or `save_strategy` or `by_epoch` argument, because the saving interval
will follow the `EvaluationHook`.
c                 
    X:  $ rj   r   xys     r   <lambda>BestCkptSaverHook.<lambda>;  s    AEr   c                 
    X:  $ rj   r   r   s     r   r   r   ;  s    qur   r8   min
metric_key	save_bestrulesave_file_namerestore_bestr)   r0   c                 J  > US;   d   S5       e0 n	SU;  a  SU;  a  [         R                  U	S'   UR                  SS 5        UR                  SS 5        [        T
U ]  " SUUS.UDU	D6  X l        Xl        X0l        S U l        S U l	        X@l
        XPl        [        5       U l        g )	Nr   z%Only support "max" or "min" rule now.r2   r'   r%   r$   )r)   r0   r   )r   TRAIN_BEST_OUTPUT_DIRpopsuperr>   r   r   r   _best_metric_best_ckpt_filer   r   setr:   )r<   r   r   r   r   r   r)   r0   r=   output_kwargsr   s             r   r>   BestCkptSaverHook.__init__=  s     ~%N'NN%6)l&.H.7.M.MM*+

:t$

?D) 	
11	
 	
 		
 #$	 #,(#&5 r   c                    SSK Jn  UR                  U5      n[        U5      S:X  a  U R                  R                  S5        US   R                  SUR                  4:X  a*  U R                  U5      (       a  U R                  US5        g g g )Nr   EvaluationHookITrying to save the best checkpoint, but there is no evaluation, skipping.rV   r   )
modelscope.trainers.hooksr   get_hookr   rE   errorlast_eval_tagrV   rk   rg   r<   rP   r   	eval_hooks       r   rm   #BestCkptSaverHook.after_train_epoch[  s    <$$^4	y>QKK[ Q<%%*( (,0,=,=g,F,FMM':. -G(r   c                    SSK Jn  UR                  U5      n[        U5      S:X  a  U R                  R                  S5        US   R                  SUR                  4:X  a*  U R                  U5      (       a  U R                  US5        g g g )Nr   r   r   rX   r   )
r   r   r   r   rE   r   r   rX   rk   rg   r   s       r   rp   "BestCkptSaverHook.after_train_iterg  s    <$$^4	y>QKK[ Q<%%*& &*.*;*;G*D*DMM'9- +E&r   c                 ^    U R                   =(       a    U R                  UR                  5      $ rj   )r   _is_best_metricr   rl   s     r   rk   BestCkptSaverHook._should_saves  s"    ~~M$"6"6w7L7L"MMr   c                 F   Uc  gU R                   U;  a  [        SU R                    SU 35      eU R                  c  XR                      U l        gU R                  U R                     nU" XR                      U R                  5      (       a  XR                      U l        gg)NFzNot find metric_key: z in T)r   
ValueErrorr   rule_mapr   )r<   r   
compare_fns      r   r   !BestCkptSaverHook._is_best_metricv  s     ??-/''8]OLN N $ -oo >Dtyy1J-8$:K:KLL$1//$B!r   c                    U[         R                  :X  a;  S[        R                   UR                  S-    SU R
                   U R                   3$ S[        R                   UR                  S-    SU R
                   U R                   3$ )Nbest_r4   rT   )	r   r   r   rU   rV   r   r   rW   rX   rY   s      r   rZ   !BestCkptSaverHook.generate_prefix  s{    .7777==/'--!*;)<Adoo=NtO`O`Nabb7<<.)9(:!DOO;LTM^M^L_``r   c                    U R                   nUc+  [        R                  R                  U R                  U5      nO*[        R                  R                  U R                  U5      nX0l        U R                  U5      nU R                  R                  XU R                  UU R                  5        U R                  U5        U R                  R                  U5        U R                  U5        U$ rj   )r   rG   rH   rI   r&   r   r   r;   r   r'   r0   r   r:   addr   r   s        r   r`   "BestCkptSaverHook._save_checkpoint  s    !%!4!4!)%'WW\\$--%H"%'WW\\$--2H&J"  6**73''(,(,(?(?	A 	""7+  $$%;<))'2r   c                   ^  U 4S jnT R                   b  [        T R                  5      T R                   :  a  [        T R                  US9nT R                  R	                  5         [        U5       Hi  u  pEUT R                   :  a  T R                  R                  U5        M2  T R                  R                  SU 35        T R                  R                  XS9  Mk     g g g )Nc                 |   > [        U R                  TR                  5      S   5      nTR                  S:X  a  U* $ U$ )Nr4   r8   )floatsplitr   r   )name1metric1r<   s     r   extract_metric_from_filenameTBestCkptSaverHook._remove_obsolete_checkpoints.<locals>.extract_metric_from_filename  s7    EKK8;<GyyE!xr   )keyr   r   )r)   r   r:   sortedr   r   r   rE   rO   r;   r   )r<   rP   r   r:   r   r   s   `     r   r   .BestCkptSaverHook._remove_obsolete_checkpoints  s    	 "".D,,-0G0GG"(((.J#L$$**,-67J-K)t...,,001GHKK$$/0F/GHJNN55 6 P .L	 H /r   c                     SU R                   0$ )Nbest_metric)r   )r<   s    r   r   BestCkptSaverHook.state_dict  s    4,,
 	
r   c                     Ub&  [        U5      S:  a  UR                  S5      U l        g U R                  R	                  S5        g )Nr   r   zHThe state_dict is not available, the best metric value will be affected.)r   getr   rE   warning)r<   r   s     r   load_state_dict!BestCkptSaverHook.load_state_dict  s9    !c*o&9 *} =DKKZr   c                 l    U R                   (       a#  SSKJn  UR                  U R                  U5        g g )Nr   )LoadCheckpointHook)r   9modelscope.trainers.hooks.checkpoint.load_checkpoint_hookr  load_checkpointr   )r<   rP   r  s      r   rv   BestCkptSaverHook.after_run  s,     e..t/C/CWM	 r   )r   r   r:   r   r   r   r   r   )Tr8   NFr4   T)r   r   r   r   r   r   r   r   r   r   r   r   r9   r>   rm   rp   rk   r   rZ   r`   r   r   r  rv   r   __classcell__)r   s   @r   r   r   !  s    . ||H)2DEH .2',150556,0) )$TN)  }) "*#	)
  (~) &.c]) &*) )<
/
.N$a$P.

N Nr   r   )%rG   r   rb   typingr   r   r   r   r   modelscope.hub.check_modelr   modelscope.hub.push_to_hubr   r   r   modelscope.metainfor   !modelscope.trainers.hooks.builderr	   9modelscope.trainers.hooks.checkpoint.checkpoint_processorr
   modelscope.trainers.hooks.hookr   "modelscope.trainers.hooks.priorityr   modelscope.utils.constantr   r   r   modelscope.utils.loggerr   modelscope.utils.torch_utilsr   r   register_moduler"   r   r   r   r   <module>r     s    	       87 7 % 3 / 72 2 . 2  5#7#78}T } 9}@ 5#:#:;gN gN <gNr   