
    9iO:                        S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKJ	r	J
r
JrJr  S SKrS SKrS SKJr  S SKJr  S SKJr  S\4S jrS\S\4S jrS	 rS/S
\S\SS4S jjrS\SS4S jrS\SS4S jrS0S\S\\   SS4S jjr S0S\\\4   4S jjr!S r"S r#S r$S r%S r&S0S jr'S0S jr(S r)S r*S r+\ RX                  " 5       S 5       r-S r.S r/S0S jr0S\Rb                  Rd                  S\4S  jr3S!\	S"\Rh                  4   S#\S$\S\Rh                  4S% jr5S&\6\   S'\S(\S)\7\   S\8\7\   \Rh                  4   4
S* jr9 S1S+\Rb                  Rt                  S,\Rv                  S-\S\Rb                  Rt                  4S. jjr<g)2    N)CallableListOptionalTuple)version)distributedreturnc                      [         R                   " [         R                  [         R                  5      n U R                  S5        U R	                  5       S   nU R                  5         U$ )N) r      )socketAF_INETSOCK_STREAMbindgetsocknameclose)sockports     \/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/utils/torch_utils.py_find_free_portr      sI    ==););<DIIga DJJLK    r   c                 H  ^ ^ [         R                  " [         R                  " 5       5      S   nUR                  S5        [         R                   " [         R                  [         R
                  5       m[        U U4S jU 5       5      sS S S 5        $ ! , (       d  f       g = f)N	localhostc              3   P   >#    U  H  nTR                  UT45      S :g  v   M     g7f)r   N)
connect_ex).0ipr   ss     r   	<genexpr> _is_free_port.<locals>.<genexpr>#   s#     ?3R1<<T
+q03s   #&)r   gethostbyname_exgethostnameappendr   r   all)r   ipsr   s   ` @r   _is_free_portr'      sa    

!
!&"4"4"6
7
;CJJ{	v~~v'9'9	:a?3?? 
;	:	:s   3B
B!c                 4   [        U S5      (       a  U R                  " S0 UD6n U $ [        R                  " [        R
                  5      [        R                  " S5      :  a  [        R                  " U 40 UD6n U $ [        S[        R
                   S35        U $ )Ncompilez	2.0.0.devzDCompiling model needs torch version > 2.0.0, your torch version is: z , origin model will be returned. )hasattrr)   r   parsetorch__version__print)modelcompile_optionss     r   compile_modelr2   &   s    ui  00 L 
u((	)W]];-G	Ge77 L	 	&&+&7&7%88XZ	
 Lr   launcherbackendc                     [         R                  " SS9c  [         R                  " S5        U S:X  a  [        U40 UD6  g U S:X  a  [	        U40 UD6  g U S:X  a  [        U40 UD6  g [        SU  35      e)NT)
allow_nonespawnpytorchmpislurmzInvalid launcher type: )mpget_start_methodset_start_method_init_dist_pytorch_init_dist_mpi_init_dist_slurm
ValueError)r3   r4   kwargss      r   	init_distrC   4   ss    	d+3
G$97-f-	U	w)&)	W	+F+28*=>>r   c                     [        [        R                  S   5      n[        R                  R                  U5        [        R                  " SSU 0UD6  g )N
LOCAL_RANKr4   r*   )intosenvironr-   cuda
set_devicedistinit_process_groupr4   rB   
local_ranks      r   r>   r>   A   s=    RZZ-.J	JJ*%6G6v6r   c                    [        [        R                  S   5      n[        R                  R                  U5        S[        R                  ;  a  S[        R                  S'   S[        R                  ;  a  [        S5      e[        R                  S   [        R                  S'   [        R                  S   [        R                  S	'   [        R                  " SS
U 0UD6  g )NOMPI_COMM_WORLD_LOCAL_RANKMASTER_PORT29500MASTER_ADDRz/The environment variable MASTER_ADDR is not setOMPI_COMM_WORLD_SIZE
WORLD_SIZEOMPI_COMM_WORLD_RANKRANKr4   r*   )	rF   rG   rH   r-   rI   rJ   KeyErrorrK   rL   rM   s      r   r?   r?   H   s    RZZ <=>J	JJ*%BJJ&$+

=!BJJ&HII!zz*@ABJJ|$:;BJJv6G6v6r   c                 h   [        [        R                  S   5      n[        [        R                  S   5      n[        R                  S   n[        R                  R                  5       n[        R                  R                  X%-  5        [        R                  " SU S35      nUb  [        U5      [        R                  S'   O]S[        R                  ;   a  OH[        S5      (       a  S	[        R                  S'   O$[        [        5       5      [        R                  S'   S
[        R                  ;  a  U[        R                  S
'   [        U5      [        R                  S'   [        X%-  5      [        R                  S'   [        U5      [        R                  S'   [        R                  " U S9  g)a|  Initialize slurm distributed training environment.

If argument ``port`` is not specified, then the master port will be system
environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system
environment variable, then a default port ``29500`` will be used.

Args:
    backend (str): Backend of torch.distributed.
    port (int, optional): Master port. Defaults to None.
SLURM_PROCIDSLURM_NTASKSSLURM_NODELISTzscontrol show hostname z | head -n1NrQ   i<s  rR   rS   rU   rE   rW   r4   )rF   rG   rH   r-   rI   device_countrJ   
subprocess	getoutputstrr'   r   rK   rL   )r4   r   proc_idntasks	node_listnum_gpusaddrs          r   r@   r@   U   s8    "**^,-GN+,F

+,Izz&&(H	JJ',-
!)K8:D $'I

=!	"**	$ (/BJJ}%(+O,=(>BJJ}%BJJ&$(

=!"6{BJJ|"7#56BJJ|WBJJvG,r   c                     [        5       (       aZ  SSKJn  U c"  U" 5       (       a  SSKJn  UR                  5       n [        R                  " U 5      n[        R                  " U 5      nX44$ SnSnX44$ )zGet dist info of a specified group

Args:
    group: The parallel group, default None, for the global group

Returns:
    A tuple of the current rank and world_size of the group
r   )is_megatron_initialized)mpur   )	is_distmodelscope.utils.megatron_utilsrh   megatron_utilri   get_data_parallel_grouprK   get_rankget_world_size)grouprh   ri   rank
world_sizes        r   get_dist_infors   |   sk     yyK=466)//1E}}U#((/
  
r   c                  T    [        [        R                  R                  SS5      5      $ )NrE   r   )rF   rG   rH   getr*   r   r   get_local_rankrv      s    rzz~~lA.//r   c                      [         R                  " 5       (       d  g[         R                  " 5       (       d  g[         R                  " 5       $ )Nr   )rK   is_availableis_initializedrn   r*   r   r   rn   rn      s2      ==?r   c                      [         R                  " 5       (       d  g[         R                  " 5       (       d  g[         R                  " 5       $ )Nr   )rK   rx   ry   ro   r*   r   r   ro   ro      s5        r   c                      [         R                  " 5       (       d  g[         R                  " 5       (       d  g[         R                  " 5       n U S:X  a  g[         R                  " 5         g)z^
Helper function to synchronize (barrier)
among all processes when using distributed training
Nr   )rK   rx   ry   ro   barrier)rr   s    r   synchronizer}      sI    
   $$&JQLLNr   c                  d    [         R                  " 5       =(       a    [         R                  " 5       $ N)rK   rx   ry   r*   r   r   rj   rj      s    84#6#6#88r   c                 V    [        5       (       a  [        R                  " U 5      S:H  $ S$ )Nr   T)rj   rK   rn   rp   s    r   	is_masterr      s!    (/		4==1$;t;r   c                 0   ^  S[         S[         4U 4S jjnU$ )Nfuncr	   c                 J   >^  [         R                  " T 5      U U4S j5       nU$ )Nc                  6   > [        T5      (       a  T" U 0 UD6$ g r   )r   )argsrB   r   rp   s     r   wrapper.master_only.<locals>.decorate.<locals>.wrapper   s#    T,V,,  r   )	functoolswraps)r   r   rp   s   ` r   decoratemaster_only.<locals>.decorate   s%    			- 
	- r   )r   )rp   r   s   ` r   master_onlyr      s    x H  Or   c                      [        5       (       d  [        R                  " 5       $ Sn [        5       (       a  [        R                  " 5       n [        R
                  " 5         [        U S5      n U $ )zRMake sure each rank has the same temporary directory on the distributed mode.
    Nr   )rj   tempfilemkdtempr   rK   r|   	broadcast)tmpdirs    r   make_tmp_dirr      sN     99!!F{{!!#LLNvq!FMr   c                    [         R                  " 5       n[        R                  " S/SS9nX!:X  a`  [        R                  " [	        [
        R                  " U 5      5      [        R                  SS9n[        R                  " UR                  SS9n[         R                  " 5         [         R                  " X15        X!:w  a4  [        R                  " UR                  5       4S[        R                  SS9n[         R                  " 5         [         R                  " WU5        [
        R                  " UR                  5       R                  5       R!                  5       5      $ )z
Broadcasts the inputs to all ranks.

Arguments:
    inputs : Any objects that can be serialized by pickle.
    src (int): Source rank.
Returns:
    Each rank returns the same value as src.
r   rI   devicedtyper   )rK   rn   r-   tensor	bytearraypickledumpsuint8shaper|   r   fullitemloadscpunumpytobytes)inputssrcrq   shape_tensorinputs_tensors        r   r   r      s     ==?D<<F3L{fll6*+5;;vO||M$7$7GLLNNN<%{

L$5$5$7#:#$).*02
 	LLNNN=#&<<))+113;;=>>r   c                    U bq  U S:  ak  [         R                  " U 5        [        R                   R                  U 5        [        R                  " U 5        [        R
                  R                  U 5        g [        SU  35      e)Nr   z0Random seed should be positive, current seed is )randomseednpr-   manual_seedrI   manual_seed_allrA   )r   s    r   set_random_seedr      sc    DAID
		t$

""4(>tfEG 	Gr   c                      [         R                  " 5       S:X  a  [         R                  " SS9$ [         R                  R                  $ )z^
Return a process group based on gloo backend, containing all the ranks
The result is cached.
ncclgloor]   )rK   get_backend	new_grouprp   WORLDr*   r   r   _get_global_gloo_groupr     s4     V#~~f--zzr   c                    [         R                  " U5      nUS;   d   e[        R                  " US:X  a  SOS5      n[        R
                  " U 5      n[        U5      S:  a:  [        R                  SR                  [        5       [        U5      S-  U5      5        [        R                  R                  U5      n[        R                  " U5      R                  US9nU$ )N)r   r   r   r   rI   i   @z;Rank {} trying to all-gather {:.2f} GB of data on device {}r   )rK   r   r-   r   r   r   lenloggerwarningformatrn   ByteStoragefrom_buffer
ByteTensorto)datarp   r4   r   bufferstorager   s          r   _serialize_to_tensorr     s    u%G&&&&\\7f#4%&AF\\$F
6{WIF8:v;'*F4	5 ++F3Gg&)))8FMr   c           	         [         R                  " US9nUS:  d   S5       e[        R                  " U R	                  5       /[        R
                  U R                  S9n[        U5       Vs/ s H2  n[        R                  " S/[        R
                  U R                  S9PM4     nn[         R                  " XSUS9  U Vs/ s H  n[        UR                  5       5      PM     nn[        U5      nX7:w  aG  [        R                  " Xs-
  4[        R                  U R                  S9n[        R                  " X4SS9n XP4$ s  snf s  snf )zj
Returns:
    list[int]: size of the tensor, on each rank
    Tensor: padded tensor that has the max size
r   r   zBcomm.gather/all_gather must be called from ranks within the group!r   r   dim)rK   ro   r-   r   numelint64r   rangezeros
all_gatherrF   r   maxr   cat)	r   rp   rr   
local_size_	size_listsizemax_sizepaddings	            r   _pad_to_largest_tensorr   #  s    $$51JaLKLv||~.$)KK%+]]4J
 z""A 	QCu{{6==A"   	OOI7.78idTYY[!iI89~H ++x47$)KK%+]]4 F,!4!
 9s   ,9D? #Ec           	      H   [        5       S:X  a  U /$ Uc
  [        5       n[        R                   " U5      S:X  a  U /$ [        X5      n[	        X!5      u  p2[        U5      nU Vs/ s H2  n[        R                  " U4[        R                  UR                  S9PM4     nn[        R                  " XbUS9  / n[        X65       HY  u  pUR                  5       R                  5       R                  5       SU n	UR                  [         R"                  " U	5      5        M[     U$ s  snf )a  
Run all_gather on arbitrary picklable data (not necessarily tensors).
Args:
    data: any picklable object
    group: a torch process group. By default, will use a group which
        contains all ranks on gloo backend.
Returns:
    list[data]: list of data gathered from each rank
r   Nr   r   )ro   r   rK   r   r   r   r-   emptyr   r   r   zipr   r   r   r$   r   r   )
r   rp   r   r   r   r   tensor_list	data_listr   r   s
             r   r   r   C  s    1v}&(5!Q&v!$.F.v=I9~H
 A 	XLFMMJ   	OOKu5II3##%--/6f-. 4 s   #9Dr0   c                 f    [        S U R                  5        5       5      S1-
  n[        U5      S:*  $ )Nc              3   L   #    U  H  n[        UR                  5      v   M     g 7fr   )ra   r   )r   ps     r   r    $is_on_same_device.<locals>.<genexpr>i  s     ?,>qS]],>s   "$r   r   )set
parametersr   )r0   
device_sets     r   is_on_same_devicer   h  s1    ?E,<,<,>??5'IJz?ar   
forward_fn.
chunk_size	chunk_dimc                   ^ ^^	 [        U5      S:  d
   U S35       e[        [        R                  " T 5      R                  5      nU[        U5      :w  a  [	        SU S[        U5       S35      eUS:  a  US   R
                  T   nU H4  nUR
                  T   U:w  d  M  [	        SU SUR
                  T    35      e   US   R
                  T   U-  S:w  a!  [	        SUS   R
                  T    S	U 35      eUS   R
                  T   U-  m	[        UU	4S
 jU 5       5      n[        U 4S j[        U6  5       5      n[        R                  " UTS9$ T " U6 $ )Nr   z" has to be a tuple/list of tensorszforward_chunk_fn expects z arguments, but only z input tensors are givenz/All input tenors have to be of the same shape: z, found shape zThe dimension to be chunked z( has to be a multiple of the chunk size c              3   D   >#    U  H  nUR                  TTS 9v   M     g7f)r   N)chunk)r   input_tensorr   
num_chunkss     r   r    ,apply_chunking_to_forward.<locals>.<genexpr>  s(      %/ - zy9 -s    c              3   .   >#    U  H
  nT" U6 v   M     g 7fr   r*   )r   input_tensors_chunkr   s     r   r    r     s       C'A# +,'As   r   )
r   inspect	signaturer   rA   r   tupler   r-   r   )
r   r   r   input_tensorsnum_args_in_forward_chunk_fntensor_shaper   input_tensors_chunksoutput_chunksr   s
   ` `      @r   apply_chunking_to_forwardr   m  s    }  K&'IJK  $'*%00$2 #s='99'(D'EEZ[^_l[mZn o   ! 	! A~$Q'--i8)L!!),< El^ T##/#5#5i#@"ACD D * !!),z9Q>.}Q/?/E/Ei/P.Q R"|%& & #1%++I6*D
  % %/ -%/  /  C'*,@'AC C yyI66}%%r   headsn_heads	head_sizealready_pruned_headsc                 Z  ^ [         R                  " X5      n[        U 5      U-
  n U  H   mT[        U4S jU 5       5      -
  mSUT'   M"     UR	                  S5      R                  5       R                  S5      n[         R                  " [        U5      5      U   R                  5       nX4$ )Nc              3   6   >#    U  H  oT:  a  S OSv   M     g7f)r   r   Nr*   )r   hheads     r   r    3find_pruneable_heads_and_indices.<locals>.<genexpr>  s     M8L14x!Q.8Ls   r   r   r   )
r-   onesr   sumview
contiguouseqaranger   long)r   r   r   r   maskindexr   s         @r    find_pruneable_heads_and_indicesr	    s     ::g)DE cM8LMMMT
  99R=##%((+D#ll3t95d;@@BE<r   layerr  r   c                    UR                  U R                  R                  5      nU R                  R                  X!5      R	                  5       R                  5       nU R                  bZ  US:X  a)  U R                  R	                  5       R                  5       nO+U R                  U   R	                  5       R                  5       n[        U R                  R                  5       5      n[        U5      XR'   [        R                  R                  US   US   U R                  S LS9R                  U R                  R                  5      nSUR                  l        UR                  R                  UR                  5       5        SUR                  l        U R                  bK  SUR                  l        UR                  R                  WR                  5       5        SUR                  l        U$ )Nr   r   )biasFT)r   weightr   index_selectdetachcloner  listr   r   r-   nnLinearrequires_gradcopy_r  )r
  r  r   Wbnew_size	new_layers          r   prune_linear_layerr    sq    HHU\\(()E!!#-446<<>Azz!8

!!#))+A

5!((*002AELL%%'(HJHMXa[uzz(   R++,  &+I"1<<>*%)I"zz',	$Q\\^,'+	$r   )r   r   )r   )=r   r   rG   r   r   r   r_   r   typingr   r   r   r   r   r   r-   torch.multiprocessingmultiprocessingr;   	packagingr   r   rK   ra   r   rF   boolr'   r2   rC   r>   r?   r@   rs   rv   rn   ro   r}   rj   r   r   r   r   r   	lru_cacher   r   r   r   r  Moduler   Tensorr   r  r   r   r	  r  
LongTensorr  r*   r   r   <module>r$     s     	      2 2   "  % @ @ @
? 
?c 
? 
?7 7$ 7
7C 
7d 
7$-c $-# $-$ $-NsCx ,0!9< ?BG     @"J UXX__    
,&ell*+,&,& ,&
 \\,&^Cy#&36!#h+0S5<<1G+H& #$ehhoo #..(-r   