
    9i*                         S SK Jr  S SKrS SKJrJr  S SKJr  S SKJ	r
  S SKJr  S SKJr  S SKJr  S S	KJr  S S
KJr  SSKJr  SSKJr  \" 5       r " S S\5      rg)    )DictN)mpuprint_rank_0)FP16_Module)
functional)
TorchModel)Tensor)
get_logger)init_megatron_util)pre_load   )	PlugModel)PlugNLGConfigc                      ^  \ rS rSrSrU 4S jrSS jr\SS\" S5      * 4S j5       r	         SS	 jr
SS
\\\4   4S jjrSrU =r$ )DistributedPlug   a  
The wrapper class of PLUG Model to initialize parallel environment, load model weights, generate sentences.
Parameters:
    model_dir (`str`, *required*):
        Path to model damo/nlp_plug_text-generation_27B.
    The model structure in model_dir should be like this:
    model_dir
        |_ config.json
        |_ configuration.json
        |_ ds_zero-offload_10B_config.json
        |_ vocab.txt
        |_ model <-- an empty directory

    Model binaries shall be downloaded separately to populate the model directory, so that
    the model directory would contain the following binaries:
        |_ model
            |_ mp_rank_00_model_states.pt
            |_ mp_rank_01_model_states.pt
            |_ mp_rank_02_model_states.pt
            |_ mp_rank_03_model_states.pt
            |_ mp_rank_04_model_states.pt
            |_ mp_rank_05_model_states.pt
            |_ mp_rank_06_model_states.pt
            |_ mp_rank_07_model_states.pt
    rank (`int`, *required*):
        Used to identify different GPUs in a tensor parallel environment. eg. The rank of GPU #0 is 0, and the
        model file `mp_rank_00_model_states.pt` will be loaded on this GPU.
    world_size (`int`, *required*, defaults to 8):
        The parallel size in total.
    model_parallel_size (`int`, *required*, defaults to 8):
        The parallel size of model(tensor parallel).
    master_ip (`str`, *required*):
        The master IP, can usually be set to `"127.0.0.1"`, used as part of
        [`~torch.distributed.init_process_group`] method parameter `init_method`.
        `init_method` = `"tcp://{master_ip}:{master_port}"`
    master_port (`str`, *required*):
        The master port, can usually be set to `"29500"`, used as part of
        [`~torch.distributed.init_process_group`] method parameter `init_method`.
        `init_method` = `"tcp://{master_ip}:{master_port}"`
    seed (`int`, *optional*, defaults to 42):
        Random seed to control sampling.
c                    > [         TU ]  " U40 UD6  X l        X0l        [        R
                  " U5      U l        [        XS9  SU l        U R                  SS9U l
        g )N)	model_dirrankr   model)path_load_tag)super__init__r   	model_cfgr   from_pretrainedconfigr   	iterationinitialize_modelr   )selfr   r   kwargs	__class__s       k/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/nlp/plug/distributed_plug.pyr   DistributedPlug.__init__@   sU    -f-	#33I>Y:***A
    c                    [        S5        [        U R                  5      n[        R                  " 5       S:X  am  [
        R                  SR                  [        R                  " 5       [        UR                  5        Vs/ s H  o3R                  5       PM     sn5      5      5        U R                  R                  (       a+  U R                  R                  (       a  UR                  5         UR                  [         R                  R#                  5       5        U R                  R                  (       Ga  [%        U5      nU R                  R&                  (       a  UR(                  R*                  R,                  R.                  R0                  R3                  5         UR(                  R*                  R,                  R.                  R4                  R3                  5         UR(                  R*                  R,                  R.                  R6                  R3                  5         U R                  R8                  (       aB  UR(                  R*                  R,                  R.                  R6                  R3                  5         U R                  R:                  (       a1  UR=                  5        H  u  pESU;   d  M  UR3                  5         M     [?        [        R                  " 5       U R@                  US9nUR(                  R*                  RC                  5       nU H4  nXRE                  5       ;  a  [        SU-   5        M&  [        SU-   5        M6     UR(                  R*                  RG                  USS	9  U$ s  snf )
zBuild the model.z3Building Plug model. It will take a few minutes ...r   z5 > number of parameters on model parallel rank {}: {}	LayerNorm)tagz
Skip key: zLoading key: F)strict)$r   r   r   r   get_data_parallel_rankloggerinfoformatget_tensor_model_parallel_ranksum
parametersnelement	deepspeedfp16halfcudatorchcurrent_devicer   fp32_embeddingmoduler   bert
embeddingsword_embeddingsfloatposition_embeddingstoken_type_embeddingsfp32_tokentypesfp32_layernormnamed_modulesr   r   
state_dictkeysload_state_dict)	r   r   r   pname_module
load_model
model_dictkeys	            r"   r    DistributedPlug.initialize_modelK   s\   JK$++&%%'1,KKGNN668u/?/?/AB/A!/ABCEF
 ;;  T[[%5%5JJL 	

5::,,./ ;;&E{{))""''22BBHHJ""''22FFLLN""''22HHNN {{**""''22HHNN {{))%*%8%8%:MD"d* &; ..0NN
 \\''224
C//++\C/0_s23	 
 	**:e*DG Cs   ?M7r           Infc                    US:  a#  U [         R                  " X5      S   S   :  nX0U'   US:  a  U R                  U R                  5       S   5      R	                  5       n [         R
                  " U SS9u  pV[         R                  " [        R                  " USS9SS9nXr:  nUS	S S24   R                  5       US	SS 24'   SUS
'   Xh   nX0U'   U R                  SS5      R	                  5       n U $ )Nr   ).NrL   r   T)
descendingrO   dim.).r   )
r5   topkviewsize
contiguoussortcumsumFsoftmaxclone)	logitstop_ktop_pfilter_valueindices_to_removesorted_logitssorted_indicescumulative_probssorted_indices_to_removes	            r"   top_k_logitsDistributedPlug.top_k_logitsy   s    19 &F)B1)E GK *L !L(4$%3;[[q!12==?F,1JJv$,O)M$||		-R0b : (8'?$0HSbS1%' %S!"W-/0$V, . H(4$%[[B'224Fr$   c                 2    U R                  UUUUUUUUU	U
S9
$ )N)checkpoint_activationsis_infersequence_outputparallel_outputr   )r   input_tokenstoken_type_idsattention_masktarget_tokensposition_idsdecode_attention_maskrh   ri   rj   rk   s              r"   forwardDistributedPlug.forward   s8     zz!#9++  
- 
	-r$   inputc                    [         R                  R                  5       nUS   R                  S   nUS   R	                  SS5      R                  5       R                  U5      nUS   R                  U5      nUS   R                  U5      nU R                  R                  5         [         R                  " 5          / n	/ n
SnS nU R                  R                  nSnX:  Ga.  US-  S:X  a  US:w  a  U
R                  U5        Xn:H  R                  S	S
9S   nU[        U
5      -   S:  a=  [         R                  " US U [         R                  R!                  U
5      /S5      SS  nO0[         R                  R!                  U
5      US   X[        U
5      -   & US:g  nUS   R                  U5      n/ n
S n[         R"                  " US/[        U
5      [         R$                  US9nU R                  US UUUUS	USS9	u  nnnUS S 2SS S 24   nUU R&                  S   -  nU R)                  UU R&                  S   U R&                  S   S9n[*        R,                  " USS9n[         R.                  " USS9nUS   R1                  5       nUU:  a  SnSUS'   US:X  a&  [        U	5      [3        [5        SU5      S-  5      :  a  OTUS:X  a  US-  nGM  [         R                  " UU/SS9nU
R                  U5        U	R                  U5        US-  nX:  a  GM.  / nU	 H,  nU(       a  US   S:X  a  US:X  a  M  UR                  U5        M.     SU0sS S S 5        $ ! , (       d  f       g = f)N	input_idsr   r   rO   dec_input_idsro   f      T)as_tuplei   i )dtypedeviceF)ri   rj   rk   temperaturer]   r^   )r]   r^   rQ   )num_samplesd   g?generate_context)r5   r4   r6   shaperT   rV   tor   evalno_gradr   original_vocab_sizeappendnonzerolencat
LongTensorfulllongr   re   rY   rZ   multinomialitemintmax)r   ru   
out_lengthr    r}   
batch_sizetokensrx   ro   all_generate_tokensgenerate_tokenscounterrj   
vocab_sizesep_token_idxstartrq   _r\   	log_probsprev
prev_tokenr   tokens                           r"   generateDistributedPlug.generate   s   **,;'--a0
{#((B/::<??Go.11&9/033F;

]]_"$ OG"O88JM&S=A%'Q,#**=9#4==!% > '')+Es?33s:!&"6EN!JJ11/B, "  $u"& :?9N9N8G:I q	%O 16 )6 7 '-kN$)/$:$=$=f$EM&(O&*O$zz:q/*-o*>05

17 9 .2ZZ"!" !$3$) .8 	.+*6?  2q)$.."??**..1..1 + 3 IIf"5	((B!!W\\^
+!$J!DG$-@)ACAz*S0E2 *2$qLG %		=$*?Q G&&z2#**:61q &t  ",#(8)")#',|$++E2 - '(89S __s   :IM7M
M)r   r   r   r   r   rl   )	NNNNNFFNT)rz   )__name__
__module____qualname____firstlineno____doc__r   r   staticmethodr<   re   rs   r   strr	   r   __static_attributes____classcell__)r!   s   @r"   r   r      sv    )V	B,\ #$CuU|m  @  $#"!&*', $ $-.P:d3;/ P: P:r$   r   )typingr   r5   megatron_utilr   r   megatron_util.fp16r   torch.nnr   rY   modelscope.modelsr   modelscope.models.baser	   modelscope.utils.loggerr
   modelscope.utils.megatron_utilsr   $modelscope.utils.nlp.load_checkpointr    r   configurationr   r*   r    r$   r"   <module>r      s>      + * $ ( ) . > 9  (	k:j k:r$   