
    9iz                     N   S SK r S SKrS SKJrJrJrJrJrJr  S SK	r	S SK
Js  Jr  S SKr	S SK	Jr  S SKJr  S SKJr  S SKJrJrJr  S SKJr  S SKJr  S S	KJrJr  S S
KJr  S SK J!r!  S SK"J#r#J$r$J%r%J&r&J'r'  S SK(J)r)J*r*  S SK+J,r,J-r-  S SK.J/r/  S SK0J1r1  S SK2J3r3  SSK4J5r5  SSK6J7r7  SSK8J9r9J:r:J;r;J<r<J=r=  \(       a  S SK>J?r?   S SK@JArA   S SKCJDrD  S SK@JArA  SrE S SKGJHrH  \3" 5       rISrJS rKS/rL S S!KMJNrN   " S# S$\	R                  R                  5      rP " S% S&\R                  5      rQ " S' S(\R                  5      rR " S) S*\R                  5      rS " S+ S,\-\5      rT\5R                  " \1R                  \/R                  S-9 " S. S/\T5      5       rX " S0 S1\	R                  R                  5      rYS2 rZS6S3 jr[ " S4 S5\	R                  R                  5      r\g! \B a    SrA GNf = f! \B a    SrE\F" S5         GN"f = f! \B a    SrH\F" S5         GN2f = f! \B a    SrN\F" S"5         GN4f = f)7    N)TYPE_CHECKINGCallableListOptionalTupleUnion)nn)autocast)CrossEntropyLoss)GenerationConfigPreTrainedTokenizerStoppingCriteriaList)LogitsProcessorList)GenerateOutput)BaseModelOutputWithPastCausalLMOutputWithPast)PreTrainedModel)set_seed)ModelOutputadd_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardlogging)assert_device_mapget_device_map)Model
TorchModel)Models)Tasks)
get_logger   )MODELS   )
QWenConfig)HistoryTypeStopWordsLogitsProcessordecode_tokensget_stop_words_idsmake_context)BaseStreamer	rearrange)apply_rotary_emb_funcTFzWarning: import flash_attn rotary fail, please install FlashAttention rotary to get better performance https://github.com/Dao-AILab/flash-attention/tree/main/csrc/rotary)rms_normzWarning: import flash_attn rms_norm fail, please install FlashAttention layer_norm to get better performance https://github.com/Dao-AILab/flash-attention/tree/main/csrc/layer_normzqwen-7br$   )flash_attn_unpadded_funczkWarning: import flash_attn fail, please install FlashAttention https://github.com/Dao-AILab/flash-attentionc                   8   ^  \ rS rSr   SU 4S jjrS rSrU =r$ )FlashSelfAttentionS   c                 ~   > [         TU ]  5         [        c   S5       e[        c   S5       eXl        X l        X0l        g )NzFPlease install FlashAttention first, e.g., with pip install flash-attnz:Please install einops first, e.g., with pip install einops)super__init__r/   r,   causalsoftmax_scale	dropout_p)selfr6   r7   attention_dropout	__class__s       c/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/nlp/qwen/backbone.pyr5   FlashSelfAttention.__init__U   sR     	'3 	10	13 % 	PO	P%**    c                    [        S XU4 5       5      (       d   e[        S XU4 5       5      (       d   eUR                  S   UR                  S   pTUR                  S   nXU4 Vs/ s H  n[        US5      PM     snu  pn[        R                  " SUS-   U-  U[        R
                  UR                  S9nU R                  (       a  Xe:X  d   eU R                  n	Un
OAXV:H  n	[        R                  " SUS-   U-  U[        R
                  UR                  S9n
SU l	        [        UUUUU
UUU R                  U R                  U	S9
n[        USUS	9nU$ s  snf )
Nc              3   z   #    U  H1  oR                   [        R                  [        R                  4;   v   M3     g 7fN)dtypetorchfloat16bfloat16.0is     r<   	<genexpr>-FlashSelfAttention.forward.<locals>.<genexpr>g   s$     KAWW77s   9;c              3   8   #    U  H  oR                   v   M     g 7frA   )is_cudarF   s     r<   rI   rJ   h   s     1y!IIys   r   r#   zb s ... -> (b s) ...)steprB   device)r7   r6   z(b s) ... -> b s ...)b)allshaper,   rC   arangeint32rN   trainingr6   r8   r/   r7   )r9   qkv
batch_sizeseqlen_qseqlen_kxcu_seqlens_q	is_causalcu_seqlens_koutputs               r<   forwardFlashSelfAttention.forwarde   sk   K!KM M 	M M1ay12222 wwqz1771:H771:BCKA9Q 67Ka||!^x'++88
 =='''I'L ,I <<a8+kkxxL DN)NN,,
 6#9ZHM Ls   )E)r6   r8   r7   )FN        __name__
__module____qualname____firstlineno__r5   r`   __static_attributes____classcell__r;   s   @r<   r1   r1   S   s     	+ , ,r>   r1   c                   @  ^  \ rS rSrSU 4S jjrSS jr  SS jrS rS r       SS\	\
\R                        S\	\
\R                        S	\	\R                     S
\	\R                     S\	\R                     S\	\R                     S\	\   S\	\   4S jjrSrU =r$ )QWenAttention   c           
        > [         TU ]  5         UR                  nU R                  S[        R
                  " [        R                  " X34[        R                  S95      R                  SSUU5      SS9  U R                  S[        R                  " S5      SS9  [        SU5      U l        UR                  U l        UR                  U l        UR                  U l        UR                  U l        UR                   U l        U R                  U R"                  -  U l        UR&                  U l        SU l        S U l        UR,                  UR                   -  U l        U R.                  UR                   -  S	:X  d   eU R.                  UR                   -  U l        [2        R4                  " UR                  S
U R.                  -  5      U l        [2        R4                  " UR                  U R.                  UR8                  (       + S9U l        UR<                  =(       d    UR>                  (       + U l         U R&                  (       a1  [B        b*  U R@                  (       d  [E        SURF                  S9U l$        UR<                  U l        URJ                  S:X  a  S U l&        O9URJ                  S:  d   e[O        U R0                  URJ                  -  5      U l&        U RL                  b  U RL                  OU R0                  n[Q        XARR                  S9U l*        URV                  U l+        URX                  U l,        [[        SS5       Vs/ s H4  nXPR                  :  a   [\        R^                  " XPR                  5      OSPM6     nn[        R`                  " U5      S S S 2S S 4   U l1        SU l2        [2        Rf                  " URF                  5      U l4        g s  snf )NbiasrB   r#   F)
persistentmasked_biasg     Tr   r!   ro   )r6   r:         ?)basei   )5r4   r5   max_position_embeddingsregister_bufferrC   trilonesboolviewtensormaxlayer_numberparams_dtype
seq_lengthhidden_size
split_sizenum_attention_heads	num_headshead_dimuse_flash_attnscale_attn_weights	layer_idxkv_channelsprojection_sizehidden_size_per_attention_headr	   Linearc_attnno_biasc_projbf16fp16is_fp32r/   r1   
attn_pdropcore_attention_flash
rotary_pctrotary_ndimsintRotaryEmbeddingrotary_emb_base
rotary_embuse_dynamic_ntkuse_logn_attnrangemathlogTensorlogn_tensor_ntk_cachedDropoutattn_dropout)r9   configr~   max_positionsdimrH   	logn_listr;   s          r<   r5   QWenAttention.__init__   s9   66JJ

M9!&-..2d1a3@/B 	 	
 	5<<-% 	 	A<0"// ++!-- ,,33((DNN:$33"&%11F4N4NN##f&@&@@AEEE  F$>$>> 	+ ii 2 2A8L8L4LMii 4 4v~~;MO #KK66;;7#;#GPTP\P\(:v/@/@)BD% KK	# $D$$q((( #D$G$G&,&7&7%8 !9D "&!2!2!>D// 	 *#4J4JK%55#11 1e_
$ -.,?DHHQ(QF$ 	 
 !<<	24D$3FGJJv'8'89
s   ;Oc                    [         R                  " XR                  SS5      5      nU R                  (       a@  U[         R                  " / UR                  S5      S-  UR                  UR                  S9-  nUR                  S5      UR                  S5      pU R                  S S 2S S 2UU-
  U2S U24   n	[         R                  " UR                  5      R                  n
[         R                  " / XR                  S9R                  UR                  5      n
[         R                  " U	UR                  UR                  5      U
5      n[        R                  R                  USS9nUR!                  UR                  5      nU R#                  U5      nUb  Xe-  n[         R                  " Xc5      nUR                  SS5      nX4$ )	N      ?rB   rN   rp   r   r#      )rC   matmul	transposer   fullsizerB   rN   ro   finfomintowherer	   
functionalsoftmaxtyper   )r9   querykeyvalueattention_mask	head_maskattn_weightsquery_length
key_lengthcausal_mask
mask_valueattn_outputs               r<   _attnQWenAttention._attn   s   ||E==R+@A""'%**

2#"((#**	+ L $)::b>388B<jii1j".'//9':;FJ;!G H[[!3!3488
ZZJ6H6HILL!
{{;#/??<3E3E#F#-/ }},,\r,B#((5((6 '3Lll<7!++Aq1((r>   c           	          UR                  5       u  pgpUR                  5       u    pn
[        R                  " Xg-  UU[        R                  UR                  S9nSnU R
                  (       a   U[        UR                  S5      5      S-  -  n[        SS9   UR                  SUU	5      UR                  SS5      R                  SX5      p[        R                  " XR                  5       UR                  5       SUS	9nUR                  XgUU5      nS S S 5        UR                  S5      UR                  S5      nnU R                  S S 2S S 2UU-
  U2S U24   n[        R                  " UR                  5      R                  n[        R                  " UUR                  S
9R!                  UR                  5      n[        R"                  " UUU5      nUb  X-   n[$        R&                  R)                  USS9nUR                  [        R                  :w  a  [+        S5      eUR-                  UR                  5      nU R/                  U5      nUb  X-  n[        R0                  " X5      nUU4$ ! , (       d  f       GNd= f)Nr   rt   r   r   F)enabledr   r   )betaalpharp   r   zDError with upcasting, attn_weights does not have dtype torch.float32)r   rC   emptyfloat32rN   r   floatr
   reshaper   baddbmmro   r   rB   r   r|   r   r   r	   r   r   RuntimeErrorr   r   r   )r9   r   r   r   r   r   bszr   	q_seq_lendk_	k_seq_lenr   scale_factorrU   rV   r   r   r   r   r   s                        r<   _upcast_and_reordered_attn(QWenAttention._upcast_and_reordered_attn   s>    ).

%	 XXZ1{{O--<<
 ""E%**R.1366Le$==Y!#%&)mmB&;&C&C%''8  !==ggi,PL'//	09;L % $)::b>388B<jii1j".'//9':;FJ;!G H[[!3!3488
\\l00224"\5H5H2I 	{{;jI%'8L}},,\r,B.V  $((5((6 '3Lll<7L((E %$s   A;I..
I=c                 V    UR                  5       S S X#4-   nUR                  U5      nU$ )Nr   )r   r{   r9   r|   r   attn_head_size	new_shapes        r<   _split_headsQWenAttention._split_heads6  s/    KKM#2&))DD	Y'r>   c                 v    UR                  5       nUR                  5       S S X#-  4-   nUR                  U5      $ )Nr   )
contiguousr   r{   r   s        r<   _merge_headsQWenAttention._merge_heads;  s<    ""$KKM#2&)*D)GG	{{9%%r>   hidden_states
layer_pastr   r   encoder_hidden_statesencoder_attention_maskoutput_attentions	use_cachec	                    U R                  U5      n	U	R                  U R                  SS9u  pnU R                  XR                  U R
                  5      n
U R                  XR                  U R
                  5      nU R                  XR                  U R
                  5      nUR                  5       S   nU(       a  XS   R                  S   -  nU R                  (       a|  XR                  5       S   :X  af  U R                  (       dU  [        R                  " XR                  -  S5      S-   nS[        R                  " U5      -  S-
  n[        US5      nXl        OU R                  nU R!                  XS9R#                  UR$                  5      nUb  ['        U[(        5      (       a  UnOU4S-  nUbP  Uu  nnU
R                  S   nUS S 2U* S 2S S 2S S 24   nUS S 2U* S 2S S 2S S 24   n[+        U
U5      n
[+        UU5      nUb8  US   US   nn[,        R.                  " UU4SS9n[,        R.                  " UU4SS9nU(       a  X4nOS nU R0                  (       a  U R                  (       d  U R2                  R$                  U
R$                  :w  a9  U R2                  R#                  U
R$                  5      R5                  U
5      U l        UR                  S5      U
R                  S5      -
  nUR                  S5      nU R2                  S S 2UU2S S 2S S 24   nU
UR7                  U
5      -  n
U R8                  (       a\  [:        bU  U R<                  (       dD  U
R>                  (       a3  XUnnnU RA                  UUU5      n[C        US5      RE                  5       nOyU
RG                  SSSS5      n
URG                  SSSS5      nURG                  SSSS5      nU RI                  XUX45      u  nnU RK                  UU R                  U R
                  5      nU RM                  U5      nUU4n U(       a:  U R8                  (       a#  [:        b  U R<                  (       d  [O        S5      eU W4-  n U $ )	Nr   r   r#   r   )	ntk_alphazb s h d -> b s (h d)r!   z/Cannot output attentions while using flash-attn)(r   splitr   r   r   r   r   rQ   r   rT   r   r   r   ceilr}   r   r   r   rN   
isinstancetupleapply_rotary_pos_embrC   catr   r   type_as	expand_asr   r/   r   rL   r   r,   r   permuter   r   r   
ValueError)!r9   r   r   r   r   r   r   r   r   mixed_x_layerr   r   r   
kv_seq_lencontext_valuer   rotary_pos_emb	q_pos_emb	k_pos_embcur_lenpast_key
past_valuepresent	seq_startseq_endr   rU   rV   rW   context_layerr   attn_weightoutputss!                                    r<   r`   QWenAttention.forward@  s    M2)//Q/GE!!%G^^T]]C!!%G"'')!,
Q---a00J  Z3E3E3G3J%J HHZ//%A1EIM499]33a7IIq)I(((I ) --/R0D0D-E 	 %.%00!/"0!3a!7%#1 Iykk!nG!!gXY1"45I!!gXY1"45I(	:E&sI6C!#-a=*Q-jH))XsO3CIIz51q9ElGGdmm&&%,,6#'#3#3#6#6LL$"")'%.  ejjm3IhhqkG**1i.?A+EFKK11%88E#;#GPTP\P\afanan%!qA 55aA>M%m&<>>Hjl  MM!Q1-E++aAq)CMM!Q1-E'+zz%e2@(M$K --k4>>.2mm=M kk-0(""'?'KTXT`T` EG G K?*r>   )r   r   r   r   r   r   r   r   r   r   r   r~   r   r   r   r   r   r   r   r   r   r   r   r   rA   )NNNNNNNFF)rd   re   rf   rg   r5   r   r   r   r   r   r   rC   FloatTensorr   rz   r`   rh   ri   rj   s   @r<   rl   rl      s    C:J )L 37-17)r
& 596:158<>B,1$)Ze&7&7 89Z U5<<01Z !!2!23	Z
 E--.Z  (5Z !)):): ;Z $D>Z D>Z Zr>   rl   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )QWenMLPi  c                   > [         TU ]  5         [        R                  " UR                  UR
                  S-  UR                  (       + S9U l        [        R                  " UR                  UR
                  S-  UR                  (       + S9U l        UR
                  S-  n[        R                  " X!R                  UR                  (       + S9U l	        g )Nr   rs   )
r4   r5   r	   r   r   ffn_hidden_sizer   w1w2r   )r9   r   	ff_dim_inr;   s      r<   r5   QWenMLP.__init__  s    ))""a'^^#% ))""a'^^#% **a/	ii))FNN0BDr>   c                     U R                  U5      nU R                  U5      nU[        R                  " U5      -  nU R	                  U5      nU$ rA   )r  r  Fsilur   )r9   r   a1a2intermediate_parallelr_   s         r<   r`   QWenMLP.forward  sD    WW]#WW]# "QVVBZ23r>   )r   r  r  rc   rj   s   @r<   r
  r
    s    D r>   r
  c                     ^  \ rS rSrSU 4S jjr       SS\\\R                        S\\\R                        S\\R                     S\\R                     S\\R                     S\\R                     S	\\
   S
\\
   4S jjrSrU =r$ )	QWenBlocki  c                 X  > [         TU ]  5         X0l        X l        UR                  U l        UR
                  nUR                  U l        UR                  U l        [        UUR                  S9U l	        [        XS9U l        [        UUR                  S9U l        [        U5      U l        g )Neps)r~   )r4   r5   
num_expertr~   (apply_residual_connection_post_layernormr   r   RMSNormlayer_norm_epsilonln_1rl   attnln_2r
  mlp)r9   r   r   r  r   r;   s        r<   r5   QWenBlock.__init__  s    $%;; 	5((;; 	5KK	))
	 "&A	))
	
 6?r>   r   r   r   r   r   r   r   r   c	           	      @   U R                  U5      n	U R                  U	UUUUUS9n
U
S   nU
SS  nU R                  (       a  U	nOUnX-   nU R                  U5      n	U R                  (       a  U	nOUnU R	                  U	5      nX-   nU(       a  U4U-   nU$ U4USS  -   nU$ )N)r   r   r   r   r   r   r#   )r!  r"  r  r#  r$  )r9   r   r   r   r   r   r   r   r   layernorm_outputattn_outputsr   r  residuallayernorm_input
mlp_outputs                   r<   r`   QWenBlock.forward  s      99]3yy!)/ ! 
 #1oqr"88'H$H%099_588'H&HXX./
 -$''1G  %''!"+5Gr>   )r  r"  r   r~   r!  r#  r$  r  )Nr#   r  )rd   re   rf   rg   r5   r   r   rC   r  r   rz   r`   rh   ri   rj   s   @r<   r  r    s    #4 596:158<>B$),1.e&7&7 89. U5<<01. !!2!23	.
 E--..  (5. !)):): ;. D>. $D>. .r>   r  c                   d   ^  \ rS rSr\rSrSrSrS/r	U 4S jr
S rSS jr\U 4S	 j5       rS
rU =r$ )QWenPreTrainedModeli  transformerFTr  c                 b   > [         TU ]  " UR                  40 UD6  [         [        U ]  U5        g rA   )r4   r5   name_or_pathr   )r9   r   kwargsr;   s      r<   r5   QWenPreTrainedModel.__init__  s*    ,,77eT#F+r>   c           	         [        U[        R                  5      (       aj  UR                  R                  R                  SU R                  R                  S9  UR                  b$  UR                  R                  R                  5         O[        U[        R                  5      (       aw  UR                  R                  R                  SU R                  R                  S9  UR                  b1  UR                  R                  UR                     R                  5         O:[        U[        5      (       a%  UR                  R                  R                  S5        UR                  5        Hi  u  p#US:X  d  M  UR                  R                  SU R                  R                  [        R                   " SU R                  R"                  -  5      -  S9  Mk     g)zInitialize the weights.rb   )meanstdNrt   zc_proj.weightr   )r   r	   r   weightdatanormal_r   initializer_rangero   zero_	Embeddingpadding_idxr  fill_named_parametersr   sqrtn_layer)r9   modulenameps       r<   _init_weights!QWenPreTrainedModel._init_weights
  sR   fbii((MM&&dkk;; ' ={{&  &&(--MM&&dkk;; ' =!!-""6#5#56<<>((MM$$S)..0GD&6699Q)<)<%<=>   1r>   c                 <    [        U[        5      (       a  X!l        g g rA   )r   	QWenModelgradient_checkpointing)r9   rB  r   s      r<   _set_gradient_checkpointing/QWenPreTrainedModel._set_gradient_checkpointing!  s    fi((,1) )r>   c                    > UR                  SS 5      nUc  [        S0 UD6nU " U5      nO[        [        U ]  " SSU0UD6nX$l        U$ )N	model_dirpretrained_model_name_or_path )popr$   r4   r   from_pretrainedrM  )clsr2  rM  r   modelr;   s        r<   _instantiate QWenPreTrainedModel._instantiate%  s]    JJ{D1	)&)FKE%5 C.7C;ACE#r>   rO  F)rd   re   rf   rg   r$   config_classbase_model_prefixis_parallelizablesupports_gradient_checkpointing_no_split_modulesr5   rE  rJ  classmethodrT  rh   ri   rj   s   @r<   r.  r.    sE    L%&*#$,.2 	 	r>   r.  )module_namec                     ^  \ rS rSrS/rU 4S jrS rS r             SS\\	R                     S\\\\	R                           S\\	R                     S	\\	R                     S
\\	R                     S\\	R                     S\\	R                     S\\	R                     S\\	R                     S\\   S\\   S\\   S\\   4S jjrSrU =r$ )rH  i2  zattn.masked_biasc           
        > [         TU ]  U5        UR                  U l        UR                  U l        UR
                  U l        UR                  nUR                  U l	        SU l
        U R                  S:X  aw  [        R                  " X R                  5      U l        U R                  U R                  R                   5        SU l        U R                  U R                  R                   5        OS U l        SU l        [        R                  " U R                  U R                  5      U l        [        R&                  " UR(                  5      U l        [        R,                  " [/        UR                  5       Vs/ s H  n[1        UUS9PM     sn5      U l        [5        U R                  UR6                  S9U l        U R;                  5         g s  snf )NFlearnedposition_embeddings )r   r  )r4   r5   padded_vocab_size
vocab_sizenum_hidden_layersr   	embed_dimrv   pos_embposition_embedding_typerI  r	   r<  wpeinit_methodra  r7  _position_embeddings_keywter   
embd_pdropdrop
ModuleListr   r  hr  r   ln_f	post_init)r9   r   max_sequence_lengthrH   r;   s       r<   r5   QWenModel.__init__6  sl     22!'!9!9++$<<'-~~$&+#''94||$7HDHT55<<=,AD)T55<<=DH,.D)<<@JJv001	 V556	 
 7!  7	 
  NN))
	
 	 
s   9Gc                     U R                   $ rA   rl  )r9   s    r<   get_input_embeddingsQWenModel.get_input_embeddingsY  s    xxr>   c                     Xl         g rA   rv  )r9   new_embeddingss     r<   set_input_embeddingsQWenModel.set_input_embeddings\  s    !r>   	input_idspast_key_valuesr   token_type_idsposition_idsr   inputs_embedsr   r   r   r   output_hidden_statesreturn_dictc                   ^
^ Tb  TOU R                   R                  mUb  UOU R                   R                  nT
b  T
OU R                   R                  m
Ub  UOU R                   R                  nUb  Ub  [        S5      eUb5  UR                  5       nUR                  SUS   5      nUR                  S   nO1Ub#  UR                  5       S S nUR                  S   nO[        S5      eUb  UR                  OUR                  nUb  UR                  SUS   5      nUb  UR                  SUS   5      nUc%  Sn[        S /[        U R                  5      -  5      nOUS   S   R                  S5      nUcO  [        R                  " UUS   U-   [        R                  US9nUR!                  S5      R                  SUS   5      nUby  US::  a  [        S5      eUR                  US5      nUS S 2S S S S 24   nUR#                  U R$                  S9nS	U-
  [        R&                  " U R$                  5      R(                  -  nS n	U R+                  X`R                   R,                  5      nUc  U R/                  U5      nUnU R0                  b  U R1                  U5      nUU-   nU R3                  U5      nUUR                  S5      4-   nU R4                  (       a/  U R6                  (       a  T
(       a  [8        R;                  S
5        Sm
T
(       a  SOS nT(       a  SOS nU(       a  SOS n[=        [?        U R                  U5      5       H  u  nu  nnU(       a  UU4-   nU R4                  (       aQ  U R6                  (       a@  UU
4S jn[        R@                  RB                  RC                  U" U5      US UUU   UU	5      nOU" UUUUU   UU	T
TS9nUS   nT
SL a  UUT(       a  SOS   4-   nT(       d  M  UUS   4-   nM     U RE                  U5      nUR                  U5      nU(       d  [        S UUU4 5       5      $ [G        UUUUS9$ )NzDYou cannot specify both input_ids and inputs_embeds at the same timer   r   z5You have to specify either input_ids or inputs_embedsr   r   z$batch_size has to be defined and > 0rp   rt   zZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...FrO  c                    >^  U UU4S jnU$ )Nc                     > T" / U QTPTP76 $ rA   rO  )inputsrB  r   r   s    r<   custom_forwardHQWenModel.forward.<locals>.create_custom_forward.<locals>.custom_forward  s    %LvLyL:KLLr>   rO  )rB  r  r   r   s   ` r<   create_custom_forward0QWenModel.forward.<locals>.create_custom_forward  s    M *)r>   )r   r   r   r   r   r   r   Tr   r#   c              3   0   #    U  H  nUc  M  Uv   M     g 7frA   rO  )rG   rW   s     r<   rI   $QWenModel.forward.<locals>.<genexpr>  s      +"NQ "Ns   	)last_hidden_stater~  r   
attentions)$r   r   r  r   use_return_dictr   r   r{   rQ   rN   r   lenrp  rC   rR   long	unsqueezer   rB   r   r   get_head_maskrA  rl  ri  rn  rI  rT   loggerwarning_once	enumerateziputils
checkpointrq  r   )r9   r}  r~  r   r  r  r   r  r   r   r   r   r  r  input_shaperX   rN   past_lengthr   position_embedsoutput_shapepresentsall_self_attentionsall_hidden_statesrH   blockr   r  r  s             ``                 r<   r`   QWenModel.forward_  s   " "3!>KK)) 	 %9$D KK,, 	 "+!6IDKK<Q<Q	 & ,0KK,G,G 	  ]%>V  "#..*K!r;r?;I"+J&',,.s3K&,,Q/JGI I &/%:!!@T@T%+00[_EN#',,RRAL"K#TFS[$89O)!,Q/44R8K <<B+-jj	L (11!499"k"oNL%Q !GHH+00R@N+AtT1,<=N+..TZZ.@N!N2ekk

7C N "&&&y++2E2EF	  HHY/M%88"hh|4O)O;M		-0"m&8&8&<%??&&4==##p "	"2$5b4"6BD&/DFFO0L&M"A"z#$58I$I!**t}}*  ++00;;)%0!"aL)*  !)#1'l*?+A'&7	 $AJMD #!2A:'> > ! &9WQZN&J#S 'NV 		-0%**<8 +#0(<M"N+ + + '+$+*	
 	
r>   )rk  rn  rf  rI  rp  rq  re  rh  rd  ri  rl  )NNNNNNNNNNNNN)rd   re   rf   rg   _keys_to_ignore_on_load_missingr5   rw  r{  r   rC   
LongTensorr   r   r  rz   r`   rh   ri   rj   s   @r<   rH  rH  2  sW   '9&:#!F"
 15@D6:593715598<>B$(,0/3&*X
E,,-X
 "%ell(;"<=X
 !!2!23	X

 !!1!12X
 u//0X
 E--.X
   1 12X
  (5X
 !)):): ;X
 D>X
 $D>X
 'tnX
 d^X
 X
r>   rH  c                   D   ^  \ rS rSrSU 4S jjr  SS jrSS jrSrU =r$ )r   i  c                 "  > [         TU ]  5         Xl        X l        SU[        R
                  " SUS5      R                  5       U-  -  -  U l        [        R                  R                  S5      c  [        S5      eS U l        SU l        SU l        g )Nrt   r   r   einopsz'einops is required for Rotary Embedding)r4   r5   r   ru   rC   rR   r   inv_freq	importlibutil	find_specr   _rotary_pos_emb_cache_seq_len_cached_ntk_alpha_cached)r9   r   ru   r;   s      r<   r5   RotaryEmbedding.__init__  s}    	tell1c1&=&C&C&E&KLM>>##H-5HII%)" !$r>   c                    X-   nX@R                   :  d  X0R                  :w  Ga0  U R                  X0R                  U R                  S-
  -  -  -  n [        R
                  " SU R                  SU R                  R                  S9R                  5       U R                  -  U l        SXPR                  -  -  U l        X@l         X0l        [        R
                  " X@R                  R                  S9n[        R                  " UR                  U R                  5      U R                  5      n[        R                  " Xw4SS9nSSKJn	  U	" US5      U l        g g )	Nr   r   )rN   rt   r   r   r+   zn d -> 1 n 1 d)r  r  ru   r   rC   rR   r  rN   r   outerr   r   r  r,   r  )
r9   max_seq_lenoffsetr   seqlenru   seqfreqsembr,   s
             r<   update_rotary_pos_emb_cache+RotaryEmbedding.update_rotary_pos_emb_cache  s	    %(((I9O9O,O99y88txx!|+DEED "LL488Qt}}';';==BUWtxxPDM4#67DM#) %.",,vmm.B.BCCKKDMM :DMMJE))UN3C()238H)ID&% -Pr>   c                 T    U R                  XU5        U R                  S S 2X"U-   24   $ rA   )r  r  )r9   r  r  r   s       r<   r`   RotaryEmbedding.forward!  s0    ((iH))!V[4H-H*HIIr>   )r  r  r  ru   r   r  )i'  )r   rt   )	rd   re   rf   rg   r5   r  r`   rh   ri   rj   s   @r<   r   r     s"    
% ,-.1J2J Jr>   r   c                 p    SSK Jn  U" U SSS9n U R                  SS9u  p#[        R                  " U* U4SS9$ )	Nr   r+   z... (j d) -> ... j dr   )jr   r   r   )r  r,   unbindrC   r   )r[   r,   x1x2s       r<   _rotate_halfr  &  s=     !+q1AXX"XFB99rc2YB''r>   c                 v   U(       a  U R                  5       nUR                  S5      R                  S5      nUS S 2S UR                  S   S-  24   R                  5       nUS S 2S UR                  S   S-  24   R	                  5       n[        X4U5      R                  U 5      nU$ UR                  S   nU SS U24   U SUS 24   pUR                  5       nUR                  5       nX1R                  5       -  [        U5      UR	                  5       -  -   n[        R                  " X84SS9R                  U 5      $ )Nr   r#   r   r   .r   )
r   squeezerQ   cossinr-   r   r  rC   r   )	tr  use_flash_rotaryt_r  r  r_   rot_dimt_pass_s	            r<   r   r   .  s   WWYa ((+A,B1,,,-113A,B1,,,-113&r4<<Q?++b/XgX&#wx-(8GXXZ--/99;<#3eiik#AByy"B/77::r>   c                   D   ^  \ rS rSrSS\S\4U 4S jjjrS rS rSr	U =r
$ )	r  i?  r   r  c                    > [         TU ]  5         X l        [        R                  " [
        R                  " U5      5      U l        g rA   )r4   r5   r  r	   	ParameterrC   ry   r7  )r9   r   r  r;   s      r<   r5   RMSNorm.__init__A  s+    ll5::c?3r>   c                     U[         R                  " UR                  S5      R                  SSS9U R                  -   5      -  $ )Nr   r   T)keepdim)rC   rsqrtpowr5  r  )r9   r[   s     r<   _normRMSNorm._normF  s4    5;;quuQx}}R}>IJJJr>   c                     [         b1  UR                  (       a   [        XR                  U R                  5      $ U R	                  UR                  5       5      R                  U5      nX R                  -  $ rA   )r.   rL   r7  r  r  r   r   )r9   r[   r_   s      r<   r`   RMSNorm.forwardI  sO    AIIA{{DHH55ZZ	*2215FKK''r>   )r  r7  )gư>)rd   re   rf   rg   r   r   r5   r  r`   rh   ri   rj   s   @r<   r  r  ?  s+    4C 4e 4 4
K( (r>   r  rV  )]r  r   typingr   r   r   r   r   r   rC   torch.nn.functionalr	   r   r  torch.utils.checkpointtorch.cuda.ampr
   torch.nnr   transformersr   r   r   &transformers.generation.logits_processr   transformers.generation.utilsr   transformers.modeling_outputsr   r   transformers.modeling_utilsr   transformers.trainer_utilsr   transformers.utilsr   r   r   r   r   'transformers.utils.model_parallel_utilsr   r   
modelscoper   r   modelscope.metainfor   modelscope.utils.constantr   modelscope.utils.loggerr    rb  r"   configurationr$   qwen_generation_utilsr%   r&   r'   r(   r)   !transformers.generation.streamersr*   r  r,   ImportErrorflash_attn.layers.rotaryr-   r  printflash_attn.ops.rms_normr.   r  _CHECKPOINT_FOR_DOC_CONFIG_FOR_DOC"QWen_PRETRAINED_MODEL_ARCHIVE_LISTflash_attn.flash_attn_interfacer/   Moduler1   rl   r
  r  r.  register_modulebackboneqwen_7brH  r   r  r   r  rO  r>   r<   <module>r     s     H H      # %0 0 F 8C 7 /P PE ) & + .  %2 2 > 	N> 0 
 &/[ ":H> >BFBII FRbii 0G		 GT0*o 0f FNNCD
# D
 DD
N)Jehhoo )JX(;"(ehhoo (e  I  N		MNN  H		Q   :#	 9 ::sH   >G G" G9 )H GG"G65G69HHH$#H$