
    9i_H              	          S r SSKrSSKJr  SSKJr  SSKJrJrJ	r	J
r
Jr  SSKrSSKrSSKrSSKJr  SSKJrJrJrJrJrJr  SSKJr  SS	KJr  SS
KJrJr  SSK J!r!  SSK"J#r#  SSK$J%r%  SSK&J'r'  SSK(J)r)  SSK*J+r+  SSK,J-r-  SSK.J/r/  SSK0J1r1  \1" 5       r2S/r3S r4 " S S\5      r5 " S S\5      r6\'Rn                  " \/Rp                  \#Rr                  S9 " S S\%5      5       r:   S#S\\\Rv                        S\<S\<S\<4S  jjr=  S$S\\\Rv                        S\<S\<4S! jjr>  S$S\\\Rv                        S\<S\<4S" jjr?g)%zPyTorch UniTE model.    N)	dataclass)ceil)DictListOptionalTupleUnion)version)DropoutLinearModule	ParameterParameterList
Sequential)softmax)pad_sequence)XLMRobertaConfigXLMRobertaModelACT2FN)Models)
TorchModel)MODELS)InputFormat)TranslationEvaluationOutput)compatible_position_ids)Tasks)
get_loggerUniTEForTranslationEvaluationc                    UR                  SS9nUR                  5       U R                  S5      -  nX-  nUR                  / SQSS9U-  nXE-
  U-  S-  R                  / SQSS9U-  nX-
  [        R                  " US-   5      -  $ )Ndim)r!   T)keepdim   g-q=)	unsqueezesumsizetorchsqrt)tensor
mask_floatbroadcast_masknum_elements_not_maskedtensor_maskedmeanvariances          r/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/nlp/unite/translation_evaluation.py_layer_norm_allr5   #   s    ))b)1N,002V[[_D+M\%)  +-DED&.81<AAd B $&=>H MUZZ5(8999    c            	          ^  \ rS rSr SS\S\S\SS4U 4S jjjr SS\\R                     S	\R                  S\R                  4S
 jjr
SrU =r$ )LayerwiseAttention0   N
num_layers	model_dimdropoutreturnc                   > [         [        U ]  5         Xl        X l        X0l        [        [        R                  " U4SS95      U l	        [        [        R                  " S/5      SS9U l        U R
                  (       a  [        R                  " [        U R                  5      5      n[        R                  " [        U R                  5      5      R                  S5      nU R                  SU5        U R                  SU5        g g )NT)requires_gradg      ?g@xdropout_maskdropout_fill)superr8   __init__r:   r;   r<   r   r+   zerosscalar_parametersFloatTensorgammalenemptyfill_register_buffer)selfr:   r;   r<   r@   rA   	__class__s         r4   rC   LayerwiseAttention.__init__2   s     	 $02$"!*KKd;"=u00#7tL
<< ;;s4+A+A'BCL ;;s&&(( )).u   >  > r6   tensorsmaskc                     [         R                  " [        S U 5       5      SS9nU R                  (       ak  U R                  (       aZ  [        [         R                  " U R                  R                  5       U R                  :  U R                  U R                  5      SS9nO[        U R                  SS9nUR                  SSSS5      nUR                  5       nU[        X5      -  R                  SS9nUS S 2SS S 24   nU R                  U-  $ )Nc              3   >   #    U  H  oR                  S S9v   M     g7fr   r"   N)r(   .0xs     r4   	<genexpr>-LayerwiseAttention.forward.<locals>.<genexpr>M   s      EW!3Ws   r   r"   r!      )r+   catlisttrainingr<   r   wherer@   uniform_rE   rA   viewfloatr5   r)   rG   )rL   rO   rP   normed_weightsr.   weighted_sums         r4   forwardLayerwiseAttention.forwardH   s    
 ))D EW EE1M==T\\$D--6684<<G 22D4E4EGN
 %T%;%;DN',,RAq9ZZ\
&)'>?@C
 	#Aq!G,zzL((r6   )r<   rG   r;   r:   rE   N)__name__
__module____qualname____firstlineno__intr`   rC   r   r+   Tensorrc   __static_attributes____classcell__rM   s   @r4   r8   r8   0   st     	?? ? 	?
 
? ?2 ")ell#) ll) 
	) )r6   r8   c                      ^  \ rS rSrSSS/SSS4S\S	\S
\\   S\S\\   S\SS4U 4S jjjr	S\S\
4S jrS\R                  S\R                  4S jrSrU =r$ )FeedForwarda   rY      i   SigmoidN皙?in_dimout_dimhidden_sizesactivationsfinal_activationr<   r=   c                 r  > [         T	U ]  5         / nUR                  [        XS   5      5        UR                  U R	                  U5      5        UR                  [        U5      5        [        S[        U5      5       H_  nUR                  [        X8S-
     X8   5      5        UR                  U R	                  U5      5        UR                  [        U5      5        Ma     UR                  [        US   [        U5      5      5        Ub   UR                  U R	                  U5      5        [        U6 U l
        g)a  
Feed Forward Neural Network.

Args:
    in_dim (:obj:`int`):
        Number of input features.
    out_dim (:obj:`int`, defaults to 1):
        Number of output features. Default is 1 -- a single scalar.
    hidden_sizes (:obj:`List[int]`, defaults to `[3072, 768]`):
        List with hidden layer sizes.
    activations (:obj:`str`, defaults to `Sigmoid`):
        Name of the activation function to be used in the hidden layers.
    final_activation (:obj:`str`, Optional, defaults to `None`):
        Name of the final activation function if any.
    dropout (:obj:`float`, defaults to 0.1):
        Dropout ratio to be used in the hidden layers.
r   rY   r!   N)rB   rC   appendr   build_activationr   rangerH   rj   r   ff)
rL   ru   rv   rw   rx   ry   r<   modulesirM   s
            r4   rC   FeedForward.__init__c   s    4 	vf1o67t,,[9:ww'(q#l+,ANN6,1u"5|GHNN400=>NN77+, -
 	vl2.G=>'NN4001ABCg&r6   
activationc                     [         U   $ re   r   )rL   r   s     r4   r|   FeedForward.build_activation   s    j!!r6   in_featuresc                 $    U R                  U5      $ re   r~   )rL   r   s     r4   rc   FeedForward.forward   s    ww{##r6   r   )rf   rg   rh   ri   rj   r   strr   r`   rC   r   r|   r+   rk   rc   rl   rm   rn   s   @r4   rp   rp   a   s    
 #'+$*.)')' )' 3i	)'
 )' #3-)' )' 
)' )'V"3 "6 "$5<< $ELL $ $r6   rp   )module_namec            )       >  ^  \ rS rSrSSSSSSSSS	S
SSSSSSSS/SSS4S\S\S\S\S\S\S\S\S\S\S\S\S\S \S!\S"\S#\\   S$\S%\	\   S&\4(U 4S' jjjr
  S2S(\R                  S)\	\\      S*\	\R                     S+\4S, jjrS-\S.\R                   S/\4S0 jrS1rU =r$ )3r      rt   r   r'   rY   gelui   g{Gz?i   gh㈵>         Ti rr   tanhNattention_probs_dropout_probbos_token_ideos_token_idpad_token_id
hidden_acthidden_dropout_probhidden_sizeinitializer_rangeintermediate_sizelayer_norm_epsmax_position_embeddingsnum_attention_headsnum_hidden_layerstype_vocab_size	use_cache
vocab_sizemlp_hidden_sizesmlp_actmlp_final_actmlp_dropoutc           	        > [         TU ]  " S0 UD6  Xl        X l        X0l        X@l        XPl        X`l        Xpl        Xl	        Xl
        Xl        Xl        Xl        Xl        Xl        Xl        UU l        UU l        UU l        UU l        UU l        [-        S0 SU R                  _SU R                  _SU R
                  _SU R"                  _SU R                  _SU R                  _SU R                  _SU R                  _S	U R                  _S
U R                  _SU R                  _SU R                  _SU R                  _SU R                  _SU R                  _SU R                   _6U l        [1        U R.                  SS9U l        [5        U R                  S-   U R                  U R*                  S9U l        [9        U R                  SU R$                  U R&                  U R(                  U R*                  S9U l        g)ah	  The UniTE Model which outputs the scalar to describe the corresponding
translation quality of hypothesis. The model architecture includes two
modules: a pre-trained language model (PLM) to derive representations,
and a multi-layer perceptron (MLP) to give predicted score.

Args:
    attention_probs_dropout_prob (:obj:`float`, defaults to 0.1):
        The dropout ratio for attention weights inside PLM.
    bos_token_id (:obj:`int`, defaults to 0):
        The numeric id representing beginning-of-sentence symbol.
    eos_token_id (:obj:`int`, defaults to 2):
        The numeric id representing ending-of-sentence symbol.
    pad_token_id (:obj:`int`, defaults to 1):
        The numeric id representing padding symbol.
    hidden_act (:obj:`str`, defaults to :obj:`"gelu"`):
        Activation inside PLM.
    hidden_dropout_prob (:obj:`float`, defaults to 0.1):
        The dropout ratio for activation states inside PLM.
    hidden_size (:obj:`int`, defaults to 1024):
        The dimensionality of PLM.
    initializer_range (:obj:`float`, defaults to 0.02):
        The hyper-parameter for initializing PLM.
    intermediate_size (:obj:`int`, defaults to 4096):
        The dimensionality of PLM inside feed-forward block.
    layer_norm_eps (:obj:`float`, defaults to 1e-5):
        The value for setting epsilon to avoid zero-division inside
            layer normalization.
    max_position_embeddings: (:obj:`int`, defaults to 512):
        The maximum value for identifying the length of input sequence.
    num_attention_heads (:obj:`int`, defaults to 16):
        The number of attention heads inside multi-head attention layer.
    num_hidden_layers (:obj:`int`, defaults to 24):
        The number of layers inside PLM.
    type_vocab_size (:obj:`int`, defaults to 1):
        The number of type embeddings.
    use_cache (:obj:`bool`, defaults to :obj:`True`):
        Whether to use cached buffer to initialize PLM.
    vocab_size (:obj:`int`, defaults to 250002):
        The size of vocabulary.
    mlp_hidden_sizes (:obj:`List[int]`, defaults to `[3072, 1024]`):
        The size of hidden states inside MLP.
    mlp_act (:obj:`str`, defaults to :obj:`"tanh"`):
        Activation inside MLP.
    mlp_final_act (:obj:`str`, `optional`, defaults to :obj:`None`):
        Activation at the end of MLP.
    mlp_dropout (:obj:`float`, defaults to 0.1):
        The dropout ratio for MLP.
r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   F)add_pooling_layerrY   )r:   r;   r<   )ru   rv   rw   rx   ry   r<   N )rB   rC   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   encoder_configr   encoderr8   layerwise_attentionrp   	estimator)rL   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   kwargsrM   s                         r4   rC   &UniTEForTranslationEvaluation.__init__   s    L 	"6",H)((($#6 &!2!2,'>$#6 !2."$ 0*&. &**&**& **& 	&
 ((& #44& !% 8 8& #44& & !% 8 8& *.)J)J& %)$@$@& !00& #44&  ..&  nn!&$ '5: $6--1&&$$$& 
 %##..!//$$& 	r6   	input_idsinput_formatscorer=   c                 ~   UR                  U R                  5      R                  5       nU R                  UUSSS9nU R	                  US   U5      nU R                  U5      R                  SS9n[        UR                  5       R                  5       US9n	Ub%  X-
  R                  S5      R                  5       n
XS'   U	$ )	NT)r   attention_maskoutput_hidden_statesreturn_dicthidden_statesr!   r"   )r   r   r'   loss)ner   longr   r   r   squeezer   cputolistpowr2   )rL   r   r   r   r   r   outputs
mix_statespredoutputr   s              r4   rc   %UniTEForTranslationEvaluation.forward  s    
 #d&7&78==?,,)!%	  
 --go.F.<>
~~j)11b19,((*##%LB L%%a(--/D!6Nr6   pathdeviceplm_onlyc                 &   U(       aA  U R                   R                  U5      R                  U5      U l         S U R                   l        O1[        R
                  " XS9n[        US5        U R                  U5        [        R                  SU-  5        g )N)map_locationzencoder.embeddings.position_idsz%Loading checkpoint parameters from %s)
r   from_pretrainedtopoolerr+   loadr   load_state_dictloggerinfo)rL   r   r   r   
state_dicts        r4   load_checkpoint-UniTEForTranslationEvaluation.load_checkpoint0  sq    <<77=@@HDL"&DLLD>J#J$EG  ,;dBCr6   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )NN)rf   rg   rh   ri   r`   rj   r   boolr   r   rC   r+   rk   r   r   rc   r   r   rl   rm   rn   s   @r4   r   r      s    8;%&%&%&#).1$(,0*.).03,.*,()#'#)04d| &04&))/4"  #  #	
 ! ', " %* %( "' +. '* %( #& !  !!" $(9#$ %& !)'( $) F =A04 <<&tK'89  - 9	.
C 
 
 
 
r6   all_input_concatmaximum_lengthpad_idxeos_idxc                 x    U SS   H  nX4S S 2S4'   M     [        U 5      S:X  a  [        XU5      $ [        XU5      $ )NrY   r      )rH   cut_long_sequences3cut_long_sequences2)r   r   r   r   groups        r4   combine_input_sentencesr   =  sL     "!"%ad & !"#3WMM"#3WMMr6   c           
        ^^^ [        [        U 6 5      n [        5       nU  GH{  n[        U4S jU 5       5      n[        S U 5       5      n[        U5      U:  Ga/  [	        [        U5      5      m[        S [        TR                  5       S SS9 5       5      n[        [        TR                  5       5      U-
  S-  5      m[        U5      US-  :  a3  [        U5      T:  a$  [	        U4S jTR                  5        5       5      mOUTUS	      -
  TUS
   '   [        U4S j[        S
[        U5      5       5       5      n[        S [        XG5       5       5      n[        X5       H  u  pU
S   U	S'   M     UR                  U5        GMj  UR                  U5        GM~     [        S U 5       5      n[        USTS9nU$ )Nc              3   b   >#    U  H$  oR                  UR                  T5      5      v   M&     g 7fre   masked_selectr   rU   rV   r   s     r4   rW   &cut_long_sequences2.<locals>.<genexpr>P  '      B4@qOOADDM**L   ,/c              3   8   #    U  H  n[        U5      v   M     g 7fre   rH   rT   s     r4   rW   r   R       6AQ   c              3   *   #    U  H	  oS    v   M     g7fr   Nr   rT   s     r4   rW   r   V        (D 7C! 7C   c                     U S   $ NrY   r   ds    r4   <lambda>%cut_long_sequences2.<locals>.<lambda>W      qtr6   Tkeyreverser'   c              3   4   >#    U  H  u  pXT-
  4v   M     g 7fre   r   rU   kvoffsets      r4   rW   r   ]       K?41v:?   rY   r   c              3   .   >#    U  H
  nTU   v   M     g 7fre   r   rU   r   lengthss     r4   rW   r   b  s     L0K1GAJ0K   c              3   0   #    U  H  u  pUS U v   M     g 7fre   r   rU   rV   ys      r4   rW   r   c         %N1L &'rU1L   r!   c              3   L   #    U  H  n[         R                  " US S9v   M     g7frS   r+   rZ   rT   s     r4   rW   r   k       G6F1!,6F   "$batch_firstpadding_valuer[   ziptupler)   dict	enumeratesorteditemsr   valuesminr}   rH   r{   r   r   r   r   collected_tuplestensor_tupleall_lenslengths_sorted_idxesnew_lensnew_tensor_tuplerV   r  concat_tensorall_input_concat_paddedr   r   s     `          @@r4   r   r   J  s    C!123v( B4@B B666x=>)9X./G#' (Df^T7C (D $D  3w~~/0>AQFGF8}#$!% &*-h-&*@K7==?KK3AG(+E- 4-,Q/0 La\9J0KLLH$ %N14\1L%N  N,;"" <##$45##L15 )8 G6FGGM*4w@""r6   c           
      j  ^^^ [        [        U 6 5      n [        5       nU  GHp  n[        U4S jU 5       5      n[        S U 5       5      n[        U5      U:  Ga$  [	        [        U5      5      m[        S [        TR                  5       S SS9 5       5      n[        [        TR                  5       5      U-
  S-  5      m[        U5      US-  :  a4  [        U5      T:  a%  [	        U4S jTR                  5        5       5      mGO[        TR                  5       5      U:  a  TUS	      TUS
      :  a8  UTUS
      -
  TUS      -
  mTTUS
      :  a	  TTUS	   '   OTUS
      TUS	   '   OTUS	      TUS
      s=:X  a  TUS      :  aE  O  OBUTUS      -
  S-  mTTUS      :  a  T=TUS	   '   TUS
   '   O2TUS      =TUS	   '   TUS
   '   OUS-  =TUS	   '   =TUS
   '   TUS   '   [        TR                  5       5      U:  a  M  [        U4S j[        S	[        T5      5       5       5      n[        S [        XG5       5       5      n[        X5       H  u  pU
S   U	S'   M     UR                  U5        GM_  UR                  U5        GMs     [        S U 5       5      n[        USTS9nU$ )Nc              3   b   >#    U  H$  oR                  UR                  T5      5      v   M&     g 7fre   r   r   s     r4   rW   &cut_long_sequences3.<locals>.<genexpr>w  r   r   c              3   8   #    U  H  n[        U5      v   M     g 7fre   r   rT   s     r4   rW   r"  y  r   r   c              3   *   #    U  H	  oS    v   M     g7fr   r   rT   s     r4   rW   r"  }  r   r   c                     U S   $ r   r   r   s    r4   r   %cut_long_sequences3.<locals>.<lambda>~  r   r6   Tr   r   c              3   4   >#    U  H  u  pXT-
  4v   M     g 7fre   r   r   s      r4   rW   r"    r   r   r   rY   r'   c              3   .   >#    U  H
  nTU   v   M     g 7fre   r   r   s     r4   rW   r"    s     G0F1GAJ0Fr  c              3   0   #    U  H  u  pUS U v   M     g 7fre   r   r  s      r4   rW   r"    r  r  r!   c              3   L   #    U  H  n[         R                  " US S9v   M     g7frS   r  rT   s     r4   rW   r"    r	  r
  r  r  r  s     `          @@r4   r   r   q  sf    C!123v( B4@B B666x=>)9X./G#' (Df^T7C (D $D  3w~~/0>AQFGF8}#$!% &*-h-&*@K7==?KK'..*+n<3A67'03;5 5!/':N; 3  " ")*>q*A"B"C!G,@,C$DD?EG$8$;<?F 4Q 7@9G$8$;< !5a!89W03>5 97> 4Q 7899 #1$+,@,C$D#EIJ"K!G,@,C$DD;ABG$8$;<w 4Q 7@9 <C$8$;<==G$8$;<w 4Q 7@9 <JQ;NO 4Q 78 O703<57> 4Q 7891 '..*+n<6 GaW0FGGH$ %N14\1L%N  N ,;"" <##$45##L1g )j G6FGGM*4w@""r6   )r   rY   r'   )r   rY   )@__doc__warningsdataclassesr   mathr   typingr   r   r   r   r	   numpynpr+   torch.utils.checkpoint	packagingr
   torch.nnr   r   r   r   r   r   torch.nn.functionalr   torch.nn.utils.rnnr   transformersr   r   transformers.activationsr   modelscope.metainfor   modelscope.models.baser   modelscope.models.builderr   )modelscope.models.nlp.unite.configurationr   modelscope.outputs.nlp_outputsr   -modelscope.utils.compatible_with_transformersr   modelscope.utils.constantr   modelscope.utils.loggerr   r   __all__r5   r8   rp   register_moduletranslation_evaluationuniter   rk   rj   r   r   r   r   r6   r4   <module>rE     sa     !  5 5    " " ' + : + & - , A F + .	*
+
:.) .)b1$& 1$h 44&,,OdJ d PdP 36+,+,
Nd43E.F 
N,/
N%(
N &)
N /2'($#$tELL/A*B $#(+$#!$$#P /2'(=#$tELL/A*B =#(+=#!$=#r6   