
    9i                     X    S SK r S SK Jr  S SKJr  \" 5       rSS jr  SS jr  SS jrg)	    N)nn)
get_loggerc           
         U R                   S   n[        R                  " SS9n[        R                  " U" [        R
                  " SS9" U 5      [        R                  " SS9" U5      5      SS9[        R                  " U" [        R
                  " SS9" U5      [        R                  " SS9" U 5      5      SS9-   nUb4  [        R                  " XR-  5      [        R                  " U5      -  U-  nU$ [        R                  " U5      U-  nU$ )a  
Calculate two logits' the KL div value symmetrically.
:param logits1: The first logit.
:param logits2: The second logit.
:param attention_mask: An optional attention_mask which is used to mask some element out.
This is usually useful in token_classification tasks.
If the shape of logits is [N1, N2, ... Nn, D], the shape of attention_mask should be [N1, N2, ... Nn]
:return: The mean loss.
none)	reduction)dim)shaper   	KLDivLosstorchsum
LogSoftmaxSoftmaxmean)logits1logits2attention_mask
labels_numKLDivlosss         j/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/nlp/structbert/adv_utils.py_symmetric_kl_divr      s     r"JLL6*E99bmm#G,jjR )	+ ))"--B'0**$W-/D !yy!#%*YY~%>?AKL K zz$*,K    c                    Ub  UOSU-  nX R                   R                  U R                  5       5      R                  SU5      -   nUR	                  S5        SU;   a  UR	                  S5        SU;  a  SOUS   n	US   n
U	(       d  Sn
SU;   a  UR	                  S5        U" S0 UDSU0D6nUR
                  n[        X,U
5      n[        R                  R                  X5      S   R                   nUR                  SS	[        S
5      S9R                  SS	S9S   n[        R                  " [        R                  " U5      5      nU(       a  [        R!                  S5        U$ XS-   -  nXU-  -   n[        R                  " X-
  U5      n[        R"                  " X-   U5      nU" S0 UDSU0D6nUR
                  n[        UUU
5      nUU-   $ )a  
Calculate the adv loss of the model.
:param embedding: Original sentence embedding
:param model: The model, or the forward function(including decoder/classifier),
        accept kwargs as input, output logits
:param ori_logits: The original logits outputted from the model function
:param ori_loss: The original loss
:param adv_grad_factor: This factor will be multiplied by the KL loss grad and then the result will be added to
        the original embedding.
        More details please check:https://arxiv.org/abs/1908.04577
        The range of this value always be 1e-3~1e-7
:param adv_bound: adv_bound is used to cut the top and the bottom bound of the produced embedding.
        If not proveded, 2 * sigma will be used as the adv_bound factor
:param sigma: The std factor used to produce a 0 mean normal distribution.
        If adv_bound not proveded, 2 * sigma will be used as the adv_bound factor
:param kwargs: the input param used in model function
:return: The original loss adds the adv loss
N   r   	input_idsinputs_embedswith_attention_maskFr   Tinfr	   keepdimp   r!   z'Nan occurred when calculating adv loss.gư> datanewsizenormal_poplogitsr   r   autogradgradnormfloatmaxanyisnanloggerwarningmin)	embeddingmodel
ori_logitsori_lossadv_grad_factor	adv_boundsigmakwargsembedding_1r   r   outputs	v1_logitsr   emb_grademb_grad_normis_nanembedding_2
adv_logitsadv_losss                       r   compute_adv_lossrH   3   s   4 '2	E	Inn001ABJJ	5 K
JJ{& 

?##8#F%FM,-N&

()8f8K8GIZNCD~~""45a8==HMMtuU| " --0St .1 . M YYu{{=12F@A4/0H( ::K))K3[AK))K3[AK8f8K8GJ ZHHhr   c                 t   Ub  UOSU-  nX R                   R                  U R                  5       5      R                  SU5      -   n	UR	                  S5        SU;   a  UR	                  S5        U" S0 UDSU	0D6n
U
R
                  u  p[        UU5      [        X<5      -   nUS-  n[        R                  R                  X5      S   R                   nUR                  SS[        S5      S9R                  SSS	9S   n[        R                  " [        R                  " U5      5      nU(       a  [        R!                  S
5        U$ X-  nXU-  -   n[        R                  " X-
  U5      n[        R"                  " X-   U5      nU" S0 UDSU0D6n
U
R
                  u  nn[        UU5      [        UU5      -   nUU-   $ )ax  
Calculate the adv loss of the model. This function is used in the pair logits scenario.
:param embedding: Original sentence embedding
:param model: The model, or the forward function(including decoder/classifier),
        accept kwargs as input, output logits
:param start_logits: The original start logits outputted from the model function
:param end_logits: The original end logits outputted from the model function
:param ori_loss: The original loss
:param adv_grad_factor: This factor will be multiplied by the KL loss grad and then the result will be added to
        the original embedding.
        More details please check:https://arxiv.org/abs/1908.04577
        The range of this value always be 1e-3~1e-7
:param adv_bound: adv_bound is used to cut the top and the bottom bound of the produced embedding.
        If not proveded, 2 * sigma will be used as the adv_bound factor
:param sigma: The std factor used to produce a 0 mean normal distribution.
        If adv_bound not proveded, 2 * sigma will be used as the adv_bound factor
:param kwargs: the input param used in model function
:return: The original loss adds the adv loss
r   r   r   r   Tr   r    r#   r$   z,Nan occurred when calculating pair adv loss.r%   r&   )r7   r8   start_logits
end_logitsr:   r;   r<   r=   r>   r?   r@   v1_logits_startv1_logits_endr   rB   rC   rD   rE   adv_logits_startadv_logits_endrG   s                        r   compute_adv_loss_pairrP   o   s   8 '2	E	Inn001ABJJ	5 K
JJ{& 

?#8f8K8G%,^^"O\,.0A!+1<<D !8D~~""45a8==HMMtuU| " --0St .1 . M YYu{{=12FEF'H( ::K))K3[AK))K3[AK8f8K8G'.~~$n !135F%/6AAH hr   )N)Ngh㈵>)r   r   modelscope.utils.loggerr   r4   r   rH   rP   r%   r   r   <module>rR      s8       .	@  $9D %) $9r   