
    9i7                        S SK r S SKrS SKrS SKJr  S SKJr  S SKJr  S SKJ	r	J
r
JrJrJrJrJr  S SKrS SKrS SKJr  S SKJr  S SKJr  S S	KJr  S S
KJr  S SKJr  S SKJr  S SKJ r   S SK!J"r"  S SK#J$r$J%r%  S SK&J'r'J(r(  \\)\*\SS4   r+\ RX                  " \-5      r.S\R^                  S'   S/r0\"Rb                  " \(Rd                  \Rd                  S9 " S S\ 5      5       r3g)    N)deepcopy)ceil)time)AnyDict	GeneratorListMappingOptionalUnion)softmax)autocast)tqdm)	Pipelines)Model)	MsDataset)
OutputKeys)Pipeline)	PIPELINES)PreprocessorSiameseUiePreprocessor)	ModelFileTaskszImage.Imageznumpy.ndarraytrueTOKENIZERS_PARALLELISMSiameseUiePipeline)module_namec            	          ^  \ rS rSr    SS\\\4   S\\   S\S\4U 4S jjjr	S\
\\4   S\
\\4   4S	 jrS
\\\\   4   S\\
\\4   \4   4S jrS rS rS rS rS rS rSrU =r$ )r   #   modelpreprocessorconfig_filedevicec                   > [         TU ]  UUUUUUR                  SS5      UR                  S0 5      S9  [        U R                  [
        5      (       d   S[        R                   35       eU R                  c0  [        R                  " U R                  R                  40 UD6U l        U R                  R                  5         SU l        SU l        S	U l        S
U l        SU l        g)u  Use `model` and `preprocessor` to create a generation pipeline for prediction.

Args:
    model (str or Model): Supply either a local model dir which supported the text generation task,
    or a model id from the model hub, or a torch model instance.
    preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
    the model if supplied.
    kwargs (dict, `optional`):
        Extra kwargs passed into the preprocessor's constructor.

Examples:
    >>> from modelscope.pipelines import pipeline
    >>> pipeline_ins = pipeline(Tasks.siamese_uie,
    >>>    model='damo/nlp_structbert_siamese-uie_chinese-base')
    >>> sentence = '1944年毕业于北大的名古屋铁道会长谷口清太郎等人在日本积极筹资，共筹款2.7亿日元，参加捐款的日本企业有69家。'
    >>> print(pipeline_ins(sentence, schema={'人物': None, '地理位置': None, '组织机构': None}))

    To view other examples please check tests/pipelines/test_siamese_uie.py.
compileFcompile_options)r    r!   r"   r#   auto_collater%   r&   z,please check whether model config exists in Ni`  i        g      ?)super__init__pop
isinstancer    r   r   CONFIGURATIONr!   r   from_pretrained	model_direval	slide_lenmax_lenhint_max_leninference_batch_size	threshold)selfr    r!   r"   r#   r'   kwargs	__class__s          m/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/pipelines/nlp/siamese_uie_pipeline.pyr+   SiameseUiePipeline.__init__'   s    4 	%#%JJy%0"JJ'8"= 	 	? $**e,, 	U:9;R;R:ST	U, $ , < <

$$!0(.!0D

$%!    inputsreturnc                     g )N )r7   r=   s     r:   postprocessSiameseUiePipeline.postprocessW   s    r<   inputc                    SU;   a)  UR                  S5      nU(       a  US:  a  [        S5      eU R                  (       a!  U R                  (       d  U R	                  5         UnUR                  S5      n[        U5      [        :X  a  [        R                  " U5      nUR                  SS5      nU R                  U/5      S   n/ n	/ n
U R                  XXXU	U5        SU	0$ )	u  
Args:
    input(str): sentence to extract
    schema: (dict or str) schema of uie task
Default Returns:
    List[List]:  predicted info list i.e.
    [[{'type': '人物', 'span': '谷口清太郎', 'offset': [18, 23]}],
    [{'type': '地理位置', 'span': '日本', 'offset': [26, 28]}],
    [{'type': '地理位置', 'span': '日本', 'offset': [48, 50]}],
    [{'type': '组织机构', 'span': '北大', 'offset': [8, 10]}],
    [{'type': '组织机构', 'span': '名古屋铁道', 'offset': [11, 16]}]]

batch_size   z,This pipeline do not support batch inferenceschemaoutput_all_prefixFr   output)r,   	Exceptionr    _model_prepareprepare_modeltypestrjsonloadsr!   forward)r7   rC   argsr8   rE   textrG   rH   tokenized_textpred_info_listprefix_infos              r:   __call__SiameseUiePipeline.__call__Z   s     6!L1Jj1n NOO::&&""$ H%<3ZZ'F"JJ':EB**D6215T;&	(.))r<   c           	      Z    US==   U/U R                   [        US   5      -
  -  -  ss'   U$ )N)r3   len)r7   	input_idspad_token_ids      r:   _padSiameseUiePipeline._pad   s/    ",4<<#im:L+LMMr<   c                 B   U R                  USSU R                  S9n/ n[        U5      U R                  :  a1  [	        [        U5      U R                  -
  U R
                  -  5      S-   OSn[        U5       Vs/ s H7  nUR                  XpR
                  -  XpR
                  -  U R                  -    PM9     nn[        U5       Vs/ s H7  nUR                  XpR
                  -  XpR
                  -  U R                  -    PM9     n	nUS:  a$  U R                  US5      nU R                  U	S5      n	[        R                  " U[        R                  U R                  S9n[        R                  " U	[        R                  U R                  S9n	UR                  S5      U R                  -  S-   n
[        R                   " X5      n[        R                   " X5      n/ n[        R"                  " 5          [%        5          ['        UU5       H1  u  pU R(                  R+                  X5      nUR-                  U5        M3     S S S 5        S S S 5        [        R.                  " USS9n[        R.                  " USS9n[        [        U5      5       He  nX?   nXO   n[        U5       HK  nXpR
                  -  nUS-   U-   UUUX   UR                  X   UR                  S.nUR-                  U5        MM     Mg     U$ s  snf s  snf ! , (       d  f       N= f! , (       d  f       N= f)	NT)padding
truncation
max_lengthrF   r   dtyper#   )dimz--)idhintrS   shiftsequence_outputhint_token_idsattention_maskscross_attention_masks)r!   r4   r[   r3   r   r2   rangeidsattention_maskr^   torchtensorlongr#   sizer5   tensor_splitno_gradr   zipr    get_plm_sequence_outputappendcat)r7   rS   rT   hintstokenized_hintstokenized_data	split_numj	token_idsrl   	batch_numall_token_idsall_attention_masksall_sequence_outputrj   irh   tokenized_hintaitems                       r:   tokenize_sample"SiameseUiePipeline.tokenize_sample   s   ++4DT=N=N , P &)%84<<%G  4<</nn !"MN 	
 9>i8H
8H1 q>>1!nn2D!%3. /8H 	 
 9%
 & ))!nn*<Q=O,0LL>9 :% 	 

 q=		)Q/I"ii;OLLUZZ=	,,5::dkkCNN1%)B)BBQF	**9@#00L ]]_25m6I3K.I&*jj&H&H!'4O'..?	3K   $ii(;C#ii(;Cs5z"A8D,/N9%&+,  ':'=&4&8&8':'=-;-J-J	 %%d+ & #" ]

"  _s1   8>K5>K:/L:AK?<L?
L		L
Lc                    U R                  XU5      n[        R                  " U Vs/ s H  oUS   PM	     sn5      n[        R                  " U Vs/ s H  oUS   PM	     sn5      n[        R                  " U Vs/ s H  oUS   PM	     sn[        R                  U R
                  S9n[        R                  " U Vs/ s H  oUS   PM	     sn[        R                  U R
                  S9n	UR                  S5      U R                  -  S-   n
[        R                  " Xj5      n[        R                  " Xz5      n[        R                  " X5      n[        R                  " U	U
5      n	XFUX44$ s  snf s  snf s  snf s  snf )Nrj   rl   rk   rd   rm   r   rF   )	r   rq   stackrr   rs   r#   rt   r5   ru   )r7   rS   rT   r{   r}   r   rj   rl   rk   rm   r   s              r:   "get_tokenized_data_and_data_loader5SiameseUiePipeline.get_tokenized_data_and_data_loader   sZ   --dEJ++1?@#$@B++1?@#$@B0>?"#?**;;  !&7EF~t)*~F**;;! 
 $((+t/H/HH1L	,,_H,,_H++NF % 2 23H3<!> . G G 	G% A@? Gs   E'E,9E1:E6c                 \   / n[        [        U5      5       Vs/ s H  ocU   U R                  :  d  M  UPM     nnU H[  n[        U[        U5      5       H?  n	XI   U R                  :  d  M  X(   S   n
X)   S   nX/XU S.nUR                  U5          MY     M]     [	        US S9nU$ s  snf )Nr   rF   )offsetspanc                     [        U S   5      $ )Nr   )tuple)xs    r:   <lambda>1SiameseUiePipeline.get_entities.<locals>.<lambda>   s    58+=r<   )key)rn   r[   r6   ry   sorted)r7   rS   offsets
head_probs
tail_probssample_entitiesr   potential_headsphpt	char_head	char_tailes                r:   get_entitiesSiameseUiePipeline.get_entities   s    S_-
-!A1OA- 	 
 "BBJ0>DNN2 'AI 'AI#,"8 $y 9A $**1- 1 " !!=?!
s
   B)B)c                    / nU H6  nSnU H  nXxS    SUS    S3-  nM     Xv S3-  nUR                  U5        M8     U R                  XU5      u  p/ nS n/ n/ n/ n[        R                  " 5          [	        5          [        U
6  HI  nU R                  R                  " U6 u  nnUR                  5       UR                  5       nnUU-  nUU-  nMK     S S S 5        S S S 5        U	R                  SS05        UR                  S 5        UR                  S 5        [        XU5       GH#  u  nnnUS   nUR                  SS	5      UUS
.nUb  UU:w  a  [        UR                  5      nS/U-  nS/U-  nU H  nUS   nUS   nUS   n[        U5      n[        U5       H^  nUU-   U:  d  M  UUU-      S:X  a  UU   OUUU-      UU   -   S-  UUU-   '   UUU-      S:X  a  UU   OUUU-      UU   -   S-  UUU-   '   M`     M     UR                  nU R                  UUUU5      n UR                  U 5        / nUR                  U5        UnGM&     / n![        XM5       HH  u  nn U  H<  n"[        U5      n#UU"S   U"S   S.nU#R                  U5        U!R                  U#5        M>     MJ     U!$ ! , (       d  f       GN= f! , (       d  f       GN= f)N rM   z: r   z, rg   WhatADifferentUUiDri   r   )ri   headtailrZ   r   r      r   )rM   r   r   )ry   r   rq   rv   r   rw   r    fast_inferencetolistgetr[   r   rn   r   r   )$r7   rS   rT   rV   schema_typesr{   strh   r   all_valid_tokenized_dataall_tensor_dataprobs	last_uuidall_pred_entitiesall_head_probsall_tail_probs
batch_databatch_head_probsbatch_tail_probstokenized_sampler   r   uuidprob
len_tokensprob_tmpri   r   r   len_subr   r   pred_entitiesnext_prefix_infosr   pis$                                       r:   get_prefix_infos#SiameseUiePipeline.get_prefix_infos   sk   BD#<.4<.;; $d"IDLL  594[4[%5)1 	]]_"%"7J9=9R9R#:%6$&69I9P9P :'..0 '7$"&66N"&66N #8   	!''/C )+ 	,d#d#8;(.9J4j*#D)D)--gq9""D
 $): !7!78
 TJ.
 TJ.
 %H$W-E#F+D#F+D!$iG"7^u9z1?I !E	@+.0@1DG7A!e)7L9=a8ADE7F 'q5y1 @J !E	@+.0@1DG7A!e)7L9=a8ADE7F 'q5y1 , !& )00 $ 1 1$2<!>!((7LLIC9JD !$\!EB"k* "AfI8M		$!((,	 # "F ! k  _s%   2J9=AJ'J9'
J6	1J99
Kc           	          U R                  XUU5      nU HO  nXCS   S      nUc  UR                  U5        M#  U(       a  UR                  U5        U R                  XUXU5        MQ     g )NrZ   rM   )r   ry   rQ   )	r7   rS   rT   rV   curr_schema_dictrU   rH   r   next_schema_dicts	            r:   rQ   SiameseUiePipeline.forward/  sw     !11$2=2BD -K/B0GH'%%k2$"))+6T;-.0 -r<   )r4   r5   r3   r!   r2   r6   )NNcpuT)__name__
__module____qualname____firstlineno__r   r   rN   r   r   r+   r   r   rA   Inputr	   r   rW   r^   r   r   r   r   rQ   __static_attributes____classcell__)r9   s   @r:   r   r   #   s     9=$($".eSj).'5. ". 	. .`$sCx. T#s(^ #*eE4;$67 #*#DcNI$=>#*J5nG0(G!R0 0r<   )4loggingospathlibcopyr   mathr   r   typingr   r   r   r	   r
   r   r   rO   rq   scipy.specialr   torch.cuda.ampr   r   modelscope.metainfor   modelscope.modelsr   modelscope.msdatasetsr   modelscope.outputsr   modelscope.pipelines.baser   modelscope.pipelines.builderr   modelscope.preprocessorsr   r   modelscope.utils.constantr   r   rN   r   r   	getLoggerr   loggerenviron__all__register_modulesiamese_uier   r@   r<   r:   <module>r      s     	     G G G   ! #  ) # + ) . 2 I 6c5)]OCD			8	$'-

# $
  	9#8#8:Z0 Z0:Z0r<   