
    9iE&                        S SK r S SKJrJrJrJr  S SKrS SKrS SK	J
r
  S SKJr  S SKJr  S SKJr  S SKJrJr  S SKJr  S S	KJr  S S
KJr  S SKJr  \" 5       rS/r\R<                  " \R>                  \R>                  S9 " S S\5      5       r g)    N)AnyDictListUnion)Dataset)	Pipelines)Model)
OutputKeys)PipelineTensor)	PIPELINES),DocumentSegmentationTransformersPreprocessor)Tasks)
get_loggerDocumentSegmentationPipeline)module_namec            	         ^  \ rS rSr    SS\\\4   S\S\S\4U 4S jjjrS\\	\	\      \	\   \4   S\
\\4   4S	 jrS\\	\	\      \	\   \4   S\
\\4   4S
 jrS\
\\4   S\
\\4   4S jrS\\	\	\      \	\   \4   4S jrS rSrU =r$ )r      modelpreprocessorconfig_filedevicec           	      b  > [         TU ]  " SUUUUUS.UD6  UR                  SS5        UR                  SS5        U R                  R                  U l        U R                  R
                  U l        Uc;  [        U R                  U R                  R                  R                  40 UD6U l	        gg)a  The document segmentation pipeline.

Args:
    model (str or Model): Supply either a local model dir or a model id from the model hub
    preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
    the model if supplied.
)r   r   r   r   auto_collatecompileNcompile_options )
super__init__popr   	model_dir	model_cfgr   configmax_position_embeddingsr   )selfr   r   r   r   r   kwargs	__class__s          w/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/pipelines/nlp/document_segmentation_pipeline.pyr   %DocumentSegmentationPipeline.__init__   s     	 	%#%	 	 	

9d#

$d+---- L

 1 1 I I!!D      	documentsreturnc                 J    U R                  U5      nU R                  U5      nU$ )N)predictpostprocess)r%   r+   outputs      r(   __call__%DocumentSegmentationPipeline.__call__>   s'     i(!!&)r*   c                 
   U R                  U5      nU R                  S   S:X  a  UR                  S5      n[        R                  " U5      nU R                  X@R                  5      n[        X@R
                  R                     5      n[        XPR
                  R                     5      nU R                  S   S:X  a  UR                  S5        UR                  S5      nUR                  S5      n	UR                  U R
                  R                  5      n
U R                  (       d%  U R                  (       a5  U R                  S	   (       a!  U R                  (       d  U R                  5         [        R                  " 5          UR!                  5        VVs0 s H5  u  pU[        R"                  " U5      R%                  U R&                  5      _M7     nnnU R                  R(                  " S0 UD6R*                  R-                  5       nS S S 5        [.        R0                  " WS
S9n[        U	5      [        U5      :X  d*   SR3                  U[        U	5      [        U5      5      5       e[5        X5       VVVVs/ s HG  u  nn[5        UU5       VVs/ s H'  u  nnUS:w  d  M  U R
                  R6                  U   PM)     snnPMI     nnnnn[5        X5       VVVVs/ s HG  u  nn[5        UU5       VVs/ s H'  u  nnUS:w  d  M  U R
                  R6                  U   PM)     snnPMI     nnnnn/ n[9        U5       HF  nU R                  S   S:X  a  UR;                  / / / WU   S.5        M1  UR;                  / / / S.5        MH     [5        UU	UU
5       GH  u  nnnnU R                  S   S:X  a  [        U5      [        U5      :  a"  UR;                  S5        UR;                  S5        [        U5      [        U5      :X  d)   SR3                  [        U5      [        U5      5      5       e[        U5      [        U5      :X  d)   SR3                  [        U5      [        U5      5      5       eUU   S   R=                  U5        UU   S   R=                  U5        UU   S   R=                  U5        GM      U R                  S   S:X  ai  [9        U5       HZ  n[        UU   S   5      S-   [        UU   S   5      :X  d   eUU   S   R;                  S5        UU   S   R;                  S5        M\     U$ s  snnf ! , (       d  f       GN7= fs  snnf s  snnnnf s  snnf s  snnnnf )Nleveltopic
paragraphstypebertsegment_idslabels	sentencesr      )axisz(sample {}  infer_sample {} prediction {}i)r;   r:   predictionsr6   )r;   r:   r>   docB-EOPz{} {}r>      r   )cut_documentsr"   r    r   	from_dictr   lencontext_column_nameexample_id_column_namer   has_multiple_modelsmodels_model_prepareprepare_modeltorchno_graditemstensortor   forwardlogitscpunpargmaxformatzip
label_listrangeappendextend)r%   r+   pred_samplesr6   predict_examplespredict_datasetnum_examplesnum_samplesr:   r;   example_idskeyvalinputr>   
predictionlabelpltrue_predictionstrue_labelsoutisentence_list
example_ids                            r(   r.   $DocumentSegmentationPipeline.predictE   s    )))4>>'"g-%)),7J",,\: ++,<nnM..BBCE--AABD >>&!V+. $$X.#''4	%))446 JJ433A&&""$]]_ !0 5 5 7 7HC U\\#&))$++66 7   **,,5u5<<@@BK  ii!49~"  	?DKKS^S-=?	?  (+;'?	
 (@#*e "*e44FQT	 0!!,,Q/4 (@	 	 
 (+;'?	
 (@#*e "*e44FQT	 0!!,,Q/4 (@	 	 
 |$A~~g&'1

!# #%",Q-	  

r"MN % =@ )[+=G8Juj~~g&%/u:M 22LL)%%g.=)S_< 9gnn&J?9 9<=)SZ7 4&E
:4 47 
OK(//>
OH%,,U3
OM*11*==G >>'"g-<(3q6-01A5F<(:* * * *A}%,,W5Ax ''0	 ) 
{ _

sZ    T7<T17T76U
U	U	<U
U
1UUU
1T77
U	U
U
inputsc                    / n/ n[        U5      nU R                  S   S:X  a  [        U5       H  n/ n/ n[        X   S   X   S   X   S   5       Hv  u  pn
UR	                  5       nU	S:X  a%  SR                  US/5      nUR                  S	5        O$SR                  US
/5      nUR                  S5        UR                  U5        Mx     UR                  U5        SSR                  U5      R	                  5       -   nUR                  U5        M     O[        U5       H  n/ n[        X   S   X   S   5       H?  u  pUR	                  5       nU	S:X  a  SR                  US
/5      nUR                  U5        MA     SSR                  U5      -   nUR                  U5        M     US	:X  a  [        R                  US   0$ [        R                  U0$ )zprocess the prediction results

Args:
    inputs (Dict[str, Any]): _description_

Returns:
    Dict[str, str]: the prediction results
r4   r5   r6   r>   r:   r@    z

	rA   z
	r   	r;   )	rD   r"   rX   rV   stripjoinrY   r
   TEXT)r%   ro   result	res_preds
list_countnumrespredsrf   rg   documents               r(   r/   (DocumentSegmentationPipeline.postprocess   s    	[
>>'"g-Z("6;|#<#);}#=#);x#8 :GA! 	AG|GGQM2AGGQK0AJJqM :   & 2773<#5#5#77h'! )$ Z(K 8 &M :<DA	AG|GGQK0JJqM< !2773</h' ) ?OOVAY//OOV,,r*   parac                    Un/ n/ n/ n/ nSnU R                   S   S:X  a  [        U[        5      (       a  U//nO[        US   [        5      (       a  U/nU H  n/ n	/ n
U HJ  nU R                  U5      nU	R	                  U5        U
R	                  S/[        U5      S-
  -  S/-   5        ML     UR                  U5        UR                  U	5        UR                  U
5        UR                  U5        US-  nM     UUUUS.$ [        U[        5      (       a  U/nU Hb  nU R                  U5      n	S/[        U	5      S-
  -  S/-   n
UR                  U	5        UR                  U
5        UR                  U5        US-  nMd     UUUS	.$ )
Nr   r4   r5   z-100rA   r@   )rm   r;   r6   r:   O)rm   r;   r:   )r"   
isinstancestrcut_sentencerZ   rD   rY   )r%   r   document_listr6   r;   r:   rm   idr}   sentencere   itemsentence_of_current_paragraphs                r(   rB   *DocumentSegmentationPipeline.cut_documents   s   
	
>>'"g-$$$"&DGS))!%)$D484E4Ed4K1OO$ABLL&$'(E$F$J"L$+9"- . % !!(+  *e$!!"%a *  )&( 	  $$$!%),,X6X!23wi?  *e$!!"%a * )&  r*   c                 @   [         R                  " SSU5      n[         R                  " SSU5      n[         R                  " SSU5      n[         R                  " SSU5      nUR                  5       nUR                  S5       Vs/ s H  o"(       d  M  UPM     sn$ s  snf )Nu   ([。！.!？\?])([^”’])z\1\n\2u   (\.{6})([^”’])u   (\…{2})([^”’])u*   ([。！？\?][”’])([^，。！？\?])
)resubrstripsplit)r%   r   _s      r(   r   )DocumentSegmentationPipeline.cut_sentence  s    vv5y$Gvv+Y=vv-y$?vvCYPTU{{}::d+1+aq+111s   
BB)r"   r!   r   )NNgpuT)__name__
__module____qualname____firstlineno__r   r	   r   r   r   r   r   r   r1   r.   r   r/   rB   r   __static_attributes____classcell__)r'   s   @r(   r   r      s    JN#$ G 	
  B"4S	?DI#&$' (,0cN["4S	?DI#&$' ([,0cN[z/-$sF{"3 /-S&[8I /-b3%T#YcC(G"H 3j2 2r*   )!r   typingr   r   r   r   numpyrS   rK   datasetsr   modelscope.metainfor   modelscope.modelsr	   modelscope.outputsr
   modelscope.pipelines.baser   r   modelscope.pipelines.builderr   modelscope.preprocessorsr   modelscope.utils.constantr   modelscope.utils.loggerr   logger__all__register_moduledocument_segmentationr   r   r*   r(   <module>r      s~    
 ) )    ) # ) 6 21 + .	)
* 	Y-L-LNs28 s2Ns2r*   