
    9iT                     X   S SK r S SKJrJrJrJrJrJr  S SKr	SSK
Jr  SSKJr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJr  SSKJr  SSKJr  SSK J!r!  SSK"J#r#  SSK$J%r%J&r&  SSK'J(r(  SSK
J)r)  \RT                   " S S\5      5       r+\" S5       " S S\5      5       r,g)    N)AnyDictListOptionalTupleUnion   )logging)pipeline_requires_extra   )ImageBatchSampler)	ReadImage)	DetResult)	benchmark)	HPIConfig)PaddlePredictorOption   )(AutoParallelImageSimpleInferencePipeline)BasePipeline)CropByBoxes)DocPreprocessorResult)	OCRResult   )SingleTableRecognitionResultTableRecognitionResult)get_table_recognition_res)get_neighbor_boxes_idxc            !       P  ^  \ rS rSrSr    S'S\S\S\S\S\	\
\\\4   \4      S	S4U 4S
 jjjrS\	\   S\	\   S\	\   S\	\   S	\4
S jrS\S\S\S	\4S jrS\R(                  S\S	\\\R(                  4   4S jrS rS r   S(S\R(                  S\S\S\S\S\S	\4S jjr              S)S\
\\\   \R(                  \\R(                     4   S\	\   S\	\   S\	\   S\	\   S\	\   S\	\   S\	\   S \	\   S!\	\   S"\	\   S#\	\   S$\	\   S\S\	\   S	\4 S% jjr S&r!U =r"$ )*_TableRecognitionPipeline&   zTable Recognition PipelineNconfigdevice	pp_optionuse_hpip
hpi_configreturnc                   > [         T
U ]  X#XES9  UR                  SS5      U l        U R                  (       a:  UR                  S0 5      R                  SSS05      nU R	                  U5      U l        UR                  SS5      U l        U R                  (       a:  UR                  S	0 5      R                  S
SS05      nU R                  U5      U l        UR                  S	0 5      R                  SSS05      nU R                  U5      U l	        UR                  SS5      U l
        U R                  (       a;  UR                  S0 5      R                  SSS05      n	U R	                  U	5      U l        O'UR                  S0 5      R                  SS5      U l        [        5       U l        [        SS9U l        [#        SS9U l        g)aQ  Initializes the layout parsing pipeline.

Args:
    config (Dict): Configuration dictionary containing various settings.
    device (str, optional): Device to run the predictions on. Defaults to None.
    pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
    use_hpip (bool, optional): Whether to use the high-performance
        inference plugin (HPIP) by default. Defaults to False.
    hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
        The default high-performance inference configuration dictionary.
        Defaults to None.
)r"   r#   r$   r%   use_doc_preprocessorTSubPipelinesDocPreprocessorpipeline_config_errorz+config error for doc_preprocessor_pipeline!use_layout_detection
SubModulesLayoutDetectionmodel_config_errorz"config error for layout_det_model!TableStructureRecognitionz'config error for table_structure_model!use_ocr_model
GeneralOCRz&config error for general_ocr_pipeline!Nr   )
batch_sizeBGR)format)super__init__getr(   create_pipelinedoc_preprocessor_pipeliner,   create_modellayout_det_modeltable_structure_modelr1   general_ocr_pipelinegeneral_ocr_config_bakr   _crop_by_boxesr   batch_samplerr   
img_reader)selfr!   r"   r#   r$   r%   doc_preprocessor_configlayout_det_configtable_structure_configgeneral_ocr_config	__class__s             v/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/paddlex/inference/pipelines/table_recognition/pipeline.pyr7   "_TableRecognitionPipeline.__init__*   s   * 	 	 	
 %+JJ/Et$L!$$&,jj&D&H&H!+-Z'# .2-A-A'.D* %+JJ/Et$L!$$ &

< < @ @!%'KL! %)$5$56G$HD!!'L"!=!A!A'!#LM"
 &*%6%67M%N"#ZZ>!'NB!?!C!C(*RS" )-(<(<=O(PD%*0**^R*H*L*Ld+D' *m.!<#51    use_doc_orientation_classifyuse_doc_unwarpingr,   r1   c                     Uc  Uc  U R                   nOUSL d  USL a  SnOSnUc  U R                  nUc  U R                  n[        UUUS9$ )a  
Get the model settings based on the provided parameters or default values.

Args:
    use_doc_orientation_classify (Optional[bool]): Whether to use document orientation classification.
    use_doc_unwarping (Optional[bool]): Whether to use document unwarping.
    use_layout_detection (Optional[bool]): Whether to use layout detection.
    use_ocr_model (Optional[bool]): Whether to use OCR model.

Returns:
    dict: A dictionary containing the model settings.
TF)r(   r,   r1   )r(   r,   r1   dict)rC   rL   rM   r,   r1   r(   s         rI   get_model_settings,_TableRecognitionPipeline.get_model_settingsn   sq    & (/4E4M#'#<#< +t37HD7P'+$',$'#'#<#<   ..M!5!5'
 	
rK   model_settingsoverall_ocr_reslayout_det_resc                    US   (       a(  U R                   (       d  [        R                  " S5        gUS   (       aB  Ub  [        R                  " S5        gU R                  (       d  [        R                  " S5        gUS   (       aD  Ub  [        R                  " S5        gU R                  (       d  [        R                  " S	5        g gUc  [        R                  " S
5        gg)a  
Check if the input parameters are valid based on the initialized models.

Args:
    model_settings (Dict): A dictionary containing input parameters.
    overall_ocr_res (OCRResult): Overall OCR result obtained after running the OCR pipeline.
        The overall OCR result with convert_points_to_boxes information.
    layout_det_res (DetResult): The layout detection result.
Returns:
    bool: True if all required models are initialized according to input parameters, False otherwise.
r(   zRSet use_doc_preprocessor, but the models for doc preprocessor are not initialized.Fr,   z^The layout detection model has already been initialized, please set use_layout_detection=FalsezRSet use_layout_detection, but the models for layout detection are not initialized.r1   zLThe OCR models have already been initialized, please set use_ocr_model=Falsez>Set use_ocr_model, but the models for OCR are not initialized.z7Set use_ocr_model=False, but no OCR results were found.T)r(   r
   errorr,   r1   )rC   rR   rS   rT   s       rI   check_model_settings_valid4_TableRecognitionPipeline.check_model_settings_valid   s    $ 01$:S:SMMd 01)t ,,h /**b %%T 	 &  &WXrK   image_arrayinput_paramsc                     US   (       a/  US   nUS   n[        U R                  UUUS95      S   nUS   nXV4$ 0 nUnXV4$ )as  
Preprocess the document image based on input parameters.

Args:
    image_array (np.ndarray): The input image array.
    input_params (dict): Dictionary containing preprocessing parameters.

Returns:
    tuple[DocPreprocessorResult, np.ndarray]: A tuple containing the preprocessing
                                      result dictionary and the processed image array.
r(   rL   rM   rL   rM   r   
output_img)listr:   )rC   rY   rZ   rL   rM   doc_preprocessor_resdoc_preprocessor_images          rI   predict_doc_preprocessor_res6_TableRecognitionPipeline.predict_doc_preprocessor_res   s     ./+78V+W( ,-@ A#'..1M&7 / $ $  &:,%G" $;; $& %0"#;;rK   c           
         [        U[        5      (       d  UR                  5       n/ n[        [	        U5      5       Hy  nX$    Vs/ s H  n[
        R                  " U5      PM     snu  pgp[        U R                  XU	2Xh2SS24   5      5      S   n
UR                  SR                  U
S   5      5        M{     U$ s  snf a<  
Splits OCR bounding boxes by table cells and retrieves text.

Args:
    ori_img (ndarray): The original image from which text regions will be extracted.
    cells_bboxes (list or ndarray): Detected cell bounding boxes to extract text from.

Returns:
    list: A list containing the recognized texts from each cell.
Nr    	rec_texts

isinstancer^   tolistrangelenmathceilr>   appendjoinrC   ori_imgcells_bboxes
texts_listikx1y1x2y2rec_tes              rI   split_ocr_bboxes_by_table_cells9_TableRecognitionPipeline.split_ocr_bboxes_by_table_cells        ,--'..0L
s<()A4@ODOqdiilODNBB$33GrE25!O4LMNqQFbggf[&9:; *  E    B;c           
         [        U[        5      (       d  UR                  5       n/ n[        [	        U5      5       Hy  nX$    Vs/ s H  n[
        R                  " U5      PM     snu  pgp[        U R                  XU	2Xh2SS24   5      5      S   n
UR                  SR                  U
S   5      5        M{     U$ s  snf rd   rg   rp   s              rI   r{   r|     r}   r~   	table_box use_ocr_results_with_table_cellsflag_find_nei_textcell_sort_by_y_projectionc           	      h   [        U R                  U5      5      S   nUS:X  a8  US   nU V	s/ s H  oS   U	S   U	S   U	S   /PM     nn	U R                  X5      n
O/ n
[        UUUU
UUS9nSnU(       a4  [	        US	   U5      n[        U5      S:  a  U H  nXS
   U   S-   -  nM     XS'   U$ s  sn	f )a  
Predict table recognition results from an image array, layout detection results, and OCR results.

Args:
    image_array (np.ndarray): The input image represented as a numpy array.
    overall_ocr_res (OCRResult): Overall OCR result obtained after running the OCR pipeline.
        The overall OCR results containing text recognition information.
    table_box (list): The table box coordinates.
    use_ocr_results_with_table_cells (bool): whether to use OCR results with cells.
    flag_find_nei_text (bool): Whether to find neighboring text.
    cell_sort_by_y_projection (bool): Whether to sort the matched OCR boxes by y-projection.
Returns:
    SingleTableRecognitionResult: single table recognition result.
r   Tbboxr   r	      r   re   	rec_boxesrf   z; neighbor_texts)r^   r=   r{   r   r   rk   )rC   rY   rS   r   r   r   r   table_structure_predtable_cells_resultrectcells_texts_listsingle_table_recognition_resneighbor_textmatch_idx_listidxs                  rI   $predict_single_table_recognition_res>_TableRecognitionPipeline.predict_single_table_recognition_res   s   .  $D$>$>{$KLQO+t3!5f!=AS"ASa$q'47DG4AS  "  $CC   "'@ ,&?(
$ 3,iN >"Q&)C![%A#%F%MMM *9F%56++3"s   B/inputtext_det_limit_side_lentext_det_limit_typetext_det_threshtext_det_box_threshtext_det_unclip_ratiotext_rec_score_threshc              +     #    U R                  UUUU5      nUc  SnU R                  UXg5      (       d  SS0v   [        U R                  U5      5       GH  u  nnU R	                  UR
                  5      S   nUS   (       a  [        U R                  UUUS95      S   nOSU0nUS   nUS	   (       a"  [        U R                  UUU	U
UUUS
95      S   nO8US:X  a2  U R                  S:w  d   eU R                  U R                  5      U l        / nSnUS   (       dS  UcP  0 nUR                  SS u  nnSSUS-
  US-
  /nU R                  UUUUSUS9nUUS'   UR                  U5        US-  nOUS   (       a  [        U R                  U5      5      S   nUS    Hj  nUS   R                  5       S;   d  M  U R!                  UU/5      nUS   nUS   nU R                  US   UUUUS9nUUS'   UR                  U5        US-  nMl     UR"                  S   UR$                  S   UUUUUS.n['        U5      v   GM     g7f)a  
This function predicts the layout parsing result for the given input.

Args:
    input (Union[str, list[str], np.ndarray, list[np.ndarray]]): The input image(s) of pdf(s) to be processed.
    use_layout_detection (bool): Whether to use layout detection.
    use_doc_orientation_classify (bool): Whether to use document orientation classification.
    use_doc_unwarping (bool): Whether to use document unwarping.
    overall_ocr_res (OCRResult): The overall OCR result with convert_points_to_boxes information.
        It will be used if it is not None and use_ocr_model is False.
    layout_det_res (DetResult): The layout detection result.
        It will be used if it is not None and use_layout_detection is False.
    use_ocr_results_with_table_cells (bool): whether to use OCR results with cells.
    cell_sort_by_y_projection (bool): Whether to sort the matched OCR boxes by y-projection.
    **kwargs: Additional keyword arguments.

Returns:
    TableRecognitionResult: The predicted table recognition result.
NFrV   z0the input params for model settings are invalid!r   r(   r\   r]   r1   )r   r   r   r   r   r   Tr   r,   r   )r   r   table_region_idboxeslabel)tableboximgr   )
input_path
page_indexr_   rT   rS   table_res_listrR   )rP   rW   	enumeraterA   rB   	instancesr^   r:   r>   r?   r9   shaper   rn   r<   lowerr@   input_pathspage_indexesr   ) rC   r   rL   rM   r,   r1   rS   rT   r   r   r   r   r   r   r   r   kwargsrR   img_id
batch_datarY   r_   r`   r   r   
img_height	img_widthr   single_table_rec_resbox_infocrop_img_infosingle_img_ress                                    rI   predict!_TableRecognitionPipeline.predictU  s    N 00( 	
 %,(-%..O
 
 NOO"+D,>,>u,E"FFJ//**>*>?BK45'+22#5Q*; 3 ( ($ )5k'B$%9,%G"o."&--.0G,?(7,?.C.C . 
# 
# 2T922d:::,0,@,@//-)  NO!"89n>T!#(>(D(DRa(H%
I9q=*q.A	'+'P'P*#4',.G (Q ($ ;J$%67%%&:;1$!"89%)--.DE&&N !/w 7H(..0I=(,(;(;K((T(5a(8$1%$8	 EE -e 4 / ) @:S F  - CR,->?&--.BC'1,! !8& )44Q7(55a8(<"0#2"0"0N )88i #Gs   F0H?6B	H?)r@   rA   r:   r?   r>   rB   r<   r=   r(   r,   r1   )NNFN)FTF)NNNNNNNNNNNNFN)#__name__
__module____qualname____firstlineno____doc__r   strr   boolr   r   r   r   r7   rO   rP   r   r   rW   npndarrayr   r   ra   r{   r^   r   r   r   intfloatr   r   __static_attributes____classcell__)rH   s   @rI   r   r   &   s   $
 +/AEB2B2 B2 )	B2
 B2 U4S>9#<=>B2 
B2 B2H%
&.tn%
 $D>%
 'tn	%

  ~%
 
%
N55 #5 "	5
 
5n<::<59<	$bjj0	1<<6@ 27#'*/3,ZZ3, #3, 	3,
 +/3, !3, $(3, 
&3,p 8<,0/3(,/3.215-1+//315151648!I9S$s)RZZbjj1AABI9 '/tnI9 $D>	I9
 'tnI9  ~I9 "),I9 !+I9 "*#I9 &c]I9 "%I9 &e_I9  (I9  (I9 +/I9  $,D>!I9$ 
 %I9 I9rK   r   ocrc                   0    \ rS rSrS/r\S 5       rS rSrg)TableRecognitionPipelinei  table_recognitionc                     [         $ )N)r   )rC   s    rI   _pipeline_cls&TableRecognitionPipeline._pipeline_cls  s    ((rK   c                     g)Nr    )rC   r!   s     rI   _get_batch_size(TableRecognitionPipeline._get_batch_size  s    rK   r   N)	r   r   r   r   entitiespropertyr   r   r   r   rK   rI   r   r     s!    #$H) )rK   r   )-rl   typingr   r   r   r   r   r   numpyr   utilsr
   
utils.depsr   common.batch_samplerr   common.readerr   models.object_detection.resultr   utils.benchmarkr   	utils.hpir   utils.pp_optionr   	_parallelr   baser   
componentsr   doc_preprocessor.resultr   
ocr.resultr   resultr   r   !table_recognition_post_processingr   r   time_methodsr   r   r   rK   rI   <module>r      s     : :   2 5 & 7 ( " 4 @  $ ; " H H ) w9 w9 w9t G   rK   