
    9iSU                        S SK JrJrJrJrJr  S SKrSSKJ	r	  SSK
Jr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJr  SSKJrJrJrJrJrJ r   SSK!J"r"  \RF                   " S S\5      5       r$\" SSS9 " S S\5      5       r%g)    )AnyDictListOptionalUnionN   )logging)pipeline_requires_extra   )ImageBatchSampler)	ReadImage)	benchmark)	HPIConfig)PaddlePredictorOption   )(AutoParallelImageSimpleInferencePipeline)BasePipeline)CropByPolysSortPolyBoxesSortQuadBoxescal_ocr_word_boxconvert_points_to_boxesrotate_image   )	OCRResultc                   >  ^  \ rS rSrSr    S S\S\\   S\\   S\	S\\
\\\4   \4      S	S4U 4S
 jjjrS\\R                      S\\   S	\\R                      4S jrS\S	\	4S jrS\\	   S\\	   S\\	   S	\4S jr      S!S\\   S\\   S\\   S\\   S\\   S\\   S	\4S jjr           S"S\
\\\   \R                   \\R                      4   S\\	   S\\	   S\\	   S\\   S\\   S\\   S\\   S\\   S\\   S\\   S\\	   S	\4S jjrSrU =r$ )#_OCRPipeline'   zOCR PipelineNconfigdevice	pp_optionuse_hpip
hpi_configreturnc                   > [         T
U ]  X#XES9  UR                  SS5      U l        U R                  (       a:  UR                  S0 5      R                  SSS05      nU R	                  U5      U l        UR                  SS5      U l        U R                  (       a:  UR                  S	0 5      R                  S
SS05      nU R                  U5      U l        UR                  S	0 5      R                  SSS05      nUS   U l	        U R                  S:X  a  UR                  SS5      U l
        UR                  SS5      U l        UR                  SS5      U l        UR                  SS5      U l        UR                  SS5      U l        UR                  SS5      U l        UR                  SS5      U l        [#        5       U l        ['        SS 9U l        OU R                  S!:X  a  UR                  SS"5      U l
        UR                  SS#5      U l        UR                  SS5      U l        UR                  SS$5      U l        UR                  SS5      U l        UR                  SS%5      U l        UR                  SS5      U l        [+        5       U l        ['        S&S 9U l        O$[-        S'R/                  U R                  5      5      eU R                  UU R                  U R                  U R                  U R                  U R                  U R                   U R                  S(9U l        UR                  S	0 5      R                  S)SS*05      n	U	R                  S+S,5      U l        U	R                  S-S.5      U l        U	R                  SS5      U l        U R                  XR                  S/9U l        [9        UR                  S0S15      S29U l        [=        S3S49U l        g)5af  
Initializes the class with given configurations and options.

Args:
    config (Dict): Configuration dictionary containing various settings.
    device (str, optional): Device to run the predictions on. Defaults to None.
    pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
    use_hpip (bool, optional): Whether to use the high-performance
        inference plugin (HPIP) by default. Defaults to False.
    hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
        The default high-performance inference configuration dictionary.
        Defaults to None.
)r    r!   r"   r#   use_doc_preprocessorTSubPipelinesDocPreprocessorpipeline_config_errorz+config error for doc_preprocessor_pipeline!use_textline_orientation
SubModulesTextLineOrientationmodel_config_errorz,config error for textline_orientation_model!TextDetectionz config error for text_det_model!	text_typegenerallimit_side_leni  
limit_typemaxmax_side_limiti  threshg333333?
box_threshg333333?input_shapeNunclip_ratiog       @quad)det_box_typeseali  ming?g      ?polyzUnsupported text type {})r1   r2   r4   r5   r6   r8   r7   TextRecognitionz config error for text_rec_model!score_threshr   return_word_boxF)r7   
batch_sizer   )rA   BGR)format) super__init__getr&   create_pipelinedoc_preprocessor_pipeliner*   create_modeltextline_orientation_modelr/   text_det_limit_side_lentext_det_limit_typetext_det_max_side_limittext_det_threshtext_det_box_threshr7   text_det_unclip_ratior   _sort_boxesr   _crop_by_polysr   
ValueErrorrC   text_det_modeltext_rec_score_threshr@   text_rec_modelr   batch_samplerr   
img_reader)selfr   r    r!   r"   r#   doc_preprocessor_configtextline_orientation_configtext_det_configtext_rec_config	__class__s             h/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/paddlex/inference/pipelines/ocr/pipeline.pyrE   _OCRPipeline.__init__+   s   * 	 	 	
 %+JJ/Et$L!$$&,jj&D&H&H!+-Z'# .2-A-A'.D* )/

3Mt(T%((*0**\2*F*J*J%%'UV+' /3.?.?+/D+ !**\26::24VW
  ,>>Y&+:+>+>?OQT+UD('6':':<'OD$+:+>+>?OQU+VD(#2#6#6x#ED '6':':<'MD$.22=$GD)8)<)<^S)QD&,D"-6"BD^^v%+:+>+>?OQT+UD('6':':<'OD$+:+>+>?OQU+VD(#2#6#6x#ED '6':':<'MD$)8)<)<^S)QD&.22=$GD,D"-6"BD7>>t~~NOO"//77//77''//33(( 0 	
 !**\26::!#EF
 &5%8%8%K".223DeL*..}dC"//)9)9 0 
 /&**\ST:UV#51    image_array_listrotate_angle_listc                    [        U5      [        U5      :X  d    S[        U5       S[        U5       S35       eU H  nUS;   a  M   SU 35       e   / n[        X5       H&  u  pVUS-  n[        XW5      nUR                  U5        M(     U$ )aX  
Rotate the given image arrays by their corresponding angles.
0 corresponds to 0 degrees, 1 corresponds to 180 degrees.

Args:
    image_array_list (List[np.ndarray]): A list of input image arrays to be rotated.
    rotate_angle_list (List[int]): A list of rotation indicators (0 or 1).
                                0 means rotate by 0 degrees
                                1 means rotate by 180 degrees

Returns:
    List[np.ndarray]: A list of rotated image arrays.

Raises:
    AssertionError: If any rotate_angle is not 0 or 1.
    AssertionError: If the lengths of input lists don't match.
zLength of image_array_list (z*) must match length of rotate_angle_list ())r   r   z&rotate_angle must be 0 or 1, now it's    )lenzipr   append)	rY   rb   rc   anglerotated_imagesimage_arrayrotate_indicatorrotate_anglerotated_images	            r_   r   _OCRPipeline.rotate_image   s    ( #$)
 
 	E)#.>*?)@@jkn  pA  lB  kC  CD  E	E 
 'EF?T&LUG$TT? ' -01A-U)K+c1L(CM!!-0	 .V ra   model_settingsc                     US   (       a(  U R                   (       d  [        R                  " S5        gUS   (       a(  U R                  (       d  [        R                  " S5        gg)a  
Check if the input parameters are valid based on the initialized models.

Args:
    model_info_params(Dict): A dictionary containing input parameters.

Returns:
    bool: True if all required models are initialized according to input parameters, False otherwise.
r&   zRSet use_doc_preprocessor, but the models for doc preprocessor are not initialized.Fr*   z^Set use_textline_orientation, but the models for use_textline_orientation are not initialized.T)r&   r	   errorr*   )rY   rq   s     r_   check_model_settings_valid'_OCRPipeline.check_model_settings_valid   sT     01$:S:SMMd  5611MMp ra   use_doc_orientation_classifyuse_doc_unwarpingr*   c                 x    Uc  Uc  U R                   nOUSL d  USL a  SnOSnUc  U R                  n[        UUS9$ )a  
Get the model settings based on the provided parameters or default values.

Args:
    use_doc_orientation_classify (Optional[bool]): Whether to use document orientation classification.
    use_doc_unwarping (Optional[bool]): Whether to use document unwarping.
    use_textline_orientation (Optional[bool]): Whether to use textline orientation.

Returns:
    dict: A dictionary containing the model settings.
TF)r&   r*   )r&   r*   dict)rY   rv   rw   r*   r&   s        r_   get_model_settings_OCRPipeline.get_model_settings   s]    " (/4E4M#'#<#< +t37HD7P'+$',$#+'+'D'D$!5%=
 	
ra   rK   rL   rM   rN   rO   rP   c           	          Uc  U R                   nUc  U R                  nUc  U R                  nUc  U R                  nUc  U R                  nUc  U R
                  n[        UUUUUUS9$ )a  
Get text detection parameters.

If a parameter is None, its default value from the instance will be used.

Args:
    text_det_limit_side_len (Optional[int]): The maximum side length of the text box.
    text_det_limit_type (Optional[str]): The type of limit to apply to the text box.
    text_det_max_side_limit (Optional[int]): The maximum side length of the text box.
    text_det_thresh (Optional[float]): The threshold for text detection.
    text_det_box_thresh (Optional[float]): The threshold for the bounding box.
    text_det_unclip_ratio (Optional[float]): The ratio for unclipping the text box.

Returns:
    dict: A dictionary containing the text detection parameters.
)r1   r2   r5   r4   r6   r8   )rK   rL   rM   rN   rO   rP   ry   )rY   rK   rL   rM   rN   rO   rP   s          r_   get_text_det_params _OCRPipeline.get_text_det_params   s    2 #*&*&B&B#&"&":":"*&*&B&B#""22O&"&":": ($($>$>!2*"2*.
 	
ra   inputrU   r@   c              #   
  #    U R                  X#U5      nU R                  U5      (       d  SS0v   U R                  UUUUU	U
5      nUc  U R                  nUc  U R                  n[        U R                  U5      5       GH  u  nnU R                  UR                  5      nUS   (       a  [        U R                  UUUS95      nOU Vs/ s H  nSU0PM	     nnU Vs/ s H  nUS   PM
     nn[        U R                  " U40 UD65      nU Vs/ s H  nUS   PM
     nnU Vs/ s H  nU R                  U5      PM     nn[        UR                  UR                  UU5       VVVVs/ s H"  u  nnnnUUUUUUU R                   UU/ / / / S.PM$     nnnnn[        [#        [%        U5      5      5      nU Vs/ s H  n[%        UU   5      S	:  d  M  UPM     nnU(       Ga  / nS	/n U HU  n[        U R'                  UU   UU   5      5      n!UR)                  U!5        U R+                  U S
   [%        U!5      -   5        MW     US   (       aC  U R-                  U5       V"s/ s H  n"[/        U"S   S	   5      PM     n#n"U R1                  UU#5      nOS
/[%        U5      -  n#[        U5       H  u  n$nUU   n%U#U U$   U U$S-       U%S'   M     [        U5       GH  u  n$nUU U$   U U$S-       n!UU   n%UU   n[        U!5       V&V's/ s H1  u  n&n'U&U'R2                  S   [5        U'R2                  S	   5      -  S.PM3     n(n&n'[7        U(S S9n)U) V*s/ s H  n*U!U*S      PM     n+n*[        U R9                  U+US95       H  u  n$n,U)U$   S   n-U,U(U-   S'   M     U(       a
  / U%S'   / U%S'   [#        [%        U(5      5       H  n.U(U.   S   n,U,S   U:  d  M  U(       ab  [;        U,S   S	   UU.   U,S   S   5      u  n/n0U%S   R+                  U/5        U%S   R+                  U05        U%S   R+                  U,S   S	   5        OU%S   R+                  U,S   5        U%S   R+                  U,S   5        U%S   R+                  U,S   5        U%S   R+                  UU.   5        M     GM     U Hv  n%U R                   S:X  a=  [=        U%S   5      n1U1U%S'   U(       a"  U%S    V2s/ s H  n2[=        U25      PM     sn2U%S '   O[>        R@                  " / 5      U%S'   [C        U%5      v   Mx     GM     gs  snf s  snf s  snf s  snf s  snnnnf s  snf s  sn"f s  sn'n&f s  sn*f s  sn2f 7f)!a  
Predict OCR results based on input images or arrays with optional preprocessing steps.

Args:
    input (Union[str, list[str], np.ndarray, list[np.ndarray]]): Input image of pdf path(s) or numpy array(s).
    use_doc_orientation_classify (Optional[bool]): Whether to use document orientation classification.
    use_doc_unwarping (Optional[bool]): Whether to use document unwarping.
    use_textline_orientation (Optional[bool]): Whether to use textline orientation prediction.
    text_det_limit_side_len (Optional[int]): Maximum side length for text detection.
    text_det_limit_type (Optional[str]): Type of limit to apply for text detection.
    text_det_max_side_limit (Optional[int]): Maximum side length for text detection.
    text_det_thresh (Optional[float]): Threshold for text detection.
    text_det_box_thresh (Optional[float]): Threshold for text detection boxes.
    text_det_unclip_ratio (Optional[float]): Ratio for unclipping text detection boxes.
    text_rec_score_thresh (Optional[float]): Score threshold for text recognition.
    return_word_box (Optional[bool]): Whether to return word boxes along with recognized texts.
Returns:
    OCRResult: Generator yielding OCR results for each input image.
rs   z0the input params for model settings are invalid!Nr&   )rv   rw   
output_imgdt_polys)
input_path
page_indexdoc_preprocessor_resr   rq   text_det_paramsr/   rU   r@   	rec_texts
rec_scores	rec_polys	vis_fontsr   r*   	class_idsr   textline_orientation_angles)
sub_img_idsub_img_ratioc                     U S   $ )Nr    )xs    r_   <lambda>&_OCRPipeline.predict.<locals>.<lambda>  s	    ?9Kra   )keyr   )r@   rec_res	text_wordtext_word_region	rec_scorerec_textr   r   r   vis_fontr   r0   	rec_boxestext_word_boxes)"rz   rt   r}   rU   r@   	enumeraterW   rX   	instanceslistrH   rT   rQ   rh   input_pathspage_indexesr/   rangerg   rR   extendri   rJ   intr   shapefloatsortedrV   r   r   nparrayr   )3rY   r   rv   rw   r*   rK   rL   rM   rN   rO   rP   rU   r@   rq   r   _
batch_dataimage_arraysdoc_preprocessor_resultsarritemdoc_preprocessor_imagesdet_resultsdt_polys_listr   r   r   r   resultsindicesidxall_subs_of_imgschunk_indicesall_subs_of_imgtextline_angle_infoanglesiresimg_idsub_imgsub_img_info_listsorted_subs_infor   sorted_subs_of_imgr   r   snoword_box_content_listword_box_listr   lines3                                                      r_   predict_OCRPipeline.predict  sm    F 00(=U
 ..~>>NOO22##!
 !($($>$>!""22O&t'9'9%'@AMAz??:+?+?@L45+/22$5Q*; 3 ,( LX+X<C\3,?<(+X 0H'/Gt\"/G $ ' ##$;OOK ;FF+$T*-+MF@MNT--d3MN$ OR**++,!	O! OJJ
,@( #-",,@ (&4'6!%-B'6!#"$!#!#O!  0 5%<!=>?G&-Mgs]35G1H11LsgGM#% !""C&*++3C8-:L'O
 %++O<!((r):S=Q)QR # ""<= 483R3R,44/ /<Q?@4   (,'8'89I6'R$ TC(8$99F'0FAs!#,C9?%a(=Q+?:C56 1 (0FAs&6%a(=Q+?'O "#,C,S1H 09/I)
 0JOFG +1-4]]1-=gmmTUFV@W-W 0J & ) (.)/K($ CS*BRQ,8BR ' * '0++. , '
7
 &6a%8%F
CJ)*5i@' '+-K(24./$S):%;<"3C"8"C";/3HH.GW$+J$7$:$,SM$+J$7$:H" D 5}
 !$K 0 7 78M N #$6 7 > >} M #K 0 7 7
8KA8N O #K 0 7 7
8K L-44W[5IJ,33GJ4GH,33HSMB!  =; 1\ >>Y. 7K8H II'0C$& ),,>(?2(? 4D9(?2-.
 (*xx|C$n$ C B ,Y' GN2 N ,)*F2s   CU
TUT!-"UT&U$T+?)U()T0&U7T8T8BUT=9BU 8U
8U
UA,UDUU%A-U)rR   rQ   rW   rH   rX   r7   r@   rO   rK   rL   rM   rT   rN   rP   rV   rU   r/   rJ   r&   r*   )NNFN)NNNNNN)NNNNNNNNNNN)__name__
__module____qualname____firstlineno____doc__r   r   strr   boolr   r   r   rE   r   r   ndarrayr   r   rt   ry   rz   r   r}   r   r   __static_attributes____classcell__)r^   s   @r_   r   r   '   s~   
 !%59AE`2`2 `2 12	`2
 `2 U4S>9#<=>`2 
`2 `2D" $RZZ 0"EI#Y"	bjj	"H $ 8
&.tn
 $D>
 #+4.	

 

D 26-115+//315,
!)#,
 &c],
 "*#	,

 "%,
 &e_,
  (,
 
,
b 8<,03715-115+//31515*.E%S$s)RZZbjj1AABE% '/tnE% $D>	E%
 #+4.E% "*#E% &c]E% "*#E% "%E% &e_E%  (E%  (E% "$E% 
E% E%ra   r   ocrzocr-core)altc                   .    \ rS rSrSr\S 5       rS rSrg)OCRPipelinei  OCRc                     [         $ )N)r   )rY   s    r_   _pipeline_clsOCRPipeline._pipeline_cls  s    ra   c                 &    UR                  SS5      $ )NrA   r   )rF   )rY   r   s     r_   _get_batch_sizeOCRPipeline._get_batch_size  s    zz,**ra   r   N)	r   r   r   r   entitiespropertyr   r   r   r   ra   r_   r   r     s    H +ra   r   )&typingr   r   r   r   r   numpyr   utilsr	   
utils.depsr
   common.batch_samplerr   common.readerr   utils.benchmarkr   	utils.hpir   utils.pp_optionr   	_parallelr   baser   
componentsr   r   r   r   r   r   resultr   time_methodsr   r   r   ra   r_   <module>r      s    4 3   2 5 & ( " 4 @    x%< x% x%v J/+: + 0+ra   