
    9ij                        / S Qr SSKrSSKJr  SSKJrJrJrJrJ	r	  SSK
rSSKJr  SSKJr  SSKJr  S	S
KJrJr  S\R*                  S\R*                  S\4S jr  S0S\S\S\S\S\4
S jjrS r  S1S\\   S\\   S\S\4S jjr S2S\	\R*                  \\4   S\	\R*                  \\4   S\4S jjrS r S r!S r"S r#S r$ S3S\	\\%   \\%\%\%\%4   4   S\	\\%   \\%\%\%\%4   4   S\S \S\\	\\%   \\%\%\%\%4   4      4
S! jjr& S4S"\\\\\%   4      S#\S \S\\\\\\%   4      \\\\\%   4      4   4S$ jjr'S5S% jr(S\4S& jr)S' r*S(\S)\+4S* jr,S+ r-S, r.S- r/S6S.\S\4S/ jjr0g)7)get_sub_regions_ocr_resget_show_colorsorted_layout_boxes    N)deepcopy)DictListOptionalTupleUnion)Image   )convert_points_to_boxes)	OCRResult   )BLOCK_LABEL_MAPREGION_SETTINGS	src_boxes	ref_boxesreturnc                    / n[        U 5      nUS:  a  [        U5      S:  a  [        [        U5      5       H  nX   n[        R                  " US   U SS2S4   5      n[        R                  " US   U SS2S4   5      n[        R                  " US   U SS2S4   5      n[        R                  " US   U SS2S4   5      n	X-
  n
X-
  n[        R
                  " U
S:  US:  -  5      S   nUR                  U5        M     U$ )ad  
Get the indices of source boxes that overlap with reference boxes based on a specified threshold.

Args:
    src_boxes (np.ndarray): A 2D numpy array of source bounding boxes.
    ref_boxes (np.ndarray): A 2D numpy array of reference bounding boxes.
Returns:
    match_idx_list (list): A list of indices of source boxes that overlap with reference boxes.
r   Nr   r      )lenrangenpmaximumminimumwhereextend)r   r   match_idx_listsrc_boxes_numrnoref_boxx1y1x2y2pub_wpub_h	match_idxs                p/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/paddlex/inference/pipelines/layout_parsing/utils.pyget_overlap_boxes_idxr+   !   s     N	NMqS^a/Y(CnGGAJ	!Q$8BGAJ	!Q$8BGAJ	!Q$8BGAJ	!Q$8BGEGE%!)	!:;A>I!!), )     overall_ocr_resobject_boxesflag_withinreturn_match_idxc                 $   0 n/ US'   / US'   / US'   / US'   U S   n[        XQ5      n[        [        U5      5      n[        [	        U5      5       H  nU(       a  Xv;   a  SnOSnO
Xv;  a  SnOSnU(       d  M(  US   R                  U S   U   5        US   R                  U S   U   5        US   R                  U S   U   5        US   R                  U S   U   5        M     S H  n	[        R                  " XI   5      XI'   M     U(       a  XF4$ U$ )a4  
Filters OCR results to only include text boxes within specified object boxes based on a flag.

Args:
    overall_ocr_res (OCRResult): The original OCR result containing all text boxes.
    object_boxes (list): A list of bounding boxes for the objects of interest.
    flag_within (bool): If True, only include text boxes within the object boxes. If False, exclude text boxes within the object boxes.
    return_match_idx (bool): If True, return the list of matching indices.

Returns:
    OCRResult: A filtered OCR result containing only the relevant text boxes.
	rec_polys	rec_texts
rec_scores	rec_boxesTF)r2   r4   r5   )r+   listsetr   r   appendr   array)
r-   r.   r/   r0   sub_regions_ocr_resoverall_text_boxesr   box_no
flag_matchkeys
             r*   r   r   ;   s\   $ ')$')$(*%')$(5*+=LN#n-.N./0'!
"
+!
"
:,33,V4  ,33,V4  -44-f5  ,33,V4+ 10 8#%88,?,D#E  8  
- !r,   c                    [        U 5      nUS:X  a  U $ [        U S S9n[        U5      n/ n/ n/ nSn X:  a  OXH   S   S   US-  :  a-  XH   S   S   SU-  S	-  :  a  UR                  XH   5        US-  nOQXH   S   S   SU-  S	-  :  a  UR                  XH   5        US-  nO$XV-  nXW-  nUR                  XH   5        / n/ nUS-  nM  [        US
 S9n[        US S9nU(       a  XV-  nU(       a  XW-  nU$ )z
Sort text boxes in order from top to bottom, left to right
Args:
    res: List of dictionaries containing layout information.
    w: Width of image.

Returns:
    List of dictionaries containing sorted layout information.
r   c                 "    U S   S   U S   S   4$ )N
block_bboxr   r    xs    r*   <lambda>%sorted_layout_boxes.<locals>.<lambda>   s    aoa.@!L/RSBT-Ur,   )r>   r   rA      r   r      c                     U S   S   $ NrA   r   rB   rC   s    r*   rE   rF      s    qq/Ar,   c                     U S   S   $ rJ   rB   rC   s    r*   rE   rF      s    <1Cr,   )r   sortedr6   r8   )	resw	num_boxessorted_boxes_boxesnew_resres_left	res_rightis	            r*   r   r   w   s@    CIA~
 ##UVL,FGHI	A
> Il#A&Q.	,'*QUQY6OOFI&FAY|$Q'!a%!)3VY'FAG GNN69%HIFA' * h$BCHy&DEINr,   bbox1bbox2	directionc                 ~   Su  pEUS:X  a  Su  pE[        X   X   5      n[        X   X   5      nXv-
  nUS::  a  gUS:X  a"  [        X   X   5      [        X   X   5      -
  n	OQUS:X  a  [        X   X   -
  X   X   -
  5      n	O0US:X  a  [        X   X   -
  X   X   -
  5      n	O[        SU S	35      eU	S:  a  X-  $ S
$ )am  
Calculate the IoU of lines between two bounding boxes.

Args:
    bbox1 (List[float]): First bounding box [x_min, y_min, x_max, y_max].
    bbox2 (List[float]): Second bounding box [x_min, y_min, x_max, y_max].
    direction (str): direction of the projection, "horizontal" or "vertical".

Returns:
    float: Line overlap ratio. Returns 0 if there is no overlap.
)r   r   
horizontal)r   r   r   unionsmalllargeInvalid mode -, must be one of ['union', 'small', 'large'].        )maxmin
ValueError)
rV   rW   rX   modestart_index	end_indexintersection_startintersection_endoverlap	ref_widths
             r*   "calculate_projection_overlap_ratiork      s   " "KL !%U/1CD5+U-=>3G!|w(%*:;c 2?
 
	 
u1153CeFX3X
	 
u1153CeFX3X
	 D6!NO
 	
 #,a-78S8r,   c                    [         R                  " U 5      n [         R                  " U5      n[         R                  " U S   US   5      n[         R                  " U S   US   5      n[         R                  " U S   US   5      n[         R                  " U S   US   5      n[         R                  " SXS-
  5      n[         R                  " SXd-
  5      nXx-  n	[	        U 5      n
[	        U5      nUS:X  a  X-   U	-
  nOIUS:X  a  [         R                  " X5      nO,US:X  a  [         R                  " X5      nO[        SU S	35      eUS:X  a  g
X-  $ )a  
Calculate the overlap ratio between two bounding boxes using NumPy.

Args:
    bbox1 (np.ndarray, list or tuple): The first bounding box, format [x_min, y_min, x_max, y_max]
    bbox2 (np.ndarray, list or tuple): The second bounding box, format [x_min, y_min, x_max, y_max]
    mode (str): The mode of calculation, either 'union', 'small', or 'large'.

Returns:
    float: The overlap ratio value between the two bounding boxes
r   r   r   r   r[   r\   r]   r^   r_   r`   )r   r9   r   r   calculate_bbox_arearc   )rV   rW   rd   x_min_intery_min_interx_max_intery_max_interinter_widthinter_height
inter_area
bbox1_area
bbox2_arearef_areas                r*   calculate_overlap_ratiorx      s-     HHUOEHHUOE**U1XuQx0K**U1XuQx0K**U1XuQx0K**U1XuQx0K**Q 9:K::a!:;L+J$U+J$U+Jw*Z7	::j5	::j5D6!NO
 	
 1}  r,   c                 j   U (       d  [        S5      e[        R                  " U 5      n[        R                  " USS2S4   5      n[        R                  " USS2S4   5      n[        R                  " USS2S4   5      n[        R                  " USS2S4   5      n[        R                  " X#XE/5      $ )a'  
Calculate the minimum enclosing bounding box for a list of bounding boxes.

Args:
    bboxes (list): A list of bounding boxes represented as lists of four integers [x1, y1, x2, y2].

Returns:
    list: The minimum enclosing bounding box represented as a list of four integers [x1, y1, x2, y2].
z$The list of bounding boxes is empty.Nr   r   r   r   )rc   r   r9   rb   ra   )bboxesbboxes_arraymin_xmin_ymax_xmax_ys         r*    calculate_minimum_enclosing_bboxr     s     ?@@ 88F#L FF<1%&EFF<1%&EFF<1%&EFF<1%&E 88U5011r,   c                 B    [        [        R                  " SU 5      5      $ )z#check if the char is english letterz
^[A-Za-z]$boolrematchchars    r*   is_english_letterr   (  s    -..r,   c                 B    [        [        R                  " SU 5      5      $ )zcheck if the char is numericz^[\d]+$r   r   s    r*   
is_numericr   -  s    T*++r,   c                     1 SknX;   $ )z
check if the char is non-breaking punctuation

Args:
    char (str): character to check

Returns:
    bool: True if the char is non-breaking punctuation
>   "',-:;   “   、   ，   ：   ；rB   )r   non_breaking_punctuationss     r*   is_non_breaking_punctuationr   2  s    ! ,,r,   c                 $   / nU H  nUS   [         S   ;   d  M  US   n[        [        [        US   5      5      u  pVpxSU SU SU SU SU S3n	[        R
                  " XU2XW2S S S24   5      n
UR                  U	U
XVXx4US	   S
.5        M     U$ )Nlabelimage_labels
coordinatezimgs/img_in__box__z.jpgscore)pathimgr   r   )r   r6   mapintr   	fromarrayr8   )original_imglayout_det_objsimgs_in_docdet_objr   x_miny_minx_maxy_maximg_pathr   s              r*   gather_imgsr   M  s    K"7~>>G$E)-c#w|7L.M)N&E%%eWE%%%%PTUH//,U{EK2/M"NOC$#(">$W-	 # r,   ratiosmallerc                     [        U 5      n[        U5      n[        XSS9nXb:  a  XE::  a  U(       d  XE:  a  U(       d  ggg)a  
Determine if the overlap area between two bounding boxes exceeds a given ratio
and return the smaller (or larger) bounding box based on the `smaller` flag.

Args:
    bbox1 (Union[List[int], Tuple[int, int, int, int]]): Coordinates of the first bounding box [x_min, y_min, x_max, y_max].
    bbox2 (Union[List[int], Tuple[int, int, int, int]]): Coordinates of the second bounding box [x_min, y_min, x_max, y_max].
    ratio (float): The overlap ratio threshold.
    smaller (bool): If True, return the smaller bounding box; otherwise, return the larger one.

Returns:
    Optional[Union[List[int], Tuple[int, int, int, int]]]:
        The selected bounding box or None if the overlap ratio is not exceeded.
r\   rd   r   r   N)rm   rx   )rV   rW   r   r   area1area2overlap_ratios          r*   _get_minbox_if_overlap_by_ratior   `  s@    *  &E&E+EwGMNwEN7r,   blocks	thresholdc           	         [        5       n[        U 5      n / n[        U S   5       H  u  pV[        US-   [	        U S   5      5       H  nU S   U   nXS;   d  Xs;   a  M  [        US   US   UUS9n	U	c  M.  US   S:H  n
US   S:H  nX:w  a#  U
(       a  UOUnUR                  U S   U   5        O
U	S:X  a  UOUnUR                  U5        M     M     [        USS9 H	  nU S   U	 M     U $ )	a  
Remove overlapping blocks based on a specified overlap ratio threshold.

Args:
    blocks (List[Dict[str, List[int]]]): List of block dictionaries, each containing a 'block_bbox' key.
    threshold (float): Ratio threshold to determine significant overlap.
    smaller (bool): If True, the smaller block in overlap is removed.

Returns:
    Tuple[List[Dict[str, List[int]]], List[Dict[str, List[int]]]]:
        A tuple containing the updated list of blocks and a list of dropped blocks.
boxesr   r   )r   r   imageT)reverse)	r7   r   	enumerater   r   r   r8   addrL   )r   r   r   dropped_indexesoverlap_image_blocksrU   block1jblock2overlap_box_indexis_block1_imageis_block2_image
drop_indexindexs                 r*   remove_overlap_blocksr     s    eOfFvg/	q1uc&/23AG_Q'F#q'; ?|$|$	! !,"(/W"<"(/W"<"5&51J(//w
0KL&71&<!J##J/- 4 04 67OE" 7 Mr,   c                 \   [         R                  " U 5      n [         R                  " U5      n[        U R                  5      S:X  a  U O[	        U /5      S   n[        UR                  5      S:X  a  UO[	        U/5      S   n[        US   US   5      n[        US   US   5      n[        US   US   5      n[        US   US   5      nXW:  d  Xh:  a  gUS:X  a  [         R                  " XVXx/5      $ US:X  a+  [         R                  " XV/Xv/Xx/XX//[         R                  S9$ [        S	5      e)
a;  
Compute the intersection of two bounding boxes, supporting both 4-coordinate and 8-coordinate formats.

Args:
    bbox1 (tuple): The first bounding box, either in 4-coordinate format (x_min, y_min, x_max, y_max)
                   or 8-coordinate format (x1, y1, x2, y2, x3, y3, x4, y4).
    bbox2 (tuple): The second bounding box in the same format as bbox1.
    return_format (str): The format of the output intersection, either 'bbox' or 'poly'.

Returns:
    tuple or None: The intersection bounding box in the specified format, or None if there is no intersection.
r   r   r   r   Nbboxpoly)dtypez.return_format must be either 'bbox' or 'poly'.)	r   r9   r   shaper   ra   rb   int16rc   )	rV   rW   return_formatrect1rect2rn   ro   rp   rq   s	            r*   get_bbox_intersectionr     s3    HHUOEHHUOE%*E0G0PQR0SE%*E0G0PQR0SE eAha)KeAha)KeAha)KeAha)K ![%?xx;LMM	&	 xx****	 ((
 	
 IJJr,   c           
         U u  pgpUu  ppSSSSS.nX-
  U-  X-
  U-  X-
  U-  X-
  U-  /n[        U5      n[        U5      nXR                  U5         n[        U5      S:X  a  U / 4$ [	        S5       GH~  nUU   nU SS nUU   UU'   / / nnU Hm  n[        UUU   SS9nU[        R                  " S	S
5      :  a  UR                  U5        M?  U[        R                  " SS5      :  d  M\  UR                  U5        Mo     [        U5      S:  a  [        U5      S:  a  U Hy  nUU   nUu  pgpUu  ppX-
  U-  X-
  U-  X-
  U-  X-
  U-  /n[        U5      nUR                  U5      nUU   nUU   UU'   [        UUUUUU5      u  nn[        U5      S:X  d  My  M{     U Vs/ s H  nUU   PM
     nn[        U5      n   U U4$ UR                  U5        [        U5      nXR                  U5         nGM     U W4$ s  snf )a  
Shrink the supplement region bbox according to the reference region bbox and match the block bboxes.

Args:
    supplement_region_bbox (list): The supplement region bbox.
    ref_region_bbox (list): The reference region bbox.
    image_width (int): The width of the image.
    image_height (int): The height of the image.
    block_idxes_set (set): The indexes of the blocks that intersect with the region bbox.
    block_bboxes (dict): The dictionary of block bboxes.

Returns:
    list: The new region bbox and the matched block idxes.
r   r   r   r   )r   r   r   r   Nr\   r   #match_block_overlap_ratio_thresholdg?#split_block_overlap_ratio_thresholdg?)r   rb   r   r   r   rx   r   getr8   ra   shrink_supplement_region_bboxr   remove) supplement_region_bboxref_region_bboximage_widthimage_heightblock_idxes_setblock_bboxesr#   r$   r%   r&   x1_primey1_primex2_primey2_primeindex_conversion_mapedge_distance_listedge_distance_list_tmpmin_distance	src_indexr   	dst_indextmp_region_bboxiner_block_idxessplit_block_idxes	block_idxr   split_block_idxsplit_block_bboxmax_distance
iner_idxesidxmatched_bboxess                                    r*   r   r     s   , ,NBB-<*H Q13	+%	,&	+%	,&	 &&89)*L$%=%=l%KLI
?q %r))1X(3	03%4Y%?	".0"+(I3i!8wM 225s   !''	2!4!45s"  "((3 )  1$$%)'8O'3O'D$%4NBB=M:H!+5!,6+5,6	*& $''9#:L 2 8 8 FI 4Y ?I1A)1LOI.2O''#$($3/OZ :!+ / (90 <LL;KCl3/;KNL%En%U"
 "#333 #)),756L,-E-El-STIe f "#333 Ms   !G:c                     Uc  U $ U u  p#pEUu  pgp[        [        X&5      5      n[        [        X75      5      n[        [        XH5      5      n[        [        XY5      5      n	XgX/nU$ )zUpdate region box with bbox)r   rb   ra   )
r   
region_boxr#   r$   r%   r&   	x1_region	y1_region	x2_region	y2_regions
             r*   update_region_boxr   =  sl    NBB1;.I)C&'IC&'IC&'IC&'I	=Jr,   formula_res_listocr_resc                    U  H  n[        [        [        US   5      5      u  p4pVX44XT4XV4X64/nUS   R                  U5        US   nUS   R                  U5        US   R                  S:X  a  [
        R                  " US   /5      US'   O"[
        R                  " US   US   /45      US'   US   R                  S5        US   R                  U5        US	   R                  S
5        M     g)zConvert formula result to OCR result format

Args:
    formula_res_list (List): Formula results
    ocr_res (dict): OCR result
Returns:
    ocr_res (dict): Updated OCR result
dt_polysrec_formular3   r5   r   
rec_labelsformular2   r4   r   N)r6   r   r   r8   sizer   r9   vstack)	r   r   formula_resr   r   r   r   poly_pointsformula_res_texts	            r*   !convert_formula_res_to_ocr_formatr  O  s	    (%)#c;z3J*K%L"eNNNN	
 	
"";/ +M :##$45;$$)#%88[-D,E#FGK #%99%J(?'@A$GK  	$$Y/##K0$$Q'' (r,   c                 P    [        [        U 5      u  pp4[        X1-
  XB-
  -  5      nU$ )zCalculate bounding box area)r   floatabs)r   r#   r$   r%   r&   areas         r*   rm   rm   n  s*    %NBBBG$%DKr,   c                 6    U u  p#Uu  pEX$-
  S-  X5-
  S-  -   S-  $ )z/Calculate euclidean distance between two pointsr   g      ?rB   )point1point2r#   r$   r%   r&   s         r*   caculate_euclidean_distr  u  s,    FBFBWNbg!^+33r,   c                    SnSnU R                   nU R                  nU R                  nU R                  nUGb  UR                  nUR                  n	[        UR                  U	-
  5      S:  n
US:  nXAR                  :  =(       a    XQR                   :  nU(       a?  [        UR                   U5      n[        UR                  U5      n[        XY-
  5      S:  n
SnO"[        U R                   UR                  -
  5      nXd-
  S:  nU
(       a4  U(       a-  U(       a&  U[        UR                  U R                  5      :  a  SnO
Xd-
  S:  a  SnXW-
  S:  a  SnX#4$ )zGet segment start flag and end flag based on previous block

Args:
    block (Block): Current block
    prev_block (Block): Previous block

Returns:
    seg_start_flag (bool): Segment start flag
    seg_end_flag (bool): Segment end flag
T
   r   r   F)	start_coordinateend_coordinateseg_start_coordinateseg_end_coordinatenum_of_linesr  rb   ra   width)block
prev_blockseg_start_flagseg_end_flagcontext_left_coordinatecontext_right_coordinater  r  num_of_prev_linespre_block_seg_end_coordinateprev_end_space_smallprev_lines_more_than_oneoverlap_blocksedge_distancecurrent_start_space_smalls                  r*   get_seg_flagr&  |  s}    NL#44$33 5511&33'1'D'D$
)),HHIBN 	 $5q#8  $&?&?? G(+F+FF 	 &)++-D'# (+))+C($ ,KLrQ ! M 6 69R9R RSM$8$RUW$W! !)(J$4$4ekk BB"N9B>"N4r9''r,   r   c                     U(       a  SSSSSSSSSS	S
.
nO<0 SS_SS_SS_SS_SS	_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_S S_S!S"S"S#S$.EnSnUR                  X5      $ )%N)         d   )r(        r+  )f   r.  r(  r+  )r.     r(  r+  )   r(  3   r+  )      r2  r+  )r0  r   L   r+  )5         r+  )      r8  r+  )
	doc_titledoc_title_textparagraph_titlesub_paragraph_titlevisionvision_titlevision_footnotenormal_textcross_layoutcross_referencer<  r:  table_title)r(  r(  r.  r+  figure_titlechart_titler@  textvertical_textinline_formular  )r   r(  r   r+  display_formulaabstractcontent)(      \   r+  seal)   rQ  rQ  r+  table)   rS  r   r+  r   figure)      rU  r+  )   r(  rS  r+  )r(        r+  )chart	referencereference_content	algorithm)r   )r   order_labellabel_colorsdefault_colors       r*   r   r     s9   -23#7)03,/3
!
3!
 -!

 /!
 0!
 /!
 3!
 %!
 .!
 /!
 '!
  /!!
" ,#!
& )'!
* (+!
. '/!
2 (3!
6 )7!
8 *-!5-A!
D )ME11r,   )TF)rZ   r[   )r[   )T)g?T)r   )F)1__all__r   copyr   typingr   r   r	   r
   r   numpyr   PILr   
componentsr   
ocr.resultr   settingr   r   ndarrayr+   r   r   r   r
  strrk   r6   tuplerx   r   r   r   r   r   r   r   r   r   r   r   dictr  rm   r  r&  r   rB   r,   r*   <module>rm     sy   
  5 5   0 " 5RZZ BJJ 4 : "	999 9 	9
 9x4t "		,9;,9;,9 ,9
 ,9d 
.!T5().!T5().! 	.!b24/
,
-6. 	cE#sC"4556cE#sC"4556  	
 eDIuS#sC%7889:F RV0c49n%&0380JN0
4S$s)^$%tDd3i,@'AAB0f+K\W4 
W4t$( (t (>4@(F22# 22U 22r,   