
    9iQ                        S r SSKJr  SSKJrJrJrJr  SSKrSSK	r
SSKrSSKrSSKJrJr  SSKJr  SSKJr  S r " S	 S
\5      r " S S\5      rS rS rS rS\\\\R8                  4      4S jr " S S\5      r " S S\5      rg)z"
Processor class for GeoLayoutLM.
    )defaultdict)DictIterableListUnionN)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)
transforms)	LoadImagec                 v    U R                  SU-   5      SS  n[        U5      S:  a  US   S:X  a  USS  nU$ UnU$ )Nzpad    r   u   ▁)tokenizelen)	tokenizertexttokstoks2s       n/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/multi_modal/vldoc/processing.pycustom_tokenizer      sP    ftm,QR0DD	A$q'U*:DHEL AEEL    c                   6   \ rS rSrSr         SS\S\S\\\4   S\S\	S	\S
\
\	\\	   4   S\
\	\\	   4   S\SS4S jjrS\
\R                  \R                   R                   4   4S jrS\
\\R                  \R                   R                   \4   4S jrSrg)ImageProcessor   z
Construct a GeoLayoutLM image processor
Args:
    do_preprocess (`bool`): whether to do preprocess to unify the image format,
        resize and convert to tensor.
    do_rescale: only works when we disable do_preprocess.
Ndo_preprocess	do_resize
image_size
do_rescalerescale_factordo_normalize
image_mean	image_std	apply_ocrreturnc
                 t   Xl         X l        Ub  UOSSS.U l        U=(       a    U(       + U l        XPl        X`l        Uc  [        OUnUc  [        OUn[        U[        5      (       a  XwU4OUU l
        [        U[        5      (       a  XU4OUU l        Xl        Xl        [        R                  " 5       U l        g )N   )heightwidth)r   r   sizer   r   r   r   r	   
isinstancefloatr    r!   r"   kwargsr
   ToTensortotensor)selfr   r   r   r   r   r   r    r!   r"   r+   s              r   __init__ImageProcessor.__init__"   s     +"","8J?
	 %<m*;,(.8.@*j
,5,=(9	BLC C::>$. 	>Hu? ?)	:#, 	""++-r   imagec                    [         R                  " U5      SS2SS2SSS24   nUR                  SS nU R                  (       a3  [        R
                  " UU R                  S   U R                  S   45      nU R                  U5      nX24$ )z>unify the image format, resize and convert to tensor.
        N   r'   r&   )r   convert_to_ndarrayshaper   cv2resizer(   r-   )r.   r1   size_rawimage_pts       r   
preprocessImageProcessor.preprocessA   s}     ,,U3Aq$B$J?;;r?>>JJu $		' 2DIIh4GHJE =='!!r   imagesc                    [        U[        5      (       d  U/n/ nU R                  (       aD  [        [	        U5      5       H,  nU R                  X   5      u  X'   nUR                  U5        M.     [        R                  " USS9nU R                  (       a  XPR                  -  nU R                  (       an  [        R                  " U R                  5      R                  SSSS5      n[        R                  " U R                  5      R                  SSSS5      nXV-
  US-   -  nSnU R                   (       a  [#        S5      e[	        U5      S:X  a  SnUUUS.n	U	$ )	zE
Args:
    images: list of np.ndarrays, PIL images or image tensors.
r   dimr      g:0yE>Nz!OCR service is not available yet!)r=   	ocr_infos	sizes_raw)r)   listr   ranger   r;   appendtorchstackr   r   r   tensorr    viewr!   r"   NotImplementedError)
r.   r=   rC   ir9   	images_ptmustdrB   datas
             r   __call__ImageProcessor.__call__M   s&   
 &$''XF	3v;'&*oofi&@#	8  * ( KKA.	??!$7$77Idoo.33Aq!Q?B,,t~~.33Aq!Q?C"C$J7I 	>>%&IJJy>QI""

 r   )r"   r   r   r   r   r    r!   r+   r   r(   r-   )	TFNFgp?TNNT)__name__
__module____qualname____firstlineno____doc__boolr   strintr*   r   r   r/   npndarrayPILImager;   rD   rQ   __static_attributes__ r   r   r   r      s     (,#(.2$))1&*=A<@#'. $. . "#s(^. "	.
 "'.  $. #5(5/#9:. "%%"89. !. #.>
"bjj#))//&A B 
"uT2::syy%KL r   r   c                        \ rS rSrS rS rSrg)OCRUtilso   c                     SU l         g )Nv0version)r.   s    r   r/   OCRUtils.__init__q   s	    r   c                     [         e)zL
sort boxes, filtering or other preprocesses
should return sorted ocr_infos
rK   )r.   rB   s     r   rQ   OCRUtils.__call__t   s
    
 "!r   rf   N)rS   rT   rU   rV   r/   rQ   r_   r`   r   r   rb   rb   o   s    "r   rb   c                     [        U 5      S:X  d  [        U 5      S:X  d   e[        [        U 5      5       HC  nUS-  (       a  [        S[        X   U5      5      X'   M)  [        S[        X   U5      5      X'   ME     U $ )N      r   r   )r   rE   maxmin)boxr&   r'   rL   s       r   	bound_boxrr   |   sk    s8q=CHM))3s8_q5C/0CFC./CF	 
 Jr   c           	      J    U S   U S   U S   U S   U S   U S   U S   U S   /nU$ )Nr   r   r4   rA   r`   )box2pbox4ps     r   
bbox2pto4prv      sB    a%(E!HeAha%(E!HaE Lr   c           
          [        U S   U S   U S   U S   5      [        U S   U S   U S   U S   5      [        U S   U S   U S   U S   5      [        U S   U S   U S   U S   5      /nU$ )	Nr   r4   rm      r   rA         )rp   ro   )ru   rt   s     r   
bbox4pto2pr{      s    E!HeAha%(3E!HeAha%(3E!HeAha%(3E!HeAha%(3	E Lr   tensor_dictsc                     [        [        5      nU  H/  nUR                  5        H  u  p4X   R                  U5        M     M1     0 nUR                  5        H  u  p4[        R
                  " USS9XS'   M     U$ )Nr   r?   )r   rD   itemsrF   rG   rH   )r|   one_dicttdkvres_dicts         r   stack_tensor_dictr      si    4 HHHJDAKq!   H kk!+ !Or   c                      \ rS rSr    SS\S\S\S\4S jjrS\S	\4S
 jr	      SS\S\S\
\
   S\
\
   S\S\4S jjrS\S\S\4S jrS\S\
\
   S\
\
   S\S\4
S jr     SS\
\
   S\S\
\
   S\
\
   S\4
S jjrSrg)TextLayoutSerializer   Nmax_seq_lengthmax_block_numuse_roberta_tokenizer	ocr_utilsc                    SU l         Xl        X l        X0l        X@l        XPl        X`l        Xpl        UR                  U l        UR                  U l
        UR                  U l        UR                  U l        S/S-  U l        S/S-  U l        g )Nre   g        rn   r   rm   )rg   r   r   r   r'   r&   r   r   pad_token_idbos_token_idcls_token_ideos_token_idsep_token_idunk_token_idcls_bbs_wordcls_bbs_line)r.   r   r   r   r'   r&   r   r   s           r   r/   TextLayoutSerializer.__init__   s     ,*"
%:""%22%22%22%22 EAIC!Gr   ocr_info
label_infoc                     [         eNrj   )r.   r   r   s      r   	label2seqTextLayoutSerializer.label2seq   s    !!r   	input_ids	bbox_line	bbox_wordr'   r&   c                 >   UbM  [        U5      [        U5      :X  d   e[        U5      [        U5      :X  d   eU R                  X#XEU5      u  p'ppnOUc   eU R                  XU5      u  p'ppn0 n[        R                  " U R
                  [        R                  S9U R                  -  US'   [        R                  " U R
                  [        R                  S9US'   [        R                  " U R                  [        R                  S9US'   [        R                  " U R                  [        R                  S9US'   [        R                  " U R
                  S[        R                  S9US'   [        R                  " U R
                  S	[        R                  S9US
'   [        R                  " U R
                  [        R                  S9US'   [        R                  " U R
                  [        R                  S9US'   [        R                  " U R
                  [        R                  S9US'   XV/S	-  nXV/S-  nU R                  /U-   U R                  /-   nU R                  /U-   U/-   nU R                  /U-   U/-   n[        U5      n[        U	5      n[        R                  " U5      US   SU& SUS   SU& [        R                  " U	5      US   SU& SUS   SU& [        R                  " U
5      US   SUS-
  & [        R                  " U5      US   SUS-
  & US   US   -  US'   [        R                  " U5      US   SUS-
  & [        R                  " U5      US   SU2SS24'   US   SS2/ SQ4   U-  US   SS2/ SQ4'   US   SS2/ SQ4   U-  US   SS2/ SQ4'   [        R                  " U5      US
   SU2SS24'   US
   SS2SS/4   U-  S-  US
   SS2SS/4'   US
   SS2SS/4   U-  S-  US
   SS2SS/4'   US
   R!                  5       US
'   U$ )aL  
Either ocr_info or (input_ids, bbox_line, bbox_word)
    should be provided.
If (input_ids, bbox_line, bbox_word) is provided,
    convenient plug into the serialization (customization)
    is offered. The tokens must be organised by blocks and words.
Else, ocr_info must be provided, to be parsed
    to sequences directly (the simplest way).
Args:
    ocr_info: [
        {"text": "xx", "box": [a,b,c,d],
         "words": [{"text": "x", "box": [e,f,g,h]}, ...]},
        ...
    ]
    bbox_line: the coordinate value should match the original image
        (i.e., not be normalized).
N)dtyper   attention_maskfirst_token_idxesfirst_token_idxes_maskrn   bbox_4p_normalizedrm   bboxline_rank_idline_rank_inner_idword_rank_idr4   r   )r   r4   rm   rx   )r   rA   ry   rz   r   i  rA   )r   halfseq2seqocr_info2seqrG   onesr   int64r   zerosr   float32r   r   r   r   rI   long)r.   r   r   r   r   r'   r&   bbs_wordbbs_liner   line_rank_idsline_rank_inner_idsword_rank_ids	token_seqsep_bbs_wordsep_bbs_line
len_tokens	len_liness                     r   serialize_single%TextLayoutSerializer.serialize_single   s   4  y>S^333y>S^333   yPBIM ''' !!(6:BIM 	!&u{{"46:6G6G"H	+&+kku{{'4	"#).ekk*3	%&.3kkekk/3	*+*/++%--+9	&'!KK%--9	&$)KKu{{%4	.!*/**u{{+4	&'$)KKu{{%4	.! **&&')3t7H7H6II	%%&1\NB%%&1\NB ^
)*	.3ll9.E	+{
+34	"#KZ05:\\6	%&z	2:;	*+JY7).m)D 	.!!J$%%& 	'<ALL=!	&'*q.9*3 +"$-.>$?+@	&' */m)D 	.!!J$%%& 	' ;@,,;	&'Q7 *+A|O<uD 	&'<8 *+A|O<vE 	&'<8 -2LL,B	&+:+q.) %.f$5a!Qi$@5$H4$O 	&!a&! 	" %.f$5a78!f7= %>@F%GIM%N 	&!a&! 	" &f-224	&r   c                    / n/ n/ n/ n/ n/ n	/ n
Sn[        U5       GH  u  pXR                  :X  a  SnU(       a    GOUS   n[        XU5      nSn[        US   5       GH  u  nnUS   nUS   n[        UX25      n[        U5      nU R                  (       a  [        U R                  U5      nOU R                  R                  U5      nU R                  R                  U5      n[        U5      S:X  a  UR                  U R                  5        [        U5      n[        U5      U-   U R                  S-
  :  a  Sn  GM"  U(       a  UR                  [        U5      S-   5        UR                  U5        UR                  U/U-  5        UR                  U/U-  5        U
R                  US-   /U-  5        UR                  US-   /U-  5        U(       a>  [        U	5      S:  a  U	S	   S:X  a  S
U	S	'   U	R                  S/US-
  S/-  -   5        SnGM  U	R                  US/-  5        GM     GM     [        U	5      S:  a  U	S	   S:X  a  S
U	S	'   XEXgUX4$ )NFTrq   wordsr   r   r4   r   r3   rA   )	enumerater   rr   rv   r   r   r   r   convert_tokens_to_idsr   rF   r   r   extend)r.   r   r'   r&   r   r   r   r   r   r   r   
early_stopline_idxlinelboxis_first_wordword_id	word_infowtextwboxwbox4pwtokens
wtoken_idsn_tokenss                           r   r   !TextLayoutSerializer.ocr_info2seq'  sb   	 
'1NH---!
;DT51D M&/W&>"!&) ' v5#D)---dnneDG"nn55e<G!^^AA'J
z?a'%%d&7&78z?y>H,t/B/BQ/FF!%J %,,S^a-?@  ,8 34 12$$gk]X%=>$$hl^h%>? .  !%8%<%A23+B/'..shlqc5I/IJ$)M'..x1#~>? '? 2P "#a',?,Cq,H&'#H/ 	/r   c                 H   / n/ n/ n/ n	/ n
/ n[        U5      nSu  pSnSn[        U5       GHQ  nUU   nUU   nUSL =(       d    UU:g  nUSL =(       d    UU:g  nUUp[        U5      S:X  a  [        U5      n[        U5      S:X  d   e[        UXT5      n[        U5      S:X  a  [	        U5      n[        U5      S:X  d   e[        UXT5      nUR                  U5        UR                  U5        U(       a  US-  nU(       aJ  US-  nUR                  US-   5        [        U
5      S:  a  U
S   S:X  a  S	U
S'   U
R                  S5        SnOU
R                  S5        U	R                  U5        UR                  U5        GMT     [        U
5      S:  a  U
S   S:X  a  S	U
S'   XXxXU4$ )
z
for convenient plug into the serialization, given the 3 customized sequences.
They should not contain special tokens like [CLS] or [SEP].
)NNr   r   Nrn   rm   r3   r4   rA   )r   rE   r{   rr   rv   rF   )r.   r   r   r   r'   r&   r   r   r   r   r   r   n_real_tokenslb_prevwb_prevline_idr   rL   lb_nowwb_now
line_start
word_starts                         r   r    TextLayoutSerializer.halfseq2seq_  s     I%}%Aq\Fq\F D=Fg,=J D=Fg,=J%vW6{a#F+v;!###vv5F6{a#F+v;!###vv5FOOF#OOF#11!((Q/* !4R!8A!=./'+#**1-#**1-  )  )A &D "#a',?,Cq,H&'#H> 	>r   rB   bboxes_linebboxes_wordrC   c           	      B   Ub  [        U5      O
[        U5      nUc  U R                  U R                  4/U-  n/ nUbt  [        U5      [        U5      :X  d   e[        U5      [        U5      :X  d   e[        X#XE5       H/  u  ppUu  pU R	                  S XXU5      nUR                  U5        M1     ObUc   S5       eU R                  b  U R                  U5      n[        X5       H+  u  nnUu  pU R	                  UXS9nUR                  U5        M-     [        U5      nU$ )Nz2For serialization, ocr_infos must not be NoneType!)r'   r&   )r   r&   r'   zipr   rF   r   r   )r.   rB   r   r   r   rC   r+   	n_samplesseqsinput_idr   r   r9   r&   r'   r   r   pt_seqss                     r   rQ   TextLayoutSerializer.__call__  s8    '0&;C	NY	++tzz23i?I y>S%5555y>S%5555<?K=D8Y ( 11$2;FL	I&=D (^*^^(~~) NN95	&))&?"( ( 11E 2 :	I&	 '@
 $D)r   )r   r   r   r&   r   r   r   r   r   r   r   r   rg   r'   )r%   r%   TN)NNNNr%   r%   )NNNNN)rS   rT   rU   rV   rZ   rX   rb   r/   rD   r   r   r   r   r   rQ   r_   r`   r   r   r   r      sU    /3'+$!$$ #$ )-$ %$2"$ "D "
  $ $cc c :	c
 :c c cJ6/T 6/# 6/s 6/p7>T 7>d4j 7>#Dz7>257>?B7>v !%"&"& :    $Z	 
 $Z     r   r   c            	           \ rS rSrSr     SS\S\S\4S jjr   SS\	\
\R                  \R                  R                  \4   S	\\   S
\S\
4S jjr SS jrSrg)	Processori  aX  Construct a GeoLayoutLM processor.

Args:
    max_seq_length: max length for token
    max_block_num: max number of text lines (blocks or segments)
    img_processor: type of ImageProcessor.
    tokenizer: to tokenize strings.
    use_roberta_tokenizer: Whether the tokenizer is originated from RoBerta tokenizer
        (True by default).
    ocr_utils: a tool to preprocess ocr_infos.
    width: default width. It can be used only when all the images are of the same shape.
    height: default height. It can be used only when all the images are of the same shape.

In `serialize_from_tokens`, the 3 sequences (i.e., `input_ids`, `bboxes_line`, `bboxes_word`)
    must not contain special tokens like [CLS] or [SEP].
The boxes in `bboxes_line` and `bboxes_word` can be presented by either 2 points or 4 points.
The value in boxes should keep original.
Here is an example of the 3 arguments:
    ```
    input_ids ->
    [[6, 2391, 6, 31833, 6, 10132, 6, 2283, 6, 17730, 6, 2698, 152]]
    bboxes_line ->
    [[[230, 1, 353, 38], [230, 1, 353, 38], [230, 1, 353, 38], [230, 1, 353, 38],
        [230, 1, 353, 38], [230, 1, 353, 38], [230, 1, 353, 38], [230, 1, 353, 38],
        [257, 155, 338, 191], [257, 155, 338, 191], [257, 155, 338, 191], [257, 155, 338, 191],
        [257, 155, 338, 191]]]
    bboxes_word ->
    [[[231, 2, 267, 2, 267, 38, 231, 38], [231, 2, 267, 2, 267, 38, 231, 38],
        [264, 7, 298, 7, 298, 36, 264, 36], [264, 7, 298, 7, 298, 36, 264, 36],
        [293, 3, 329, 3, 329, 41, 293, 41], [293, 3, 329, 3, 329, 41, 293, 41],
        [330, 4, 354, 4, 354, 39, 330, 39], [330, 4, 354, 4, 354, 39, 330, 39],
        [258, 156, 289, 156, 289, 193, 258, 193], [258, 156, 289, 156, 289, 193, 258, 193],
        [288, 158, 321, 158, 321, 192, 288, 192], [288, 158, 321, 158, 321, 192, 288, 192],
        [321, 156, 336, 156, 336, 190, 321, 190]]]
    ```

Nimg_processorr   r   c	           
      P    X0l         X@l        Xl        [        UUUUUUUS9U l        g )N)r   r   )r   r   r+   r   
serializer)
r.   r   r   r   r   r   r   r'   r&   r+   s
             r   r/   Processor.__init__  s5     +"."7!r   r=   rB   
token_seqsrC   c                    U R                  U5      nUS   nUc  US   OUnUc  US   OUnUc  U R                  X$S9nOU R                  "  SSU0UD6nUc   S5       e0 nXS'   UR                  5        H  u  pxX7   Xg'   M     U$ )Nr=   rB   rC   )rC   z token_seqs must not be NoneType!r1   r   )r   r   r~   )	r.   r=   rB   r   rC   img_databatchr   r   s	            r   rQ   Processor.__call__  s     %%f-(#-6->H[)I	-6->H[)I	HJ9 )9-79J%I'II%g$$&DA!}EH 'r   c                 2    0 nX&S'   X6S'   XFS'   U " US Xe5      $ )Nr   r   r   r`   )r.   r=   r   r   r   rC   
half_batchs          r   serialize_from_tokensProcessor.serialize_from_tokens  s0     
"+;$/=!$/=!FD*88r   )r   r+   r   r   )NTNr%   r%   )NNNr   )rS   rT   rU   rV   rW   r   rX   rb   r/   r   rD   r[   r\   r]   r^   rY   r   dictrQ   r   r_   r`   r   r   r   r     s    $T  /3'+! !/!
 )-! %!4 !%dBJJ		<= : 	
 8 )-
9r   r   ) rW   collectionsr   typingr   r   r   r   r7   numpyr[   r]   rG   timm.data.constantsr   r	   torchvisionr
   modelscope.preprocessors.imager   r   objectr   rb   rr   rv   r{   rY   Tensorr   r   r   r`   r   r   <module>r      s    $ . . 
  
  K " 4SV Sl
"v 
"Dc5<<.?)@$A T6 Tn_9 _9r   