
    9il                         S SK JrJr  S SKrS SKrS SKJr  S SKJr  S SKJ	r
  S SKJr  S SKJr  SS	KJr  SS
 jr " S S\5      rg)    )AnyDictN)
transforms)InterpolationMode)
functional)convert)ModeKeys   )OfaBasePreprocessorc                    U R                  S5      n U R                  u  p4U(       a  Su  pVOvX4:  a9  [        SU5      n[        S[        XU-  -  5      5      nX-
  S-  nX-
  U-
  nSu  pO8[        SU5      n[        S[        XU-  -  5      5      nX-
  S-  n	X-
  U	-
  n
Su  px[        R
                  " U XV4[        R                  S9nU(       aw  [        R                  " 5       " U5      R                  SSS	9n[        R                  " 5       " [        R                  " US
S	95      nUR                  u  peX-
  S-  nX-
  U-
  nSu  p[        R                  " UW	WW
W/SS9nUR                  X4:X  d   eU$ )z&
Image resize function for OCR tasks.
RGB)@   i  r      )r   r   )interpolation   )dimedge)paddingpadding_mode)r   sizemaxintFresizer   BICUBICr   ToTensorchunk
ToPILImagetorchcatpad)imgpatch_image_sizeis_documentwidthheight
new_height	new_widthtopbottomleftrightimg_new	img_splits                l/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/preprocessors/ofa/ocr_recognition.py
ocr_resizer2      s    ++e
CHHME (
I?B 01IR%5%%H!IJJ#0Q6C%2S8FKD%R!12JB$4$G HII$0Q6D$047EKChh	'//G '')'2888C	'')%))I2*FG '	,2!.4ee$UF3&JG<<,????N    c                      ^  \ rS rSrSr\R                  4U 4S jjrS\\	\
4   S\\	\
4   4S jrS\\	\
4   S\\	\
4   4S jrS\\	\
4   S\\	\
4   4S jrS	rU =r$ )
OfaOcrRecognitionPreprocessor;   z-
OFA preprocessor for OCR recognition tasks.
c           	         >^  [         [        T ]
   " XU/UQ70 UD6  [        R                  " U 4S j[        R
                  " 5       [        R                  " T R                  T R                  S9/5      T l	        g)zpreprocess the data

Args:
    cfg(modelscope.utils.config.ConfigDict) : model config
    model_dir (str): model path,
    mode: preprocessor mode (model mode)
c                 v   > [        U TR                  TR                  R                  R	                  SS5      S9$ )Nr&   F)r&   )r2   r%   cfgmodelget)imageselfs    r1   <lambda>8OfaOcrRecognitionPreprocessor.__init__.<locals>.<lambda>Q   s/    *%% HHNN..}eDFr3   )meanstdN)
superr5   __init__r   Composer   	Normalizer@   rA   patch_resize_transform)r=   r9   	model_dirmodeargskwargs	__class__s   `     r1   rC   &OfaOcrRecognitionPreprocessor.__init__@   s~     	+		 T	D48	D<B	D '1&8&8F !  diiTXX>:
 '#r3   datareturnc                     U R                   [        R                  :X  a  U R                  U5      $ U R	                  U5      $ )N)rH   r	   TRAIN_build_train_sample_build_infer_sample)r=   rM   s     r1   __call__&OfaOcrRecognitionPreprocessor.__call__Y   s4    99&++D11++D11r3   c                 &   U R                  U5      nUS   nUR                  5       R                  5       nSR                  USU R                   5      nU R                  USS9US'   [        R                  " U R                  US   SS /5      US'   U$ )	a  
Building training samples.

step 1. Preprocess the data using the logic of `_build_infer_sample`
    and make sure the label data in the result.
step 2. Preprocess the label data. Contains:
    - do tripe to the label value.
    - tokenize the label as `target` value without `bos` token.
    - add `bos` token and remove `eos` token of `target` as `prev_output_tokens`.

Args:
    data (`Dict[str, Any]`): Input data, should contains the key of `image`, `prompt` and `label`,
        the former refers the image input data, and the later refers the text input data
        the `label` is the supervised data for training.
Return:
    A dict object, contains source, image, mask, label, target tokens,
    and previous output tokens data.
label NF)add_bostargetr   prev_output_tokens)	rR   stripsplitjoinmax_tgt_lengthtokenize_textr!   r"   bos_item)r=   rM   samplerY   target_token_lists        r1   rQ   1OfaOcrRecognitionPreprocessor._build_train_sample_   s    & ))$/"LLN002+,@T-@-@AB--fe-Dx',yy]]F8,Sb12(4#$r3   c                    U R                  XR                  S      5      nU R                  U5      nU R                  R                  R                  SS5      nU R                  U5      nUU[        R                  " S/5      S.nSU R                  ;   aH  U R                  S   U;   a5  XR                  S      n[        R                  " S[        US5      5      US	'   U$ )
a&  
Building inference samples.

step 1. Get the pillow image.
step 2. Do some transforms for the pillow image as the image input,
    such as resize, normalize, to tensor etc.
step 3. Tokenize the prompt as text input.
step 4. Determine Whether or not to add labels to the sample.

Args:
    data (`Dict[str, Any]`): Input data, should contains the key of `image` and `prompt`,
        the former refers the image input data, and the later refers the text input data.
Return:
    A dict object, contains source, image, image patch mask and label data.
r<   promptu   图片上的文字是什么?T)sourcepatch_image
patch_masktextNFKCzzh-hansrV   )get_img_pil
column_maprF   r9   r:   r;   r_   r!   tensorunicodedata2	normalizer   )r=   rM   r<   rg   re   inputsra   rY   s           r1   rR   1OfaOcrRecognitionPreprocessor._build_infer_sample{   s        oog&>!?@11%8##H.LM##F+ &,,v.

 T__$)@D)H//&12F*44	24F7Or3   )rF   )__name__
__module____qualname____firstlineno____doc__r	   	INFERENCErC   r   strr   rS   rQ   rR   __static_attributes____classcell__)rK   s   @r1   r5   r5   ;   s     ((22T#s(^ 2S#X 2S#X 4S> 8S#X 4S>  r3   r5   )F)typingr   r   r!   rn   torchvisionr   torchvision.transformsr   r   r   zhconvr   modelscope.utils.constantr	   baser   r2   r5    r3   r1   <module>r      s5       " 4 2  . %)X^$7 ^r3   