
    9i                        % S SK r S SKrS SKJr  S SKJrJrJrJrJ	r	J
r
Jr  S SKrS SKJs  Jr  S SKJr  S SKJr  S SKJr  S SKJrJr  SS	KJr  SS
KJr  SSKJrJ r J!r!J"r"J#r#J$r$  SSKJ%r%J&r&J'r'J(r(J)r)  \" 5       r*Sr+0 r,\\-\\-\4   4   \.S'      S!S\-S\S\	\-   S\	\/   S\S   SS4S jjr0S\\/   S\\/\\/   4   S\\/   4S jr1 S"S\)S\-S\
\-\%\\-   4   4S jjr2 " S S\5      r3 " S  S5      r4g)#    N)deepcopy)AnyDictListLiteralOptionalTupleUnion)
get_logger)Module)pad_sequence)PreTrainedTokenizerBaseStoppingCriteria   )loss_scale_map)get_tools_prompt)
load_batch
load_imagerescale_image	fetch_one	to_devicedecode_base64)HistoryPrompt	StopWordsContextMessageszYou are a helpful assistant.TEMPLATE_MAPPINGtemplate_type	tokenizerdefault_system
max_lengthtruncation_strategydeletetruncation_leftreturnTemplatec                 \    [         U    n[        US   5      nUR                  " XX440 UD6  U$ )Ntemplate)r   r   init_template)r   r    r!   r"   r#   kwargstemplate_infor*   s           g/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/preprocessors/templates/base.pyget_templater/      s8     %]3Mj12H9j`Y_`O    
token_listsub_token_listc                     [        U[        5      (       a  U/n/ nSn  U R                  US   US-   5      n[        U5      S:X  d  XX3[        U5      -    :X  a  UR	                  U5        MM  ! [
         a     U$ f = f)z,Find the index of a token in the token_list.r   r   )
isinstanceintindexlenappend
ValueError)r1   r2   residxs       r.   _findallr=   '   s    .#&&()
C
C"">!#4cAg>C>"a'>RUVdReLe=f+f

3   Js   AA, ,
A:9A:messagesreplace_tokenc                 0   / n/ n[        U 5       H  u  pVUR                  5       nUS   b	  US   S;   a  UR                  U5        M7  U[        R                  " X&S   5      -  n[        R
                  " X!US   5      US'   UR                  U5        M     X4$ )Ncontentrole)toolsystem	assistant)	enumeratecopyr9   refindallsub)r>   r?   patternimages_pathnew_messagesims          r.   replace_img_tagrP   7   s     KL(#FFHY<1V90O#O"2::g|<<K66'!I,GAiL" $   r0   c                   l    \ rS rSrSrS\S\SS4S jrS\R                  S	\R                  S\
4S
 jrSrg)StopWordsCriteriaG   zsAdding extra stop words in template to prevent unstoppable generation
Like suffixes and chat seps in the template.
r    
stop_wordsr'   Nc                 6    Xl         X l        X0l        SU l        g )Nr4   )r    rT   tokenizer_kwargs	start_idx)selfr    rT   rV   s       r.   __init__StopWordsCriteria.__init__K   s    "$ 0r0   	input_idsscoresc                    U R                   S:X  a  [        US   5      S-
  U l         U R                  nU R                  nUR                  " USU R                   S 24   SS  40 U R
                  D6nU HX  n[        U[        5      (       a	  Xv;   a    gM!  [        U5      S:  d  M2  US   R                  5       [        U5      * S  U:X  d  MX    g   g)Nr4   r   r   iTF)	rW   r8   r    rT   decoderV   r5   strtolist)rX   r[   r\   r,   r    rT   text	stop_words           r.   __call__StopWordsCriteria.__call__Q   s    >>R 1.2DNNN	__
 	!T^^_*< =cd C]tG\G\]#I)S))$ % y>A%)A,*=*=*?Y@P*QU^*^ $ r0   )rW   rT   r    rV   )__name__
__module____qualname____firstlineno____doc__r   r   rY   torchTensorboolrc   __static_attributes__ r0   r.   rR   rR   G   sF    "9 y im %,,  SW r0   rR   c                      \ rS rSrSr/ SQr/ SQrSrS/rSr	Sr
Sr       SjS
\S\S\\   S\S\\   S\\   S\S\S\\   S\S   S\S   SS	4S jjr\S
\S\4S j5       r\S
\S\4S j5       r\S\S\\   S\\   4S j5       r     SkS\S\\   S\\   S\S    S!\S"\SS	4S# jjrS$\S%\S\\\4   4S& jrS'\\\4   SS	4S( jrS'\\\4   SS	4S) jrSlS* jrS+ r S, r!SmS'\\\4   S-\S.\S\"\\\4   \\\4   4   4S/ jjr#S'\\\4   S\"\\\4   \\\4   4   4S0 jr$     SnS1\%\&   S2\%\&   S3\%\'   S4\\   S5\\   S6\\   S7\\   S8\SS	4S9 jjr(S1\%\&   S3\%\'   S\"\%\&   \%\'   4   4S: jr)\S1\%\&   S3\%\'   S\"\%\&   \%\'   4   4S; j5       r*S< r+S=\S>   S?\S'\\\4   S\%\&   4S@ jr,S?\S'\\\4   S\%\&   4SA jr-S?\S'\\\4   S\%\&   4SB jr.\/SC\%\\\4      SD\%\   SE\SF   SS	4SG j5       r0S1\%\&   S3\%\'   S\"\%\&   \%\'   4   4SH jr1 SoS1\%\&   S3\\%\'      S\"\%\   \%\   \%\'   \\\4   4   4SI jjr2\SJ\%\   SK\%\   SS	4SL j5       r3 SpSM\%\\\4      S\S\S\"\\\4   \\\4   4   4SN jjr4SO\S\\\4   4SP jr5SQ\\\4   SR\\\4   SS	4SS jr6\  SqST\%\7Rp                     SU\'S\SV   S\7Rp                  4SW jj5       r9SoSX\%\\\4      SY\\   S\\\4   4SZ jjr:\/S[\7Rp                  S\\S\%\   4S] j5       r;\S[\%\   S\\S\%\   4S^ j5       r<\S_\S\4S` j5       r=\/SpS6\Sa\Sb\S\4Sc jj5       r> SrS	SS	S	Sd.S[\%\   Sb\SQ\\\\4      Se\Sa\\%\      Sf\\%\      4Sg jjjr?S6\S'\@S\4Sh jrASirBg	)sr(   c   a  A template class for all supported models.

    Args:
        prefix: Prefix tokens before the first turn's prompt
        prompt: A list of elements whose types are str and list of integers. The input query part of every turn.
        chat_sep: The chat separators between every turn.
        suffix: The end tokens after the chat finished.
        default_system: A default system instruction.
        system_prefix: The prefix if the `system` is not empty.
        auto_add_bos: By default, the bos_token is not added. The auto_add_bos option will determine
            whether to add it based on `tokenizer.encode('')`.
        tools_prompt: The tools prompt name
        tool_prompt: The tool prompt, usually useful when there is a tool role
        padding_side: The padding side
        infer_media_type: The media type supported by the multi-modals
        Examples:
            <start_of_output>system
You are a helpful assistant!<end_of_output>
<bos><start_of_output>Who are you?<end_of_output>
<start_of_output>assistant:I am a robot<end_of_output>
<start_of_output>Who are you?<end_of_output>
<start_of_output>assistant:I am a robot<end_of_output> # noqa
                                     ----------system------------                                       ---query----                                            --response- -----chatsep-----                 ---query---                                             --response- ----suffix-----
            ----------------------------system_prefix---------------------------- ---------------------------- prompt -------------------------------------                                  ---------------------------- prompt -------------------------------------

    )<image><video><audio><bbox><ref-object>)imagesvideosaudiosobjects	norm_1000rq   TFNprefixpromptchat_sepsuffixr!   system_prefixauto_add_bostools_prompttool_promptpadding_side)leftrightinfer_media_type)
interleavedialogueroundr'   c                 0   XX4U4 H  nUc  M  [        U[        5      (       a  M   e   US:X  a  S nU R                  U5      (       a  Ub   S5       eUnU R                  U5      nXl        X`l        U R
                  c.  [        U Vs/ s H  nSU;   PM
     sn5      (       d
  Ub   S5       eX l        X0l        U R                  S LU l	        X@l
        XPl        SU l        Xpl        SU l        Xl        U	b  U	OU R                  U l        Xl        Xl        g s  snf )N z'The prefix already contains {{SYSTEM}}.
{{SYSTEM}}z'The template does not support `system`.TF)r5   list_has_system_replace_systemr{   r   anyr|   r}   support_multi_roundr~   r!   use_default_systemr   _is_initr   r   r   r   )rX   r{   r|   r}   r~   r!   r   r   r   r   r   r   xcontexts                 r.   rY   Template.__init__   s$    (MBA9
1d 3 333 C R!NF## (S*SS("M))&1F*%cZ`2aZ`w<73JZ`2a.b.b!)T+TT) #'==#< ,"&((*5*A;t{{( 0 3bs   Dc                 b    U  Vs/ s H  nSU;   d  M  UR                  SS5      PM     sn$ s  snf )zReplace system with the r   r   )replacer{   ps     r.   r   Template._replace_system   s1     6<QV|q?P+		,+VQQQs   
,,c                 J    [        U  Vs/ s H  nSU;   PM
     sn5      $ s  snf )Nr   )r   r   s     r.   r   Template._has_system   s$    v6v!LA%v6776s    r    valuec                     Uc  g/ nU Hg  n[        U[        5      (       a>  / nU H4  n[        U[        5      (       a  [        X5      nUR	                  U5        M6     UnUR	                  U5        Mi     U$ )zBTurn `eos_token_id` to token id

e.g. [['eos_token_id']] -> [[2]]
N)r5   r   r_   getattrr9   )r    r   	res_valuevres_vsub_vs         r.   token_attr_to_idTemplate.token_attr_to_id   sw     =	A!T""E!%-- '	 9LL'  Q  r0   r"   r#   r$   
loss_scaler   c                    U R                   SL d   S5       eSU l         Xl        [        USS5      U l        US:X  a  SU l        O+Ub(  U R
                  c   S[        U SS5       35       eX l        X0l        X@l        [        U[        5      (       a  [        R                  " US5      U l        OXPl        X`l        S	 H+  n[        X5      n	U R                  X5      n	[        XU	5        M-     g)
a  Init template by a tokenizer
Args:
    tokenizer: The tokenizer to tokenize the sentence
    default_system: The default system to use if the dataset does not provide one
    max_length: Max length of the sequence
    truncation_strategy: The truncation strategy
    loss_scale: The loss scale function to use
    rescale_image: Rescale image to reduce memory usage, default `-1` means no limitation
Fz"The template has been initialized.Tis_multimodalNr   7The template does not support `system`, template_type: r   )r{   r|   r}   r~   r   )r   r    r   r   r!   r   r"   r#   r5   r_   r   getr   r   r   setattr)
rX   r    r!   r"   r#   r   r   r,   keyr   s
             r.   r+   Template.init_template   s    " }}%K'KK%"$YFR"&D'%%1 rI'RVXgimJnIopr1"0$#6 j#&&,00TBDO(O*NCD&E)));EDu% Or0   modeldatac                     0 $ )zThis method will be called after data_collator and before the forward
Args:
    data: The `_data` field from the example batch, this field should be packed manually
Returns:
    Any extra fields need to be passed into the model.forward
rn   )rX   r   r   s      r.   post_encodeTemplate.post_encode   s	     	r0   examplec                     g)zCheck example validNrn   )rX   r   s     r.   check_exampleTemplate.check_example   s    r0   c                     US   nS GH  u  p4UR                  U5      (       d  M  U Vs/ s H  oUS   S:w  d  M  UPM     nn[        U5      nUS-  S:X  d   e[        US-  5       Vs/ s H	  oXS-    PM     n	nU R                  S:X  a  [	        [        US-  5      XU   5       Hu  u  pn[        [
        R                  " XJS   S   5      5      nU(       a0  US	::  d   S
U SU
S    35       eUS:X  a  XJS   S   -   U
S   S'   M`  Mb  US:X  a  Mj   SU
S    35       e   X    Vs/ s H  o(       d  M  UPM     snX'   GM8  [        [
        R                  " USR                  U	 V
s/ s H
  oS   S   PM     sn
5      5      5      nX    Vs/ s H  o(       d  M  UPM     snX'   [        X   5      nX-
  nUS:  d   SU SU 35       eXN-  U	S   S   S   -   U	S   S   S'   GM     gs  snf s  snf s  snf s  sn
f s  snf )a  Add default tags to example, this is for the multi-modal datasets
    1. For the round infer_media_type, this method will check the tag equals with the chat round
    2. Else, this method will try to add tags to the head of the messages
Args:
    example: The input example
r>   ))rw   rr   )rv   rq   )rx   rs   rB   rD      r   r   rA   r   zMThe model includes at most one media per round. However, this round contains z media_tags. query: zMissing media. query: 
zNumber of media: z, number of media_tags: N)r   r8   ranger   ziprH   rI   join)rX   r   r>   	media_key	media_tagmessage	_messagesn_roundrN   historyhrO   num_media_tags	num_medianum_new_tagss                  r.   add_default_tagsTemplate.add_default_tags   sJ    :&$i I{{9%%4<ZHQY@YWH	Zi.{a'''5:7a<5HI5HQs+5HI((G3#&uW\':GYEW#Xa),RZZ	Q4	?-S)T#1Q#6 c!77E6FFZ[\]^[_Z`!bc#6  .22;d9o2M!Y  3 $2Q#6W:PQRSTQUPV8WW#6 $Y 6=5G)M5G1!5G)MG&%(ItyycjIkcj^_A$y/cjIk?l)m%nN5<5G)M5G1!5G)MG& #G$6 7I#,#=L'1,u0A)Ldesdt.uu,/8/G'RS*UV-XaJb/bGAJqM),1 %jZ J *NIk)Ms.   G'G'(G,$
G12G1%G6
G;G;c                    U R                   S;   ai  [        UR                  S5      S5      u  US'   nUR                  S5      (       a  U(       a  [        S5      eUR                  S5      =(       d    / U-   US'   U R                   S;   a^  [	        SS/SS	/S
S/5       HF  u  p4n[        UR                  S5      XE5      u  US'   nUR                  U5      =(       d    / U-   X'   MH     gg)zbReplace the <img></img> with the images key and <image> tag

Args:
    example: The input example
>   NTr>   rq   rv   z3Do not mix use the <img></img> tag and <image> tag.rx   rw   rs   rr   z<audio>(.+?)</audio>z<video>(.+?)</video>N)r   rP   r   r:   r   )rX   r   rL   ktagrK   medias_paths          r.   replace_media_tagsTemplate.replace_media_tags  s     -/>J'04,GJ {{8$$ !VWW 'H 5 Ik9IGH -#&(';i=S(?AX'Y$[3BKK
+S4;0
#[ %[[^?rK/?
$[ .r0   c                    SSSS.nUR                  5        HA  nUR                  U5      (       d  M  [        X   [        [        45      (       a  M:  X   /X'   MC     U R                  U5        U R                  U5        UR                  S5      (       ay  [        US   [        5      (       aa  [        R                  " US   5      US'   / nUS    H6  n[        U[        5      (       a  US   US   SSS	.nUR                  U5        M8     XAS'   UR                  S5      =(       d    / nU(       a  UR                  S5      (       d  U R                  (       a  [        U[        5      nUR                  S5      (       a  U R                  US   X`R                  S
9  U R                  (       a5  U R                  S:w  a%  U Vs/ s H  n[!        XpR                   5      PM     nnU R                  (       d  [#        US9S   nXaS'   ggs  snf )a0  Preprocess multi-modal media resources in one example
    1. Wrap all values in media keys to list
    2. Replace <img></img> tags
    3. Add or check missing tags to examples
    4. Parse the string field in the `objects` field to jsons
    5. Load images if needed
Args:
    example: The input example
rx   rv   rw   )audioimagevideory   r   r   N)captionbbox	bbox_typer   )to_typereal)rv   )valuesr   r5   tupler   r   r   r_   jsonloadsr9   load_mediasr   r   normalize_bboxgrounding_typer   r   )rX   r   multimodal_keysr   ry   objectrv   imgs           r.   _preprocess_mediaTemplate._preprocess_media6  s    
 )//1I{{9%%j9KeUY].[.[&-&8%9" 2
 	(g& ;;y!!j1CS&I&I!%GI,>!?GIG!),fd++#)!9 &q	%)!"	F v& - ")I X&,"{{9%%)9)9#FJ7{{9%%##GI$6H[H[#\D$7$76$ALRSFS--?-?@FS##&f5h? &H  Ts   =Hc                 N   UR                  5       n[        U SS 5      nUR                  S5      =(       d    / nU R                  (       d  [	        S5      eUS   nU Vs/ s H  oUS   S:X  d  M  UPM     nnU Vs/ s H  oUS   S:w  d  M  UPM     nnU(       a  US   S   OS	nU(       d  U R
                  (       a  U R                  nOU R                  c
   S
U 35       eU(       aH  [        U[        5      (       a  [        R                  " U5      nUc  S	nU[        X0R                  5      -  nU(       a  U(       d  SS S./nXvS   S'   [        U5      S:  a  U R                  (       d
   SU 35       eXd-   US'   U R!                  U5        U R#                  U5        U$ s  snf s  snf )Nr   toolsz[Template is not initialized, please use the `get_template` function to obtain the template.r>   rB   rD   r   rA   r   r   )rB   rA   r   z?The template does not support multi-round chat, template_type: )rG   r   r   r   r:   r   r!   r   r5   r_   r   r   r   r   r8   r   r   r   )rX   r   r   r   r>   r   system_roundrD   s           r.   
preprocessTemplate.preprocessn  s   ,,.'.t_d'K'.{{7';'Ar }}mo o :&/7WxG6?h;VxW+3S8v(7RG8S>JQ	 :PR&&,,%%1 [I-Y[1%%%

5)~&u.?.?@@F)1d CD)/OI&x=1++ cQR_Q`ac+*5
w'7#; XSs   F-F9F"	F"	streamingis_trainingc                 <   U R                  U5      nU R                  " U40 UD6nUS   nU(       db  SU;   a\  UR                  S5      nUc   eUR                  S5      n[	        XR
                  5      nUR                  U R                  Xx5      5        U(       d  U$ U$ )aQ  The entrance method of Template!

Args:
    example: The input example
    streaming: If is streaming mode
    is_training: Use template in training
    **kwargs:
        model: The model instance, use only in `is_training=False`
Returns:
    if not streaming mode, returns tuple of (example, tokenizer_kwargs), else return example only
r   _datar   )r   _encoder   popr   deviceupdater   )	rX   r   r   r   r,   r;   inputsr   r   s	            r.   encodeTemplate.encode  s     //'*ll7-f-Qw&0JJw'E$$$::g&DT<<0DMM$**578#s//r0   c                 &   US   n[        [        R                   Vs/ s H  oAR                  U5      PM     sn5      nU R	                  UU R
                  U R                  UUS9u  pgUR                  S5      c  UR                  SS5        Xg4$ s  snf )z return: inputs, tokenizer_kwargsr>   )r   is_multi_modalr   labelsNr   )r   r(   special_keysr   _concat_and_tokenizer#   r   r   )rX   r   r,   r>   r   r   r   rV   s           r.   r   Template._encode  s    :&"@U@U#V@UKK$4@U#VW#'#<#<$$**) $= $  ::h'JJ|T*'' $Ws   Bcontext_listres_context_listloss_scale_listrD   queryresponseround0compute_lossc	                    Sn	Ub  [        US-   5      n	[        U5      nU H  n
[        U
[         5      (       a  SU
:X  aI  Uc   eU(       a  U R                  XV5      u  pOU/S/pUR                  U5        UR                  U5        Mg  / SQnXEXy/n[	        X5       H$  u  nnUc  M  X;   d  M  U
R                  UU5      n
M&     [        U
5      S:X  a  M  UR                  U
5        UR                  S5        M     g)z+Concat context list and replace placeholderNr   {{RESPONSE}}        )r   z	{{QUERY}}z
{{ROUND0}}z
{{ROUND1}}r   )r_   r5   r   extendr   r   r8   r9   )rX   r   r   r   rD   r   r   r   r  round1r   content_partweight_partold_str_listnew_str_listold_strnew_strs                    r.   _concat_context_listTemplate._concat_context_list  s     !_F[F#G'3''!W,#///#48OOE4T1k5=Jk$++L9#**;7V &v>*-l*I&Wg*w/A")//'7"C +J 7|q ##G,""2&' $r0   c                    UR                  SS5      nU(       a  U R                  X5      u  pU R                  " X40 UD6u  p/ n/ n/ nSn[        [	        X5      5       H  u  n	u  p[        U
[        5      (       a  X:X  a  UR                  U
5        M5  [        U5      S:  aA  UR                  SR                  U5      5        UR                  U5        UR                  5         [        U
[        5      (       a  UR                  U
5        O"UR                  U
5        UR                  U5        UnM     [        U5      S:  a1  UR                  SR                  U5      5        UR                  U5        XV4$ )z4Merge anything in the context to simplify the inputsr   Fr  r   r   )r   split_special_tokenspre_tokenizerF   r   r5   r_   r9   r8   r   clear)rX   r   r   r,   r   r;   res_loss_scaletemptemp_loss_scalerN   r   r   s               r.   _simplify_context_listTemplate._simplify_context_list  s9     &zz*:EB,0,E,El,d)L(,(9(9,(b[a(b%&((1#l2T(U$A$'3''Z-JG$t9q=JJrwwt}-"))/:JJLgs++KK(JJw'"))*5", )V t9q=JJrwwt}%!!/2""r0   c                    SSK Jn  / n/ n[        X5       H  u  pV/ n[        [	        U5      [
        5      (       a  U" U[        R                  5       H  nUR                  US   US   /5        M     U V	s/ s H  o(       d  M  U	PM     nn	UR                  U5        UR                  U/[        U5      -  5        M  UR                  U5        UR                  U5        M     X44$ s  sn	f )z`Split special tokens, for example `<image>`, `<video>`, this will help the replace_tag operationr   )split_str_parts_byr   rA   )utilsr  r   r5   r   r_   r(   special_tokensr  r8   r9   )
r   r   r  r;   loss_scale_resr   r   contextsdcs
             r.   r  Template.split_special_tokens  s     	.&(#&|#EGH)G,c22+GX5L5LMAOOQuXq|$<= N'/5x!1Ax5

8$%%zlS]&BC

7#%%j1 $F "" 6s   6
C&C&c                 4    U R                   " U4SSS.UD6S   $ )NF)return_attention_maskadd_special_tokensr[   )r    )rX   r   rV   s      r.   	_tokenizeTemplate._tokenize  s5    ~~`+0U`N^``km 	mr0   
media_typer   r   r   r7   c                 L    US:X  a  U R                   $ US:X  a  S/$ US:X  a  S/$ g)ar  Override this function to do your own replace operation.

This method is used to replace standard tags like `<image>` to some tokens that the model needs.

Args:
    media_type: The modal.
    index: The index of the medias, for example 0 represents the first elements in `images`
    example: The input example

Returns:
    The content or input_ids after replacement.
r   r   rr   r   rs   N)image_placeholder)rX   r&  r7   r   s       r.   replace_tagTemplate.replace_tag  s=      )))7";7"; #r0   c                 L    UR                  S5      nU(       a
  X1   nUS   /$ S/$ )a!  Replace objects referenced by the bbox to contents or input_ids. This is useful in the grounding task.
Override this function to do your own replace operation.

Args:
    index: The index in the `objects` key
    example: The input example

Returns:
    The contents or input_ids replaced
ry   r   ru   )r   )rX   r7   r   ry   object_s        r.   replace_objectTemplate.replace_object1  s3     ++i(nGI&''"##r0   c                 .   UR                  S5      nU(       a{  X1   n[        US   S   [        5      (       a4  SnUS    H!  nUSUS    SUS    SUS	    SUS
    S3	-  nM#     USS nU/$ SUS   S    SUS   S    SUS   S	    SUS   S
    S3	/$ S/$ )a  Replace bbox pointing to the objects to contents or input_ids. This is useful in the grounding task.
Override this function to do your own replace operation.

Args:
    index: The index in the `objects` key
    example: The input example

Returns:
    The contents or input_ids replaced
ry   r   r   r   z[(,r   z),(r      z)],Nr4   z)]rt   )r   r5   r   )rX   r7   r   ry   r-  all_objects
sub_objects          r.   replace_boxTemplate.replace_boxC  s     ++i(nG'&/!,d33 ")&/JR
1a
1gjYZm_\]^hij^k]llo#ppK #2)#2.#}$WV_Q/0'&/!2D1ESQWYZI[H\\]^efl^mno^p]qqstuu:r0   ry   rv   r   )r   rz   norm_1c                    U(       a  U(       d  gU GH  nUS   nUS   nUS   nX'   nUS:X  a  US:X  a  M%  UR                   UR                  p[        US   [        5      (       aZ  / nU HM  nUR	                  [        XXU
/5       VVs/ s H  u  pUS:X  a  [        X-  S-  5      OX-  PM!     snn5        MO     XS'   O>[        XYXU
/5       VVs/ s H  u  pUS:X  a  [        X-  S-  5      OX-  PM!     snnUS'   X4S'   M  US:X  a  US:X  a  GM  US	:X  a  U Vs/ s H  oS
-  PM	     snUS'   ORUS:X  aL  UR                   UR                  p[        XYXU
/5       VVs/ s H  u  p[        US
-  U-  5      PM     snnUS'   X4S'   GMy  US	:X  d  GM  US	:X  a  GM  US:X  a#  U Vs/ s H  n[        US-  5      PM     snUS'   ONUS:X  aH  UR                   UR                  p[        XYXU
/5       VVs/ s H  u  p[        X-  5      PM     snnUS'   X4S'   GM	     gs  snnf s  snnf s  snf s  snnf s  snf s  snnf )a%  Normalize bbox to needed.
to_type support real/norm_1000/norm_1, which literally means the coordinates in real, or normalized by 1000,
    or normalized by 1.

Args:
    objects: The objects containing the bbox
    images: The images list
    to_type: The coordinate type needed by the model.
Nr   r   r   r   r   rz   i  r7  g     8@)widthheightr5   r   r9   r   r6   )clsry   rv   r   r   r   r   r<   r   r9  r:  bboxes_boxcoorddims                  r.   r   Template.normalize_bbox\  sj    fF&>D{+I/CKEF"f$ %U\\vd1gt,,F $.1$v8V.W'.W
 7>6LCc 12RWR]].W'  !%
 &,6N +.dF64R*S&*SJE 3:[2HEK#-.ekY*S&F6N '.{#k)k)h&@D%Eudl%EF6N&$)KK6BEdTZciLjBk&BkJEEDL3./Bk&F6N '.{#h&h&k)DH%ID5c%#+&6D%IF6N&$)KK6ILT[ajpSqIr%sIr:5c%+&6Ir%sF6N&-{#U '& &F& &J &ts$   	&H&H$H* H/'H54H:c                 &   UR                  S5      n/ n/ nS H
  nSXG S3'   M     [        X5       H  u  pS H6  nUSU S3:X  d  M  U R                  XtU S3   U5      n
XG S3==   S-  ss'     O   US:X  a;  U R                  UR                  S	S5      U5      n
UR                  S	S5      S-   US	'   ODUS
:X  a;  U R	                  UR                  SS5      U5      n
UR                  SS5      S-   US'   OU/n
XZ-  nXi/[        U
5      -  -  nM     XV4$ )a	  This method happens before tokenization, replace standard tags to the contents or input_ids needed by
the model.

Args:
    context_list: The content list
    loss_scale_list: The loss scale list
Returns:
    The context_list and loss_scale_list after replacement.
r   r'  r   _index<>r   ru   object_indexrt   	box_index)r   r   r*  r.  r5  r8   )rX   r   r   r,   r   r;   r  r   r   r   c_lists              r.   r  Template.pre_tokenize  sD    **Y'&(,A$%GcL! - $'|#EG0!Ah&!--aA3f1FPFcL)Q.)	 1 n,!00^Q1OQXYF.5kk.!.Lq.PGN+(!--gkk+q.I7SF+2;;{A+F+JGK(%YFMClS[88N! $F" ""r0   c                    / n/ n/ n0 nUc  S/[        U5      -  n[        [        X5      5       H  u  nu  p[        U[        5      (       a6  U R                  U5      n
U R                  Xj5        U R                  " U40 U
D6nOUnX;-  nX'   S:  a  XK-  nOUS/[        U5      -  -  nUR                  U	/[        U5      -  5        M     X4XV4$ )z+return: input_ids, labels, tokenizer_kwargsr  )	r8   rF   r   r5   r_   _get_tokenizer_kwargs_concat_tokenizer_kwargsr$  r  )rX   r   r   r[   r   r   rV   rN   r   loss_weightcurr_tokenizer_kwargsr1   s               r.   _encode_context_listTemplate._encode_context_list  s    
  "	"$
"!dS%66O)23|3U)V%A%'3'' )-(B(B7(K%--.>V!^^GM7LM
$
#I!C'$4&3z?22{mc*o=> *W *>>r0   r   suffix_tokens_idc                     [        U5      nSn[        S[        U 5      5       HH  nXS-
     S:  a
  X   S:X  a  UnUS:  d  M   XS-
     S:X  d  M-  X   S:  d  M7  XC-
  nXR:  d  MB  XX3U-   & MJ     g )Nr   r   rJ  )r8   r   )r   rQ  
suffix_lenstartrN   lengths         r.   use_dynamic_eosTemplate.use_dynamic_eos  sy    )*
q#f+&A!e}!fi4&7qyVE]d2vyA~'7G5!34 'r0   r>   c                 .   U Vs/ s H  oUS   S:X  d  M  UPM     nnU Vs/ s H  oUS   S:w  d  M  UPM     nn[        U5      S:  a	  US   S   nOSn[        U5      S:  d   e[        U5      S:X  a'  US   S:X  a  SUS   /nSUS   /nOUS   S/nUS   S/nO[        U5      S-  S:X  d   e[        [        U5      S-  5       V	s/ s H  oU	   S   XS-      S   /PM     nn	[        [        U5      S-  5       V	s/ s H  oU	   S   XS-      S   /PM     nn	/ n
/ nU(       al  U R                  R                  n[	        U[
        5      (       aA  XR                  R                  S	5      ;   a#  U
R                  U/5        UR                  S
5        U R                  R                  5       nUc  U Vs/ s H  nSU;  d  M  UPM     nnUb$  [        U Vs/ s H  nSU;   PM
     sn5      (       a  U R                  nOU R                  nU R                  XXS9  [        [        Xx5      5       GH  u  n	u  u  nnu  nnUS:X  a  U R                   R                  5       OUR                  5       n/ nSnU	[        U5      S-
  :  aG  U Vs/ s H  nSU;  d  M  UPM     nnUR                  S5        XyS-      S   (       a  U R"                  nO"Ub  UR                  S5        U R$                  nSnU(       d	  U(       d  M  U R                  UU
UUUUU	U R&                  =(       d    US9  U
U-  n
UU(       a  S/OS
/[        U5      -  -  nGM     0 nU R(                  (       a  [        W5      [+        US   S   SL5      -   n[        U
5      n[        SS/[-        UU-
  U5      [-        SUU-
  5      /5       Hf  u  nnU R.                  " U
U   UU   40 UD6u  nnU R1                  UU5      u  nnn n!UUsUU S3'   UU S3'   U R2                  (       d  M^  U UU S3'   Mh     US   US   -   nUS   US   -   nUS   S   c  [        US   5      S:X  d   eSUS'   O\U R.                  " X40 UD6u  pU R1                  X5      u  nnn n!Ub.  U R5                  UU R1                  U R$                  5      S   5        US   S   c  SnU R6                  b  US:X  aL  [        U5      U R6                  :  a3  [8        R;                  S[        U5       SU R6                   S 35        0 0 4$ UU R6                  * S nUb  UU R6                  * S nW b  U U R6                  * S n UUS!'   UUS"'   U R2                  (       a  W US#'   UW!4$ s  snf s  snf s  sn	f s  sn	f s  snf s  snf s  snf )$z"
return: inputs, tokenizer_kwargs
rB   rD   r   rA   Nr   r   r   r   r  r   )rD   rC   Fr  T)r   r   rD   r   r  g      ?r4   answerr|   
_input_ids_labels_loss_scaleprompt_input_idsanswer_input_idsprompt_labelsanswer_labelsr%   zCurrent length of row(z ) is larger than the max_length(z), deleted.r[   r   r   )r8   r   r    bos_token_idr5   r6   r   r9   r|   rG   r   r{   r   r  rF   r   r   r}   r~   compute_per_round_lossoutput_prompt_answerrl   slicer  rO  r   rV  r"   loggerwarn)"rX   r>   r#   r   r,   r   rD   r   history_rolesrN   r   r   ra  r|   r   r{   qrqrrrr   extra_context_list	is_suffixr   
answer_len	total_lenr   _slice_res_context_list_loss_scale_listr[   r   r   rV   s"                                     r.   r   Template._concat_and_tokenize  sA    *2QgV_5P'Q+3S8v(7RG8Sv;?AYy)FF8}!!!x=A:-)!45!%x'7 8#I.5!)&!14 8x=1$)))SXY\]eYfjkYkSlmSlaI.1i0HISlGmUZ[^_g[hlm[mUnoUnPQqk&18E?63JKUnMo*,')>>66L,,,AVAVWYAZ1Z ''7&&r*!!#>-3SV'|77RgVFS>S!Pg,'"9!PQQ[[F''F!!&O![%.s7/J%K!A!AR68Fl4++002L!#I3w<!##7Cc|G|[bGb|c##N3q5>!$)-&##N3%)[[" 	AA)) $#!!%!<!<!I	 * K !$66 IRDB43GYCZ#ZZ3 &L4 $$/04Bt8S3TTJ,-I"Hh#7$))j*@)$L$)!Y-C$D$F GV 7;6Q6QRbciRjRabhRi7ums7u3!#3BFB[B[%'7C9?	6:/?FOQWC#j)*FcU'?,C???2<FcU+./ G 12V<N5OOIO,vo/FFFr{2&6/23q888*.' 150K0KL\0xqw0x->B>W>W ?3;Ivz+;!$$VT-F-Ft{{-STU-VW2;r?"F??&"h.3y>DOO3S4S^4D E448OO3DKQ R2v!4??"2"34I! 0 12%'(8(9:
'{!x??#-F< '''Y RS  no T!P  ds>   U4U4U9U9U>=V2
V VV
VVr   c                     0 $ )zreturn: curr_tokenizer_kwargsrn   )rX   r   s     r.   rK  Template._get_tokenizer_kwargs[  s    	r0   rV   rN  c                 &    [        U5      S:X  d   eg )Nr   )r8   )rX   rV   rN  s      r.   rL  !Template._concat_tokenizer_kwargs_  s    #$)))r0   	sequencespadding_value)r   r   c                    US:H  nU(       a  [        U SUS9$ [        U  Vs/ s H  oDR                  S5      PM     sn5      n/ nU  Hh  nXWR                  S5      -
  nS/UR                  5       S-
  S-  -  US/-   n	[        R
                  " U[        U	5      SU5      n
UR                  U
5        Mj     [        R                  " U5      $ s  snf )zPad sequence by some side

Args:
    sequences: The input sequences in tensor.
    padding_value: The padding value
    padding_side: The padding side

Returns:
    A tensor after padding
r   T)batch_firstry  r   r   r   constant)
r   maxsizer?  Fpadr   r9   rj   stack)rx  ry  r   padding_rightsmax_lenpadded_sequencesseq
pad_length	pad_tuple
padded_seqs              r.   r   Template.pad_sequenceb  s     %/	t=YY)4)Qvvay)45C 88A;.J	A23z1oEIsE)$4j-PJ##J/	  {{+,, 5s   Cbatch
padding_toc           	         U R                   nUR                  c   eU R                  S:H  n0 nSUS   ;   aq  U Vs/ s H  ofS   PM	     nnXuS'   [        [	        U5      5       Vs/ s H5  n[
        R                  " Xx   R                  S   [
        R                  S9PM7     snUS'   OSUS   ;   a  U Vs/ s H  n[
        R                  " US   5      PM     n	nXS'   [        [	        U	5      5       Vs/ s H1  n[
        R                  " [	        X   5      [
        R                  S9PM3     snUS'   S H8  n
XS   ;   d  M  U Vs/ s H  n[
        R                  " Xj   5      PM     snXZ'   M:     Ub  SU;   d   eX%S   S   R                  S   -
  nUS:  a]  [        / S	QUR                  SS
SS/5       H<  u  pX;   d  M  [        R                  " XZ   S   U(       a  SU4OUS4SU5      XZ   S'   M>     [        / SQUR                  SSS
SS/5       H,  u  pX;   d  M  U R                  XZ   XR                  5      XZ'   M.     SUS   ;   a  U Vs/ s H  ofS   PM	     snUS'   U Vs/ s H  ofR                  S5      c  M  US   PM     nn[	        U5      S:  ai  [
        R                  " U5      US'   U Vs/ s H  ofR                  S5      c  M  US   PM     nn[	        U5      S:  a  [
        R                  " U5      US'   U Vs/ s H  ofR                  S5      c  M  US   PM     nn[	        U5      S:  a  [
        R                  " U5      US'   U$ s  snf s  snf s  snf s  snf s  snf s  snf s  snf s  snf s  snf )z
Args:
    batch(`List[Dict[str, Any]]`): The input data in batch
    padding_to(`int`, optional): Whether padding the batch to a fixed length, if none, the batch
        will be padded to the `longest`
r   inputs_embedsr   )dtypeattention_maskr[   )r   r   position_idsr4   )r[   r  r   r   r  rJ  r  r|  )r[   r  r  r   r   r  r   pixel_valuesimage_sizespixel_values_videos)r    pad_token_idr   r   r8   rj   onesshapeint64tensorr   r  r  r   r   concat)rX   r  r  r    r  r;   br  rN   r[   r   padding_lenr   r  r  r  s                   r.   data_collatorTemplate.data_collator  sn    NN	%%111))W4eAh&9>?A/M?#0 TYZ]^kZlTm%Tmq

M,2215ekkJTm%C ! E!H$?DEu!an5uIE(_dehires_t$u_tZ[UZZIL0A%U_t$uC !;CAh:?@%QELL0%@ < !#%%%$;'7':'@'@'DDKQ"%&m'0'='=q$B&O#QJCz&'eeCHQK]![9Ialno`p,6'?#Q
 v(55r1dBKMJCz,,SXu>O>OPM
 eAh0561gJ6CL38^5aEE.<Q).)5^|q "',,|"<C5:_UeeM>R+1]+UK_;!#%*\\+%>M"AFsA%%PeJf7q!67s"#a').6I)JC%&
U @% F$u A" 7^ ` tsG   M$<M4#M38M$"M)M.'M3 	M37M8	M8M= 	M=generate_idsinput_token_lenc                     [        U[        R                  5      (       a  UR                  5       n[	        U5      S:  a#  [        US   [
        [        45      (       a  US   nU R                  X5      $ )Nr   r   )r5   rj   rk   r`   r8   r   r   _get_generate_ids)r;  r  r  s      r.   get_generate_idsTemplate.get_generate_ids  s\    lELL11'..0L|!ja4-&P&P'?L$$\CCr0   c                 
    XS  $ Nrn   )r  r  s     r.   r  Template._get_generate_ids  s    ,--r0   cpc                     SU s=::  a  S::  dm  O  SU s=::  a  S::  d^  O  SU s=::  a  S::  dO  O  SU s=::  a  S::  d@  O  S	U s=::  a  S
::  d1  O  SU s=::  a  S::  d"  O  SU s=::  a  S::  d  O  SU s=::  a  S::  a   g  ggg)z6Checks whether CP is the codepoint of a CJK character.i N  i  i 4  iM  i   iߦ i  i? i@ i i  i i   i  i  i TFrn   )r  s    r.   _is_chinese_charTemplate._is_chinese_char  s     r#V#2)?)?WPRE]V]E]r,W,'R2J72JPW[]PhahPhb*F*20H0H 1I  r0   	print_idxis_finishedc                    U(       a  [        U5      $ UR                  S5      (       d1  [        U5      S:  a/  U R                  [        US   5      5      (       a  [        U5      nU$ [	        UR                  S5      S-   U5      nU$ )Nr   r   r4    r   )r8   endswithr  ordr}  rfind)r;  r   r  r  s       r.   _get_safe_print_idxTemplate._get_safe_print_idx  s{    x= T""c(ma&7C<P<PQTU]^`UaQb<c<cHI  HNN3/!3Y?Ir0   )rV   return_deltar  first_num_spacer  r  c                z   Uc  0 nU R                   n[        US5      (       a  UR                  5       n[        U R                  S   [
        5      (       aW  U(       a4  U(       aI  U[        U R                  S   5      * S  U R                  S   :X  a  US [        U R                  S   5      *  nU(       a%  U(       a#  USS  U R                   R                  /:X  a  US S nUR                  " U40 UD6nUb\  Un	US   n[        U5      [        UR                  S5      5      -
  n
U(       d  US:X  a  U
nXiS'   X:  a  SXj-
  -  U-   nOX:  a  XU-
  S  n[        U R                  S   [        5      (       a  U(       a4  U(       aq  U[        U R                  S   5      * S  U R                  S   :X  aD  [        [        U5      [        U R                  S   5      -
  S5      nUb  [        XS   5      nUS U nUb8  US   nU(       d  U R                  XS   5      US'   US US    nU(       a  XS  nU$ U(       a  U(       a   eU$ )Nr`   r4   r   r  )r    hasattrr`   r5   r~   r   r8   eos_token_idr^   lstripr_   r}  r  )rX   r  r  rV   r  r  r  r    r   res_fnscur_num_spacer<   old_print_idxs                r.   generate_ids_to_response!Template.generate_ids_to_response  s@    #!NN	<**'..0Ldkk"ot,,k[6BCTVDXCXCY6Z^b^i^ijl^m6m'(>#dkk"o*>)>?Lkl23.?DNND_D_C`.`',L##LE4DE&%G-a0OMC0D,EEM?b#8"/,
./"ABXM0#O$C$DEdkk"o $/;8SQUQ\Q\]_Q`MaLaLbCcgkgrgrsugvCvc(mc$++b/&::A>C$#|,~H %aLM#77A,O	!#MYq\2#N3  |33r0   c                     U$ r  rn   )rX   r   r   s      r.   post_process_generate_response'Template.post_process_generate_response  s    r0   )r   r   r}   r!   r   r   r   r"   r   r{   r|   r   r~   r   r   r    r   r   r#   r   )NNFreact_enNr   r   )NNr%   defaultr4   )r'   N)FF)NNNNTr  )F)r  r   )T)Cre   rf   rg   rh   ri   r  r   r   r)  r   rb  rc  r   r   r_   rl   r   rY   staticmethodr   r   r   r   r6   r+   r   r   r   r   r   r   r   r   r   r	   r   r   r   r   floatr  r  r  r$  r*  r.  r5  classmethodr   r  rO  rV  r   rK  rL  rj   rk   r   r  r  r  r  r  r  dictr  rm   rn   r0   r.   r(   r(   c   s   , QN<L N"K!  2637&+%/15:AP\%1%1%1 $F+%1  	%1
 "*#%1 !) 0%1  $%1  #%1 'v.%1  '7%1 $++L#M%1 bf%1N R R6 R R 8F 8t 8 8 $; HVDT YabhYi  * 8<37S[)2,.'&"9'&'/}'& $,C='& -44O,P	'&
 $''& '*'& %)'&R s tCH~ T#s(^   cS#X  c4  cD@06'p)V0d38n 0 0TX 0otuyz}  @C  {C  vD  FJ  KN  PS  KS  FT  vT  pU 0.(tCH~ (E$sCx.RVWZ\_W_R`B`<a (( %)#'&*$(!%"'w-"' #7m"' "%[	"'
 SM"' C="' sm"' SM"' "' +/"'H#4= #SWX]S^ #,1$w-e2L,M#B #4= #.25k#>CDMSWX]S^D^>_# #&mg.G&H QT !#s(^04W*$C $$sCx. $T'] $$ tCH~ $w- 2 8.T$sCx.%9 8.49 8. '(E F8.KO8. 8.t##g ##e ##"'WtE{(B"C##P 6:?w-? &d5k2? ?DDItTWyZ^_dZegkloqtltguDu>v?8 
HS	 
HT#Y 
H4 
H 
H 38t('+DcN';t(25t( ,0t( +0S#XS#X0N*O	t(lS T#s(^ *c3h *`dehjmem`n *sw * ,.>E-U\\ 2 -$)-#*?#;-JO,,- -874S#X#7 7Xc] 7^bcfhkck^l 7r DELL D3 DSWX[S\ D D .S	 .C .DQTI . . S T   3 3 T ^a   !5
 6:")-/353i5 5
 #4S>25 5 DI&5 "$s),5ns T c r0   )NNr%   )z<img>(.+?)</img>)5r   rH   rG   r   typingr   r   r   r   r   r	   r
   rj   torch.nn.functionalnn
functionalr  
modelscoper   torch.nnr   torch.nn.utils.rnnr   transformersr   r   r   r   r   r   r  r   r   r   r   r   r   r   r   r   r   r   re  DEFAULT_SYSTEMr   r_   __annotations__r6   r/   r=   rP   rR   r(   rn   r0   r.   <module>r     s7    	  C C C    !  + B & * ] ] @ @	/.0 $sDcN*+ 0 %) $@H& SM 	
 !!<= c E#tCy.4I dSVi $ 0!h !#&!49#wS	:Q4R! ( 8n nr0   