
    9i                     R    S SK JrJr  S SKrS SKrS SKJr  SSKJ	r	   " S S\	5      r
g)    )AnyDictN)ModeKeys   )OfaBasePreprocessorc                      ^  \ rS rSrSr\R                  4U 4S jjrS\\	\
4   S\\	\
4   4S jrS\\	\
4   S\\	\
4   4S jrS\\	\
4   S\\	\
4   4S jrS	rU =r$ )
OfaSudokuPreprocessor   z#
OFA preprocessor for sudoku tasks
c                   > [         [        U ]
  " XU/UQ70 UD6  U R                  R                  R                  SS5      U l        U R                  R                  SS5      U l        U R                  R                  SS5      U l        U R                  (       Ga  / U l	        / U l
        [        S5       H  n[        S5       H  nU R                  R                  US-   5        U R                  R                  US-   5        US	:X  a  US	:X  a  MM  U R                  R                  S
5        U R                  R                  S
5        M     M     [        R                  " U R                  5      U l
        [        R                  " U R                  5      U l	        [        R                  " U R!                  U R                  5      5      n[        R"                  " U R                  U/5      n	[        R"                  " U R                  U/5      n
[        R"                  " U R$                  XR&                  /5      U l
        [        R"                  " U R$                  XR&                  /5      U l	        gg)zpreprocess the data

Args:
    cfg(modelscope.utils.config.ConfigDict) : model config
    model_dir (str): model path,
    mode: preprocessor mode (model mode)
promptz solve the sudoku .seg_embeddingFmax_struct_length   	   r      r   N)superr	   __init__cfgmodelgetinstruction_textr   r   input_puzzle_rowinput_puzzle_colrangeappendtorchtensor
zeros_liketokenize_textcatbos_itemeos_item)selfr   	model_dirmodeargskwargsidxjdxinstruct_segr   r   	__class__s              c/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/preprocessors/ofa/sudoku.pyr   OfaSudokuPreprocessor.__init__   s    	#T3CD 	E59	E=C	E !% 2 283H!J!XX\\/5A!%.A3!G$&D!$&D!Qx 8C))00q9))00q91H--44Q7--44Q7 $   %*LL1F1F$GD!$)LL1F1F$GD! ++""4#8#89;L$yy$*?*?)NO$yy$*?*?)NO$)II 0--@%BD!$)II 0--@%BD!'     datareturnc                     U R                   [        R                  :X  a  U R                  U5      $ U R	                  U5      $ )N)r%   r   TRAIN_build_train_sample_build_infer_sample)r#   r/   s     r,   __call__OfaSudokuPreprocessor.__call__:   s4    99&++D11++D11r.   c                 B   U R                  U5      nUS   nUR                  5       R                  5       R                  5       nSR	                  USU R
                   5      nU R                  USS9US'   [        R                  " U R                  US   SS /5      US'   U$ )	z
build sample for training tasks.

step 1. execute the `_build_infer_sample` function to get a batch sample
    for inference.
step 2. process the label data for training.
label NF)add_bostargetprev_output_tokens)
r4   lowerstripsplitjoinmax_tgt_lengthr   r   r    r!   )r#   r/   sampler;   target_token_lists        r,   r3   )OfaSudokuPreprocessor._build_train_sample@   s     ))$/"LLN00288:+,@T-@-@AB--fe-Dx',yy]]F8,Sb12(4#$r.   c                 X   SU R                   ;   a  SU;   d   S5       eXR                   S      nSR                  UR                  5       R                  5       R	                  5       SU R
                   5      nU R                  X R                  -   5      nUSU R                  U R
                  -    nSUS.nU R                  (       a  U R                  US'   U R                  US'   S	U R                   ;   a6  U R                   S	   U;   a#  S
R                  XR                   S	      5      US'   U$ )a  
build sample for inference tasks.

step 1. Get the input random masked sudoku text input, which shold be
    generated like below pseudo code.
    >>> sudo = np.random.randint(1, 9, size=(9, 9)) # a pseudo sudoku
    >>> sudo_text = " | ".join(" : ".join(str(c) for c in row) \
    >>>             for row in sudo)
step 2. Limit the length, tokenize the input text and add the bos token
    to the front of the input as source input.
step 3. Add a pseodo ids for every input.
textz;there must be `text` column in task key map and source datar9   Ng        )idsourceseg_row_tokensseg_col_tokenssolutionz {}r8   )
column_maprA   r>   r?   r@   r   r   r   max_src_lengthr   r   r   format)r#   r/   rG   src_itemrC   s        r,   r4   )OfaSudokuPreprocessor._build_infer_sampleQ   s    (Vt^ 	JI	J;OOF+,xx

**,2245Ld6L6LMN%%d-B-B&BCKd11D4J4JJLx0'+'<'<F#$'+'<'<F#$(T__.#.$#ll4
0K+LMF7Or.   )r   r   r   r   r   )__name__
__module____qualname____firstlineno____doc__r   	INFERENCEr   r   strr   r5   r3   r4   __static_attributes____classcell__)r+   s   @r,   r	   r	      s     (((BT2T#s(^ 2S#X 2S#X 4S> "S#X 4S>  r.   r	   )typingr   r   numpynpr   modelscope.utils.constantr   baser   r	    r.   r,   <module>ra      s#       . %c/ cr.   