
    i"                     D    S SK r S SKrS SKJrJr  S SKJr   " S S5      rg)    N)ListOptional)	Tokenizerc            	       ~    \ rS rSrSrSS\S\\   4S jjrS\S\S	\S
\	\
   4S jrS\	\
   S
\4S jrS\
S
\4S jrSrg)HuggingFaceTokenizer   zI
Tokenizing and encoding/decoding text using the Hugging face tokenizer.
N
model_pathconfig_pathc                    [         R                  R                  U5      (       d   U5       e[        R                  " U5      =U l        nUR                  5       U l        U(       a}  [        U5       n[        R                  " U5      nUS   (       a  U R
                  R                  US   5      OSU l        U R
                  R                  US   5      U l        SSS5        O@U R
                  R                  S5      U l        U R
                  R                  S5      U l        U R                  /U l        g! , (       d  f       N!= f)z
Initializes the Tokenizer with a tokenizer.json from HuggingFace.

Args:
    model_path (str): The path to the Tiktoken model file.
	bos_tokenN	eos_tokenz<|begin_of_text|>z<|endoftext|>)ospathisfiler   	from_filemodelget_vocab_sizen_wordsopenjsonloadtoken_to_idbos_ideos_idstop_tokens)selfr	   r
   	tokenizerftokenizer_configs         ^/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/pytorch_tokenizers/hf_tokenizer.py__init__HuggingFaceTokenizer.__init__   s    ww~~j))5:5)!*!4!4Z!@@
Y%446k"a#'99Q<  (4 JJ**+;K+HI 
 #jj445Ek5RS #" **001DEDK**00ADK KK
 #"s   0A)D55
Esboseosreturnc                t    [        U5      [        L d   eU R                  R                  U5      R                  $ N)typestrr   encodeids)r   r#   r$   r%   s       r    r+   HuggingFaceTokenizer.encode1   s-    Aw#~~zz  #'''    tc                 8    U R                   R                  U5      $ r(   r   decoder   r/   s     r    r2   HuggingFaceTokenizer.decode5   s    zz  ##r.   c                 :    U R                   R                  U/5      $ r(   r1   r3   s     r    decode_token!HuggingFaceTokenizer.decode_token8   s    zz  !%%r.   )r   r   r   r   r   r(   )__name__
__module____qualname____firstlineno____doc__r*   r   r!   boolr   intr+   r2   r6   __static_attributes__ r.   r    r   r      sm    
3 
Xc] 
:( (T ( (c ($S	 $c $&c &c &r.   r   )r   r   typingr   r   
tokenizersr   r   r@   r.   r    <module>rC      s     	 !  *& *&r.   