
    9iS                     f    S SK Jr   " S S\5      r " S S\5      r " S S\5      r " S S	5      rg
)    )normalize_chinese_numberc                       \ rS rSrS rSrg)TrieNode   c                      0 U l         SU l        g)&
Initialize your data structure here.
FNdatais_wordselfs    m/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/preprocessors/ofa/utils/text2phone.py__init__TrieNode.__init__   s     	    r	   N)__name__
__module____qualname____firstlineno__r   __static_attributes__ r   r   r   r      s    r   r   c                   6    \ rS rSrSrS rS rS rS rS r	Sr
g	)
Trie   z
trie-tree
c                 "    [        5       U l        g)r   N)r   rootr   s    r   r   Trie.__init__   s     J	r   c                     U R                   nU HK  nUR                  R                  U5      nU(       d  [        5       UR                  U'   UR                  U   nMM     SUl        g)z<
Inserts a word into the trie.
:type word: str
:rtype: void
TN)r   r
   getr   r   )r   wordnodecharschilds        r   insertTrie.insert   sS     yyEIIMM%(E#+:		% 99U#D	 
 r   c                     U R                   nU H'  nUR                  R                  U5      nU(       a  M'    g   UR                  $ )zB
Returns if the word is in the trie.
:type word: str
:rtype: bool
F)r   r
   r   r   )r   r    r!   r"   s       r   searchTrie.search)   s=     yyE99=='D4  ||r   c                 v    U R                   nU H'  nUR                  R                  U5      nU(       a  M'    g   g)zl
Returns if there is any word in the trie that starts with the given prefix.
:type prefix: str
:rtype: bool
FT)r   r
   r   )r   prefixr!   r"   s       r   
startsWithTrie.startsWith6   s6     yyE99=='D4  r   c                   ^ U4S jm/ nU R                  U5      (       d  U$ U R                  U5      (       a  UR                  U5        U$ U R                  nU H  nUR                  R                  U5      nM      T" X5      $ )zH
Returns words started with prefix
:param prefix:
:return: words (list)
c           	        > / nUR                   (       a  UR                  U 5        UR                  R                  5        H@  nUR	                  T" U [        U5      -   UR                  R                  U5      5      5        MB     U$ N)r   appendr
   keysextendstrr   )prepre_node	word_listxget_keys       r   r8   Trie.get_start.<locals>.get_keyJ   sh    I  %]]'')  s1vx}}7H7H7K!LM *r   )r+   r'   r0   r   r
   r   )r   r*   wordsr!   r"   r8   s        @r   	get_startTrie.get_startC   ss    	 v&&L;;vLL LyyE99=='D v$$r   )r   N)r   r   r   r   __doc__r   r$   r'   r+   r;   r   r   r   r   r   r      s     %r   r   c                   J   ^  \ rS rSrSrU 4S jrS rS rS rS r	S r
S	rU =r$ )
TrieTokenizer^   z
word_split based on trie-tree
c                 V   > [         [        U ]  5         Xl        U R	                  5         g r/   )superr?   r   	dict_pathcreate_trie_tree)r   rC   	__class__s     r   r   TrieTokenizer.__init__c   s!    mT+-"r   c                    / n[        U R                  SSS9 nU HR  nUR                  UR                  5       R	                  S5      S   R                  S5      R                  S5      5        MT     S S S 5        U$ ! , (       d  f       U$ = f)Nrzutf-8)modeencoding	r   z	utf-8-sig)openrC   r0   stripsplitencodedecode)r   r:   filelines       r   	load_dictTrieTokenizer.load_dicth   s{    $..sW=TZZ\//5a8??#VK02  > 	 >= s   AA;;
B
c                 X    U R                  5       nU H  nU R                  U5        M     g r/   )rS   r$   )r   r:   r    s      r   rD   TrieTokenizer.create_trie_treep   s$     DKK r   c                     U[        U5      S-
  ::  a<  X#   UR                  ;   a*  US-   nU R                  UR                  X#S-
        UU5      nU$ )N   )lenr
   	mine_tree)r   treesentencetrace_indexs       r   rZ   TrieTokenizer.mine_treeu   sY    3x=1,-$		1)Ao"nnIIhQ78(! r   c                 8   / n[        U5      nUS:w  a  SnU R                  U R                  X5      nUS:X  a.  UR                  USS 5        US[        U5       n[        U5      nO,UR                  USU 5        X[        U5       n[        U5      nUS:w  a  M  U$ )Nr   rX   )rY   rZ   r   r0   )r   r\   tokenssentence_lenr]   s        r   tokenizeTrieTokenizer.tokenize~   s    8}aK..HJKahqm,#Ac(m4"8}hq56#H>"8} a r   c                 F   Sn/ n/ nU H  n[        U5      S:w  aV  US:X  a  UR                  US S  5        M.  UR                  SR                  U5      5        UR                  US S  5        / nSnMh  US:X  a  UR                  U5        SnM  UR                  U5        M     U$ )Nr   rX    )rY   r0   join)r   
token_listflagoutputtempis         r   combineTrieTokenizer.combine   s    A1v{19MM!B%(MM"''$-0MM!B%(DD19KKNDKKN  r   )rC   )r   r   r   r   r=   r   rS   rD   rZ   rb   rl   r   __classcell__)rE   s   @r   r?   r?   ^   s+     

$ r   r?   c                   &    \ rS rSrS rS rS rSrg)
Text2Phone   c                 P    [        U5      U l        U R                  U5      U l        g r/   )r?   trie_cwsget_phone_map	phone_map)r   phone_dict_paths     r   r   Text2Phone.__init__   s     %o6++O<r   c                     [        5       n[        US5       nU H/  nUR                  5       R                  S5      u  pVXR;  d  M+  XbU'   M1     S S S 5        U$ ! , (       d  f       U$ = f)NrH   rK   )dictrL   rM   rN   )r   rv   ru   phone_map_file_readerrR   keyphone_seriess          r   rt   Text2Phone.get_phone_map   sd    F	/3'+@-$(JJL$6$6t$<!'%1cN . (
  ('
 s   *AA
A&c                 v   [        U5      nU R                  R                  U5      n/ nU H{  nX@R                  ;   a   UR	                  U R                  U   5        M2  [        U5      S:  d  MC  U H2  nXPR                  ;   d  M  UR	                  U R                  U   5        M4     M}     SR                  U5      $ )NrX    )r   rs   rb   ru   r0   rY   rf   )r   textr`   phonesr    chars         r   transText2Phone.trans   s    '-''-D~~%dnnT23TQ D~~-dnnT&:; !	  xxr   )ru   rs   N)r   r   r   r   r   rt   r   r   r   r   r   rp   rp      s    = r   rp   N)modelscope.utils.chinese_utilsr   objectr   r   r?   rp   r   r   r   <module>r      s?    Dv K%6 K%\ED EP   r   