
    9i
                     |    S SK r S SKrSr\R                  rS\4S jrS\4S jrS\S\4S jr	S	\S\4S
 jr
S rS rg)    Nu   ＂＃＄％＆＇（）＊＋，－／：；＜＝＞＠［＼］＾＿｀｛｜｝～｟｠｢｣､　、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·！？｡。decoded_strc                    U R                  S5      n/ nSn[        U5       H\  u  pE[        U5      (       a  US:X  a  UnM  M!  US:w  a$  UR                  SR	                  XU 5      5        SnUR                  U5        M^     US:w  a"  UR                  SR	                  XS  5      5        SR	                  U5      R                  5       $ )N  )split	enumerate_is_chinese_strappendjoinstrip)r   old_word_listnew_word_liststartiwords         ^/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/utils/chinese_utils.py"remove_space_between_chinese_charsr   
   s    %%c*MME]+4  {  {$$RWW]-C%DE  & , {RWW]6%:;<88M"((**    stringc           
          SR                  SR                  U  Vs/ s H%  n[        U5      (       d
  U[        ;   a  SU S3OUPM'     sn5      R                  5       5      $ s  snf )Nr   r   )r   _is_chinese_charCHINESE_PUNCTUATIONr   )r   chars     r   rebuild_chinese_strr      sq    88BGG  D D!!T-@%@ D6FJ	K  uw	  s   ,A returnc                 &    [        S U  5       5      $ )Nc              3   x   #    U  H0  n[        U5      =(       d    U[        ;   =(       d	    U[        ;   v   M2     g 7f)N)r   r   ENGLISH_PUNCTUATION).0cps     r   	<genexpr>"_is_chinese_str.<locals>.<genexpr>&   s<      7/5 	 	%&9 9 	%$$	%/5s   8:)all)r   s    r   r
   r
   %   s     7/57 7 7r   r!   c                     [        U 5      n U S:  a  U S::  dT  U S:  a  U S::  dH  U S:  a  U S::  d<  U S:  a  U S::  d0  U S	:  a  U S
::  d$  U S:  a  U S::  d  U S:  a  U S::  d  U S:  a  U S::  a  gg)z6Checks whether CP is the codepoint of a CJK character.i N  i  i 4  iM  i   iߦ i  i? i@ i i  i i   i  i  i TF)ord)r!   s    r   r   r   +   sx    	RB	v",B&LR6\g"-g"-g"-g"-fvg"-r   c                 `    SSK Jn  / SQnSnU  H  nUS;   a  US   nX4-  nM     U" US5      nU$ )Nr   )convert)
u   零u   一u   二u   三u   四u   五u   六u   七u   八u   九r   
0123456789zzh-hans)zhconvr(   )textr(   chinese_numbernew_textxs        r   normalize_chinese_numberr/   :   sE    [NHq!A  x+HOr   c                     U R                  5       R                  [        S5      R                  [        S5      n [        R
                  " SSU 5      n U R                  S5      n U R                  S5      S U n U $ )Nr   z\s{2,}
)lowerreplacer   r   resubrstripr   )r+   	max_wordss     r   pre_chineser8   G   sn    ::< 3 #%%,W-@#%F 	66D
 ;;tD::c?:I&DKr   )r4   r   r   punctuationr   strr   r   boolr
   r   r/   r8    r   r   <module>r=      sj    
  N (( +C +& 7C 7D 7  
r   