
    9i	                     ,    S SK Jr  S SKJr  S rSS jrg)    N)parse_label_mappingc                    SSK nUR                  R                  R                  U 5        [        R
                  " XS-   5      n[        R
                  " X5      n[        R                  " U5      n[        R                  " U5      (       dD  SSKnUR                  U5       nUR                  [        R
                  " U5      5        SSS5        gg! , (       d  f       g= f)zimport external nltk_data, and extract nltk zip package.

Args:
    nltk_data_dir (str): external nltk_data dir path, eg. /home/xx/nltk_data
    package_name (str): nltk package name, eg. tokenizers/punkt
r   Nz.zip)nltkdatapathappendospjoindirnameexistszipfileZipFile
extractall)nltk_data_dirpackage_namer   filepathzippathpackagepathr   zfs           Z/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/utils/nlp/utils.pyimport_external_nltk_datar      s     IINN-(xxf'<=Hhh}3G++g&K::g__X&"MM#((;/0 '& &&s   &C
Cc                 6   UR                  SS5      nUR                  SS5      nUR                  SS5      nUc'  Ub$  UR                  5        VVs0 s H  u  pgXv_M	     nnnUcS  UbB  UR                  S5       b/  [        UR                  S5      5       VVs0 s H  u  pXx_M	     nnnOU b  [	        U 5      nUc  Ub  [        U5      nUc'  Ub$  UR                  5        VVs0 s H  u  pvXg_M	     nnnUb  XRS'   Ub  X2S'   Ub  XBS'   U$ s  snnf s  snnf s  snnf )a>  Parse labels information in order.

This is a helper function, used to get labels information in the correct order.
1. The kw arguments listed in the method will in the first priority.
2. Information in the cfg.dataset.train.labels will be used in the second priority (Compatible with old logic).
3. Information in other files will be used then.

Args:
    model_dir: The model_dir used to call `parse_label_mapping`.
    cfg: An optional cfg parsed and modified from the configuration.json.
    **kwargs: The user inputs into the method.

Returns:
    The modified kwargs.
label2idNid2label
num_labelszdataset.train.labels)popitemssafe_get	enumerater   len)		model_dircfgkwargsr   r   r   idlabelidxs	            r   parse_labels_in_orderr'      sQ     zz*d+Hzz*d+HL$/JH0/7~~/?@/?)"EI/?@?s||& (/3 4
 #,LL!78#:#:JC 
#:  H
 "*95Hh2]
H0/7~~/?@/?)%BI/?@)|%z%zM/ A
 As   D	D!D)NN)os.pathr   r	   modelscope.utils.hubr   r   r'        r   <module>r,      s     41&+r+   