
    9i#                         S SK r S SKrS SKJr  S SKrS SKJr  S SKJr  S SK	J
r
  S SKJr  S SKJrJr  S SKJrJrJrJr   " S	 S
\5      rg)    N)defaultdict)FileLock)HubApi)DatasetContextConfig)DataMetaConfig)get_dataset_filesget_target_dataset_structure)REPO_TYPE_DATASETDatasetFormationsDatasetPathNameDownloadModec                   `    \ rS rSrSrS\4S jrSS jrS rSS jr	S	 r
S
\S\S\S\4S jrSrg)DataMetaManager   zData-meta manager.dataset_context_configc                 .    Xl         [        5       U l        g )N)r   r   api)selfr   s     l/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/msdatasets/meta/data_meta_manager.py__init__DataMetaManager.__init__   s    &<#8    Nc           
         U R                   R                  nU R                   R                  nU R                   R                  nU R                   R                  nU R                   R
                  nU R                   R                  nU R                   R                  n[        R                  R                  UX!U5      n[        R                  R                  U[        R                  5      n	U R                   R                  =(       d
    [        5       n
U(       d  [        R                  nOUnU(       d  [        R                  nOUn[        R                   [        R                    U [        R                    U [        R                    U [        R                    U S3
n[        R                  R                  UU5      n[        R"                  " USS9  U[$        R&                  :X  a  [        R                  R)                  U	5      (       a1  [        R*                  " U	5      (       a  U R-                  U	5      u  nnGO'[/        US9   [        R"                  " U	SS9  U R1                  XXI5      u  nnS S S 5        OU[$        R2                  :X  a  [        R                  R)                  U	5      (       a0  [        R*                  " U	5      (       a  [4        R6                  " U	SS9  [/        US9   [        R"                  " U	SS9  U R1                  XXI5      u  nnS S S 5        OE[9        S[$        R&                  R:                   S[$        R2                  R:                   SU S	35      eXl        WU
l        WU
l         S
U;   a  US
   n[C        U5      S:  a
  US   U
l"        XR                   l        XR                   l#        XR                   l$        g ! , (       d  f       Nw= f! , (       d  f       N= f)Nz.lockT)exist_ok)	lock_file)ignore_errorsz"Expected values of download_mode: z or z
, but got  ..pyr   )%r   dataset_name	namespacedownload_modeversioncache_root_dirsubset_namesplitospathjoinr   	META_NAMEdata_meta_configr   LOCK_FILE_NAME_ANYLOCK_FILE_NAME_DELIMITERmakedirsr   REUSE_DATASET_IF_EXISTSexistslistdir_fetch_meta_from_cacher   _fetch_meta_from_hubFORCE_REDOWNLOADshutilrmtree
ValueErrorvaluemeta_cache_dirdataset_scriptsdataset_formationlendataset_py_scriptdataset_version_cache_root_dirglobal_meta_lock_file_path)r   r   r    r!   r"   r#   r$   r%   r=   r8   r*   lock_subset_name
lock_splitlock_file_namelock_file_pathr9   r:   tmp_py_scriptss                     r   fetch_meta_files DataMetaManager.fetch_meta_files   sK    22??//99	33AA--5544CC11==++11)+n6?6=*?& &D&5&?&?A66GG 
> L
 .AA*(;;JJ+5567_7_6`am`n+DDEgY+DDE,-o.V.V-WXbWcchj &D&46
2TB L@@@ww~~n--"**^2L2L595P5P"6$2!2 7KK>9=9R9R$:J6O%6 87
 l;;;ww~~n--"**^2L2LnDAN3NT:595N5N W6F2!2 43
 477==>d00667z-PRT  +9'+:(->*O#,U3N>"Q&5CA5F 2 8H##4Ec##BAO##>C 87 43s   +O,+O
O
O-c                 ~   U R                   R                  nU R                   R                  nU R                   R                  nU R                   R                  nU R                   R
                  nU R                   R                  =(       d
    [        5       nS nS nUR                  n	U	(       a  [        U	5      S:X  a  [        S5      eSU	;   a  U	S   S   nU	S    HG  n
U
R                  U S35      (       d  M  [        U
SS9 n[        R                  " U5      nS S S 5          O   U(       d  U(       d  [        SU SU S	35      eU(       a  Xl        OJ[!        XrU5      u  p[#        XUU R                   U5      u  pnnXl        Xl        UUl        UUl        Xl        X`R                   l        g ! , (       d  f       N= f)
Nr   zFCannot find dataset meta-files, please fetch meta from modelscope hub.r   z.jsonzutf-8)encodingzFile z
.json and z4.py not found,please specify at least one meta-file.)r   r   r$   r%   r    r"   r*   r   r9   r;   FileNotFoundErrorendswithopenjsonloadr<   r	   r   meta_data_fileszip_data_filesmeta_args_mapmeta_type_maptarget_dataset_structure)r   r   r$   r%   r    r"   r*   dataset_jsonr<   r9   	json_pathdataset_json_filetarget_subset_namerQ   meta_mapfile_mapargs_maptype_maps                     r   parse_dataset_structure'DataMetaManager.parse_dataset_structurei   s   22??11==++11//99	--5566GG 
> L
  *::#o"6!";#X  O# / 6q 9(1I!!\N%"899)g6:K#'99->#?L 7	 2
 $5#~Z~ >9 9: :
 1B.;W5<285F(	++W662H( 08,.6+-5*-5*8P57G##41 76s   F..
F<	c                     U R                   R                  n[        U R                   R                  5      nU R                  R                  XS9nU R                   R                  R                  U5        g )N)
dataset_idr"   )r   r   intr"   r   get_virgo_metaconfig_kwargsupdate)r   virgo_dataset_idr"   meta_contents       r   fetch_virgo_meta DataMetaManager.fetch_virgo_meta   s`    66CCd1199:xx..' / :##1188Fr   c                 B   [        [        5      nS n[        R                  " U5       H  n[        R                  R                  U5      S   nU[        R                  R                  :X  a-  [        [        R                  R                  U5      S   5      nMp  X%   R                  [        R                  R                  X5      5        M     U(       dA  [        [        R                  R                   S[        R                  R                   S35      eU[        U5      4$ )Nr   z! file does not exist, please use r   )r   listr&   r0   r'   splitextr   formation_mark_extr7   r^   appendr(   rH   r   r3   )r   r8   local_pathsdataset_typemeta_file_namefile_exts         r   r1   &DataMetaManager._fetch_meta_from_cache   s    !$' jj8Nww''7;H,??EEE"277#3#3N#CA#FG!((^<> 9 #$77==> ?*;;AAB"FG G -l;;;r   r   r    revisionr8   c                    [        5       nUR                  US-   U-   [        S9nU R                  R	                  XU5      u  pxU R                  R                  XXs5      n	U R                  R                  XX4UU	5      u  pX4$ )N/)repo_id	repo_type)r   get_endpoint_for_readr
   r   get_dataset_id_and_typeget_dataset_meta_file_list"get_dataset_meta_files_local_paths)r   r   r    rq   r8   _apiendpointr]   rm   	file_listrl   r:   s               r   r2   $DataMetaManager._fetch_meta_from_hub   s     x--Ol2' . ) $(88#C#CX$/ 
 HH77Z;	 *.)T)TX|*& --r   )r   r   )returnN)__name__
__module____qualname____firstlineno____doc__r   r   rD   rZ   rd   r1   strr2   __static_attributes__ r   r   r   r      sO    /C LP\-H^G<". . .'*.<?.r   r   )r&   r4   collectionsr   rK   datasets.utils.filelockr   modelscope.hub.apir   4modelscope.msdatasets.context.dataset_context_configr   +modelscope.msdatasets.meta.data_meta_configr   )modelscope.msdatasets.utils.dataset_utilsr   r	   modelscope.utils.constantr
   r   r   r   objectr   r   r   r   <module>r      s@    
  #  , % F5F Fq.f q.r   