
    9i6                        S SK r S SKrS SKrS SKrS SKrS SKJrJrJr  S SK	J
r
  S SKJr  S SKJrJrJrJrJrJrJrJrJrJrJr  S SKJr  S SKrS SKJrJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)J*r*J+r+J,r,Jr-  S SK.J/r/  S S	K0J1r1  S S
K2J3r3J4r4J5r5J6r6J7r7J8r8J9r9  S SK:J;r;J<r<J=r=  S SK>J?r?J@r@  S SKAJBrB  S SKCJDrDJErEJFrFJGrGJHrHJIrIJJrJJKrKJLrLJMrMJNrNJOrOJPrPJQrQJRrRJSrSJTrTJUrUJVrVJWrW  S SKXJYrY  S SKZJ[r[J\r\J]r]  S SK^J_r_  S SK`JaraJbrbJcrcJdrdJere  S SKfJgrg  S SKhJiri  S SKjJkrk  S SKlJmrm  S SKnJoro  S SKpJqrq  S SKrJsrs  S SKtJuruJvrv  S SKwJxrx  S SKyJzr{  S SKyJ|r|J}r}J~r~  S SKJr  S SKJr  S S KJr  S S!KJr  S S"KJr  S S#KJrJr  S S$KJr  S S%KJr  \" 5       r\S&   r\" S'S(9 " S) S*\-5      5       r\\1S+'   S,\4S- jrS.\S/\!S0\4S1 jrSSS'SSS2.S3\S4\\   S5\\   S6\S7\\\\4      S8\\\      S0\{4S9 jjr SUS:S'SSSS;.S3\S<\\   S=\S8\S4\\   S>\\   S7\\\\4      S0\\\}\~4      4S? jjjrS'SSSS@.S3\SA\\\   \4   S8\S4\\   S>\\   S7\\\\4      S0\\\}\~4      4SB jjrS3\S<\S/\!S4\4SC jr  SVSD jr  SVSE\SF\SG\\\      S/\\!   S0\\   4
SH jjr SUSF\S/\\!   S0\\\\   4   4SI jjrS0\G4SJ jr SWSK\SL\SM\S4\SN\\\\\4   S/\\!   SO\\   S0\\\\4      4SP jjrS0\G4SQ jr " SR SS5      r\GRF                  ST 5       rg)X    N)	dataclassfieldfields)partial)Path)DictIterableListMappingOptionalSequenceUnionTupleLiteralAnyClassVar)	urlencode)BuilderConfigDatasetDatasetBuilderDatasetDictDownloadConfigDownloadManagerDownloadModeFeaturesIterableDatasetIterableDatasetDictSplitVerificationModeVersionconfig
data_files	LargeListr   )features)_FEATURE_TYPES)FILES_TO_IGNOREDataFilesDictEmptyDatasetError_get_data_files_patterns"_is_inside_unrequested_special_dir?_is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dirsanitize_patterns)!_prepare_path_and_storage_options	xbasenamexjoin)DataFilesNotFoundErrorDatasetNotFoundError)DatasetInfosDict)ALL_ALLOWED_EXTENSIONSBuilderConfigsParametersCachedDatasetModuleFactoryDatasetModule$HubDatasetModuleFactoryWithoutScript(HubDatasetModuleFactoryWithParquetExport!HubDatasetModuleFactoryWithScript&LocalDatasetModuleFactoryWithoutScript#LocalDatasetModuleFactoryWithScriptPackagedDatasetModuleFactory,create_builder_configs_from_metadata_configsget_dataset_builder_classimport_main_classinfer_module_for_data_filesfiles_to_hash_get_importable_file_pathresolve_trust_remote_code_create_importable_file_load_importable_fileinit_dynamic_modules)camelcase_to_snakecase)_EXTENSION_TO_MODULE_MODULE_TO_EXTENSIONS_PACKAGED_DATASETS_MODULES)
file_utils)!_raise_if_offline_mode_is_enabledcached_pathis_local_pathis_relative_pathrelative_to_absolute_path)is_small_dataset)MetadataConfigs)get_imports)tracked_str)
filesystem)	_un_chain)stringify_path)DatasetCardDatasetCardData)OfflineModeIsEnabled)DatasetInfo)HfApiRepoFile
RepoFolder)version)HubApi)get_endpoint)get_from_cache_ms)MS_DATASETS_CACHE)DEFAULT_DATASET_REVISIONREPO_TYPE_DATASET)has_attr_in_class)
get_logger)authorcardDatacitation	createdAtdisableddescription	downloadsdownloadsAllTimegatedlastModifiedlikespaperswithcode_idprivatesiblingsshatagsF)reprc                       \ rS rSr% Sr\\S'   Sr\\S'   \	" SSS9r
\\   \S	'   Sr\\   \S
'   \	" SSSS9r\\S'   S rSrg)ListMsa   a  Feature type for large list data composed of child feature data type.

It is backed by `pyarrow.ListType`, which uses 32-bit offsets or a fixed length.

Args:
    feature ([`FeatureType`]):
        Child feature data type of each item within the large list.
    length (optional `int`, default to -1):
        Length of the list if it is fixed.
        Defaults to -1 which means an arbitrary length.
featurelengthNF)defaultrx   idpa_typer
   )r   initrx   _typec                     U R                   S:w  a2  [        U 5      R                   SU R                   SU R                    S3$ [        U 5      R                   SU R                   S3$ )Nr}   (z	, length=))r~   type__name__r|   )selfs    l/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/msdatasets/utils/hf_datasets_util.py__repr__ListMs.__repr__v   s\    ;;"4j))*!DLL>4;;-qQQ4j))*!DLL>;;     )r   
__module____qualname____firstlineno____doc__r   __annotations__r~   intr   r   r   strr   r   r   r   __static_attributes__r   r   r   rz   rz   a   sS    
 LFCd7B7!GXc]!vE>E3><r   rz   r
   objc           
          [        U [        5      (       a  U  Vs/ s H  n[        U5      PM     sn$ SU ;  d  [        U S   [        5      (       a/  U R	                  5        VVs0 s H  u  p!U[        U5      _M     snn$ [        U 5      n U R                  S5      n[        R                  " US5      =(       d    [        5       R                  US5      nUc-  [        SU S[        [        R                  " 5       5       35      eU[        :X  a&  U R                  S5      n[        [        U5      40 U D6$ U[        :X  a&  U R                  S5      n[        [        U5      40 U D6$ U[        :X  a'  U R                  S5      n[        SS[        U5      0U D6$ [        U5       Vs1 s H  ofR                  iM     nnU" S0 U R	                  5        VV	s0 s H  u  pX;   d  M  X_M     sn	nD6$ s  snf s  snnf s  snf s  sn	nf )a  Regenerate the nested feature object from a deserialized dict.
We use the '_type' fields to get the dataclass name to load.

generate_from_dict is the recursive helper for Features.from_dict, and allows for a convenient constructor syntax
to define features from deserialized JSON dictionaries. This function is used in particular when deserializing
a :class:`DatasetInfo` that was dumped to a JSON object. This acts as an analogue to
:meth:`Features.from_arrow_schema` and handles the recursive field-by-field instantiation, but doesn't require any
mapping to/from pyarrow, except for the fact that it takes advantage of the mapping of pyarrow primitive dtypes
that :class:`Value` automatically performs.
r   NzFeature type 'z&' not found. Available feature types: r|   r   )
isinstancelistgenerate_from_dict_msdictitemspopr%   getglobals
ValueErrorkeysr#   rz   
SequenceHfr   name)
r   valuekeyr   
class_typer|   ffield_nameskvs
             r   r   r      s    #t:=>#%e,#>>cZGd;;DGIIKPKjc*511KPP
s)CGGGE##E40NGIMM%4NJ>%0VW[\j\o\o\qWrVstuuY'')$.w7?3??V'')$+G4<<<Z'')$H"7"@HCHH#)*#56#5a66#5K6K#))+J+$!9I+JKK- ? Q$ 7Js   G*#G/!G5G:G:url_or_filenamedownload_configreturnc                 `   [        U5      nS nUR                  S5      (       a'  UR                  SS5      S   R                  SS5      u  p1[        U5      (       a.  U=(       d    [        nSX1S.n[        U5      nU R                  U-   n[        XS9n[        U5      nUR                  U5        U$ )	Nzhf://@   r}   /SDK)SourceRevisionFilePathr   )
r   
startswithsplitrO   rd   r   
_base_pathrM   rT   
set_origin)r   r   r   revisionparamsouts         r   _download_msr      s    /*OH!!'**$3$9$9#q$A"$E$K$KCQR$S!(( 77"'X['//F2
o
GC
c
CNN?#Jr   )r   timeoutfiles_metadatatokenexpandrepo_idr   r   r   r   r   c          
         U R                  USUSUU[        S9n[        0 5      nXS'   SUS'   U(       a  UR                  S5      S   OSUS'   X(S	'   SUS
'   SUS'   SUS'   SUS'   SUS'   / US'   / US'   SUS'   / n	U HR  n
[	        U
[
        5      (       d  M  U	R                  [        U
R                  U
R                  U
R                  S95        MT     XS'   [        S0 UD6$ )ak  
Get info on one specific dataset on huggingface.co.

Dataset can be private if you pass an acceptable token.

Args:
    repo_id (`str`):
        A namespace (user or an organization) and a repo name separated
        by a `/`.
    revision (`str`, *optional*):
        The revision of the dataset repository from which to get the
        information.
    timeout (`float`, *optional*):
        Whether to set a timeout for the request to the Hub.
    files_metadata (`bool`, *optional*):
        Whether or not to retrieve metadata for files in the repository
        (size, LFS metadata, etc). Defaults to `False`.
    token (`bool` or `str`, *optional*):
        A valid authentication token (see https://huggingface.co/settings/token).
        If `None` or `True` and machine is logged in (through `huggingface-cli login`
        or [`~huggingface_hub.login`]), token will be retrieved from the cache.
        If `False`, token is not sent in the request header.

Returns:
    [`hf_api.DatasetInfo`]: The dataset repository information.

<Tip>

Raises the following errors:

    - [`~utils.RepositoryNotFoundError`]
      If the repository to download from cannot be found. This may be because it doesn't exist,
      or because it is set to `private` and you do not have access.
    - [`~utils.RevisionNotFoundError`]
      If the revision to download from cannot be found.

</Tip>
r   F)r   path_in_repor   	recursiver   r   	repo_typer   rt   r   Nrh   rv   rq   rp   rl   rn   rr   rw   ri   rk   )	rfilenameblobIdsizeru   r   )list_repo_treere   r   r   r   r]   appendr   blob_idr   HfDatasetInfo)r   r   r   r   r   r   r   repo_info_iter	data_infodata_siblings	info_items              r   _dataset_infor      s$   b ((# ) N RIdO Ii3:'--,Q/Ihe $InIg!IjIkIgIfIj!Ik M#	i**  '11$,," $ *j%9%%r   T)r   r   r   r   r   r   r   r   c          
   #     #    [        SSS9nUR                  U[        S9n	Sn
Sn  UR                  UU=(       d    [        U=(       d    SUU
UU	S9nU HN  n0 nUS   S:X  a  SOSUS'   US   US'   US   US'   US   US'   US   S:X  a  [        S0 UD6O
[        S0 UD6v   MP     [        U5      U:  a  g U
S-  n
M  ! [
         a%  n[        R                  S	U S
U 35         S nAg S nAff = f7f)N      )r   max_retriesr   r   r   d   r   )r   r   	root_pathr   page_number	page_sizeendpointzGet dataset: z file list failed, message: Typetree	directoryfiler   r   pathSizer   Sha256oidr   )r`   get_endpoint_for_readre   get_dataset_filesrd   	Exceptionloggererrorr]   r^   len)r   r   r   r   r   r   r   r   _apir   r   r   dataset_filesefile_info_d	path_infos                   r   _list_repo_treer     sB     &a0D))#4 * 6H KI
	 22!=%=&-##'#! 3 M )KI/:6/Bf/LRXIf +F 3If +F 3If*84Ie+4V+<+F('Y'JLcYbLcc ) }	)q5   	LL=	1MaSQR	s/   $C/+B= A+C/=
C,C'"C/'C,,C/)r   r   r   r   pathsc          	      X    U R                  USUUUUS9nU Vs/ s H  oPM     sn$ s  snf )NF)r   r   r   r   r   r   )r   )	r   r   r   r   r   r   r   r   	item_infos	            r   _get_paths_infor   E  sE     (( ) N (66~)I~666s   'c           	      @   [        5       nU R                  S5      u  pVUR                  U [        S9nU(       a  UR                  c  SU S3Ul         UR                  UUUUSUS9n[        XS9n	U	$ ! [         a"  n
Sn	[        R                  U
5         S n
A
U	$ S n
A
ff = f)	Nr   r   zDownloading []F)	file_namedataset_name	namespacer   extension_filterr   )r   r    )
r`   r   r   re   download_descget_dataset_file_urlrM   FileNotFoundErrorr   r   )r   r   r   r   r   
_namespace_dataset_namer   r   repo_file_pathr   s              r   _download_repo_filer  ]  s    8D 'c 2J))#4 * 6H?88@*7~Q(G%33"& " 4 
 %+N 	  Q	s   A1 1
B;BBc                    [        U [        [        [        45      (       a*  U (       d  [	        S5      e[        [        U 5      S   5      nO[        U 5      nU=(       d    0 nU(       a  X!S'   [        X1=(       d    0 5      n0 n[        [        U5      5       HC  u  pgUu  pn
U[        U5      S-
  :X  a  [        S0 U
DUD6nM+  [        S0 U
DUD6US'   XS'   XS'   ME     US   u  pn[        U40 UD6nU$ )	Nzempty urlpath sequencer   protocolr   target_optionstarget_protocolfor   )r   r   tuplesetr   rW   rV   	enumeratereversedr   r   rU   )urlpathstorage_optionsr  urlpath0chaininkwargsichurlsnested_protocolkwr   _fss                 r   get_fs_token_pathsr  v  s    
 'D%-..566!$w-"23!'*%+O&.
#h 526EH8E?+$&!rE
Q-b-H-H%)%;B%;(%;!"&5"# , qEQ	H	)	)BIr   pattern	base_pathallowed_extensionsc                   ^ [        U 5      (       a  [        X5      n OF[        U 5      (       a4  [        R                  R                  U 5      S   [        R                  -   nOSn[        XS9u  p[        XS9nUR                  S5      S   R                  S5      S   =(       d    UR                  nU R                  S5      S   R                  S5      S   n[        [        5      [        U 5      1-
  n[        UR                  [         5      (       a  UR                  OUR                  S   n	U	S:w  a  U	S-   OSn
0 nU	S	:X  a-  ["        R$                  [&        R(                  " S
5      :  a  SUS'    UR*                  " U 4SS0UD6nUR1                  5        VVs/ s H  u  pUS   S:X  d  M  [        U5      U;  d  M!  [3        [        R                  R5                  X5      [        R                  R5                  Xv5      5      (       a  Mn  [7        [        R                  R5                  X5      [        R                  R5                  Xv5      5      (       a  M  UR9                  U
5      (       a  UOU
U-   PM     nnnTb  U Vs/ s H<  n[;        U4S j[        U5      R                  S5      SS  5       5      (       d  M:  UPM>     nn[=        U5      [=        U5      :  a;  [?        [        U5      [        U5      -
  5      n[@        RC                  SU  SU 35        OUnU(       d%  SU  S3nTb  US[?        T5       3-  n[-        U5      eU$ ! [,         a    [/        SU  S35      ef = fs  snnf s  snf )a  
Resolve the paths and URLs of the data files from the pattern passed by the user.

You can use patterns to resolve multiple local files. Here are a few examples:
- *.csv to match all the CSV files at the first level
- **.csv to match all the CSV files at any level
- data/* to match all the files inside "data"
- data/** to match all the files inside "data" and its subdirectories

The patterns are resolved using the fsspec glob.

glob.glob, Path.glob, Path.match or fnmatch do not support ** with a prefix/suffix other than a forward slash /.
For instance, this means **.json is the same as *.json. On the contrary, the fsspec glob has no limits regarding the ** prefix/suffix,  # noqa: E501
resulting in **.json being equivalent to **/*.json.

More generally:
- '*' matches any character except a forward-slash (to match just the file or directory name)
- '**' matches any character including a forward-slash /

Hidden files and directories (i.e. whose names start with a dot) are ignored, unless they are explicitly requested.
The same applies to special directories that start with a double underscore like "__pycache__".
You can still include one if the pattern explicitly mentions it:
- to include a hidden file: "*/.hidden.txt" or "*/.*"
- to include a hidden directory: ".hidden/*" or ".*/*"
- to include a special directory: "__special__/*" or "__*/*"

Example::

    >>> from datasets.data_files import resolve_pattern
    >>> base_path = "."
    >>> resolve_pattern("docs/**/*.py", base_path)
    [/Users/mariosasko/Desktop/projects/datasets/docs/source/_config.py']

Args:
    pattern (str): Unix pattern or paths or URLs of the data files to resolve.
        The paths can be absolute or relative to base_path.
        Remote filesystems using fsspec are supported, e.g. with the hf:// protocol.
    base_path (str): Base path to use when resolving relative paths.
    allowed_extensions (Optional[list], optional): White-list of file extensions to use. Defaults to None (all extensions).
        For example: allowed_extensions=[".csv", ".json", ".txt", ".parquet"]
Returns:
    List[str]: List of paths or URLs to the local or remote files that match the patterns.
r   r   r   )r  z::z://r}   r   hfz0.20.0Fexpand_infodetailTzUnable to find ''r   Nc              3   4   >#    U  H  nS U-   T;   v   M     g7f).Nr   ).0suffixr  s     r   	<genexpr>#_resolve_pattern.<locals>.<genexpr>  s#      E!Cv <#55!Cs   r$  r   z Some files matched the pattern 'z-' but don't have valid data file extensions: z with any supported extension )"rO   r/   rN   osr   
splitdrivesepr-   r  r   root_markerr  r&   r.   r   r  r   r!   HF_HUB_VERSIONr_   parseglobr   r0   r   r*   relpathr+   r   anyr   r   r   info)r  r  r  r   r  r  fs_base_path
fs_patternfiles_to_ignorer  protocol_prefixglob_kwargstmp_file_pathsfilepathr2  matched_pathsr   invalid_matched_files	error_msgs     `                r   _resolve_patternr=    s8   b   	+	w		GG&&w/2RVV;		@ 2G	G	EB??4(+11%8<NLt$Q'--e4R8J/*i.@-AAO(c::r{{AH*2f*<h&"OK4F11W]]85LL%*M"DEEE *8)=)=)?)?~x<6!	 h6	 3GGOOH3GGOOJ57		 LGGOOH3GGOOJ57	H''88o
? 	)?   %%2
%2 E!*8!4!:!:3!?!CE E ] 	 

 s8c-(($(]);c#h)F$G!KK27);h()+, &wiq1	)9$?Q:R9STTI	**JC  D$'7y%BCCD
s8   0M
 M'+M'<A	M'	A	M'!M'9M->M-
M$c                 r    [        [        XS9n [        U5      $ ! [         a    [	        SU  S35      Sef = f)u	  
Get the default pattern from a directory testing all the supported patterns.
The first patterns to return a non-empty list of data files is returned.

Some examples of supported patterns:

Input:

    my_dataset_repository/
    ├── README.md
    └── dataset.csv

Output:

    {"train": ["**"]}

Input:

    my_dataset_repository/
    ├── README.md
    ├── train.csv
    └── test.csv

    my_dataset_repository/
    ├── README.md
    └── data/
        ├── train.csv
        └── test.csv

    my_dataset_repository/
    ├── README.md
    ├── train_0.csv
    ├── train_1.csv
    ├── train_2.csv
    ├── train_3.csv
    ├── test_0.csv
    └── test_1.csv

Output:

    {'train': ['train[-._ 0-9/]**', '**/*[-._ 0-9/]train[-._ 0-9/]**',
                'training[-._ 0-9/]**', '**/*[-._ 0-9/]training[-._ 0-9/]**'],
     'test': ['test[-._ 0-9/]**', '**/*[-._ 0-9/]test[-._ 0-9/]**',
                'testing[-._ 0-9/]**', '**/*[-._ 0-9/]testing[-._ 0-9/]**', ...]}

Input:

    my_dataset_repository/
    ├── README.md
    └── data/
        ├── train/
        │   ├── shard_0.csv
        │   ├── shard_1.csv
        │   ├── shard_2.csv
        │   └── shard_3.csv
        └── test/
            ├── shard_0.csv
            └── shard_1.csv

Output:

    {'train': ['train[-._ 0-9/]**', '**/*[-._ 0-9/]train[-._ 0-9/]**',
            'training[-._ 0-9/]**', '**/*[-._ 0-9/]training[-._ 0-9/]**'],
     'test': ['test[-._ 0-9/]**', '**/*[-._ 0-9/]test[-._ 0-9/]**',
            'testing[-._ 0-9/]**', '**/*[-._ 0-9/]testing[-._ 0-9/]**', ...]}

Input:

    my_dataset_repository/
    ├── README.md
    └── data/
        ├── train-00000-of-00003.csv
        ├── train-00001-of-00003.csv
        ├── train-00002-of-00003.csv
        ├── test-00000-of-00001.csv
        ├── random-00000-of-00003.csv
        ├── random-00001-of-00003.csv
        └── random-00002-of-00003.csv

Output:

    {'train': ['data/train-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]*.*'],
     'test': ['data/test-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]*.*'],
     'random': ['data/random-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]*.*']}

In order, it first tests if SPLIT_PATTERN_SHARDED works, otherwise it tests the patterns in ALL_DEFAULT_PATTERNS.
)r  r   zThe directory at z doesn't contain any data filesN)r   r=  r)   r   r(   )r  r   resolvers      r   _get_data_patternsr@     sQ    v IPH'11 	{*IJ
	s   
 6c                 J   U R                   R                  R                  SS 5      =(       d    [        nSU R                   SU SU R
                  =(       d    S 3R                  S5      nU R                  nU R                   R                  5       n[        USUUS9nU(       a)  [        R                  " [        U5      5      R                  O	[        5       nUR                  R                  SS 5      n[        R                  " U5      n[         R                  " U5      n	U R"                  b  [%        U R"                  5      n
O|U(       ab  S	['        [)        UR+                  5       5      5      ;   a<  Ub  X   S	   nO%['        [)        UR+                  5       5      5      S	   n[%        U5      n
O[-        X R                   S
9n
[.        R0                  " U
U[2        U R                   S9n[5        UU R                  U R                   S9u  p[7        US5      (       a  UR8                  " [:        U   S9nOUR<                  " [:        U   5      n[>        U   u  nnU(       aU  US;   n[@        RB                  " [D        5      nUUUUU R                   S.nSURF                  ;   a  UUS'   [E        S0 UD6u  nnO [I        U5      RJ                  " SS	U0UD6/nS n[M        5       nURO                  U[P        S9n[M        5       RS                  UUS9U R                  [U        [        U R                  5      R                  5      US.nU R                   R                  5       nURV                  c  SUl+        Uc#  [Y        U	5      S:X  a  ['        [)        U	5      5      nUn[[        UUUU	[]        UUUS9S9$ )Nr   zhf://datasets/r   r   r   	README.mdr   r   r   r   r   r"   r   )r  r  r   )r"   r   r   filter)
extensions>   audiofolderimagefolder)module_pathmetadata_configsr  default_builder_kwargsr   supports_metadatar   )r   r   )r  r   r   r"   zDownloading metadatar   )rI  builder_configsdefault_config_name)dataset_infosbuilder_configs_parametersr   )/r   r  r   rd   r   data_dirrstripcopyr  rX   loadr   datarY   rR   from_dataset_card_datar2   r"   r,   nextitervaluesr@  r'   from_patternsr3   r@   hasattrrD  rI   filter_extensionsrJ   inspect	signaturer=   
parametersr?   BUILDER_CONFIG_CLASSr`   r   re   get_file_base_pathrG   r   r   r6   r4   )r   r   r  r   r   dataset_readme_pathdataset_card_datasubset_namerI  rN  patternssubset_data_filesr"   module_namerJ  rH  r  rK  create_builder_signaturein_argsrL  rM  r   r   builder_kwargshashs                             r   get_module_without_scriptrk  e  s    ##3377
DIeMeH 1XJa8K7LMTTI 99G**//1O- '	 M`((.A)BCHHetev&66::64HK&==$;;<MNM "$T__5	ld!((*+/- - " 0 =l K $T*:*A*A*C%D El S$%67%';';= ,,1,,	J +FYY,,+'K z8$$&&2G2TU
112G2TU
/<NK'+II#*#4#45a#b & 0"&<#33
 ":"E"EE+<G'(/[/f^e/f,, k*?? %(0
 #8D))#4 * 6H 	##Gh#G		tDII334 	N **//1O$$,(>% "s='9Q'>"4#67D##;-+ 3$

 
r   r   r   r   importstrust_remote_codec                    / n/ n[        S U 5       5      n	U	(       a  U(       d  [        SU  S35      eUR                  5       nUR                  c  SUl        U H  u  ppU
S:X  a  UR	                  X45        M   X:X  a  [        SU  SU SU S	U S
3	5      eU
S:X  a"  [        5       nUS-   nUR                  UUUUS9nOU
S:X  a  UnO[        S5      e[        UUS9nUb   [        R                  R                  UU5      nUR	                  UU45        M     0 nU H  u  nn [        R                  " U5      nM     U(       a  [        U5      S:  a  SOSn[        U5      S:  a  SOSnSUR                  5       ;   a  SUS'   SUR                  5       ;   a  SUS'   [        SU  SU SSR                  U5       SU SS R                  UR!                  5       5       S!35      eU$ ! [         a    UU;  d  UU:w  a  UUU'    M   M  f = f)"a  
Download additional module for a module <name>.py at URL (or local path) <base_path>/<name>.py
The imports must have been parsed first using ``get_imports``.

If some modules need to be installed with pip, an error is raised showing how to install them.
This function return the list of downloaded modules as tuples (import_name, module_file_path).

The downloaded modules can then be moved into an importable directory
with ``_copy_script_and_other_resources_in_importable_dir``.
c              3   6   #    U  H  u  n    nUS ;   v   M     g7f))internalexternalNr   )r%  import_typer  s      r   r'  /_download_additional_modules.<locals>.<genexpr>  s&      $+ KAq 	//$+s   Loading z requires executing code from the repository. This is disabled by default for security reasons. If you trust the authors of this dataset, you can enable it with `trust_remote_code=True`.zDownloading extra moduleslibraryzError in the z script, importing relative z module but z: is the name of the script. Please change relative import zl to another name and add a '# From: URL_OR_PATH' comment pointing to the original relative import file path.rp  .py)r   r   r   r   rq  zWrong import_typer   r   dependencies
dependencythemitsklearnzscikit-learnBio	biopythonzTo be able to use z$, you need to install the following : z, z.
Please install z using 'pip install  z' for instance.)r1  r   rR  r   r   r`   r   rM   r)  r   join	importlibimport_moduleImportErrorr   r   rX  )r   r   r   r   rl  r   rm  local_importslibrary_importshas_remote_coderr  import_nameimport_pathsub_directoryr   r   r   local_import_pathneeds_to_be_installedlibrary_import_namelibrary_import_pathlib_dependencies_str	_them_strs                           r   _download_additional_modulesr    s   & MO  $+ O
 0tf ( (
 	
 &**,O$$,(C%@G<+)#""K#=>v%A+ O"m $11< >NO  *$8D#e+I"77)EQBKAI 8 LO J&)O011'+
 $ "-> Nk+<=>? AHD 4C00	Q))*=>C 5D .12G.H1.LNR^!"781<F$	-2244/=!),)..00+6!%( &JK\J]]_yy./00B9+Maxx-44678I
 	

   	Q"*??CVZmCm=P%&9: Dn	Qs   "G$$HHc                    U R                   nUR                  S5      u  p#U R                  R                  R	                  SS 5      =(       d    [
        nU S3n[        UUU R                  US9nU(       d  [        SU SU SU SU S	3	5      eS n[        US
U R                  US9n[        U5      n	[        UUUUU	U R                  U R                  S9n
/ nU(       a!  UR                  [        R                  U45        U(       a!  UR                  [        R                  U45        U R                  (       a  U R                  O	[!        5       n[#        U/U
 Vs/ s H  oS   PM	     sn-   5      n[%        USUUS9n[&        R(                  R+                  U5      (       dh  [-        U R                  U R                   S9nU(       a4  [.        R1                  SU S35        [3        UU
UUSUUU R4                  S9  O[7        SU S35      e[9        USUUS9u  nn[:        R<                  " 5         [?        5       RA                  US9US.n[C        UUU5      $ s  snf )Nr   r   rv  rC  zCannot find z in z at revision z. Please create z in the repo.rB  )r   r   r   r   rl  r   rm  r   datasets)dynamic_modules_pathmodule_namespacesubdirectory_namer   )rm  r   3Use trust_remote_code=True. Will invoke codes from 9. Please make sure that you can trust the external codes.)
local_pathr  additional_filesr  r  r  r   download_modert  z requires you to execute the dataset script in that repo on your local machine. Make sure you have read the code there to avoid malicious use, then set the option `trust_remote_code=True` to remove this error.)r   )r  r   )"r   r   r   r  r   rd   r  r   rS   r  rm  r   r!   DATASETDICT_INFOS_FILENAMEREPOCARD_FILENAMEr  rF   rA   rB   r)  r   existsrC   r   warningrD   r  r   rE   r  invalidate_cachesr`   r`  r6   )r   r   r  r  r   script_file_namelocal_script_pathdataset_infos_pathra  rl  r  r  r  locrj  importable_file_pathrm  rH  ri  s                      r   get_module_with_scriptr  A  s   99G 'c 2J##3377
DIeMeH',+%,,	 +,D	xj Q-.m=
 	

  . ,,	 +,G0",,00M !B!BDV WX!9!9;N OP8<8Q8Q444WkWm+,-/P-3A-/PPQD41#	 77>>.//5H^H^hlhqhqrNNPQXPY Z? ? @#,+!1%9!+"&"00	 7) $Q Q 
 .1#	K ! X000AN
 dN;;S 0Qs   I
c            /          \ rS rSr\                    SS\S\\   S\\   S\\\\\   \	\\\\\   4   4   4      S\\\\
4      S\\   S	\\   S
\\   S\\\\4      S\\\\4      S\\   S\S\\\\4      S\\\\4      S\S\\   S\\   S\S\\   S\\\\\\4   4(S jj5       r\             SS\S\\   S\\   S\\\\\   \	\\\\\   4   4   4      S\\   S	\\   S
\\   S\\\\4      S\\\\4      S\\\\4      S\\   S\\   S\4S jj5       r\          SS\S\\\\4      S
\\   S\\\\4      S\\   S\\   S\\\\\\4      S\\   S\\   S\4S jj5       rSrg)DatasetsWrapperHFi  Nr   r   rP  r"   r   	cache_dirr$   r   r  verification_modekeep_in_memory
save_infosr   r   	streamingnum_procr  rm  dataset_info_onlyr   c                    US:w  a  [         R                  " S[        5        UnUS:w  a  [         R                  " S[        5        OS nUb  U(       d  [        SU S35      e[	        U [
        R                  5      R                  5       (       a  [        S5      eU(       a  Ub  [        S5      e[        U=(       d    [        R                  5      n[        U(       d  U	=(       d    [        R                  O[        R                  5      n	U(       a  [        R                  SU  S	35        [         R"                  " SU UUUUUUUUUUUUS L S
.UD6nU(       Ga;  0 n[%        U [&        5      (       a]  U R)                  S5      (       aG  [*        R,                  R                  U 5      (       a#  SSKJn  U" U 5      nU Vs0 s H  nU/ _M     nnU$ Ub  [3        US5      (       d  [        R5                  SU  S35        U$ UR6                  nUR9                  5        Hv  u  nn['        U5      n[3        US5      (       aO  UR:                  bB  [=        UR:                  R?                  5       5       Vs/ s H  n['        U5      PM     snUU'   Mq  / UU'   Mx     U$ U(       a  URA                  US9$ URC                  UUU	UUS9  U
b  U
O[E        URF                  RH                  5      n
URK                  UU	U
S9nUbJ  [         RL                  " 5          [         RN                  " S[        5        URQ                  U5      nS S S 5        U(       a  URS                  5          [U        5       n [W        U 5      (       aN  U RY                  S5      S:X  a9  U R[                  S5      u  n!n"U R]                  U [^        S9n#U Ra                  U"U!U#S9  U$ s  snf s  snf ! , (       d  f       N= f! [b         a#  n$[        R                  SU$ 35         S n$A$U$ S n$A$ff = f)N
deprecated'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.
You can remove this warning by passing 'token=<use_auth_token>' instead.zF'task' was deprecated in version 2.13.0 and will be removed in 3.0.0.
zEmpty 'data_files': 'z3'. It should be either non-empty or None (default).zjYou are trying to load a dataset that was saved using `save_to_disk`. Please use `load_from_disk` instead.zLoading a streaming dataset in parallel with `num_proc` is not implemented. To parallelize streaming, you can wrap the dataset with a PyTorch DataLoader using `num_workers` > 1 instead.r  r  )r   r   rP  r"   r  r$   r   r  r   r   r  rm  _require_default_config_namerv  r   )get_dataset_config_namesrL  zNo builder_configs found for z	 dataset.r"   )r   )r   r  r  r  r  )r   r  	in_memoryignorer   r   r   )r   r   r   z&Could not record download statistics: r   )2warningswarnFutureWarningr   r   r!   DATASET_STATE_JSON_FILENAMEr  NotImplementedErrorr   REUSE_DATASET_IF_EXISTSr   BASIC_CHECKS
ALL_CHECKSr   r  r  load_dataset_builderr   r   endswithr)  r   r  r  rZ  r   rL  r   r"   r   r   as_streaming_datasetdownload_and_preparerQ   r2  dataset_size
as_datasetcatch_warningssimplefilterprepare_for_task_save_infosr`   rO   countr   r   re   dataset_download_statisticsr   )%r   r   rP  r"   r   r  r$   r   r  r  r  r  r   r   use_auth_tokentaskr  r  r  rm  r  config_kwargsbuilder_instanceret_dictr  subset_list_subset_tmp_builder_configstmp_config_nametmp_builder_configitemdsr   r  r  r   r   s%                                        r   load_datasetDatasetsWrapperHF.load_dataset  s!   : \)MM[
 #E<MMY
 D!*'
|3fg  f889@@ 
 
78 8 -%34 4
 %] &M(4(L(LN, >!1!>!>!1!<!<> NNPQUPV WD D 
 -AA 
!+'+/)-
 
$ H$$$u)=)="''..QUBVBV=6t<7BC{GGRK{C'w7G7H0J 0J<TF)LM#3#C#C 7K7Q7Q7S3!3"%o"6-|<<ASA^A^AjGKL^LiLiLnLnLpGq0rGqtTGq0rH_-02H_- 8T O #88u8EE 	--+'/+ 	. 	
 -8N>N %%22?4 	 ((/$ ) &
 ((*%%h>((. + ((*		I8D%%$**S/Q*>,0JJsO)
M55 ,= 6 ?00mWalt0u 	A D 1sF +*  	INNCA3GHH		Is1   :N?*O-O	A(O 	
O
P$PPc                    U
S:w  a  [         R                  " S[        5        U
n	[        U=(       d    [        R                  5      nU	b'  U(       a  UR                  5       O	[        5       nXl        Ub<  U(       a  UR                  5       O	[        5       nUR                  R                  U5        U(       a  [        R                  SU  S35        [        R                  U UUUUUUUU[        U5      US9nUR                  nUR!                  SU5      nUR!                  SU5      nUR!                  SU=(       d    UR"                  R$                  5      nUR!                  S	S 5      nUR&                  (       a  UR&                  R)                  U5      OS nU [*        ;   ap  Ucm  UR"                  R,                  S
   R.                  cI  SU  S3n[0         Vs/ s H  n[0        U   U :X  d  M  UPM     nnU(       a  USUS
    S3-  n[3        U5      e[5        UUS9nU" SUUUUUUR6                  UUU	US.
UDUD6nUR9                  U5        U$ s  snf )Nr  r  r  r  )
r   r   r  rP  r"   r  rm  r  _require_custom_configsr   rP  r"   config_namer   r   z@Please specify the data files or data directory to load for the z dataset builder.z9
For example `data_files={"train": "path/to/data/train/*.z"}`)r   )
r  r   r  rP  r"   rj  r2  r$   r   r  r   )r  r  r  r   r  rR  r   r   r  updater   r  r  dataset_module_factoryboolri  r   rO  rM  rN  r   rJ   rL  r"   rH   r   r>   rj  !_use_legacy_cache_dir_if_possible)r   r   rP  r"   r  r$   r   r  r   r   r  r  rm  r  r  dataset_moduleri  r  r   r2  r<  	extensionexample_extensionsbuilder_clsr  s                            r   r  &DatasetsWrapperHF.load_dataset_builderJ  s   * \)MM[
 #E$] &M(4(L(LN  .22 &4&6 $)!&  .22 &4&6 ++22?CNNPQUPV WD D  +AA+'!/)E$($7 B 
 (66!%%j(;#''jA
$((4 M88LLN &)).$?*88 ++//>B 	 ..:3E"==##-:6Z[_Z``qrI+?"+?i'	2d: +?  " "Z[mno[pZqquvv	Y''/7 ,7 ,
%#!$$+,
 ,
 ,
 	::>J7"s   I
(I
r  c                    UR                  SS 5      nU=(       d    [        nUc  [        S,0 UD6nUR                  R	                  SU05        UR                  R	                  SU05        U(       a  UR
                  c  [        Ul        [        U=(       d    [        R                  5      nSUl	        SUl
        U[        R                  :H  Ul        [        [        S U R                  [         R"                  S5      R%                  S5      5      5      S   nUR'                  S5      (       d  US-   n[         R(                  R+                  X5      nU(       a  [,        R/                  SU  S	35        U [0        ;   a  [3        U UUUUS
9R5                  5       $ U R'                  U5      (       aU  [         R(                  R7                  U 5      (       a  [9        U UUUS9R5                  5       $ [;        S[=        U 5       35      e[         R(                  R7                  U5      (       a  [9        UUUUS9R5                  5       $ [         R(                  R?                  U 5      (       a  [A        U UUUS9R5                  5       $ [C        U 5      (       Ga  U RE                  S5      S:X  Ga   [G        5          [I        5       RK                  U UURL                  SS9n[e        U SUUS9n[         R(                  Rg                  [         R(                  Ri                  U5      5      nUURj                   Vs/ s H  nURl                  PM     sn;   a  Sn[n        Rp                  (       aO  U(       aH   [s        [t        S5      (       a  [u        U UUS9R5                  5       $ [u        U UUS9R5                  5       $ [s        [x        S5      (       a  [y        U UUUUUS 9R5                  5       $ [y        U UUUUUS!9R5                  5       $ [s        [z        S5      (       a  [{        U UUUUUS"9R5                  5       $ [{        U UUUUUS#9R5                  5       $ [;        S[=        U5       S+35      e! [N         a  n[Q        U[R        [T        RV                  RX                  [T        RV                  RZ                  45      (       a%  [[        SU  S[]        U5      R^                   S35      eS[a        U5      ;   a%  SU  S3n[c        U(       a  USU S3-   5      eU5      eS[a        U5      ;   a&  SU  S3nU(       a	  USU S3-   OUn[c        US-   5      eUeS nAff = fs  snf ! [N         a   n[,        Rw                  U5         S nAGNS nAff = f! [N         a  n[,        Rw                  S$U  S%U 35         [}        U UUS&9R5                  5       s S nA$ ! [N         a    [Q        U[R        5      (       a  [[        S'U  S(U 35      S e[Q        U[~        [b        [        45      (       a  US e[Q        U[:        5      (       a4  [;        S[=        U5       S)U  S*[]        U5      R^                   S%U 35      S eUS ef = fS nAff = f)-Nr   r   Tc                     U $ Nr   )xs    r   <lambda>:DatasetsWrapperHF.dataset_module_factory.<locals>.<lambda>  s    Qr   r   r}   rv  r  r  )rP  r"   r   r  )r  r  rm  z"Couldn't find a dataset script at )rP  r"   r  r   g      Y@)r   r   r   r   zCouldn't reach 'z' on the Hub (r   404z	Dataset 'z' doesn't exist on the Hubz at revision 'r"  401zT. If the repo is private or gated, make sure to log in with `huggingface-cli login`.rB  rC  F)r   r   )commit_hashr   )r   r   r  r  rm  )r  r   r  r  rm  )r   rP  r"   r   r  )r  rP  r"   r   r  z>> Error loading r~  )r  r  z1Couldn't reach the Hugging Face Hub for dataset 'z': z8 or any data file in the same directory. Couldn't find 'z"' on the Hugging Face Hub either: z( or any data file in the same directory.r   )Ar   rd   r   r  r  r  rc   r   r  extract_compressed_fileforce_extractFORCE_REDOWNLOADforce_downloadr   rD  replacer)  r+  r   r  r   r  r   r  rJ   r<   
get_moduleisfiler;   r   rP   isdirr:   rO   r  rL   r\   dataset_infor   r   r   rZ   requests
exceptionsConnectTimeoutConnectionErrorr   r   r   r1   r  basenamedirnameru   r   r!   USE_PARQUET_EXPORTrf   r8   r   r9   r7   r5   r0   r(   )r   r   r   r  r  rP  r"   r  rm  r  r  download_kwargsrc  filenamecombined_pathr  r   msgra  r  sibling#can_load_config_from_parquet_exporte1s                          r   r  (DatasetsWrapperHF.dataset_module_factory  s     +..vt<77",??O''../DE''..
H/EF88@(9O%$] &M(4(L(LN26/(,%)6,:W:W)W&;<<,22379::<>   ''%'HT4  NNPQUPV WD D 
 --/!% /+ jl ]]8$$ww~~d##:"/)=&7	
 *, (89RSW9X8YZ  WW^^M**6+%9"3	
 jl WW]]4  9!%+	- .8Z\	: d##

31(<Y'13" #(7#7#7 $!)-33 %	 $8 $LF ': !,$3%	'# !gg..rww?R/ST9E9N9N 9Ng))9N   ;@7 005X
,01Y[eff'O$(-54C(E FPZ\!R
 $L $,70?$A BLN ))JJWW@ %-,;*71E.?  %*,' =$/(7&3-A*; !jl# ))MzZZC %-%-'1,;*7  %*,' @$/!)#-(7&3 !jl#F $45N}5]4^ _7 89 9c !  !0$//*N$//??	  ..tfN47CSCSBTTUV  #a& )$/IJ2>F   .xj:; LO  #a& )$/IJDLcnXJa$@@RU2 #P P 
  7 J L  ) ,"LLOO,V  '0b=>'5-A"+- .8Z\: ! '!"&:;;-OPTvUXY[X\]#$ ""#9#79J#LM M !d*!"&788/@AZ[hAi@j k..2V3UVZ[]V^VgVgUhhjkmjnp  $	$
 $&!''s   %
U; 0$Q AU; +U	$U; &-U U -0U; U; :0U; +U; 
U)CUUU; 
U8U3-U; 3U88U; ;
Y(Y#!V?9Y(?B!Y  Y##Y(r   )NNNNNNNNNNFNNr  r  FNNFF)NNNNNNNNNr  NNT)
NNNNNNNNTF) r   r   r   r   staticmethodr   r   r   r   r   r   r   r   r   r   r  r    r   r   r   r   r   r   r   r  r   r  r
   r'   r6   r  r   r   r   r   r  r    s;    #"& KO-1#''+48<@DH)- 26,0#"&*."',1/^^sm^ 3-^ U3#*3c6>sm7D 1E ,E $F$F G H	^ c5j)*^ C=^ 8$^ ".1^  lC&7 89^ $E*:C*?$@A^ !^ ^ 5g./^  dCi()!^& '^( 3-)^* "$+^,  -^. $D>/^2 
{G%8/ 
3^ ^@  #"& KO#''+48<@26,0#*.,0%)!b b smb  3-b  U3#*3c6>sm7D 1E ,E $F$F G H	b  C=b  8$b  ".1b   lC&7 89b  5g./b  dCi()b  "$b  $D>b $ 
%b  b H  3748<@.2"&FJ#',0%) %{9{95g./{9 ".1{9  lC&7 89	{9
 'sm{9 3-{9 U4sM#ABC{9 C={9 $D>{9 
{9 {9r   r  c               /   ,  #    [         R                  n[        R                  n[	        [
        S5      (       a  [
        R                  O[
        R                  n[        R                  n[        R                  n[        R                  n[        R                  n[        R                  n	[         R                  n
["        R$                  n['        5       [         l        [(        [        l        [	        [
        S5      (       a  [*        [
        l        O[*        [
        l        [,        [        l	        [.        [        l
        [0        [        l        [2        [        l        [4        [        l        [6        [         l        [8        ["        l        UR;                  SS5      n [<        R>                  " U 0 UD6nUv   U[         l        U[        l        U["        l        U(       d  U[         l        U[        l        [	        [
        S5      (       a  U[
        l        OU[
        l        U[        l	        U[        l
        U[        l        U[        l        U	[        l        U
[         l        g g ! U[         l        U[        l        U["        l        U(       d  U[         l        U[        l        [	        [
        S5      (       a  U[
        l        OU[
        l        U[        l	        U[        l
        U[        l        U[        l        U	[        l        U
[         l        f f = f7f)N	_downloadr  F) r!   HF_ENDPOINTrK   get_from_cacherZ  r   r  _download_singler\   r  r   get_paths_infor"   resolve_patternr7   r  r9   r$   generate_from_dictra   rb   r   r   r   r   r=  rk  r  r   r   r  r  )argskwargshf_endpoint_originget_from_cache_origin_download_origindataset_info_originlist_repo_tree_originget_paths_info_originresolve_pattern_origin get_module_without_script_originget_module_with_script_origingenerate_from_dict_originr  dataset_ress                 r   load_dataset_with_ctxr    s=      ++&55 5<O[4Y4Y00--   ,,!00!00'77'K'V'V$$E$P$P! ( ; ; &F 1J,,$0!+7(&E*E*E!1J6O(33I%0"7H

;.IY'44dEfE 0$9
!&?#!3F(=J% 44,<)3C0!4E#8E #8E )?J&>^0;;X-8 	 0$9
!&?#!3F(=J% 44,<)3C0!4E#8E #8E )?J&>^0;;X-8 s    FLI" 4B.L"B/LLr  )NN)F)r  
contextlibr\  r)  r  dataclassesr   r   r   	functoolsr   pathlibr   typingr   r	   r
   r   r   r   r   r   r   r   r   urllib.parser   r  r  r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r   datasets.featuresr$   datasets.features.featuresr%   datasets.data_filesr&   r'   r(   r)   r*   r+   r,   ,datasets.download.streaming_download_managerr-   r.   r/   datasets.exceptionsr0   r1   datasets.infor2   datasets.loadr3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   datasets.namingrG   datasets.packaged_modulesrH   rI   rJ   datasets.utilsrK   datasets.utils.file_utilsrL   rM   rN   rO   rP   datasets.utils.info_utilsrQ   datasets.utils.metadatarR   datasets.utils.py_utilsrS   datasets.utils.trackrT   fsspecrU   fsspec.corerV   fsspec.utilsrW   huggingface_hubrX   rY   huggingface_hub.errorsrZ   huggingface_hub.hf_apir[   r   r\   r]   r^   	packagingr_   
modelscoper`   modelscope.hub.utils.utilsra   )modelscope.msdatasets.utils.hf_file_utilsrb   modelscope.utils.config_dsrc   modelscope.utils.constantrd   re   modelscope.utils.import_utilsrf   modelscope.utils.loggerrg   r   ExpandDatasetProperty_Trz   r   r   r   floatr  r   r   r   r  r  r=  r@  rk  r  r  r  contextmanagerr  r   r   r   <module>rC     s      	  0 0   j j j j " h h h h h ' 5X X X9 9 L *
 
 
 
 
 
 3C C &B B 7 3 / ,  ! ' : 7 ? > >   3 G 8 Q ; .	 " * <Z < <6  v #Ls #LL n QT 2 ## (,6:W&W& sm	W&
 e_W& W& E$)$%W& T123W& W&z #'-
 "#(,-- 3--
 - - sm- }- E$)$%- eHj()*-j "#(,77 cC 7
 7 sm7 }7 E$)$%7 
%*$
%&70 C . dg 6 D /304	hhh !c+h n-	h
 
#YhZ 59bb!.1b=A#BFs)CL >MbJz} zH -2\\\ \ 	\
 sCc)*\ ".1\ $D>\ 
%S/\~c<M c<LD9 D9N <Y <Yr   