
    9i|3                        S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKJr  S SK	J
r
  S SKJr  S SKJrJr  S SKJrJr  S SKrS SKJr  S SKJr  S S	KJrJrJrJr  S S
KJr  S SKJ r   S SK!J"r"  S SK#J$r$  S SK%J&r&  \"" 5       r'SS\\\(\)4      S\(4S jjr*    SS\(S\(S\+S\,S\,S\,S\RZ                  4S jjr. SS\RZ                  4S jjr/ SS\\RZ                     4S jjr0               S S\(4S jjr1g)!    N)contextmanager)partial)Path)OptionalUnion)urljoinurlparse)tqdm)config)hash_url_to_filename"get_authentication_headers_for_urlfsspec_head
fsspec_get)FileLock)MS_DATASETS_CACHE)
get_logger)ModelScopeConfig)__version__
user_agentreturnc                 2   S[          3nUS[        R                   3-  nUS[        R                   3-  n[        R                  (       a  US[        R
                   3-  n[        R                  (       a  US[        R                   3-  n[        R                  (       a  US[        R                   3-  n[        U [        5      (       a.  USSR                  S U R                  5        5       5       3-  nU$ [        U [        5      (       a  USU -   -  nU$ )	Nz	datasets/z	; python/z
; pyarrow/z; torch/z; tensorflow/z; jax/z; c              3   4   #    U  H  u  pU S U 3v   M     g7f)/N ).0kvs      i/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/msdatasets/utils/hf_file_utils.py	<genexpr>-get_datasets_user_agent_ms.<locals>.<genexpr>-   s     G4FDAs!A3Z4Fs   )r   r   
PY_VERSIONPYARROW_VERSIONTORCH_AVAILABLETORCH_VERSIONTF_AVAILABLE
TF_VERSIONJAX_AVAILABLEJAX_VERSION
isinstancedictjoinitemsstr)r   uas     r   get_datasets_user_agent_msr/   "   s   [M	"BIf''(
))BJv--.
//B
--.//
f//011
v))*++*d##
499GJ4D4D4FGGHII I 
J	$	$
dZI    methodurlmax_retriesbase_wait_timemax_wait_timetimeoutc           	         Su  pxSn	U(       d8  US-  n [         R                  " S
U R                  5       XS.UD6n	SnU(       d  M8  U	$ ! [         R                  R                  [         R                  R
                  4 a[  n
Xr:  a  U
e[        R                  U  SU SXr-   S35        [        XCS	US-
  -  -  5      n[        R                  " U5         Sn
A
NSn
A
ff = f)a  Wrapper around requests to retry in case it fails with a ConnectTimeout, with exponential backoff.

Note that if the environment variable HF_DATASETS_OFFLINE is set to 1, then a OfflineModeIsEnabled error is raised.

Args:
    method (str): HTTP method, such as 'GET' or 'HEAD'.
    url (str): The URL of the resource to fetch.
    max_retries (int): Maximum number of retries, defaults to 0 (no retries).
    base_wait_time (float): Duration (in seconds) to wait before retrying the first time. Wait time between
        retries then grows exponentially, capped by max_wait_time.
    max_wait_time (float): Maximum amount of time between two retries, in seconds.
    **params (additional keyword arguments): Params to pass to :obj:`requests.request`.
)r   FN   )r1   r2   r6   Tz request to z timed out, retrying... []   r   )requestsrequestupper
exceptionsConnectTimeoutConnectionErrorloggerinfomintimesleep)r1   r2   r3   r4   r5   r6   paramstriessuccessresponseerr
sleep_times               r   _request_with_retry_msrL   3   s    , NEH
		'''bv||~3b[abHG	 g O ##22H4G4G4W4WX 	'"	vhl3%7PQVQbPccdef uqyAQ0QR


:&&	's   )A 8C ACCc                     [         R                  " U5      =(       d    0 n[        UR                  S5      S9US'   [	        SU UUUUUUS9nU$ )N
user-agentr   HEAD)r1   r2   proxiesheaderscookiesallow_redirectsr6   r3   )copydeepcopyr/   getrL   )r2   rQ   rR   rS   rT   r6   r3   rI   s           r   http_head_msrX   Z   sX     mmG$*G6'++lB[\GL%'	H Or0   c	                    Ub  [        U5      O0 n[        UR                  S5      S9US'   US:  a
  SUS S3US'   [        SU S	UUUUUS
9n	Uc  U	$ U	R                  S:X  a  g U	R
                  R                  S5      n
U
b  U[        U
5      -   OS n[        XS	SU=(       d    SS9nU	R                  SS9 H.  nUR                  [        U5      5        UR                  U5        M0     UR                  5         g )NrN   rO   r   zbytes=d-RangeGETT)r1   r2   streamrQ   rR   rS   r3   r6   i  zContent-LengthBDownloading)totalinitial
unit_scaleunitdesci   )
chunk_size)r*   r/   rW   rL   status_coderR   intr
   iter_contentupdatelenwriteclose)r2   	temp_filerQ   resume_sizerR   rS   r6   r3   re   rI   content_lengthra   progresschunks                 r   http_get_msrs   l   s     '2d7mG6'++lB[\GLQ#K?!4%	H s"%%))*:;N1?1KK#n--QUE%CVZVk^klH&&$&7E
# 8 NNr0   c                   ^% US:w  a!  [         R                  " SU S3[        5        Un
Uc  [        n[	        U[
        5      (       a  [        U5      n[        R                  " USS9  U(       a   [        U [        U 5      R                  5      nOU nSnSnSnSnSnSn[        USS9n[        R                  R                  UU5      nUc  S	U-   S
-   n[        R                  R                  U5      (       a  U(       d	  U(       d  U$ [        X
S9nUb  UUS'   U(       Gd  [        U 5      R                   nUS;  a?  [#        XS9nU(       a+  UR%                  SS5      =(       d    UR%                  SS5      OSnSn [&        R(                  " 5       n[+        U SUUU	UUS9nUR,                  S:X  a  U(       a  UR.                  R%                  S5      OSnUR0                  R3                  5        H:  u  nnUR5                  S5      (       d  M  SU ;   d  M&  U SU-   -  n UR0                  nM<     SnSU ;   a  SU ;  a  U S-  n OUR,                  S:X  a  SU ;   d~  UR,                  S:X  a  SU ;   dh  UR,                  S:X  aB  [6        R8                  " SU 5      (       d<  [6        R8                  " SUR:                  5      (       d  UR,                  S:X  a!  SU ;   a  Sn[<        R?                  SU  35        O6UR,                  S :X  a&  [@        RB                  U ;   a  U
c  [E        S!U  S"35      eU(       d  [        R                  R                  U5      (       a	  U(       d  U$ U(       a  [O        S#U S$35      eUb  UR,                  S%:X  a  [O        S&U  35      eUb  [E        S'U  S([Q        U5       S)35      eUb  [E        S'U  S*UR,                   S)35      e[E        S'U  35      e[        UU5      n[        R                  R                  UU5      n[        R                  R                  U5      (       a	  U(       d  U$ US+-   n[S        U5         [        R                  R                  U5      (       a  U(       d  UsSSS5        $ US,-   m%[T        S<U%4S- jj5       nS.nU(       aN  [W        US/S09n[        R                  R                  T%5      (       a   [        RX                  " T%5      RZ                  nU" 5        n US;  a  []        U U XS19  O[_        U U UUUUU	US29  SSS5        [<        R?                  S3U  S4U 35        [`        Rb                  " W Rd                  U5        [        Rf                  " S55      n![        Rf                  " U!5        [        Rh                  " US5U!) -  5        [<        R?                  S6U 35        U US7.n"US8-   n#[k        U#S9S:S;9 n$[l        Rn                  " U"U$5        SSS5        SSS5        U$ ! [F        [H        RJ                  RL                  4 a  nUn SnAGNSnAff = f! , (       d  f       GN= f! , (       d  f       N^= f! , (       d  f       U$ = f)=ad  
Given a URL, look for the corresponding file in the local cache.
If it's not there, download it. Then return the path to the cached file.

Return:
    Local path (string)

Raises:
    FileNotFoundError: in case of non-recoverable file
        (non-existent or no cache on disk)
    ConnectionError: in case of unreachable url
        and no cache on disk

deprecatedz'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.
You can remove this warning by passing 'token=z
' instead.NT)exist_okF)etagzDownloading [r9   )tokenrN   )httphttps)storage_optionsETagrw   )rT   rQ   r6   r3   rR   rS      download_warningzdrive.google.comz	&confirm=zconfirm=z
&confirm=ti  zfirebasestorage.googleapis.comi  i  z7^https?://github.com/.*?/.*?/releases/download/.*?/.*?$z#^https://.*?s3.*?amazonaws.com/.*?$zndownloader.figstatic.comz"Couldn't get ETag version for url i  zUnauthorized for URL zU. Please use the parameter `token=True` after logging in with `huggingface-cli login`z6Cannot find the requested files in the cached path at zi and outgoing traffic has been disabled. To enable file online look-ups, set 'local_files_only' to False.i  zCouldn't find file at zCouldn't reach z ()z (error z.lockz.incompletec              3   b   >#    [        TU 5       nUv   S S S 5        g ! , (       d  f       g = f7fN)open)modefincomplete_paths     r   temp_file_manager,get_from_cache_ms.<locals>.temp_file_manager0  s"     ot, -,,s   /	/
,/r   za+b)r   )r{   re   )rn   rQ   ro   rR   rS   r3   re   zstoring z in cache at i  zcreating metadata file for )r2   rw   z.jsonwzutf-8)encoding)zw+b)8warningswarnFutureWarningr   r)   r   r-   osmakedirsr   r	   pathr   r+   existsr   schemer   rW   r   get_cookiesrX   rg   rR   rS   r,   
startswithrematchr2   rA   rB   r   HF_ENDPOINTr@   OSErrorr;   r>   TimeoutFileNotFoundErrorreprr   r   r   statst_sizer   rs   shutilmovenameumaskchmodr   jsondump)&r2   	cache_dirforce_downloadrQ   etag_timeoutresume_downloadr   local_files_onlyuse_etagr3   rx   use_auth_tokenignore_url_paramsr{   download_descdisable_tqdm
cached_url	connectedrI   rS   rw   
head_errorr   filename
cache_pathrR   r   r   e	lock_pathr   ro   rn   r   meta	meta_path	meta_filer   s&                                        @r   get_from_cache_msr      s   > %==K<LJX	

 %	)T""	N	KK	D)S(3-"4"45

IHGDJF $JT:Hi2J'(2S8	ww~~j!!. 1BG * #%%**"3HHQYHLL.L(,,vt2L_cDI-	&224G# $$'H ##s*7?x''++F3T$,,224DAq||$677<NRU<U{Q."*"2"2	 5
 !	%,31F<'C %%,1QUX1X((C/4F#4M((C/![]`aa88$JHLLYY ((C/4OSV4V 	@FG%%,1C1Cs1Ju}%+C5 1j k  77>>*%%n#H U^ ^  !h&:&:c&A#&<SE$BCC!!OC54
;K:LA"NOO!!OC5AUAU@VVW"XYY!OC5"9:: $J5Hi2J	ww~~j!!. W$I	)	77>>*%%n 
	
 %}4		 
	  '(9 Fww~~o.. ggo6>>  I ..3	?_'# +## +&	 !" 	hse==>INNJ/

UeV^,1*>?D)(	)S73yIIdI& 4] 
b g ,,445 	J	l ! 4 43] 
	b sv   9BW1 	W1 +W1 =B.W1 ,6W1 -Y8A4Y,#X%B8YX7Y1$X"XX"%
X4	/Y7
Y	Y
Yr   )r:   g      ?r:         $@)NNNTr   r   )Nr   NNg      Y@r   N)NFNd   FNFTr   Nru   FNNN)2r   r   r   rU   r   rD   r   
contextlibr   	functoolsr   pathlibr   typingr   r   urllib.parser   r	   r;   	tqdm.autor
   datasetsr   datasets.utils.file_utilsr   r   r   r   filelockr   modelscope.utils.config_dsr   modelscope.utils.loggerr   modelscope.hub.apir   
modelscoper   rA   r-   r*   r/   rh   floatResponserL   rX   rs   r   r   r0   r   <module>r      sN  
  	 	     %   " *   @ @  8 . / "	8E#t)4D+E QT ( $$	$ $ 	$
 $ $ $P de& quh D 
!N" 	#Nr0   