
    <>i06                     ^   S SK r S SKrS SKrS SKrS SKrS SKrS SKJr  S SKr	 S SK
Jr  S SKJr   S SKrS SKJr  S SKJr  \R,                  R                  r\R,                  R                   S 5       r\R,                  R                   S 5       r\R,                  R                   S 5       r\R,                  R                   S 5       r\R,                  R                   S	 5       r\R,                  R                   S
 5       rS rS r\R,                  R                   \R,                  RA                  S/ SQ5      S 5       5       r!\R,                  R                   \R,                  RA                  SS/5      S 5       5       r"\R,                  R                   \R,                  RA                  SSS/5      S 5       5       r#S r$\R,                  RJ                  \R,                  RA                  SS5      S 5       5       r&S r'S r(S r)S r*S r+S  r,S! r-g! \ a    Sr GNf = f! \ a    S=rr GNf = f)"    N)mock)_write_table)alltypes_samplec                     [        SS9n [        R                  R                  U 5      n[        R
                  " 5       n[        XSSS9  UR                  S5        [        R                  " U5      nUR                  S5        [        R                  " X#S9n[        R                  " XR                  5       R                  5       5        g )N'  sizesnappy2.6)compressionversionr   )metadata)r   paTablefrom_pandasioBytesIOr   seekpqread_metadataParquetFiletmassert_frame_equalread	to_pandas)dfa_tablebufr   filehs        f/var/www/html/land-tabula/venv/lib/python3.13/site-packages/pyarrow/tests/parquet/test_parquet_file.pytest_pass_separate_metadatar!   1   s     
e	$Bhh""2&G
**,C8UCHHQK$HHHQKNN32E"jjl4467    c                     Su  p[        U S9n[        R                  R                  U5      n[        R
                  " 5       n[        X4X-  SSS9  UR                  S5        [        R                  " U5      nUR                  U:X  d   e[        U5       Vs/ s H  oeR                  U5      PM     nn[        R                  " U5      n[        R                  " X(R!                  5       5        g s  snf Nr      r   r
   r   row_group_sizer   r   r   )r   r   r   r   r   r   r   r   r   r   num_row_groupsrangeread_row_groupconcat_tablesr   r   r   )	NKr   r   r   pfi
row_groupsresults	            r    test_read_single_row_groupr3   E   s     DA	a	 Bhh""2&G
**,Cae%u6 HHQK		B!!!05a91##A&J9j)F"..01 :s   C-c                     Su  p[        U S9n[        R                  R                  U5      n[        R
                  " 5       n[        X4X-  SSS9  UR                  S5        [        R                  " U5      n[        UR                  S S 5      n[        U5       Vs/ s H  ouR                  XvS9PM     nn[        R                  " U5      n	[        R                   " X&   U	R#                  5       5        [        U5       Vs/ s H  ouR                  XvU-   S9PM     nn[        R                  " U5      n	[        R                   " X&   U	R#                  5       5        g s  snf s  snf 	Nr%   r   r
   r   r'   r      )columns)r   r   r   r   r   r   r   r   r   r   listr7   r*   r+   r,   r   r   r   )
r-   r.   r   r   r   r/   colsr0   r1   r2   s
             r    -test_read_single_row_group_with_column_subsetr:   \   s&   DA	a	 Bhh""2&G
**,Cae%u6 HHQK		B

2AD>CAhGh##A#4hJGj)F"(F$4$4$67 FK1XNX##Ad{#;XJNj)F"(F$4$4$67 H Os   E E c                     Su  p[        U S9n[        R                  R                  U5      n[        R
                  " 5       n[        X4X-  SSS9  UR                  S5        [        R                  " U5      nUR                  U:X  d   eUR                  [        U5      5      n[        R                  " X&R                  5       5        g r$   )r   r   r   r   r   r   r   r   r   r   r)   read_row_groupsr*   r   r   r   )r-   r.   r   r   r   r/   r2   s          r    test_read_multiple_row_groupsr=   u   s    DA	a	 Bhh""2&G
**,Cae%u6 HHQK		B!!!a)F"..01r"   c                  $   Su  p[        U S9n[        R                  R                  U5      n[        R
                  " 5       n[        X4X-  SSS9  UR                  S5        [        R                  " U5      n[        UR                  S S 5      nUR                  [        U5      US9n[        R                  " X&   UR!                  5       5        UR                  [        U5      Xf-   S9n[        R                  " X&   UR!                  5       5        g r5   )r   r   r   r   r   r   r   r   r   r   r8   r7   r<   r*   r   r   r   )r-   r.   r   r   r   r/   r9   r2   s           r    0test_read_multiple_row_groups_with_column_subsetr?      s    DA	a	 Bhh""2&G
**,Cae%u6 HHQK		B

2ADa$7F"(F$4$4$67 a$+>F"(F$4$4$67r"   c                  d   Su  p[        U S9n[        R                  R                  U5      n[        R
                  " 5       n[        X4X-  SSS9  UR                  S5        [        R                  " U5      nUR                  5       S:X  d   eUR                  UR                  S S 5      S:X  d   eg )	Nr%   r   r
   r   r'   r   r   r&   )r   r   r   r   r   r   r   r   r   r   scan_contentsr7   )r-   r.   r   r   r   r/   s         r    test_scan_contentsrB      s    DA	a	 Bhh""2&G
**,Cae%u6 HHQK		B&&&BJJrN+u444r"   c                 ~   U S-  n[         R                  " [        U5      5        S[        U5       S3n[        R                  " [
        5       n[        R                  " U5        S S S 5        WR                  [        5      (       a  [        R                  S:X  a  g UR                  U5        g ! , (       d  f       NO= f)N	directoryzCannot open for reading: path 'z' is a directorywin32)osmkdirstrpytestraisesIOErrorr   r   errisinstancePermissionErrorsysplatformmatch)tempdirpathmsgexcs       r    0test_parquet_file_pass_directory_instead_of_filerU      s    [ DHHSY+CI;6F
GC	w	3
t 
 
))cllg.EIIcN	 
 	s   B..
B<c                     [         R                  " [         R                  " SS/5      [         R                  " SS/5      /SS/S9n [         R                  " 5       n[        R
                  " X5        [        R                  " UR                  5       5      nUR                  R                  S5      R                  5       SS/:X  d   eUR                  R                  S	5      R                  5       SS/:X  d   eS
 HG  n[        R                  " [        [        45         UR                  R                  U5        S S S 5        MI     g ! , (       d  f       M[  = f)Nr&      foobarintsstrs)namesr      )r6   )r   tablearrayBufferOutputStreamr   write_tabler   getvaluereaderread_column	to_pylistrI   rJ   
ValueError
IndexError)r_   biofindexs       r    test_read_column_invalid_indexrl      s    HHbhh1v&%(@A"F+-E



!CNN5
s||~&A88",,.1a&88888",,.5%.@@@]]J
34HH  ' 54 44s   E
E	
batch_size)i,    i  c           	         SnSn[        US9nU S-  n[        R                  R                  U5      n[	        XeSUS9  [
        R                  " U5      nUR                  S S UR                  SS  4 H  nUR                  XS9n	[        S	X!-   U5      n
[        X5       Hh  u  p[        X,U-   5      n[        R                  " UR                  5       UR                  X2S S 24   R                   S S 2U4   R#                  S
S95        Mj     M     g )Ni  rn   r   pandas_roundtrip.parquetr   r   
chunk_size
   )rm   r7   r   Tdrop)r   r   r   r   r   r   r   r7   iter_batchesr*   zipminr   r   r   iloclocreset_index)rQ   rm   
total_sizerr   r   filenamearrow_tablefile_r7   batchesbatch_startsbatchstartends                 r     test_iter_batches_columns_readerr      s    JJ	j	)B33H((&&r*K&( NN8$EJJsORZZ_5$$
$LQ
 5zB6LEj*"45C!!!	1%))!W*5AAtAL 7 6r"   rr   rn   c                    [        SSS9nU S-  n[        R                  R                  U5      nUR                  R
                  c   e[        XCSUS9  [        R                  " U5      nS n[        U" U5      5      nSn[        UR                  5       H  n	[        R                  " Xx   R                  5       UR                  U	/5      R                  5       R!                  S	5      5        US
-  n[        R                  " Xx   R                  5       R#                  SS9UR                  U	/5      R                  5       R$                  S	S  R#                  SS95        US
-  nM     g )Nr   T)r	   categoricalrp   r   rq   c              3      #    [        U R                  5       H!  nU R                  SU/S9nU H  nUv   M	     M#     g 7f)N  )rm   r1   )r*   r)   rv   )rj   	row_groupr   r   s       r    get_all_batches1test_iter_batches_reader.<locals>.get_all_batches   sF     q//0Inn%; % G
 ! ! 1s   <>r   r   r]   rt   )r   r   r   r   schemapandas_metadatar   r   r   r8   r*   r)   r   r   r   r<   headr{   ry   )
rQ   rr   r   r}   r~   r   r   r   batch_nor0   s
             r    test_iter_batches_readerr      sR    
e	6B33H((&&r*K--999&( NN8$E ?5)*GH5''(
'')!!1#&00277<	

 	A
'')5545@!!1#&00277=II J 	
 	A )r"   
pre_bufferFTc                 ,   Su  p[        US9n[        R                  R                  U5      n[        R
                  " 5       n[        XEX-  SSS9  UR                  S5        [        R                  " XPS9nUR                  5       R                  U:X  d   eg )Nr%   r   r
   r   r'   r   )r   )r   r   r   r   r   r   r   r   r   r   r   num_rows)r   r-   r.   r   r   r   r/   s          r    test_pre_bufferr     s|     DA	a	 Bhh""2&G
**,Cae%u6 HHQK		3B779"""r"   c                    U R                  S5      n[        R                  " SS/SS/S.5      n[        R                  " X!5        [        US5       n[        R                  " U5       nUR                  5         UR                  (       a   eUR                  (       a   e SSS5        UR                  (       a   eWR                  (       a   e SSS5        WR                  (       d   eWR                  (       d   e[        R                  " U5       nUR                  5         UR                  (       a   e SSS5        UR                  (       d   eg! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       ND= f)z
Unopened files should be closed explicitly after use,
and previously opened files should be left open.
Applies to read_table, ParquetDataset, and ParquetFile
zfile.parquetr   r]   )col1col2rbN)	joinpathr   r_   r   rb   openr   r   closed)rQ   fnr_   rj   ps        r    #test_parquet_file_explicitly_closedr   "  s
    
		.	)BHHq!fq!f56ENN5 
b$1^^A!FFHxx<xx<x  88|88|8 
 88O888O8 
	q	88|8 
 88O8  
 
	s0   E%(7E .E%$E6
E"	E%%
E36
Fuse_uri)TFc                 T   U u  p#nU(       a  UOU4nU(       a  0 O[        US9n[        R                  " S[        S5      05      n[        R
                  " XtUS9  [        R                  " U0 UD6nUR                  5       U:X  d   eUR                  (       a   eUR                  5         UR                  (       d   e[        R                  " U0 UD6 n	U	R                  5       U:X  d   eU	R                  (       a   e S S S 5        W	R                  (       d   eg ! , (       d  f       N"= f)N
filesystemars   )
dictr   r_   r*   r   rb   r   r   r   close)
s3_example_fsr   s3_fss3_uris3_pathargskwargsr_   parquet_filerj   s
             r    !test_parquet_file_with_filesystemr   ?  s     +E7F7,DR 6FHHc59%&ENN5e4>>4262L%'''""""		(	(Avvx5   88|8 
) 88O8 
)	(s   *D
D'c                     [         R                  " S[         R                  " / SQ5      05      n [        R                  " 5       n[        X5        UR                  S5        [        R                  " U5      R                  5       R                  S   R                  S   R                  nUR                  SL d   eUR                  S:X  d   eUR                  b   eUR                   SL d   eUR"                  S:X  d   eUR$                  (       d   eUR&                  S:X  d   eUR(                  (       d   e[+        U5      S	:X  d   eg )
Nvalue)r^   N   r   Tr]   Fr^   r   zmarrow.ArrayStatistics<null_count=1, distinct_count=None, min=-1, is_min_exact=True, max=3, is_max_exact=True>)r   r_   r`   r   r   r   r   r   r   r   r7   chunks
statisticsis_null_count_exact
null_countdistinct_countis_distinct_count_exactrx   is_min_exactmaxis_max_exactrepr)r_   r   r   s      r    test_read_statisticsr   V  s   HHgrxx678E
**,CHHQK$))+33A6==a@KKJ))T111  A%%%$$,,, --666>>R"""">>Q""""
 !< = = =r"   c                     U  S3n[         R                  " U5      R                  5       nUR                  SS/:X  d   eUS   R	                  5       / SQ:X  d   eg )Nz/unknown-logical-type.parquetzcolumn with known typezcolumn with unknown type)s   unknown string 1s   unknown string 2s   unknown string 3)r   r   r   column_namesrf   )parquet_test_datadir	test_filer_   s      r     test_read_undefined_logical_typer   m  sh    '((EFINN9%**,E":<V!WWWW+,668 =   r"   c                     [         R                  " S5        [        R                  " S[	        S5      05      n [
        R                  " U S5        [
        R                  " S5      nU R                  U5      (       d   eSn[         R                  " [        R                  US9   [
        R                  " S5        S S S 5        g ! , (       d  f       g = f)Nfsspecr   rs   fsspec+memory://example.parquetz#Unrecognized filesystem type in URIrP   znon-existing://example.parquet)rI   importorskipr   r_   r*   r   rb   
read_tableequalsrJ   ArrowInvalid)r_   table2rS   s      r     test_parquet_file_fsspec_supportr   y  s    
!HHc59%&ENN5;<]]<=F<<
/C	rc	2
67 
3	2	2s   "C
Cc                      SSK Jn   [
        R                  " S[        S5      05      nW " 5       nUR                  SSS9  UR                  S5      (       d   eS	n[        R                  " US
US9  [        R                  " S5      nUR                  U5      (       d   eg ! [         a    [        R                  " S5         Nf = f)Nr   MemoryFileSystemz&fsspec is not installed, skipping testbrs   z/path/to/prefixT)create_parentszfsspec+memory://path/to/prefixz	b.parquetr   z(fsspec+memory://path/to/prefix/b.parquet)fsspec.implementations.memoryr   ImportErrorrI   skipr   r_   r*   rG   existsr   rb   r   r   )r   r_   fsfs_strr   s        r    <test_parquet_file_fsspec_support_through_filesystem_argumentr     s    >B HHc59%&E		BHHtH499&''''-FNN5+&9]]EFF<<  ><=>s   B  CCc                      SSK Jn   [
        R                  " S5      nW Ul        [        R                  R                  SSU05         Sn[        R                  " S[        S5      05      n[        R                  " X25        [        R                   " U5      nUR#                  U5      (       d   e S S S 5        g ! [         a    [        R                  " S5         Nf = f! , (       d  f       g = f)	Nr   r   z3fsspec is not installed, skipping Hugging Face testhuggingface_hubzsys.modulesz'hf://datasets/apache/arrow/test.parquetr   rs   )r   r   r   rI   r   types
ModuleTypeHfFileSystemr   patchr   r   r_   r*   r   rb   r   r   )r   fake_hf_moduleurir_   r   s        r    $test_parquet_file_hugginface_supportr     s    KB %%&78N"2N	):N(K	L7#uRy)*
u"s#||F#### 
M	L  KIJK
 
M	Ls   B9 A(C9 CC
C-c                      SS K n [        R                  " S5        [        R
                  " S5      n[        R                  " [        US9   [        R                  " S5        S S S 5        g ! [         a     N[f = f! , (       d  f       g = f)Nr   z"fsspec is available, skipping testzI`fsspec` is required to handle `fsspec+<filesystem>://` and `hf://` URIs.r   r   )	r   rI   r   r   reescaperJ   r   r   )r   rS   s     r    1test_fsspec_uri_raises_if_fsspec_is_not_availabler     sk    : 	89
))SUC	{#	.
78 
/	.   
/	.s   A+ A;+
A87A8;
B	c                 T   [         R                  " / 5      n[         R                  R                  / US9nU S-  n[        R
                  " X#5        [        R                  " U5      n[        R                  " [        5         UR                  SS9  S S S 5        g ! , (       d  f       g = f)N)r   zempty_file.parquetr   )rm   )r   r   r   from_batchesr   rb   r   rI   rJ   rg   rv   )rQ   r   empty_tableparquet_file_pathr   s        r    (test_iter_batches_raises_batch_size_zeror     sx    YYr]F((''6':K"66NN;2>>"34L	z	"!!Q!/ 
#	"	"s    B
B').r   rF   r   rN   r   rI   unittestr   pyarrowr   pyarrow.parquetparquetr   pyarrow.tests.parquet.commonr   r   pandaspdpandas.testingtestingr   r   mark
pytestmarkr!   r3   r:   r=   r?   rB   rU   rl   parametrizer   r   r   r   s3r   r   r   r   r   r   r   r    r"   r    <module>r      sA  $ 
 	 	 
     9< [[  
 8 8& 2 2, 8 80 2 2( 8 8, 5 5 

( '89 : . v.( / (V t}5# 6 #: M2 3 *=.	
8 $$ 90o  	B  NBs"   H H HH
H,+H,