
    A>i(                        S r SSKJrJr  SSKrSSKrSSKrSSKrSSK	J
r
Jr  SSKJr  \R                  R!                  S5      r\R                  R%                  S5      rS r\S 5       r\\R                  R-                  S	S
S/5      \R                  R-                  S/ SQ5      S 5       5       5       rS rS r\R                  R-                  SS0 S/4SSS0S/4SSS/0SS/4SS/SS.S/4SS/SS.\R4                  S/4/5      S  5       rS! r\R                  R-                  S"/ S#Q5      S$ 5       r\R                  R-                  S%SS/5      S& 5       rS' r\R                  R-                  S/ S(Q5      S) 5       r \R                  R-                  S/ S*Q5      S+ 5       r!S, r"S- r#\R                  R%                  S.5      \R                  R-                  S/S0S1/5      S2 5       5       r$g)3zZ
Tests encoding functionality during parsing
for all of the parsers defined in parsers.py
    )BytesIOTextIOWrapperN)	DataFrameread_csvz=ignore:Passing a BlockManager to DataFrame:DeprecationWarningpyarrow_skipc                     SnU n[        SR                  U5      5      nUR                  USUS9n[        SS//SS/S	9n[        R
                  " XE5        g )
Ncp1255u   שלום:1234
562:123:)sepencodingi2  {   u   שלום1234columnsr   encoder   r   tmassert_frame_equal)all_parsersr   parserdataresultexpecteds         c/var/www/html/land-tabula/venv/lib/python3.13/site-packages/pandas/tests/io/parser/test_encoding.pytest_bytes_io_inputr      s[    HF+228<=D__TsX_>F3*
F/CDH&+    c                     U n[        SR                  5       5      nUR                  USSS S9n[        SS//5      n[        R
                  " X45        g )Nu   Łaski, Jan;1;utf-8)r   r   headeru   Łaski, Jan   r   )r   r   r   r   r   s        r   test_read_csv_unicoder"   (   sR    F&--/0D__TsWT_JF,a012H&+r   r   ,	r   )utf-16zutf-16lezutf-16bec                    U nSR                  SU5      nUSS.nSnUR                  U5      n[        US5       n	U	R                  U5        S S S 5        [	        [        UR                  U5      5      US9 n
UR                  " U4SU0UD6nUR                  " U
4SU0UD6nS S S 5        [        R                  " WW5        g ! , (       d  f       N{= f! , (       d  f       N7= f)	Nz)skip this
skip this too
A,B,C
1,2,3
4,5,6r#      )r   skiprowsr   wbr   r   )	replacer   openwriter   r   r   r   r   )r   r   r   	temp_filer   r   kwargsutf8
bytes_datafbytes_bufferr   r   s                r   test_utf16_bom_skiprowsr4   2   s    
 F	 
c		 	
 a(FDX&J	i	!	
 
 
wt{{401D	A\HXHH??<I$I&I 
B &(+ 
	 
B	As   C 5+C 
C
Cc                     [         R                  R                  US5      nU nUR                  USSS9n[	        U5      S:X  d   eg )Nzutf16_ex.txtr%   r$   )r   r   2   )ospathjoinr   len)r   csv_dir_pathr8   r   r   s        r   test_utf16_exampler<   K   sA    77<<n5DF__TH$_?Fv;"r   c                     [         R                  R                  US5      nU nUR                  US SS9nUR	                  S5      nUS   S   nSnXV:X  d   eg )Nunicode_series.csvlatin-1)r    r   r   r!   i`  u$   Á köldum klaka (Cold Fever) (1994))r7   r8   r9   r   	set_index)r   r;   r8   r   r   gotr   s          r   test_unicode_encodingrB   R   s\    77<<&:;DF__T$_CFa F
)D/C9H??r   zdata,kwargs,expectedza
1r!   z"a"
1	quotechar"zb
1namesab1
1T)rE   skip_blank_linesFc                    ^^ U nSmSmUU4S jnUR                   S:X  a1  US:X  a+  UR                  SS5      (       a  [        R                  " SS	9  UR                  " U" U5      4S
T0UD6n[        SU05      n[        R                  " Xc5        g )Nu   ﻿r   c                 B   > TU -   R                  T5      n[        U5      $ )N)r   r   )_databom_databomr0   s     r   _encode_data_with_bom,test_utf8_bom.<locals>._encode_data_with_bomw   s"    %K''-x  r   pyarrowrI   rJ   Tz,https://github.com/apache/arrow/issues/38676)reasonr   rF   )enginegetpytestskipr   r   r   r   )	r   r   r/   r   r   rP   r   rO   r0   s	          @@r   test_utf8_bomrX   ^   s    * F
CD!
 	"EMJJ)400 	IJ__248R4R6RF#x)H&+r   c                     [        S/S/S.5      nU nUR                  U5      nSR                  U5      nUR                  [	        U5      US9n[
        R                  " Xs5        g )Ng333333@test)mb_num	multibytezmb_num,multibyte
4.8,testr*   )r   formatr   r   r   r   r   )r   	utf_valueencoding_fmtr   r   r   r   r   s           r   test_read_csv_utf_aliasesr`      s^    SEABHF""9-H'..x8D__WT]X_>F&+r   zfile_path,encoding)))ior   csvz	test1.csvr   ))ra   r   r   r>   r?   ))ra   r   r   zsauron.SHIFT_JIS.csvshiftjisc                 d   U nU" U6 nUR                  XRS9n[        XRS9 nUR                  U5      nUR                  (       a   e S S S 5        [        R                  " UW5        [        USS9 n	UR                  XS9nU	R                  (       a   e S S S 5        [        R                  " Xh5        [        USSS9 n	UR                  XS9nU	R                  (       a   e S S S 5        [        R                  " Xh5        g ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       NG= f)Nr*   rb)moder   )rf   	buffering)r   r,   closedr   r   )
r   	file_pathr   datapathr   fpathr   far   fbs
             r   test_binary_mode_file_buffersrn      s     Fi Eu8H	e	'2$99}9 
( (F+	e$	2799}9 
  (+	e$!	,799}9 
- (+ 
(	'
 
 	
 
-	,s#   %C?/#D<#D!?
D
D!
D/pass_encodingc                    U nUR                  U5      nUR                  S:X  a!  USL a  US;   a  [        R                  " S5        [	        SS/05      nUR                  SUS9 nUR                  S	5        UR                  S
5        UR                  X(       a  UOS S9n	[        R                  " X5        S S S 5        g ! , (       d  f       g = f)NrR   T)       zThese cases freezefoobarzw+rf   r   zfoo
barr   r*   )r]   rT   rV   rW   r   r,   r-   seekr   r   r   )
r   r^   r_   ro   r.   r   r   r   r2   r   s
             r   test_encoding_temp_filerw      s    
 F""9-H}}	!mt&;	X@U()%%)*H	TH	5	
	q	XDQ
f/ 
6	5	5s   #AB==
Cc                 j   U nSnSnSn[        X4/05      n[        R                  " 5        nUR                  U SU 3R	                  U5      5        UR                  S5        UR                  XbS9n[        R                  " Xu5        UR                  (       a   e S S S 5        g ! , (       d  f       g = f)Nz	shift-jisu	   てすとu   こむ
r   r*   )
r   tempfileNamedTemporaryFiler-   r   rv   r   r   r   rh   )r   r   r   titler   r   r2   r   s           r   test_encoding_named_temp_filer}      s    FHED%)H		$	$	&!	5'D6"))(34	q	6
f/88|8 
'	&	&s   A/B$$
B2)r   r%   z	utf-16-bez	utf-16-lezutf-32c                     Sn[        UR                  U 5      5      n[        USU S9n[        SS/SS/SS	//S
S/S9n[        R
                  " X45        g )Nu   a	b
：foo	0
bar	1
baz	2r$   )	delimiterr   u   ：foor   rt   r!   bazr'   rF   rG   )r   r   r   )r   r   encoded_datar   r   s        r   %test_parse_encoded_special_charactersr      sa     -D4;;x01LldXFFmeQZ%4c
H &+r   )r   Nr%   r	   r?   c                 L   U n[        / SQ/ SQ/ SQS.5      nUR                  USUS9  UR                  S:X  a5  Sn[        R                  " [
        US	9   UR                  X!S
S9  S S S 5        g UR                  X!S
S9n[        R                  " Xd5        g ! , (       d  f       g = f)N)Raphael	DonatellozMiguel AngelLeonardo)redpurpleorangeblue)saizbo staffnunchunkkatana)namemaskweaponF)indexr   rR   BThe 'memory_map' option is not supported with the 'pyarrow' enginematchT)r   
memory_map)	r   to_csvrT   rV   raises
ValueErrorr   r   r   )r   r   r.   r   r   msgdfs          r   test_encoding_memory_mapr      s     FH7?	
H OOIUXO>}}	!R]]:S1OOITOJ 2	$	GB"' 21s   B
B#c                 `   U n[        S/S-  S9nSUR                  S'   UR                  USSSS9  UR                  S	:X  a6  S
n[        R
                  " [        US9   UR                  USSS9  SSS5        gUR                  USSS9n[        R                  " XS5        g! , (       d  f       g= f)zC
Chunk splits a multibyte character with memory_map=True

GH 43540
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaai   )r   u   aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaąi  Fr   r   r    r   rR   r   r   NT)r    r   )
r   ilocr   rT   rV   r   r   r   r   r   )r   r.   r   r   r   dfrs         r    test_chunk_splits_multibyte_charr     s     F	d*	+B %BGGDMIIiuUWIE}}	!R]]:S1OOIdtOD 2
//)DT/
BC#" 21s   B
B-c           	         / nSnSnSn[        [        U5      [        U5      U5       H`  nSR                  [        XfS-   5       Vs/ s H  n[        U5      PM     sn5      S-   n UR	                  S5        UR                  U5        Mb     U n	[        U5      n
U
R                  USSSS9  U	R                  S	:X  a7  S
n[        R                  " [        US9   U	R                  USSSS9  SSS5        gU	R                  USSSS9n[        R                  " X5        gs  snf ! [
         a     GM  f = f! , (       d  f       g= f)z[
GH 43787

Test correct handling of UTF-8 chars when memory_map=True and encoding is UTF-8
    u   𐂀 ry   r   Fr   rR   r   r   NT)r    r   r   )rangeordr9   chrr   UnicodeEncodeErrorappendr   r   rT   rV   r   r   r   r   r   )r   r.   linesline_length
start_charend_charlnumcliner   r   r   r   s                r   test_readcsv_memmap_utf8r     s0    EKJH c*os8}kBwwd4K(@A(@1A(@ABTI	KK  	T C F	5	BIIiuUWIE}}	!R]]:S1OOIdtgOV 2
//)DTG/
TC""# B " 		 21s$   D
&D D2 
D/.D/2
E pyarrow_xfailrf   zw+bzw+tc                 $   U nSnSU;   a  Sn[         R                  " USS9 nUR                  U5        UR                  S5        UR	                  U5      nS S S 5        [        / S/S9n[        R                  " WU5        g ! , (       d  f       N1= f)Ns   abcdtabcdr   ru   r   r   )rz   SpooledTemporaryFiler-   rv   r   r   r   r   )r   rf   r   contenthandler   r   s          r   test_not_readabler   @  s     FG
d{		&	&D7	CvWA__V$ 
D fX.H"h' 
D	Cs   4B
B)%__doc__ra   r   r   r7   rz   numpynprV   pandasr   r   pandas._testing_testingr   markfilterwarnings
pytestmarkusefixturesskip_pyarrowr   r"   parametrizer4   r<   rB   nanrX   r`   rn   rw   r}   r   r   r   r   r    r   r   <module>r      sM  
 
    [[''C
 {{&&~6, , , d,%GH, I - ,,	  
aS	K%s+	'C5!C:.	3%T:QC@ e7VVQK	
&,'&,.	, ,,0 4-80 90*( G,, %ST( U(,#2#B )%0( 1 *(r   