
    A>i:              	      $   S r SSKJr  SSKJr  SSKrSSKJr  SSKrSSKr	SSK
Jr  SSKrSSKJr  SSKJr  \(       a  SS	KJrJrJrJr  S
rSrSrSr/ SQrSrSrSrSrS\ S\ S\ S\ S3	r S\ S\ S3r!SS jr"SS jr#S r$S r% " S S\5      r&g) a-  
Read a SAS XPort format file into a Pandas DataFrame.

Based on code from Jack Cushman (github.com/jcushman/xport).

The file format is defined here:

https://support.sas.com/content/dam/SAS/support/en/technical-papers/record-layout-of-a-sas-version-5-or-6-data-set-in-sas-transport-xport-format.pdf
    )annotations)datetimeN)TYPE_CHECKING)find_stack_level)
get_handle)	SASReader)CompressionOptionsDatetimeNaTTypeFilePath
ReadBufferzPHEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000  zKHEADER RECORD*******MEMBER  HEADER RECORD!!!!!!!000000000000000001600000000zPHEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000  zPHEADER RECORD*******OBS     HEADER RECORD!!!!!!!000000000000000000000000000000  )ntypenhfunfield_lengthnvar0namelabelnformnflnum_decimalsnfjnfillniformniflnifdnpos_zParameters
----------
filepath_or_buffer : str or file-like object
    Path to SAS file or object implementing binary read method.zindex : identifier of index column
    Identifier of column that should be used as index of the DataFrame.
encoding : str
    Encoding for text data.
chunksize : int
    Read file `chunksize` lines at a time, returns iterator.zBformat : str
    File format, only `xport` is currently supported.z\iterator : bool, default False
    Return XportReader object for reading file incrementally.z#Read a SAS file into a DataFrame.


a  

Returns
-------
DataFrame or XportReader

Examples
--------
Read a SAS Xport file:

>>> df = pd.read_sas('filename.XPT')

Read a Xport file in 10,000 line chunks:

>>> itr = pd.read_sas('filename.XPT', chunksize=10000)
>>> for chunk in itr:
>>>     do_something(chunk)

z$Class for reading SAS Xport files.

z

Attributes
----------
member_info : list
    Contains information about the file
fields : list
    Contains information about the variables in the file
c                r     [         R                  " U S5      $ ! [         a    [        R                  s $ f = f)z1Given a date in xport format, return Python date.z%d%b%y:%H:%M:%S)r   strptime
ValueErrorpdNaT)datestrs    V/var/www/html/land-tabula/venv/lib/python3.13/site-packages/pandas/io/sas/sas_xport.py_parse_dater%      s3      *;<< vvs    66c                ^    0 nSnU H  u  pEXX5-    R                  5       X$'   X5-  nM!     US	 U$ )z
Parameters
----------
s: str
    Fixed-length string to split
parts: list of (name, length) pairs
    Used to break up string, name '_' will be filtered from output.

Returns
-------
Dict of name:contents of string at given location.
r   r   )strip)spartsoutstartr   lengths         r$   _split_liner-      sF     CEen-335	  	CJ    c                    US:w  ah  [         R                  " [        U 5      [         R                  " S5      5      n[         R                  " SU SSU-
   35      nUR	                  US9nXS'   U$ U $ )N   S8Sz,Sdtypef0)npzeroslenr4   view)vecnbytesvec1r4   vec2s        r$   _handle_truncated_float_vecr>      sg     {xxC"((4.11VHBq6zl34yyuy%T
Jr.   c                r   [         R                  " S5      nU R                  US9nUS   nUS   nUS-  n[         R                  " [	        U 5      [         R
                  S9nSU[         R                  " US-  5      '   SU[         R                  " US	-  5      '   S
U[         R                  " US-  5      '   XV-  nXF-	  US-  SS
U-
  -   -  -  nUS-  nXSS-	  S-  S-
  S-  U-   S-   S-  US-  -  -  n[         R                  " [	        U5      4SS9nXXS'   XxS'   UR                  SS9nUR                  S5      nU$ )zZ
Parse a vector of float values representing IBM 8 byte floats into
native 8 byte floats.
z>u4,>u4r3   r5   f1i    i       i  @    i         l          A   i     l        z>f8f8)	r6   r4   r9   r7   r8   uint8whereemptyastype)	r:   r4   r<   xport1xport2ieee1shiftieee2ieees	            r$   _parse_float_vecrU      sZ   
 HHYE88%8 D$ZF$ZF ZE HHSXRXX.E+,E"((6J&
'(+,E"((6J&
'(+,E"((6J&
'( 
OE_&:"52U;K!LME 
ZE 
R<4'2-!3u<tCJ E 88SZM3DJJ9959!D;;tDKr.   c                      \ rS rSr\r    S         SS jjrSS jrS rSS jr	SS jr
SS jrSSS	 jjrS
 rSSS jjrSrg)XportReader   Nc                    X0l         SU l        X l        X@l        [	        USUSUS9U l        U R
                  R                  U l         U R                  5         g ! [         a    U R                  5         e f = f)Nr   rbF)encodingis_textcompression)	_encoding_lines_read_index
_chunksizer   handleshandlefilepath_or_buffer_read_header	Exceptionclose)selfrd   indexr[   	chunksizer]   s         r$   __init__XportReader.__init__   sv     "#!#
 #',,"5"5	 	JJL	s   A A5c                8    U R                   R                  5         g N)rb   rg   rh   s    r$   rg   XportReader.close  s    r.   c                T    U R                   R                  S5      R                  5       $ )NP   )rd   readdecodero   s    r$   _get_rowXportReader._get_row  s"    &&++B/6688r.   c           
     &   U R                   R                  S5        U R                  5       nU[        :w  a  SU;   a  [	        S5      e[	        S5      eU R                  5       nSS/SS/S	S/S
S/SS//n[        X#5      nUS   S:w  a  [	        S5      e[        US   5      US'   X@l        U R                  5       n[        US S 5      US'   U R                  5       nU R                  5       nUR                  [        5      nU[        :H  n	U(       a  U	(       d  [	        S5      e[        USS 5      n
SS/SS/SS/SS/S	S/S
S/SS//n[        U R                  5       U5      nSS/S
S/SS/SS//nUR                  [        U R                  5       U5      5        [        US   5      US'   [        US   5      US'   Xl        SSS.n[        U R                  5       SS 5      nX-  nUS-  (       a  USUS-  -
  -  nU R                   R                  U5      n/ nSn[        U5      U
:  a  US U
 UU
S  nnUR!                  S5      n["        R$                  " SU5      n['        [)        [*        US S!95      nUS
	 UUS"      US"'   US#   nUS"   S:X  a  US$:  d  US:  a  S%U S&3n[-        U5      eUR/                  5        H  u  nn UR1                  5       UU'   M     UUS#   -  nUU/-  n[        U5      U
:  a  M  U R                  5       nU[4        :X  d  [	        S'5      eUU l        UU l        U R                   R;                  5       U l        U R?                  5       U l         U R6                   Vs/ s H  nUS(   RC                  5       PM     snU l"        [G        U R6                  5       VVs/ s H%  u  nnS)[I        U5      -   S*[I        US#   5      -   4PM'     nnn[J        RL                  " U5      nUU l'        g ! [2         a     GMJ  f = fs  snf s  snnf )+Nr   z**COMPRESSED**z<Header record indicates a CPORT file, which is not readable.z#Header record is not an XPORT file.prefixrF   versionr0   OSr   created   zSAS     SAS     SASLIBz!Header record has invalid prefix.modifiedzMember header not foundset_namesasdatar   (   typenumericchar)rA   rB   6   :   rr      z>hhhh8s40s8shhh2s8shhl52sT)strictr   r   rB   zFloating field width z is not between 2 and 8.zObservation header not found.r   r(   r2   )(rd   seekru   _correct_line1r    r-   r%   	file_info
startswith_correct_header1_correct_header2intupdatemember_infors   r8   ljuststructunpackdictzip
_fieldkeys	TypeErroritemsr'   AttributeError_correct_obs_headerfieldsrecord_lengthtellrecord_start_record_countnobsrt   columns	enumeratestrr6   r4   _dtype)rh   line1line2fifr   line3header1header2	headflag1	headflag2fieldnamelengthmemr   types
fieldcount
datalength	fielddatar   
obs_length
fieldbytesfieldstructfieldflmsgkvheaderxidtypelr4   s                                  r$   re   XportReader._read_header  sW   $$Q' N"5( !R  BCC"~	1~ay3)iQS_U+	X"::@AA*9Y+?@	)" +E#2J 7	* --/--/&&'78	//	i677gbn- qMONN1I"IO
 "$--/37B#rWbMFA;G;t}}<="-k*.E"FJ!,[-C!DI& &)B/0
$1
?"zB..J++00<	
)n/ *?+/*+ "J $))#.J --(CZPKZTBCEc
"5>2E'N~&BW~*aR!V-bT1IJn$1 wwyE!H & %//JugF7 )n/: ,,<==' 3388:&&(	48KK@Kq&	((*K@
 &dkk2
25 3q6\3U>%:!;;<2 	 
  / &  A
s   O6,P),P6
PPc                F    U R                  U R                  =(       d    SS9$ )NrA   nrows)rs   ra   ro   s    r$   __next__XportReader.__next__  s    yyt3!y44r.   c                   U R                   R                  SS5        U R                   R                  5       U R                  -
  nUS-  S:w  a  [        R
                  " S[        5       S9  U R                  S:  a3  U R                   R                  U R                  5        XR                  -  $ U R                   R                  SS5        U R                   R                  S5      n[        R                  " U[        R                  S9n[        R                  " US:H  5      n[        U5      S:X  a  SnOS	[        U5      -  nU R                   R                  U R                  5        X-
  U R                  -  $ )
z
Get number of records in file.

This is maybe suboptimal because we have to seek to the end of
the file.

Side effect: returns file position to record_start.
r   rB   rr   zxport file may be corrupted.)
stacklevelir3   l     @@  r0   )rd   r   r   r   warningswarnr   r   rs   r6   
frombufferuint64flatnonzeror8   )rh   total_records_lengthlast_card_bytes	last_cardixtail_pads         r$   r   XportReader._record_count  s6    	$$Q*#66;;=@Q@QQ"$)MM.+-
 "##(():):;'+=+===$$S!,1166r:MM/C	 ^^I)<<=r7a<H3r7{H$$T%6%67$/D4F4FFFr.   c                >    Uc  U R                   nU R                  US9$ )z
Reads lines from Xport file and returns as dataframe

Parameters
----------
size : int, defaults to None
    Number of lines to read.  If None, reads whole file.

Returns
-------
DataFrame
r   )ra   rs   )rh   sizes     r$   	get_chunkXportReader.get_chunk  s#     <??Dyyty$$r.   c                    UR                  SS9nUS   S:H  US   S:H  -  US   S:H  -  nUS   S:  US   S	:*  -  US   S
:H  -  US   S:H  -  nX4-  nU$ )Nzu1,u1,u2,u4r3   r@   r   f2f3r5   rH   Z   _   .   )r9   )rh   r:   r   missmiss1s        r$   _missing_doubleXportReader._missing_double  s    HH=H)$14A.!D'Q,?go!D'T/2w$ w$  	
 	r.   c                   Uc  U R                   n[        XR                   U R                  -
  5      nX R                  -  nUS::  a  U R	                  5         [
        eU R                  R                  U5      n[        R                  " X@R                  US9n0 n[        U R                  5       H  u  pxUS[        U5      -      n	U R                  U   S   n
U
S:X  aJ  [        XR                  U   S   5      n	U R!                  U	5      n[#        U	5      n[        R$                  X'   OlU R                  U   S   S:X  aV  U	 Vs/ s H  oR'                  5       PM     nnU R(                  b*  U Vs/ s H  oR+                  U R(                  5      PM     nnUR-                  UW05        M     [.        R0                  " U5      nU R2                  c=  [.        R4                  " [7        U R                  U R                  U-   5      5      Ul        OUR;                  U R2                  5      nU =R                  U-  sl        U$ s  snf s  snf )zRead observations from SAS Xport file, returning as data frame.

Parameters
----------
nrows : int
    Number of rows to read from data file; if None, read whole
    file.

Returns
-------
A DataFrame.
r   )r4   countr(   r   r   r   r   )r   minr_   r   rg   StopIterationrd   rs   r6   r   r   r   r   r   r   r>   r   rU   nanrstripr^   rt   r   r!   	DataFramer`   Indexrangeri   	set_index)rh   r   
read_linesread_lenrawdatadf_datajr   r:   r   r   r   ydfs                  r$   rs   XportReader.read  s    =IIE		D,<,< <=
 2 22q=JJL%%**84}}S:Fdll+DAsSV|$CKKN7+E	!1#{{1~n7UV++C0$S)&&Q(F2),-AXXZ->>-;<=1a$..11A=NNAq6" , \\'";;xxd&6&68H8H:8U VWBHdkk*BJ&	 . >s   ?H:+$H?)ra   r   r^   r`   r_   r   r   r   rd   rb   r   r   r   r   )Nz
ISO-8859-1Ninfer)
rd   zFilePath | ReadBuffer[bytes]r[   z
str | Nonerj   
int | Noner]   r	   returnNone)r   r   )r   pd.DataFrame)r   r   rn   )r   r   r   r   )r   r   r   r   )__name__
__module____qualname____firstlineno___xport_reader_doc__doc__rk   rg   ru   re   r   r   r   r   rs   __static_attributes__ r.   r$   rW   rW      s{    G
 + $*18 	
  ( 
89l\5$GL%"	1 1r.   rW   )r#   r   r   r
   )r(   r   )'r   
__future__r   r   r   typingr   r   numpyr6   pandas.util._exceptionsr   pandasr!   pandas.io.commonr   pandas.io.sas.sasreaderr   pandas._typingr	   r
   r   r   r   r   r   r   r   _base_params_doc_params2_doc_format_params_doc_iterator_doc_read_sas_docr   r%   r-   r>   rU   rW   r  r.   r$   <module>r     s   #       4  ' -  W  R  W  W 
(C @9 A
      2    	 ,&6rI) Ir.   