
    9i                     (    S SK r S SKr " S S5      rg)    Nc                       \ rS rSrSrS rS rS\S\S\R                  4S jr
S\S\S	\SS
4S jr\S 5       r\S\S\S\S\S\4
S j5       r\S\S\S\S\S\S\4S j5       rSS\S\4S jjrSrg
)MaxComputeUtil   z
MaxCompute util class.

Args:
    access_id: your access id of MaxCompute
    access_key: access key of MaxCompute
    project_name: your project name of MaxCompute
    endpoint: endpoint of MaxCompute

Attributes:
    _odps: ODPS object

c                 ,    SSK Jn  U" XX45      U l        g )Nr   )ODPS)odpsr   _odps)self	access_id
access_keyproject_nameendpointr   s         l/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/msdatasets/utils/maxcompute_utils.py__init__MaxComputeUtil.__init__   s    )H
    c                 8    U R                   R                  U5      $ )z
Get MaxCompute table object.
)r	   	get_table)r
   
table_names     r   
_get_tableMaxComputeUtil._get_table   s     zz##J//r   r   pt_conditionreturnc                     U R                  U5      nUR                  USS9 nUR                  5       nSSS5        U$ ! , (       d  f       W$ = f)z
Read data from MaxCompute table.
:param table_name: table name
:param pt_condition: partition condition,
    Example: pt_condition = 'dt=20230331'
:return: pandas dataframe with all data
F)	partitionlimitN)r   open_reader	to_pandas)r
   r   r   treaderpd_dfs         r   
_read_dataMaxComputeUtil._read_data!   sP     OOJ']]\]?6$$&E @  @? s	   =
Aoutput_pathNc                 d    U R                  X5      nUR                  USS9  [        SU S35        g)z
Fetch data from MaxCompute table to local file.
:param table_name: table name
:param pt_condition: partition condition,
    Example: pt_condition = 'dt=20230331'
:param output_path: output path
:return: None
F)indexzFetch data to z successfully.N)r"   to_csvprint)r
   r   r   r$   r!   s        r   fetch_data_to_csv MaxComputeUtil.fetch_data_to_csv0   s3     
9[.{m>:;r   c                 d    U(       d  U R                   nUS::  a  [        SU 35      eX:  a  UnX4$ )Nr   z%batch_size must be positive, but got )count
ValueError)r    
batch_sizer   s      r   _check_batch_args MaxComputeUtil._check_batch_args>   sB    LLE?7
|DF FJ  r   batch_size_inlimit_indrop_last_in
partitionscolumnsc              #     #    [         R                  XU5      u  p[        R                  " X!-  5      n[	        US-   5       H  nXv:X  a  U(       d  X!-  S:  a  XU-  Xq-  X!-  -    nOXU-  US-   U-   n/ n	U HM  n
[        U
5       VVs/ s H  u  pUPM	     nnnUS[        U5      [        U5      -
   nU	R                  U5        MO     [        R                  " XS9v   M     gs  snnf 7f)aE  
Generate batch data from MaxCompute table.

Args:
    reader: MaxCompute table reader
    batch_size_in: batch size
    limit_in: limit of data, None means fetch all data
    drop_last_in: whether drop last incomplete batch data
    partitions: table partitions
    columns: table columns

Returns:
    batch data generator
   r   Nr5   )
r   r/   mathfloorrangelistlenappendpd	DataFrame)r    r1   r2   r3   r4   r5   	batch_numibatch_recordsbatch_data_listrecord_valtmp_valss                 r   gen_reader_batchMaxComputeUtil.gen_reader_batchI   s     $ #1"B"B8#- JJx78	y1}%A~lx7ORS7S &='8%)AB!E !'='8!a%)6:7 !8 O'.26l;lFACl;#$Fc(mc*o&EG&&x0 ( ,,@@ & <s   BC*	C$
AC*r&   c                    [         R                  XU5      u  p#U(       a  [        R                  " X2-  5      nO[        R                  " X2-  5      nUS:  a  [        SU 35      eX:  a  [        SU SU 35      eX-  nUS-   U-  n	X:  a  Un	XU	 n
/ nU
 HM  n[        U5       VVs/ s H  u  pUPM	     nnnUS[        U5      [        U5      -
   nUR                  U5        MO     [        R                  " XS9$ s  snnf )a{  
Get single batch data from MaxCompute table by indexing.

Args:
    reader: MaxCompute table reader
    index: index of batch data
    batch_size_in: batch size
    limit_in: limit of data, None means fetch all data
    drop_last_in: whether drop last incomplete batch data
    partitions: table partitions
    columns: table columns

Returns:
    single batch data (dataframe)
r   z$index must be non-negative, but got z1index must be less than batch_num, but got index=z, batch_num=r7   Nr8   )r   r/   r9   r:   ceilr-   r<   r=   r>   r?   r@   )r    r&   r1   r2   r3   r4   r5   rA   startend
batch_itemrD   rE   rF   rG   rH   s                   r   gen_reader_itemMaxComputeUtil.gen_reader_itemm   s   $ #1"B"B8#- 

8#;<I		(":;I19CE7KLLCE7,W`Vab  %qyM)>C#&
 F*.v,7,,H7 B#h-#j/"ACH""8, !
 ||O==	 8s   'C;c                 ~    U R                  U5      nUR                  US9 nX44sS S S 5        $ ! , (       d  f       g = f)N)r   )r   r   )r
   r   r   	table_insr    s        r   get_table_reader_ins#MaxComputeUtil.get_table_reader_ins   s5    OOJ/	""\":f$ ;::s   .
<)r	   )N)__name__
__module____qualname____firstlineno____doc__r   r   strr?   r@   r"   r)   staticmethodr/   intboolr<   rI   rP   rT   __static_attributes__ r   r   r   r      s   I0S   <C <s <'*</3< ! ! !A !As !A'+!A9=!AHL!A !AF ,>s ,>3 ,># ,>&*,>8<,>GK,> ,>\%s %# % %r   r   )r9   pandasr?   r   r`   r   r   <module>rb      s     X% X%r   