
    ёi5                        S SK Jr  S SKJr  \(       a
  S SKrS SKJr  S SKrS SK	r	S SKrS SK
Jr  S SKJr  / rSrSrSrS	rS
rSrSrSrSrSrS r " S S\5      rg)    )annotations)TYPE_CHECKINGN)_check_exists_and_download)DatasetzBhttp://paddlemodels.bj.bcebos.com/conll05st/conll05st-tests.tar.gz 387719152ae52d60422c016e92a742fcz:http://paddlemodels.bj.bcebos.com/conll05st%2FwordDict.txt ea7fb7d4c75cc6254716f0177a506baaz:http://paddlemodels.bj.bcebos.com/conll05st%2FverbDict.txt 0d2977293bbb6cbefab5b0f97db1e77cz<http://paddlemodels.bj.bcebos.com/conll05st%2FtargetDict.txt d8c7f03ceb5fc2e5a0fa7503a4353751z1http://paddlemodels.bj.bcebos.com/conll05st%2Femb bf436eb0faa1f6f9103017f8be57cdb7c                     \ rS rSr% SrS\S'   S\S'   S\S'   S\S'   S\S'   S	\S
'   S	\S'   S	\S'   S\S'   S\S'   S\S'         S           SS jjrSS jrSS jrSS jr	    SS jr
S S jrS!S jrS"S jrSrg)#	Conll05st.   a  
Implementation of `Conll05st <https://www.cs.upc.edu/~srlconll/soft.html>`_
test dataset.

Note: only support download test dataset automatically for that
      only test dataset of Conll05st is public.

Args:
    data_file(str|None): path to data tar file, can be set None if
        :attr:`download` is True. Default None
    word_dict_file(str|None): path to word dictionary file, can be set None if
        :attr:`download` is True. Default None
    verb_dict_file(str|None): path to verb dictionary file, can be set None if
        :attr:`download` is True. Default None
    target_dict_file(str|None): path to target dictionary file, can be set None if
        :attr:`download` is True. Default None
    emb_file(str|None): path to embedding dictionary file, only used for
        :code:`get_embedding` can be set None if :attr:`download` is
        True. Default None
    download(bool): whether to download dataset automatically if
        :attr:`data_file` :attr:`word_dict_file` :attr:`verb_dict_file`
        :attr:`target_dict_file` is not set. Default True

Returns:
    Dataset: instance of conll05st dataset

Examples:

    .. code-block:: python

        >>> import paddle
        >>> from paddle.text.datasets import Conll05st

        >>> class SimpleNet(paddle.nn.Layer):
        ...     def __init__(self):
        ...         super().__init__()
        ...
        ...     def forward(self, pred_idx, mark, label):
        ...         return paddle.sum(pred_idx), paddle.sum(mark), paddle.sum(label)


        >>> conll05st = Conll05st()

        >>> for i in range(10):
        ...     pred_idx, mark, label= conll05st[i][-3:]
        ...     pred_idx = paddle.to_tensor(pred_idx)
        ...     mark = paddle.to_tensor(mark)
        ...     label = paddle.to_tensor(label)
        ...
        ...     model = SimpleNet()
        ...     pred_idx, mark, label= model(pred_idx, mark, label)
        ...     print(pred_idx.item(), mark.item(), label.item())
        >>> # doctest: +SKIP('label will change')
        65840 5 1991
        92560 5 3686
        99120 5 457
        121960 5 3945
        4774 5 2378
        14973 5 1938
        36921 5 1090
        26908 5 2329
        62965 5 2968
        97755 5 2674


str | None	data_fileword_dict_fileverb_dict_filetarget_dict_fileemb_filedict[str, int]	word_dictpredicate_dict
label_dictlist	sentences
predicateslabelsNc                F   Xl         U R                   c*  U(       d   S5       e[        U[        [        SU5      U l         X l        U R                  c*  U(       d   S5       e[        U[
        [        SU5      U l        X0l        U R                  c*  U(       d   S5       e[        U[        [        SU5      U l        X@l
        U R                  c*  U(       d   S5       e[        U[        [        SU5      U l
        XPl        U R                  c*  U(       d   S5       e[        U[        [        SU5      U l        U R!                  U R                  5      U l        U R!                  U R                  5      U l        U R'                  U R                  5      U l        U R+                  5         g )Nz>data_file is not set and downloading automatically is disabled	conll05stzCword_dict_file is not set and downloading automatically is disabledzCverb_dict_file is not set and downloading automatically is disabledzEtarget_dict_file is not set and downloading automatically is disabledz=emb_file is not set and downloading automatically is disabled)r   r   DATA_URLDATA_MD5r   WORDDICT_URLWORDDICT_MD5r   VERBDICT_URLVERBDICT_MD5r   TRGDICT_URLTRGDICT_MD5r   EMB_URLEMB_MD5
_load_dictr   r   _load_label_dictr   
_load_anno)selfr   r   r   r   r   downloads          \/var/www/html/banglarbhumi/venv/lib/python3.13/site-packages/paddle/text/datasets/conll05.py__init__Conll05st.__init__}   s    #>>! P8 88X{HDN -& U8 #=#D -& U8 #=#D !1  ( W8 %? %D! !==  O8 7'7KDM )<)<="ood.A.AB//0E0EF 	    c                   0 n[        5       n[        US5       n[        U5       Hm  u  pVUR                  5       nUR	                  S5      (       a  UR                  USS  5        MA  UR	                  S5      (       d  MY  UR                  USS  5        Mo     SnU H  nXrSU-   '   US-  nXrSU-   '   US-  nM     XrS'   S S S 5        U$ ! , (       d  f       U$ = f)NrB-   I-r      O)setopen	enumeratestrip
startswithadd)	r,   filenamedtag_dictfilineindextags	            r.   r*   Conll05st._load_label_dict   s    5(C A$Q<zz|??4((LLab*__T**LLab* ( E %$*
 %$*
	  
 cF !  !  s   A!C>?C
Cc                    0 n[        US5       n[        U5       H  u  pEXBUR                  5       '   M     S S S 5        U$ ! , (       d  f       U$ = f)Nr3   )r:   r;   r<   )r,   r?   r@   rB   rC   rD   s         r.   r)   Conll05st._load_dict   sL    (C A$Q<"#$**, ( !  !  s   'A  
Ac           
     :   [         R                  " U R                  5      nUR                  S5      nUR                  S5      n/ U l        / U l        / U l        [        R                  " US9 n[        R                  " US9 n/ n/ n/ n[        XE5       GH  u  pU	R                  5       R                  5       n	U
R                  5       R                  5       R                  5       n
[        U
5      S:X  Ga  [        [        US   5      5       H(  nU Vs/ s H  oU   PM	     nnUR                  U5        M*     [        U5      S:  Ga  / nUS    H  nUS:w  d  M  UR                  U5        M     [!        USS  5       GH  u  pSnSn/ nS	nU GH  nUS
:X  a  U(       d  UR                  S5        M$  US
:X  a  U(       a  UR                  SU-   5        MG  US:X  a  UR                  SU-   5        SnMe  UR#                  S5      S:w  aA  UR#                  S5      S:w  a,  USUR#                  S
5       nUR                  SU-   5        SnM  UR#                  S5      S:w  aB  UR#                  S5      S:X  a-  USUR#                  S
5       nUR                  SU-   5        SnGM  [%        SU 35      e   U R                  R                  U5        U R
                  R                  X   5        U R                  R                  U5        GM     / n/ n/ nGM{  UR                  U	5        UR                  U
5        GM     S S S 5        S S S 5        UR'                  5         UR'                  5         UR'                  5         g s  snf ! , (       d  f       NL= f! , (       d  f       NU= f)Nz2conll05st-release/test.wsj/words/test.wsj.words.gzz2conll05st-release/test.wsj/props/test.wsj.props.gz)fileobjr   r7   -r8   F *r6   z*)()r4   TzUnexpected label: )tarfiler:   r   extractfiler   r   r   gzipGzipFilezipr<   decodesplitlenrangeappendr;   findRuntimeErrorclose)r,   tfwfpf
words_file
props_filer   r   one_segwordlabelrC   xa_kind_label	verb_listlblcur_tagis_in_bracketlbl_seq	verb_wordls                        r.   r+   Conll05st._load_anno   s    \\$..)^^@
 ^^@
 MM"%MM"%IFG"::zz|**,,,.446u:?"3wqz?36='>g!g'>l3 4 6{a'$&	!'A Cx ) 0 0 3 "+ '0qr
&;FA&)G,1M&(G(*I%(#$8M$+NN3$7%&#X-$+NN4'>$B%&$Y$+NN4'>$B49M%&VVC[B%6166#;";L./AFF3K.@G$+NN4'>$B49M%&VVC[B%6166#;";L./AFF3K.@G$+NN4'>$B48M*69KA37O*P$P# &)& !NN11)< OO229<@ KK..w75 '<8 !#IF G$$T*NN5)a  ; & &p 	




] (? &% &%s>   ,NBM;M65M;GM;5N6M;;
N		N
Nc                   U R                   U   nU R                  U   nU R                  U   n[        U5      nUR	                  S5      nS/[        U5      -  nUS:  a  SXvS-
  '   X&S-
     nOSnUS:  a  SXvS-
  '   X&S-
     n	OSn	SXv'   X&   n
U[        U5      S-
  :  a  SXvS-   '   X&S-      nOSnU[        U5      S-
  :  a  SXvS-   '   X&S-      nOSnU Vs/ s H"  oR
                  R                  U[        5      PM$     nnU R
                  R                  U	[        5      /U-  nU R
                  R                  U[        5      /U-  nU R
                  R                  U
[        5      /U-  nU R
                  R                  U[        5      /U-  nU R
                  R                  U[        5      /U-  nU R                  R                  U5      /U-  nU Vs/ s H  oR                  R                  U5      PM     nn[        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      [        R                  " U5      4	$ s  snf s  snf )NzB-Vr   r7   bosr5   eos)r   r   r   rY   rE   r   getUNK_IDXr   r   nparray)r,   idxsentence	predicater   sen_len
verb_indexmarkctx_n1ctx_n2ctx_0ctx_p1ctx_p2wword_idx
ctx_n2_idx
ctx_n1_idx	ctx_0_idx
ctx_p1_idx
ctx_p2_idxpred_idx	label_idxs                         r.   __getitem__Conll05st.__getitem__(  s    >>#&OOC(	S!h-\\%(
sS[ >#$Da 1n-FF>#$Da 1n-FF$Fa'#$Da 1n-FFFa'#$Da 1n-FF<DEHqNN&&q'2HEnn((9:WD
nn((9:WD
^^''w787B	nn((9:WD
nn((9:WD
''++I67'A5;<V__((+V	< HHXHHZ HHZ HHYHHZ HHZ HHXHHTNHHY

 
	
 F =s   )K $Kc                ,    [        U R                  5      $ )N)rY   r   r,   s    r.   __len__Conll05st.__len__o  s    4>>""r1   c                H    U R                   U R                  U R                  4$ )a  
Get the word, verb and label dictionary of Wikipedia corpus.

Examples:

    .. code-block:: python

        >>> from paddle.text.datasets import Conll05st

        >>> conll05st = Conll05st()
        >>> word_dict, predicate_dict, label_dict = conll05st.get_dict()

)r   r   r   r   s    r.   get_dictConll05st.get_dictr  s     ~~t22DOOCCr1   c                    U R                   $ )z
Get the embedding dictionary file.

Examples:

    .. code-block:: python

        >>> from paddle.text.datasets import Conll05st

        >>> conll05st = Conll05st()
        >>> emb_file = conll05st.get_embedding()

)r   r   s    r.   get_embeddingConll05st.get_embedding  s     }}r1   )r   r   r   r   r   r   r   r   r   r   r   )NNNNNT)r   r   r   r   r   r   r   r   r   r   r-   bool)r?   strreturnr   )r   None)rx   intr   ztuple[npt.NDArray[np.int_], npt.NDArray[np.int_], npt.NDArray[np.int_], npt.NDArray[np.int_], npt.NDArray[np.int_], npt.NDArray[np.int_], npt.NDArray[np.int_], npt.NDArray[np.int_], npt.NDArray[np.int_]])r   r   )r   z5tuple[dict[str, int], dict[str, int], dict[str, int]])r   r   )__name__
__module____qualname____firstlineno____doc____annotations__r/   r*   r)   r+   r   r   r   r   __static_attributes__ r1   r.   r   r   .   s    @D   ""OL !%%)%)'+#GG #G #	G
 %G G GR&FPE
E


E
N#D r1   r   )
__future__r   typingr   numpyrv   numpy.typingnptrT   rR   paddle.dataset.commonr   	paddle.ior   __all__r   r    r!   r"   r#   r$   r%   r&   r'   r(   ru   r   r   r1   r.   <module>r      so    #      < 
O-K1K1L0
=
,
b br1   