
    JKi>	                    x    S r SSKJr  SSKJr  SSKJr  SSKJr   SSK	r	Sr
 " S	 S
\5      rg! \ a    Sr
 Nf = f)zNLTK text splitter.    )annotations)Any)override)TextSplitterNTFc                  f   ^  \ rS rSrSr  SSS.         S	U 4S jjjjr\S
S j5       rSrU =r	$ )NLTKTextSplitter   z"Splitting text using NLTK package.F)use_span_tokenizec                 > [         TU ]  " S0 UD6  Xl        X l        X0l        U R                  (       a  U R                  (       a  Sn[        U5      e[        (       d  Sn[        U5      eU R                  (       a/  [        R                  R                  U R                  5      U l        g[        R                  R                  U l        g)a]  Initialize the NLTK splitter.

Args:
    separator: The separator to use when combining splits.
    language: The language to use.
    use_span_tokenize: Whether to use `span_tokenize` instead of
        `sent_tokenize`.

Raises:
    ImportError: If NLTK is not installed.
    ValueError: If `use_span_tokenize` is `True` and separator is not `''`.
z6When use_span_tokenize is True, separator should be ''zANLTK is not installed, please install it with `pip install nltk`.N )super__init__
_separator	_language_use_span_tokenize
ValueError	_HAS_NLTKImportErrornltktokenize_get_punkt_tokenizer
_tokenizersent_tokenize)self	separatorlanguager
   kwargsmsg	__class__s         _/var/www/html/dynamic-report/venv/lib/python3.13/site-packages/langchain_text_splitters/nltk.pyr   NLTKTextSplitter.__init__   s    ( 	"6"#!"3""tJCS/!yUCc"""""mm@@PDO"mm99DO    c                f   U R                   (       am  [        U R                  R                  U5      5      n/ n[	        U5       H7  u  nu  pVUS:  a  X$S-
     S   nXU XU -   nOXU nUR                  U5        M9     OU R                  XR                  S9nU R                  X0R                  5      $ )Nr      )r   )	r   listr   span_tokenize	enumerateappendr   _merge_splitsr   )	r   textspanssplitsistartendprev_endsentences	            r    
split_textNLTKTextSplitter.split_text9   s     ""66t<=EF#,U#3<Eq5$U|AH#U3doEH##Hh' $4 __TNN_CF!!&//::r"   )r   r   r   r   )z

english)
r   strr   r5   r
   boolr   r   returnNone)r*   r5   r7   z	list[str])
__name__
__module____qualname____firstlineno____doc__r   r   r2   __static_attributes____classcell__)r   s   @r    r   r      sg    ,  !!:
 #(!:!: !:
  !: !: 
!: !:F ; ;r"   r   )r=   
__future__r   typingr   typing_extensionsr   langchain_text_splitters.baser   r   r   r   r   r   r"   r    <module>rD      sC     "  & 6I
5;| 5;	  Is   . 99