
    JKi	                        S r SSKJr  SSKJrJr  SSKJr  SSKJ	r	   SSK
r
SSKJr  \(       a  SSKJr  S	r " S S\	5      rSS.     SS jjrg! \ a    S
r N$f = f)zSpacy text splitter.    )annotations)TYPE_CHECKINGAny)override)TextSplitterN)English)LanguageTFc                  l   ^  \ rS rSrSr   SSS.           S	U 4S jjjjr\S
S j5       rSrU =r	$ )SpacyTextSplitter   a=  Splitting text using Spacy package.

Per default, Spacy's `en_core_web_sm` model is used and
its default max_length is 1000000 (it is the length of maximum character
this model takes which can be increased for large files). For a faster, but
potentially less accurate splitting, you can use `pipeline='sentencizer'`.
T)strip_whitespacec               Z   > [         TU ]  " S0 UD6  [        X#S9U l        Xl        X@l        g)z#Initialize the spacy text splitter.
max_lengthN )super__init__"_make_spacy_pipeline_for_splitting
_tokenizer
_separator_strip_whitespace)self	separatorpipeliner   r   kwargs	__class__s         `/var/www/html/dynamic-report/venv/lib/python3.13/site-packages/langchain_text_splitters/spacy.pyr   SpacyTextSplitter.__init__#   s1     	"6"<
 $!1    c                   ^  U 4S jT R                  U5      R                   5       nT R                  UT R                  5      $ )Nc              3  v   >#    U  H.  nTR                   (       a  UR                  OUR                  v   M0     g 7f)N)r   texttext_with_ws).0sr   s     r   	<genexpr>/SpacyTextSplitter.split_text.<locals>.<genexpr>6   s-      
0 ,,AFF!..@0s   69)r   sents_merge_splitsr   )r   r"   splitss   `  r   
split_textSpacyTextSplitter.split_text4   s:    
__T*00
 !!&$//::r   )r   r   r   )z

en_core_web_sm@B )r   strr   r/   r   intr   boolr   r   returnNone)r"   r/   r2   z	list[str])
__name__
__module____qualname____firstlineno____doc__r   r   r+   __static_attributes____classcell__)r   s   @r   r   r      su      (#	2 "&22 2 	2 2 2 
2 2" ; ;r   r   r.   r   c                   [         (       d  Sn[        U5      eU S:X  a  [        5       nUR                  S5        U$ [        R
                  " U SS/S9nXl        U$ )NzCSpacy is not installed, please install it with `pip install spacy`.sentencizernertagger)exclude)
_HAS_SPACYImportErrorr   add_pipespacyloadr   )r   r   msgr<   s       r   r   r   =   sa     :S#=  '	]+  jjE83DE!+r   )r   r/   r   r0   r2   r	   )r8   
__future__r   typingr   r   typing_extensionsr   langchain_text_splitters.baser   rC   spacy.lang.enr   spacy.languager	   r@   rA   r   r   r   r   r   <module>rL      st     " % & 6%	
 J
 ;  ;H )2"%O  Js   A AA