
    JKi                    P    S r SSKJr  SSKrSSKrSSKJr  SSKJr   " S S5      r	g)zJSON text splitter.    )annotationsN)Any)Documentc                  ,  ^  \ rS rSr% SrSrS\S'    SrS\S'     S     SU 4S jjjr\	SS	 j5       r
\	        SS
 j5       r    SS jr  S       SS jjr S     SS jjr  S       SS jjr   S         SS jjrSrU =r$ )RecursiveJsonSplitter   a  Splits JSON data into smaller, structured chunks while preserving hierarchy.

This class provides methods to split JSON data into smaller dictionaries or
JSON-formatted strings based on configurable maximum and minimum chunk sizes.
It supports nested JSON structures, optionally converts lists into dictionaries
for better chunking, and allows the creation of document objects for further use.
  intmax_chunk_sizei  min_chunk_sizec                l   > [         TU ]  5         Xl        Ub  UU l        g[        US-
  S5      U l        g)a  Initialize the chunk size configuration for text processing.

This constructor sets up the maximum and minimum chunk sizes, ensuring that
the `min_chunk_size` defaults to a value slightly smaller than the
`max_chunk_size` if not explicitly provided.

Args:
    max_chunk_size: The maximum size for a chunk.
    min_chunk_size: The minimum size for a chunk.

        If `None`, defaults to the maximum chunk size minus 200, with a lower
        bound of 50.
N   2   )super__init__r   maxr   )selfr   r   	__class__s      _/var/www/html/dynamic-report/venv/lib/python3.13/site-packages/langchain_text_splitters/json.pyr   RecursiveJsonSplitter.__init__   sD      	, )  	 ^c)2. 	    c                @    [        [        R                  " U 5      5      $ )z1Calculate the size of the serialized JSON object.)lenjsondumps)datas    r   
_json_size RecursiveJsonSplitter._json_size5   s     4::d#$$r   c                N    USS  H  nU R                  U0 5      n M     X US   '   g)z;Set a value in a nested dictionary based on the given path.N)
setdefault)dpathvaluekeys       r   _set_nested_dict&RecursiveJsonSplitter._set_nested_dict:   s/     9CS"%A $r(r   c                L   [        U[        5      (       a4  UR                  5        VVs0 s H  u  p#X R                  U5      _M     snn$ [        U[        5      (       a9  [        U5       VVs0 s H   u  pE[        U5      U R                  U5      _M"     snn$ U$ s  snnf s  snnf )N)
isinstancedictitems_list_to_dict_preprocessinglist	enumeratestr)r   r   kviitems         r   r,   1RecursiveJsonSplitter._list_to_dict_preprocessingE   s     dD!!GKzz|T|tqA77::|TTdD!!  ).GA A88>>. 
  Us   B-'B c                   U=(       d    / nUb  UO0 /n[        U[        5      (       a  UR                  5        H  u  pE/ UQUPnU R                  US   5      nU R                  XE05      nU R                  U-
  n	X:  a  U R                  US   Xe5        M\  XpR                  :  a  UR                  0 5        U R                  XVU5        M     U$ U R                  US   X!5        U$ )zESplit json into maximum size dictionaries while preserving structure.r    )	r)   r*   r+   r   r   r&   r   append_json_split)
r   r   current_pathchunksr%   r$   new_path
chunk_sizesize	remainings
             r   r7   !RecursiveJsonSplitter._json_splitU   s     $)r!-B4dD!!"jjl
/\/3/!__VBZ8
|4 //*<	#))&*hF!%8%88b) $$Uf= +&  !!&*lAr   c                    U(       a!  U R                  U R                  U5      5      nOU R                  U5      nUS   (       d  UR                  5         U$ )zSplits JSON into a list of JSON chunks.

Args:
    json_data: The JSON data to be split.
    convert_lists: Whether to convert lists in the JSON to dictionaries
        before splitting.

Returns:
    A list of JSON chunks.
r    )r7   r,   pop)r   	json_dataconvert_listsr9   s       r   
split_json RecursiveJsonSplitter.split_jsont   sI     %%d&F&Fy&QRF%%i0F bzJJLr   c                t    U R                  XS9nU Vs/ s H  n[        R                  " XSS9PM     sn$ s  snf )a>  Splits JSON into a list of JSON formatted strings.

Args:
    json_data: The JSON data to be split.
    convert_lists: Whether to convert lists in the JSON to dictionaries
        before splitting.
    ensure_ascii: Whether to ensure ASCII encoding in the JSON strings.

Returns:
    A list of JSON formatted strings.
)rA   rB   )ensure_ascii)rC   r   r   )r   rA   rB   rF   r9   chunks         r   
split_text RecursiveJsonSplitter.split_text   s9    " 9R KQQ&

5<&QQQs   5c                    U=(       d    0 /[        U5      -  n/ n[        U5       HN  u  pxU R                  XUS9 H5  n	[        R                  " XW   5      n
[        XS9nUR                  U5        M7     MP     U$ )a  Create a list of `Document` objects from a list of json objects (`dict`).

Args:
    texts: A list of JSON data to be split and converted into documents.
    convert_lists: Whether to convert lists to dictionaries before splitting.
    ensure_ascii: Whether to ensure ASCII encoding in the JSON strings.
    metadatas: Optional list of metadata to associate with each document.

Returns:
    A list of `Document` objects.
)rA   rB   rF   )page_contentmetadata)r   r.   rH   copydeepcopyr   r6   )r   textsrB   rF   	metadatas
metadatas_	documentsr2   textrG   rL   new_docs               r   create_documents&RecursiveJsonSplitter.create_documents   s~    $ 32$U"3
	 'GA, )   ==7"I  ) ( r   )r   r   )r	   N)r   r
   r   z
int | NonereturnNone)r   dict[str, Any]rW   r
   )r"   rY   r#   	list[str]r$   r   rW   rX   )r   r   rW   r   )NN)r   r   r8   zlist[str] | Noner9   zlist[dict[str, Any]] | NonerW   list[dict[str, Any]])F)rA   rY   rB   boolrW   r[   )FT)rA   rY   rB   r\   rF   r\   rW   rZ   )FTN)
rO   r[   rB   r\   rF   r\   rP   zlist[dict[Any, Any]] | NonerW   zlist[Document])__name__
__module____qualname____firstlineno____doc__r   __annotations__r   r   staticmethodr   r&   r,   r7   rC   rH   rU   __static_attributes____classcell__)r   s   @r   r   r      s    NC*NC
 HL
!
:D
	
 
0 % %   
	  
& *..2	 ' ,	
 
D $!  
	8 $!	R!R R 	R
 
R2 $!15#  	
 / 
 r   r   )
ra   
__future__r   rM   r   typingr   langchain_core.documentsr   r    r   r   <module>rj      s#     "    -r rr   