
    9i                         S SK r S SKrS SKJrJr  S SKJr  S SKJr  SSK	J
r
Jr  SSKJr  \" S5      (       a  S S	KJr  S S
KJr  \" S5      (       a  S SKJr  S SKJr  \
" SS5       " S S\\S95       rg)    N)ABCabstractmethod)List)logging   )class_requires_depsis_dep_available)AutoRegisterABCMetaClass	langchain)Document)RecursiveCharacterTextSplitterzlangchain-community)vectorstores)FAISSc                      ^  \ rS rSrSrSrSrU 4S jr\S 5       r	\S 5       r
S\4S	 jrS
\S\4S jrS\S\4S jrS\S\4S jrS/ SQ4S\\   S\S\\   SS4S jjr	SSS\4S jrS\SS4S jr   S S\\   SSS\S\S\S\4S jjr
SrU =r$ )!BaseRetriever    zBase RetrieverTPADDLEX_VECTOR_STOREc                 >   > [         TU ]  5         SU l        SU l        g)z*Initializes an instance of base retriever.N)super__init__
model_name	embedding)self	__class__s    u/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/paddlex/inference/pipelines/components/retriever/base.pyr   BaseRetriever.__init__(   s        c                     [        S5      e)z~
Declaration of an abstract method. Subclasses are expected to
provide a concrete implementation of generate_vector_database.
zCThe method `generate_vector_database` has not been implemented yet.NotImplementedErrorr   s    r   generate_vector_database&BaseRetriever.generate_vector_database.   s     "Q
 	
r   c                     [        S5      e)zz
Declaration of an abstract method. Subclasses are expected to
provide a concrete implementation of similarity_retrieval.
z?The method `similarity_retrieval` has not been implemented yet.r   r!   s    r   similarity_retrieval"BaseRetriever.similarity_retrieval8   s     "M
 	
r   returnc                     U R                   $ )zT
Get the model name used for generating vectors.

Returns:
    str: The model name.
)r   r!   s    r   get_model_nameBaseRetriever.get_model_nameB   s     r   sc                 8    UR                  U R                  5      $ )z
Check if the given string starts with the vector store prefix.

Args:
    s (str): The input string to check.

Returns:
    bool: True if the string starts with the vector store prefix, False otherwise.
)
startswithVECTOR_STORE_PREFIX)r   r+   s     r   is_vector_storeBaseRetriever.is_vector_storeK   s     ||D4455r   vector_store_bytesc                 f    U R                   [        R                  " U5      R                  S5      -   $ )z
Encode the vector store bytes into a base64 string prefixed with a specific prefix.

Args:
    vector_store_bytes (bytes): The bytes to encode.

Returns:
    str: The encoded string with the prefix.
ascii)r.   base64	b64encodedecode)r   r1   s     r   encode_vector_store!BaseRetriever.encode_vector_storeW   s3     ''&*:*:;M*N*U*U+
 
 	
r   vector_store_strc                 Z    [         R                  " U[        U R                  5      S 5      $ )z
Decodes the vector store string by removing the prefix and decoding the base64 encoded string.

Args:
    vector_store_str (str): The vector store string with a prefix.

Returns:
    bytes: The decoded vector store data.
N)r4   	b64decodelenr.   )r   r9   s     r   decode_vector_store!BaseRetriever.decode_vector_storee   s)      0T5M5M1N1P QRRr   i,  )	
u   。z

 	text_list
block_size
separatorsr   c                     [        USUS9nUR                  SR                  U5      5      nU Vs/ s H  n[        US9PM     nn [        R
                  " XpR                  S9nU$ s  snf ! [         a    Sn U$ f = f)a  
Generates a vector database from a list of texts.

Args:
    text_list (list[str]): A list of texts to generate the vector database from.
    block_size (int): The size of each chunk to split the text into.
    separators (list[str]): A list of separators to use when splitting the text.

Returns:
    FAISS: The generated vector database.

Raises:
    ValueError: If an unsupported API type is configured.
   )
chunk_sizechunk_overlaprD   r?   )page_content)	documentsr   N)r   
split_textjoinr   r   from_documentsr   
ValueError)	r   rB   rC   rD   text_splittertextstext
all_splitsvectorstores	            r   r"   r#   q   s    ( 7!

 ((9)=>>CDedhD1e
D	..$K  E  	K	s   A&A+ +A;:A;rS   c                 f    Uc  U R                   nU$ U R                  UR                  5       5      nU$ )z
Encode the vector store serialized to bytes.

Args:
    vectorstore (FAISS): The vector store to be serialized and encoded.

Returns:
    str: The encoded vector store.
)r.   r7   serialize_to_bytes)r   rS   s     r   encode_vector_store_to_bytes*BaseRetriever.encode_vector_store_to_bytes   s>     22K  22;3Q3Q3STKr   c                     U R                  U5      (       d  [        S5      eU R                  U5      nUS:X  a  [        R                  " S5        g[
        R                  R                  UU R                  SS9nU$ )a:  
Decode a vector store from bytes according to the specified API type.

Args:
    vectorstore (str): The serialized vector store string.

Returns:
    FAISS: Deserialized vector store object.

Raises:
    ValueError: If the retrieved vector store is not for PaddleX
    or if an unsupported API type is specified.
z-The retrieved vectorstore is not for PaddleX.r   z5The retrieved vectorstore is empty,will empty vector.NT)
embeddingsallow_dangerous_deserialization)	r/   rN   r=   r   warningr   r   deserialize_from_bytesr   )r   rS   vectors      r   decode_vector_store_from_bytes,BaseRetriever.decode_vector_store_from_bytes   sx     ##K00LMM..{;#OOST##::~~,0 ; 

 r   query_text_list
sleep_timetopkmin_charactersc                 P   SnUc  U$ U H  nUn[         R                  " U5        UR                  XS9n	U	 V
Vs/ s H  u  pU
R                  U4PM     nn
n[	        US S9nUSSS2    H/  u  pUS:  d  M  [        U5      [        U5      -   U:  a    M  Xm-  nM1     M     U$ s  snn
f )aA  
Retrieve similar contexts based on a list of query texts.

Args:
    query_text_list (list[str]): A list of query texts to search for similar contexts.
    vectorstore (FAISS): The vector store where to perform the similarity search.
    sleep_time (float): The time to sleep between each query, in seconds. Default is 0.5.
    topk (int): The number of results to retrieve per query. Default is 2.
    min_characters (int): The minimum number of characters required for text processing, defaults to 3500.
Returns:
    str: A concatenated string of all unique contexts found.
rA   N)kc                     U S   $ )N    )xs    r   <lambda>4BaseRetriever.similarity_retrieval.<locals>.<lambda>   s    AaDr   )keyg)timesleep'similarity_search_with_relevance_scoresrI   sortedr<   )r   r`   rS   ra   rb   rc   all_C
query_textQUESTIONdocsdocumentscorecontextrQ   s                 r   r%   r&      s    ( L)J!HJJz"FFxFXDMQRT/(--u5TGRW.9G&tt}D=5zCI->ME	  - *  Ss   B")r   r   )g      ?   i  )__name__
__module____qualname____firstlineno____doc___BaseRetriever__is_baser.   r   r   r"   r%   strr)   boolr/   bytesr7   r=   r   intrV   r^   float__static_attributes____classcell__)r   s   @r   r   r       s<   I0 
 
 
 
 
6 
6 
6
e 
 

SC 
SE 
S  ?	!9! ! I	!
 
!F C  # ' D  ""c" " 	"
 " " 
" "r   r   )	metaclass)r4   rn   abcr   r   typingr   paddlex.utilsr   
utils.depsr   r	   utils.subclass_registerr
   langchain.docstore.documentr   langchain.text_splitterr   langchain_communityr    langchain_community.vectorstoresr   r   rh   r   r   <module>r      sh      #  ! A AK  4F)**06 ["78CC#; C 9Cr   