
    9iw                     \    S SK JrJr  SSKJrJr  SSKJr   " S S\5      r " S S\5      r	g	)
   )get_subcommand_argsstr2bool   )PaddleXPipelineWrapperPipelineCLISubcommandExecutor)create_config_from_structurec                     ^  \ rS rSr                                             SU 4S jjr\S 5       rSS jrSS jrS r	S r
SSSSSSSSSSSSSSSSSSSSSS	.S
 jrSSSSSSSSSSSSSSSSSSSSSS	.S jrSSSSS.S jrSS.S jrSSSSSSSSSSSSSSSSSS.S jr\S 5       rS rSrU =r$ )PPChatOCRv4Doc   Nc.                    > [        5       R                  5       n/U/R                  S5        U/R                  S5        U/U l        [        T0U ]  " S0 U.D6  g )Nselfkwargs )localscopypop_paramssuper__init__)1r   layout_detection_model_namelayout_detection_model_dir#doc_orientation_classify_model_name"doc_orientation_classify_model_dirdoc_unwarping_model_namedoc_unwarping_model_dirtext_detection_model_nametext_detection_model_dirtextline_orientation_model_nametextline_orientation_model_dirtextline_orientation_batch_sizetext_recognition_model_nametext_recognition_model_dirtext_recognition_batch_size&table_structure_recognition_model_name%table_structure_recognition_model_dirseal_text_detection_model_nameseal_text_detection_model_dir seal_text_recognition_model_nameseal_text_recognition_model_dir seal_text_recognition_batch_sizeuse_doc_orientation_classifyuse_doc_unwarpinguse_textline_orientationuse_seal_recognitionuse_table_recognitionlayout_threshold
layout_nmslayout_unclip_ratiolayout_merge_bboxes_modetext_det_limit_side_lentext_det_limit_typetext_det_threshtext_det_box_threshtext_det_unclip_ratiotext_rec_score_threshseal_det_limit_side_lenseal_det_limit_typeseal_det_threshseal_det_box_threshseal_det_unclip_ratioseal_rec_score_threshretriever_configmllm_chat_bot_configchat_bot_configr   params	__class__s1                                                   e/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/paddleocr/_pipelines/pp_chatocrv4_doc.pyr   PPChatOCRv4Doc.__init__   sB    b 

6

8"6"    c                     g)NzPP-ChatOCRv4-docr   r   s    rE   _paddlex_pipeline_name%PPChatOCRv4Doc._paddlex_pipeline_nameP       !rG   c                 8    U R                   R                  UUUS9$ )N)vector_info	save_pathr@   )paddlex_pipelinesave_vector)r   rN   rO   r@   s       rE   rQ   PPChatOCRv4Doc.save_vectorT   s)    $$00#- 1 
 	
rG   c                 4    U R                   R                  XS9$ )N)	data_pathr@   )rP   load_vector)r   rT   r@   s      rE   rU   PPChatOCRv4Doc.load_vector[   s#    $$00 1 
 	
rG   c                 4    U R                   R                  US9$ )N)rT   )rP   load_visual_info_list)r   rT   s     rE   rX   $PPChatOCRv4Doc.load_visual_info_list`   s    $$::Y:OOrG   c                 4    U R                   R                  XS9$ )N)visual_inforO   )rP   save_visual_info_list)r   r[   rO   s      rE   r\   $PPChatOCRv4Doc.save_visual_info_listc   s#    $$::# ; 
 	
rG   )r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   c                    U R                   R                  " U40 SU_SU_SU_SU_SU_SU_SU_SU	_S	U
_S
U_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_UD6$ Nr+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   )rP   visual_predictr   inputr+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r   s                           rE   visual_predict_iter"PPChatOCRv4Doc.visual_predict_iterh   s    6 $$33
)E
 0
 &>	

 "6
 #8
 .
 "
 !4
 &>
 %<
 !4
 ,
 !4
 #8
  #8!
" %<#
$ !4%
& ,'
( !4)
* #8+
, #8/
 	
rG   c                    [        U R                  " U40 SU_SU_SU_SU_SU_SU_SU_SU	_S	U
_S
U_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_UD65      $ r_   )listrc   ra   s                           rE   r`   PPChatOCRv4Doc.visual_predict   s    6 $$-I #4 *B	
 &: '< "2 & %8 *B )@ %8 !0 %8 '<  '<!" )@#$ %8%& !0'( %8)* '<+, '</
 	
rG   i  i,  Fmin_characters
block_sizeflag_save_bytes_vectorr@   c                <    U R                   R                  UUUUUS9$ )Nrh   )rP   build_vector)r   r[   ri   rj   rk   r@   s         rE   rm   PPChatOCRv4Doc.build_vector   s1     $$11)!#9- 2 
 	
rG   rA   c                8    U R                   R                  UUUS9$ )Nro   )rP   	mllm_pred)r   rb   key_listrA   s       rE   rq   PPChatOCRv4Doc.mllm_pred   s)    $$..!5 / 
 	
rG   Tintegrationuse_vector_retrievalrN   ri   text_task_descriptiontext_output_formattext_rules_strtext_few_shot_demo_text_content!text_few_shot_demo_key_value_listtable_task_descriptiontable_output_formattable_rules_str table_few_shot_demo_text_content"table_few_shot_demo_key_value_listmllm_predict_infomllm_integration_strategyrB   r@   c                X    U R                   R                  UUUUUUUUU	U
UUUUUUUUUS9$ )Nru   )rP   chat)r   rr   r[   rv   rN   ri   rw   rx   ry   rz   r{   r|   r}   r~   r   r   r   r   rB   r@   s                       rE   r   PPChatOCRv4Doc.chat   s[    . $$))!5#)"71),K.O#9 3+-M/Q/&?+-' * 
 	
rG   c                     [        5       $ N)#PPChatOCRv4DocCLISubcommandExecutor)clss    rE   get_cli_subcommand_executor*PPChatOCRv4Doc.get_cli_subcommand_executor  s    244rG   c                    0 SU R                   S   _SU R                   S   _SU R                   S   _SU R                   S   _S	U R                   S
   _SU R                   S   _SU R                   S   _SU R                   S   _SU R                   S   _SU R                   S   _SU R                   S   _SU R                   S   _SU R                   S   _SU R                   S   _SU R                   S   _SU R                   S    _S!U R                   S"   _0 S#U R                   S$   _S%U R                   S&   _S'U R                   S(   _S)U R                   S*   _S+U R                   S,   _S-U R                   S.   _S/U R                   S0   _S1U R                   S,   =(       d    U R                   S.   _S2U R                   S3   _S4U R                   S5   _S6U R                   S7   _S8U R                   S9   _S:U R                   S;   _S<U R                   S=   _S>U R                   S?   _S@U R                   SA   _SBU R                   SC   _EU R                   SD   U R                   SE   U R                   SF   U R                   S?   U R                   SG   U R                   SH   U R                   SI   U R                   SJ   U R                   SK   U R                   SL   U R                   SM   U R                   SN   SO.En[        U5      $ )PNz?SubPipelines.LayoutParser.SubModules.LayoutDetection.model_namer   z>SubPipelines.LayoutParser.SubModules.LayoutDetection.model_dirr   zcSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.SubModules.DocOrientationClassify.model_namer   zbSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.SubModules.DocOrientationClassify.model_dirr   zYSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.SubModules.DocUnwarping.model_namer   zXSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.SubModules.DocUnwarping.model_dirr   zUSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.model_namer   zTSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.model_dirr   z[SubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextLineOrientation.model_namer   zZSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextLineOrientation.model_dirr   z[SubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextLineOrientation.batch_sizer    zWSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextRecognition.model_namer!   zVSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextRecognition.model_dirr"   zWSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextRecognition.batch_sizer#   zgSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubModules.TableStructureRecognition.model_namer$   zfSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubModules.TableStructureRecognition.model_dirr%   zoSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.model_namer&   znSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.model_dirr'   zqSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextRecognition.model_namer(   zpSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextRecognition.model_dirr)   zqSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextRecognition.batch_sizer*   zSSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.use_doc_orientation_classifyr+   zHSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.use_doc_unwarpingr,   zJSubPipelines.LayoutParser.SubPipelines.GeneralOCR.use_textline_orientationr-   z.SubPipelines.LayoutParser.use_doc_preprocessorz.SubPipelines.LayoutParser.use_seal_recognitionr.   z/SubPipelines.LayoutParser.use_table_recognitionr/   z>SubPipelines.LayoutParser.SubModules.LayoutDetection.thresholdr0   z8SubPipelines.LayoutParser.SubModules.LayoutDetection.nmsr1   zASubPipelines.LayoutParser.SubModules.LayoutDetection.unclip_ratior2   zFSubPipelines.LayoutParser.SubModules.LayoutDetection.merge_bboxes_moder3   zYSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.limit_side_lenr4   zUSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.limit_typer5   zQSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.threshr6   r7   r8   r9   r;   r<   r=   r>   r?   r@   rA   rB   )zUSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.box_threshzWSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.unclip_ratiozYSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextRecognition.score_threshzsSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.limit_side_lenzoSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.limit_typezkSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.threshzoSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.box_threshzqSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.unclip_ratiozsSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextRecognition.score_threshzSubModules.LLM_RetrieverzSubModules.MLLM_ChatzSubModules.LLM_Chat)r   r   )r   	STRUCTUREs     rE   _get_paddlex_config_overrides,PPChatOCRv4Doc._get_paddlex_config_overrides  s   F
Mt||-PF
 Mdll,O	F
 rswss5tF
 qrvr~r~4sF
 himiuiu*jF
  ghlhtht)i!F
& deieqeq+f'F
, cdhdpdp*e-F
2 jkokwkw1l3F
8 ijnjvjv0k9F
> jkokwkw1l?F
D fgkgsgs-hEF
J efjfrfr,gKF
P fgkgsgs-hQF
V vw{  xD  xD8xWF
\ uvz  wC  wC7w]F
b ~  @D  @L  @L0@cF
h }  C  K  K/iF
n @  BF  BN  BN2BoF
t   AE  AM  AM1AuF
z @  BF  BN  BN2B{F
@ bcgcoco.dAF
F WX\XdXd#YGF
L YZ^ZfZf*[MF
R =dll.? ?1 ||/0YF
Z =dll&?[F
` >t||'@aF
f Mdll"OgF
l GImF
r PQUQ]Q]%RsF
x UVZVbVb*WyF
~ himiuiu)jF
D deieqeq%fEF
J `aeamam!bKF
P fjeqeq%f hlgsgs'h jniuiu'j DH  DP  DP)D @D  @L  @L%@ |@  |H  |H!| @D  @L  @L%@ BF  BN  BN'B DH  DP  DP'D )-5G(H$(LL1G$H#'<<0A#BKF
	N ,I66rG   )r   )-NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNr   )__name__
__module____qualname____firstlineno__r   propertyrJ   rQ   rU   rX   r\   rc   r`   rm   rq   r   classmethodr   r   __static_attributes____classcell__)rD   s   @rE   r
   r
      s    %)#',0+/!% $"&!%(,'+(,$(#'$(/3.2'+&*)-(,)-%)!%!" !% $  "" $  ""!]6#p " "


P
 &*!%!" !% $  "" $  ""13
r &*!%!" !% $  "" $  ""15
v $
" BF 
 ""(,*.# )-+/"/++
Z 5 5H7 H7rG   r
   c                   0    \ rS rSr\S 5       rS rS rSrg)r   i  c                     g)Npp_chatocrv4_docr   rI   s    rE   subparser_name2PPChatOCRv4DocCLISubcommandExecutor.subparser_name  rL   rG   c           
         UR                  SS[        SSS9  UR                  SS[        SSS	S
S9  UR                  S[        SS9  UR                  S[        SSS9  UR                  S[        SS9  UR                  S[        SS9  UR                  S[        SS9  UR                  S[        SS9  UR                  S[        SS9  UR                  S[        SS9  UR                  S[        S S9  UR                  S![        S"S9  UR                  S#[        S$S9  UR                  S%[        S&S9  UR                  S'[        S(S9  UR                  S)[        S*S9  UR                  S+[        S,S9  UR                  S-[        S.S9  UR                  S/[        S0S9  UR                  S1[        S2S9  UR                  S3[        S4S9  UR                  S5[        S6S9  UR                  S7[        S8S9  UR                  S9[        S:S9  UR                  S;[        S<S9  UR                  S=[        S>S9  UR                  S?[        S@S9  UR                  SA[        SBS9  UR                  SC[        SDS9  UR                  SE[        SFS9  UR                  SG[        SHS9  UR                  SI[        SJS9  UR                  SK[        SLS9  UR                  SM[        SNS9  UR                  SO[        SPS9  UR                  SQ[        SRS9  UR                  SS[        STS9  UR                  SU[        SVS9  UR                  SW[        SXS9  UR                  SY[        SZS9  UR                  S[[        S\S9  UR                  S][        S^S9  UR                  S_[        S`S9  UR                  Sa[        SbS9  UR                  Sc[        SdS9  UR                  Se[        SfS9  UR                  Sg[        ShS9  UR                  Si[        SjS9  g )kNz-iz--inputTzInput path or URL.)typerequiredhelpz-kz--keys+KEYz$Keys use for information extraction.)r   nargsr   metavarr   z--save_pathzPath to the output directory.)r   r   z--invoke_mllmFz6Whether to invoke the multimodal large language model.)r   defaultr   z--layout_detection_model_namez#Name of the layout detection model.z--layout_detection_model_dirz-Path to the layout detection model directory.z%--doc_orientation_classify_model_namez<Name of the document image orientation classification model.z$--doc_orientation_classify_model_dirzFPath to the document image orientation classification model directory.z--doc_unwarping_model_namez'Name of the text image unwarping model.z--doc_unwarping_model_dirz,Path to the image unwarping model directory.z--text_detection_model_namez!Name of the text detection model.z--text_detection_model_dirz+Path to the text detection model directory.z!--textline_orientation_model_namez7Name of the text line orientation classification model.z --textline_orientation_model_dirzAPath to the text line orientation classification model directory.z!--textline_orientation_batch_sizez>Batch size for the text line orientation classification model.z--text_recognition_model_namez#Name of the text recognition model.z--text_recognition_model_dirz-Path to the text recognition model directory.z--text_recognition_batch_sizez*Batch size for the text recognition model.z(--table_structure_recognition_model_namez.Name of the table structure recognition model.z'--table_structure_recognition_model_dirz8Path to the table structure recognition model directory.z --seal_text_detection_model_namez&Name of the seal text detection model.z--seal_text_detection_model_dirz0Path to the seal text detection model directory.z"--seal_text_recognition_model_namez(Name of the seal text recognition model.z!--seal_text_recognition_model_dirz2Path to the seal text recognition model directory.z"--seal_text_recognition_batch_sizez/Batch size for the seal text recognition model.z--use_doc_orientation_classifyz9Whether to use document image orientation classification.z--use_doc_unwarpingz$Whether to use text image unwarping.z--use_textline_orientationz4Whether to use text line orientation classification.z--use_seal_recognitionz Whether to use seal recognition.z--use_table_recognitionz!Whether to use table recognition.z--layout_thresholdz/Score threshold for the layout detection model.z--layout_nmsz'Whether to use NMS in layout detection.z--layout_unclip_ratioz+Expansion coefficient for layout detection.z--layout_merge_bboxes_modez!Overlapping box filtering method.z--text_det_limit_side_lenzUThis sets a limit on the side length of the input image for the text detection model.z--text_det_limit_typezxThis determines how the side length limit is applied to the input image before feeding it into the text deteciton model.z--text_det_threshzDetection pixel threshold for the text detection model. Pixels with scores greater than this threshold in the output probability map are considered text pixels.z--text_det_box_threshzDetection box threshold for the text detection model. A detection result is considered a text region if the average score of all pixels within the border of the result is greater than this threshold.z--text_det_unclip_ratiozText detection expansion coefficient, which expands the text region using this method. The larger the value, the larger the expansion area.z--text_rec_score_threshzrText recognition threshold used in general OCR. Text results with scores greater than this threshold are retained.z--seal_det_limit_side_lenzZThis sets a limit on the side length of the input image for the seal text detection model.z--seal_det_limit_typez}This determines how the side length limit is applied to the input image before feeding it into the seal text deteciton model.z--seal_det_threshzDetection pixel threshold for the seal text detection model. Pixels with scores greater than this threshold in the output probability map are considered text pixels.z--seal_det_box_threshzDetection box threshold for the seal text detection model. A detection result is considered a text region if the average score of all pixels within the border of the result is greater than this threshold.z--seal_det_unclip_ratiozSeal text detection expansion coefficient, which expands the text region using this method. The larger the value, the larger the expansion area.z--seal_rec_score_threshzcSeal text recognition threshold. Text results with scores greater than this threshold are retained.z--qianfan_api_keyz&Configuration for the embedding model.z--pp_docbee_base_urlz6Configuration for the multimodal large language model.)add_argumentstrr   intfloat)r   	subparsers     rE   _update_subparser5PPChatOCRv4DocCLISubcommandExecutor._update_subparser  s   % 	 	
 	7 	 	
 	0 	 	
 	I	 	 	
 	+6 	 	

 	*@ 	 	

 	3O 	 	

 	2Y 	 	

 	(: 	 	

 	'? 	 	

 	)4 	 	

 	(> 	 	

 	/J 	 	

 	.T 	 	

 	/Q 	 	

 	+6 	 	

 	*@ 	 	

 	+= 	 	

 	6A 	 	

 	5K 	 	

 	.9 	 	

 	-C 	 	

 	0; 	 	

 	/E 	 	

 	0B 	 	

 	,L 	 	

 	!7 	 	

 	(G 	 	

 	$3 	 	

 	%4 	 	
 	 B 	 	

 	: 	 	

 	#> 	 	

 	(4 	 	

 	'h 	 	

 	# L 	 	

 	 t 	 	

 	# [ 	 	

 	% _ 	 	

 	% F 	 	

 	'm 	 	

 	# Q 	 	

 	 y 	 	

 	# ` 	 	

 	% d 	 	

 	%v 	 	
 	9 	 	

 	"I 	 	
rG   c                    [        U5      nUR                  S5      nUR                  S5      nUR                  S5      nUR                  S5      nUR                  S5      nUb  SSSS	US
.US'   SSSSUS
.US'   UR                  S5      nUb  SSUSSS
.US'   [        S0 UD6n	U	R                  U5      n
/ nU
 H4  nUR	                  US   5        U(       d  M   US   R                  U5        M6     U	R                  U5      nU(       a  U	R                  X45      nUS   nOS nU	R                  UUUUS9nUS   R                  5        H  u  nn[        U SU 35        M     g )Nrb   keysrO   invoke_mllmqianfan_api_key	retrieverzembedding-v1zhttps://qianfan.baidubce.com/v2qianfan)module_name
model_namebase_urlapi_typeapi_keyr@   chat_botzernie-3.5-8kopenairB   pp_docbee_base_urlz	PP-DocBeefake_keyrA   r[   layout_parsing_resultmllm_res)rN   r   chat_res r   )r   r   r
   rc   appendsave_allrm   rq   r   itemsprint)r   argsrC   rb   r   rO   r   r   r   chatocrresult_visualvisual_info_listresrN   result_mllmr   result_chatkvs                      rE   execute_with_args5PPChatOCRv4DocCLISubcommandExecutor.execute_with_args  s   $T*

7#zz&!JJ{+	jj/ **%67&*,=%**F%&  *,=$*)F$% $ZZ(<=))) /$%.F)* !*6*33E: C##C$67y+,55i@ !
 **+;<!++E8K +J 7 $ll#/	 # 
  
+113DAqQCq* 4rG   r   N)	r   r   r   r   r   r   r   r   r   r   rG   rE   r   r     s     " "}
~=rG   r   N)

_utils.clir   r   baser   r   utilsr   r
   r   r   rG   rE   <module>r      s2    H /N7+ N7bA*G ArG   