
    9i                     n   S SK r S SKJr  S SKJrJrJrJrJrJ	r	J
r
  S SKrS SKrS SKJr  S SKJrJr  S SKJr  S SKJr  S SKJr  S S	KJrJr  S S
KJr  S SKJr  S SKJr  S SK J!r!  S SK"J#r#  S SK$J%r%  S SK&J'r'  \'" 5       r(\ " S S\5      5       r)\!RT                  " \RV                  S9 " S S\#5      5       r,g)    N)	dataclass)AnyCallableDictListOptionalTupleUnion)nn)
DataLoaderDataset)tqdm)DataCollatorWithPadding)Trainers)Model
TorchModel)BertForTextRanking)	MsDataset)Preprocessor)TRAINERS)NlpEpochBasedTrainer)DEFAULT_MODEL_REVISION)
get_loggerc                   &    \ rS rSrSrSrSrS rSrg)SentenceEmbeddingCollator   z
Wrapper that does conversion from List[Tuple[encode_qry, encode_psg]] to List[qry], List[psg]
and pass batch separately to the actual collator.
Abstract out data detail for the model.
   Nc                 V   U Vs/ s H  o"S   PM	     nnU Vs/ s H  o"S   PM	     nnUS   R                  5       nU VVs0 s H  ofU Vs/ s H  owU   PM	     sn_M     nnnU R                  R                  R                  USU R                  SS9nUS   R                  5       nU VVs0 s H"  of[        U Vs/ s H  owU   PM	     sn/ 5      _M$     nnnU R                  R                  R                  USU R                  SS9n	XS.$ s  snf s  snf s  snf s  snnf s  snf s  snnf )Nquerydocsr   
max_lengthpt)paddingr!   return_tensors)r   r    )keys	tokenizer
_tokenizerpadr!   sum)
selffeaturesfqqddr%   kele
q_collated
d_collateds
             r/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/trainers/nlp/sentence_embedding_trainer.py__call__"SentenceEmbeddingCollator.__call__%   s$   "*+(Qj(+!)*Ai*!uzz|156AB'BSa&B''6^^..22 	 3 !

 !uzz|:>?$Q+!f+R00$?^^..22 	 3 !

 $88! ,*'6 ,?s:   DD
DDD+D%:D D%D D% )	__name__
__module____qualname____firstlineno____doc__r!   r&   r4   __static_attributes__r6       r3   r   r      s    
 JI9r=   r   )module_namec                   V  ^  \ rS rSrSSSSSSSSS\4
S\\\\R                  \
4      S\\
   S\\   S\\   S\\   S	\\\\4      S
\\\\4      S\\   S\\R"                  R$                  \R"                  R&                  R(                  4   S\\
   4U 4S jjjrU 4S jrS rSrU =r$ )SentenceEmbeddingTrainer9   N)NNmodelcfg_filecfg_modify_fnarg_parse_fndata_collatortrain_dataseteval_datasetpreprocessor
optimizersmodel_revisionc                 <   > [         TU ]  " SUUUUUUU	UUU
S.
UD6  g )N)
rB   rC   rD   rE   rF   rI   rJ   rG   rH   rK   r6   )super__init__)r*   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   kwargs	__class__s               r3   rN   !SentenceEmbeddingTrainer.__init__<   s?      	 	'%'%!'%)	 	r=   c                    > Uc2  [        U R                  R                  U R                  R                  S9n[        TU ]  " U40 UD6$ )zGet the data collator for both training and evaluating.

Args:
    data_collator: The input data_collator param.

Returns:
    The train_data_collator and eval_data_collator, can be None.
)r&   r!   )r   train_preprocessornlp_tokenizerr!   rM   get_data_collator)r*   rF   rO   rP   s      r3   rU   *SentenceEmbeddingTrainer.get_data_collatorY   sJ      511??22==?M w(A&AAr=   c                     0 $ )Nr6   )r*   s    r3   evauate SentenceEmbeddingTrainer.evauateh   s    	r=   r6   )r7   r8   r9   r:   r   r   r
   r   r   Modulestrr   r   r   r   r	   torchoptim	Optimizerlr_scheduler_LRSchedulerrN   rU   rX   r<   __classcell__)rP   s   @r3   r@   r@   9   s   
 BF&*04/304AE@D37HN,BE*bii"<=> sm $H-	
 #8, $H- $E)W*<$=> #5G);#<= #<0 ekk33#kk66CCD E %SM :B r=   r@   )-timedataclassesr   typingr   r   r   r   r   r	   r
   numpynpr\   r   torch.utils.datar   r   r   transformersr   modelscope.metainfor   modelscope.models.baser   r   modelscope.models.nlpr    modelscope.msdatasets.ms_datasetr   modelscope.preprocessors.baser   modelscope.trainers.builderr   modelscope.trainers.nlp_trainerr   modelscope.utils.constantr   modelscope.utils.loggerr   loggerr   register_modulenlp_sentence_embedding_trainerr@   r6   r=   r3   <module>ru      s     ! D D D    0  0 ( 4 4 6 6 0 @ < .	 9 7 9 9: 
h&M&MN/3 / O/r=   