
    9iZ                     `   S SK r S SKJr  S SKJrJrJrJrJrJ	r	J
r
  S SKrS SKrS SKJr  S SKJrJr  S SKJr  S SKJr  S SKJrJr  S S	KJr  S S
KJr  S SKJr  S SKJr  S SK J!r!  S SK"J#r#  S SK$J%r%  \%" 5       r&\ " S S5      5       r'\RP                  " \RR                  S9 " S S\!5      5       r*g)    N)	dataclass)AnyCallableDictListOptionalTupleUnion)nn)
DataLoaderDataset)tqdm)Trainers)Model
TorchModel)BertForTextRanking)	MsDataset)Preprocessor)TRAINERS)NlpEpochBasedTrainer)DEFAULT_MODEL_REVISION)
get_loggerc                   D    \ rS rSrSrS\\\\4      S\\\4   4S jr	Sr
g)GroupCollator   z
Wrapper that does conversion from List[Tuple[encode_qry, encode_psg]] to List[qry], List[psg]
and pass batch separately to the actual collator.
Abstract out data detail for the model.
featuresreturnc           
         [        US   [        5      (       a  [        U/ 5      nUS   R                  5       nU Vs0 s H  o3[        5       _M     nnU H/  nUR	                  5        H  u  p6XC   R                  U5        M     M1     UR	                  5        VVs0 s H  u  p6U[        R                  " USS9_M     nnnU$ s  snf s  snnf )Nr   )dim)
isinstancelistsumkeysitemsappendtorchcat)selfr   r#   kbatchelevs          l/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/trainers/nlp/text_ranking_trainer.py__call__GroupCollator.__call__"   s    hqk4((8R(H{!$()DqDFD)C		" $  5:KKMBMDAEIIaQ''MB * Cs   C"C N)__name__
__module____qualname____firstlineno____doc__r   r   strr   r.   __static_attributes__r0       r-   r   r      s/    	d38n!5 	$sCx. 	r8   r   )module_namec                     ^  \ rS rSrSSSSSSSSS\4
S\\\\R                  \
4      S\\
   S\\   S\\   S\\   S	\\\\4      S
\\\\4      S\\   S\\R"                  R$                  \R"                  R&                  R(                  4   S\\
   4U 4S jjjrSS jrSS jr SS\\
   S\\
\4   4S jjrSrU =r$ )TextRankingTrainer.   N)NNmodelcfg_filecfg_modify_fnarg_parse_fndata_collatortrain_dataseteval_datasetpreprocessor
optimizersmodel_revisionc                 V   > Uc
  [        5       n[        TU ]  " SUUUUUUU	UUU
S.
UD6  g )N)
r=   r>   r?   r@   rA   rD   rE   rB   rC   rF   r0   )r   super__init__)r(   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   kwargs	__class__s               r-   rI   TextRankingTrainer.__init__1   sL       )OM 	'%'%!'%)	 	r8   c                     SnUR                  5        HH  n[        US SS9nSn[        US U 5       H!  u  px[        US   5      S:X  d  M  SUS-   -  n  O   X6-  nMJ     U[	        U5      -  $ )Nr   c                     U S   $ Nr   r0   xs    r-   <lambda>0TextRankingTrainer.compute_mrr.<locals>.<lambda>T   s    1Q4r8   Tkeyreverse   1g      ?)valuessorted	enumerater6   len)	r(   resultr)   mrrres
sorted_resarindexr+   s	            r-   compute_mrrTextRankingTrainer.compute_mrrQ   s|    ==?CFJB'
2A7
s1v;#%	*B 8 IC # S[  r8   c           
      Z   SnSSK Jn  UR                  5        Hv  n[        US SS9n[        R
                  " U Vs/ s H  owS   PM	     sn/5      n[        R
                  " U Vs/ s H  owS   PM	     sn/5      n	U[        U" XUS95      -  nMx     U[        U5      -  nU$ s  snf s  snf )Nr   )
ndcg_scorec                     S/$ rO   r0   rP   s    r-   rR   1TextRankingTrainer.compute_ndcg.<locals>.<lambda>a   s    A3r8   TrT   rW   r)   )sklearnrf   rY   rZ   nparrayfloatr\   )
r(   r]   r)   ndcgrf   r_   r`   r+   labelsscoress
             r-   compute_ndcgTextRankingTrainer.compute_ndcg]   s    &==?CEJXX*=*3A*=>?FXX*=*3A*=>?FE*Vq9::D	 #
 c&k!	  >=s   B#
&B(
checkpoint_pathr   c           
      	   U R                   " U R                  40 U R                  R                  R	                  S0 5      DSU R
                  0D6U l        Ub  [        R                  " U5      nOU R                  nUR                  5         Sn[        5       n[        5       n[        5       nSn	[        R                  R                  5       (       a  SOSn
UR                  U
5        [!        [#        U R                  5      5       GH  u  p UR%                  5        VVs0 s H8  u  pU['        U[        R(                  5      (       a  UR                  U
5      OU_M:     nnn[,        R,                  " 5       n[        R.                  " 5          UR1                  S5      R3                  5       R5                  5       R7                  5       nUR1                  S	5      R3                  5       R5                  5       R7                  5       nU" S0 UD6nSSS5        [,        R,                  " 5       nU	UU-
  -  n	XPR                  R8                  -  nS
 nWS   R;                  S5      R3                  5       R5                  5       R7                  5       nU" U5      R=                  5       nUR?                  W5        UR?                  U5        UR?                  W5        GM     [@        RC                  SRE                  XS-  U-  5      5        0 n[G        XU5       H(  u  nnnUU;  a  / UU'   UU   RI                  UU45        M*     U H  n[K        UU   S S9UU'   M     [        5       nU RL                   GH  nURO                  S5      (       ai  URQ                  S5      S   n[S        U5      nU RU                  UUS9n[@        RC                  SRE                  UU5      5        URI                  UU45        M  URO                  S5      (       aj  URQ                  S5      S   n[S        U5      nU RW                  UUS9n[@        RC                  SRE                  UU5      5        URI                  SU45        GM  [Y        SU-  5      e   [[        U5      $ s  snnf ! [*         a/    UR%                  5        VVs0 s H  u  pX_M	     Os  snnf nnn GNef = f! , (       d  f       GN= f)a  evaluate a dataset

evaluate a dataset via a specific model from the `checkpoint_path` path, if the `checkpoint_path`
does not exist, read from the config file.

Args:
    checkpoint_path (Optional[str], optional): the model path. Defaults to None.

Returns:
    Dict[str, float]: the results about the evaluation
    Example:
    {"accuracy": 0.5091743119266054, "f1": 0.673780487804878}

dataloader
collate_fnNr   g        zcuda:0cpuro   qidc                 b    [         R                  " U 5      S[         R                  " U 5      -   -  $ )NrW   )rk   exp)logitss    r-   sigmoid,TextRankingTrainer.evaluate.<locals>.sigmoid   s"    vvf~RVVF^);<<r8   r{   z/Inference time = {:.2f}s, [{:.4f} ms / sample] i  c                     U S   $ rO   r0   rP   s    r-   rR   -TextRankingTrainer.evaluate.<locals>.<lambda>   s    adr8   )rU   r^   @ri   z{}: {}rn   zMetric %s not implementedr0   )._build_dataloader_with_datasetrC   cfg
evaluationgeteval_data_collatoreval_dataloaderr   from_pretrainedr=   evalr!   r&   cudais_availabletor[   r   r$   r    TensorRuntimeErrortimeno_gradpopdetachrw   numpy
batch_sizesqueezetolistextendloggerinfoformatzipr%   rZ   metrics
startswithsplitintrc   rq   NotImplementedErrordict)r(   rs   argsrJ   r=   total_sampleslogits_list
label_listqid_listtotal_spent_timedevice_stepr*   rU   valinfer_start_time	label_idsqidsoutputsinfer_end_timer|   r{   rank_resultrx   scorelabeleval_outputsmetricr)   r^   rn   s                                  r-   evaluateTextRankingTrainer.evaluateh   s3   $  $BB 0hh!!%%lB7 0 .. 0 &&66GEJJE 	

fV
6"ZZ4466E%d4+?+?&@ALEA %*KKM %2 &0ell&C&CCFF6NM$1    $yy{!IIh/668<<>DDF	yy'..0446<<>.%. ! "YY[N1A AA11<<<M= X&..r299;??AGGIFV_++-Fi(v&OOD!7 B: 	ELL5EG 	H !$XJ!GC+%#%C ##UEN3 "H
 C%k#&6NKK  vllF  ''LL%b)F&&{a&8HOOFC89##VSM2""6**LL%b)F(((:HOOFD9:##VTN3)*E*NOO #  L!!q
   A27++-@-hc-@@A !s=   
R?RR	A?SRS)R8
7
SS
S	)r   )
   )N)r1   r2   r3   r4   r   r   r
   r   r   Moduler6   r   r   r   r   r	   r&   optim	Optimizerlr_scheduler_LRSchedulerrI   rc   rq   r   rm   r   r7   __classcell__)rK   s   @r-   r;   r;   .   sL   
 BF&*04/304AE@D37HN,BE*bii"<=> sm $H-	
 #8, $H- $E)W*<$=> #5G);#<= #<0 ekk33#kk66CCD E %SM @
!	 37b""*3-b" #3:.b" b"r8   r;   )+r   dataclassesr   typingr   r   r   r   r   r	   r
   r   rk   r&   r   torch.utils.datar   r   r   modelscope.metainfor   modelscope.models.baser   r   modelscope.models.nlpr    modelscope.msdatasets.ms_datasetr   modelscope.preprocessors.baser   modelscope.trainers.builderr   modelscope.trainers.nlp_trainerr   modelscope.utils.constantr   modelscope.utils.loggerr   r   r   register_modulenlp_text_ranking_trainerr;   r0   r8   r-   <module>r      s     ! D D D    0  ( 4 4 6 6 0 @ < .	   & 
h&G&GH["- [" I["r8   