
    9il"                     *   S SK Jr  S SKJrJr  S SKrS SKrS SK	J
r
  S SKJr  S SKJrJr  S SKJr  S SKJr  S SKJr  S SKJrJr  S S	KJr  \" 5       rS
 rS rS rS rS r \RB                  " \RD                  \RD                  S9 " S S\5      5       r#g)    N)DictUnion)Models)Tensor
TorchModel)MODELS)cpd_auto)PGL_SUM)	ModelFileTasks)
get_loggerc                    [         R                  " U [         R                  5      n [         R                  " X R                  5      n[        U[        UR                  S   S-
  S5      SSS9u  p4US-  n[         R                  " S/X1S-
  /45      n/ n[        [        U5      S-
  5       H?  nX6   X6S-      S-
  /nU[        U5      S-
  :X  a  X6   X6S-      /nUR                  U5        MA     [         R                  " [        U5      5      n/ n[        [        U5      5       H$  n	X9   S   UU	   S   -
  nUR                  U5        M&     [         R                  " [        U5      5      n
X:4$ )Nr      x   g?)ncpvmaxlmin      )nparrayfloat32dotTr	   minshapeconcatenaterangelenappendlist)
video_featn_frameKchange_points_temp_change_pointsidxsegmenttemp_n_frame_per_segchange_points_idxn_frame_per_segs              s/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/cv/video_summarization/summarizer.pyget_change_pointsr.      se   *bjj1J
z<<(A	s1771:>3'iaAM!B&MNNQC1#FGMS'!+, %}1W'='AB#m$q(($)=q+ABG!!'* - HHT"456M"3}#562159 9" "##G, 7 hht$89:O))    c           
      @   [        US-   5       Vs/ s H"  n[        U S-   5       Vs/ s H  nSPM     snPM$     nn[        US-   5       Hz  n[        U S-   5       He  nUS:X  d  US:X  a	  SXV   U'   M  XS-
     U::  a3  [        X&S-
     XVS-
     XqUS-
     -
     -   XVS-
     U   5      XV   U'   MV  XVS-
     U   XV   U'   Mg     M|     / nU n[        USS5       H7  nXV   U   XVS-
     U   :w  d  M  UR                  SUS-
  5        XqUS-
     -  nM9     U$ s  snf s  snf )a  Maximize the value that a knapsack of capacity W can hold. You can either put the item or discard it, there is
no concept of putting some part of item in the knapsack.

:param int W: Maximum capacity -in frames- of the knapsack.
:param list[int] wt: The weights (lengths -in frames-) of each video shot.
:param list[float] val: The values (importance scores) of each video shot.
:param int n: The number of the shots.
:return: A list containing the indices of the selected shots.
r   r   )r   maxinsert)	Wwtvalnr&   r$   iwselecteds	            r-   	knap_sackr;   1   s>    -2!a%L9LqU1q5\	"\!\	"LA9 1q5\q1uAAvaQEaca%j1U8A1q5	M+BBAhqk+Q E(1+Q   H	A1a_47aAhqk!OOAq1u%AENA 
 O) 
#9s   DD
DDc           	         / n[        [        U5      5       GH  nX   nX   nX%   nX5   n	[        R                  " U[        R                  S9n
U	R
                  [        :w  a  U	R                  [        R                  5      n	U	S   U:w  a  [        R                  " X//5      n	[        [        U	5      S-
  5       H(  nX   XS-      pU[        U5      :X  a  SXU& M"  X{   XU& M*     / n/ nU HY  nUR                  US   US   -
  S-   5        UR                  U
US   US   S-    R                  5       R                  5       5        M[     US   n[        US   S-   S-  5      n[        UX[        U5      5      n[        R                  " US   S-   [        R                  S9nU H  nSUUU   S   UU   S   S-   & M     UR                  U5        GM     U$ )a  Generate the automatic machine summary, based on the video shots; the frame importance scores; the number of
frames in the original video and the position of the sub-sampled frames of the original video.

:param list[np.ndarray] all_shot_bound: The video shots for all the -original- testing videos.
:param list[np.ndarray] all_scores: The calculated frame importance scores for all the sub-sampled testing videos.
:param list[np.ndarray] all_nframes: The number of frames for all the -original- testing videos.
:param list[np.ndarray] all_positions: The position of the sub-sampled frames for all the -original- testing videos.
:return: A list containing the indices of the selected frames for all the -original- testing videos.
)dtyper1   r   r   g333333?)r   r   r   zerosr   r=   intastypeint32r   r    meanitemr;   int8)all_shot_bound
all_scoresall_nframesall_positionsall_summariesvideo_index
shot_boundframe_init_scoresn_frames	positionsframe_scoresr8   pos_left	pos_rightshot_imp_scoresshot_lengthsshot
final_shotfinal_max_lengthr:   summarys                        r-   generate_summaryrX   R   s    MS_-#0
&3+!.	 xx

;??c!!((2IR=H$	:'>?Is9~)*A"+,	a%0@iC)**34i03D3Gi0 + DQ$q' 1A 56""d1gd1gk2779??AC   ^

1 1T9:-| .0 ((:a=1,BGG<DCDGJt$Q'
4(8(;a(?@  	W%Q .T r/   c                 L    [        U S5      u  p[        US5      u  p1SX1U4-  nU$ )N<   z%02d:%02d:%06.3f)divmod)secondsmshtimes        r-   transform_timera      s0    '2DA!R=DAq	)DKr/   c                    / nSnSnSn[        U 5       H;  u  pgU(       a  USL a  UnSnM  M  U(       d  M"  US-
  nUR                  X4/5        SnM=     U(       a)  U S   S:X  a   [        U 5      S-
  nUR                  X4/5        / nU HI  n	UR                  U	[        U	S   [	        U5      -  5      [        U	S   [	        U5      -  5      /S.5        MK     U$ )Nr1   FTr   r   )frame
timestamps)	enumerater    r   ra   float)
rW   fpsframes_liststart_frame	end_frameis_summary_framer8   r(   outputsegs
             r-   summary_formatrn      s    KKIG$5(#'  )  E	""K#;<#(  % GBK1,L1$	K34Fs1vc
23s1vc
23
 	  Mr/   )module_namec                      ^  \ rS rSrS\4U 4S jjrS\\\4   S\\\4   4S jrS\\\4   S\\\4   4S jr	S\\\4   S\\\
\\4   4   4S jrS	rU =r$ )
PGLVideoSummarization   	model_dirc           	        > [         TU ]  " U/UQ70 UD6  [        R                  " U[        R
                  5      n[        R                  " 5       U l        [        SSSSSSS9U l
        [        R                  R                  5       (       a  [        R                  " S5      U l        O[        R                  " S5      U l        U R                  R!                  U R                  5      U l
        U R#                  U R                  U5      U l
        U R$                  (       a  U R                  R'                  5         g	U R                  R)                  5         g	)
zpinitialize the video summarization model from the `model_dir` path.

Args:
    model_dir (str): the model path.
i         addabsolute)
input_sizeoutput_sizenum_segmentsheadsfusionpos_enccudacpuN)super__init__ospjoinr   TORCH_MODEL_FILEnnMSELosslossr
   modeltorchr   is_availabledevice_deviceto_load_pretrainedtrainingtraineval)selfrs   argskwargs
model_path	__class__s        r-   r   PGLVideoSummarization.__init__   s     	4T4V4XXi)C)CD
JJL	 
 ::""$$ <</DL <<.DLZZ]]4<<0
**4::zB
==JJJJOOr/   inputreturnc                 b    US   nUS   nU R                  U5      u  pESU R                  XC5      0$ )Nframe_featuresgtscorer   )r   r   )r   r   r   r   predsattn_weightss         r-   _train_forward$PGLVideoSummarization._train_forward   s<    /0	""jj8		%122r/   c                 :    US   nU R                  U5      u  p4SU0$ )Nr   scores)r   )r   r   r   yr   s        r-   _inference_forward(PGLVideoSummarization._inference_forward   s'    /0**^4!}r/   c                     UR                  5        H$  u  p#X   R                  U R                  5      X'   M&     U R                  (       a  U R	                  U5      $ U R                  U5      $ )zreturn the result by the model

Args:
    input (Dict[str, Tensor]): the preprocessed data

Returns:
    Dict[str, Union[list, Tensor]]: results
)itemsr   r   r   r   r   )r   r   keyvalues       r-   forwardPGLVideoSummarization.forward   sU      ++-JCt||4EJ (==&&u--**511r/   )r   r   r   )__name__
__module____qualname____firstlineno__strr   r   r   r   r   r   r!   r   __static_attributes____classcell__)r   s   @r-   rq   rq      s    # >3Df$5 3$sF{:K 3S-3.4 )5 9=c6k9J2T#"(#) * 2.23dFl8K3K.L2 2r/   rq   )$os.pathpathr   typingr   r   numpyr   r   torch.nnr   modelscope.metainfor   modelscope.models.baser   r   modelscope.models.builderr   5modelscope.models.cv.video_summarization.kts.cpd_autor	   0modelscope.models.cv.video_summarization.pgl_sumr
   modelscope.utils.constantr   r   modelscope.utils.loggerr   loggerr.   r;   rX   ra   rn   register_modulevideo_summarizationrq    r/   r-   <module>r      s         & 5 , J D 6 .	*6B5pB 	6+E+EG<2J <2G<2r/   