
    9io3                        S SK r S SKrS SKrS SKrS SKJr  S SKJr  S SKJ	r	  S SK
rS SKrS SKrS SKJs  Jr  S SKJs  Jr  S SKJr  S SKJr  S SKJr  S SKJr  S S	KJrJr  S S
KJ r   SSK!J"r"  SSK#J$r$    SS jr%S r&S r' SS jr(SS jr) " S S\*5      r+\$RX                  " \RZ                  \R\                  S9 " S S\"5      5       r/g)    N)exists)TemporaryDirectory)urlparse)VideoReader)Compose)http_get_file)Preprocessors)FieldsModeKeys)type_assert   )Preprocessor)PREPROCESSORSc                    [        U5      nUR                  S;   a'  [        UR                  5      (       a  [	        XU5      nOi[        5        n[        R                  " 5       R                  n[        UUUSS9  [        R                  R                  Xg5      n[	        XU5      nSSS5        Ub  Un	[        X5      n
O5U R                  R                  n	[        X R                  R                  5      n
/ n[        WR!                  S5      5       HL  n[        U	5       H:  nU
R"                  S   R%                  U5        UR'                  U
" X\   5      5        M<     MN     [(        R*                  " USS9$ ! , (       d  f       N= f)aV  simple interface to load video frames from file

Args:
    cfg (Config): The global config object.
    video_path (str): video file path
    num_spatial_crops_override (int): the spatial crops per clip
    num_temporal_views_override (int): the temporal clips per video
Returns:
    data (Tensor): the normalized video clips for model inputs
)file N)url	local_dir	file_namecookiesr   r   dim)r   schemer   path_decode_videor   uuiduuid4hexr   osjoinkinetics400_tranformTESTNUM_SPATIAL_CROPSrangesize
transformsset_spatial_indexappendtorchstack)cfg
video_pathnum_spatial_crops_overridenum_temporal_views_override
url_parseddatatemporary_cache_dir
random_strtemp_file_pathnum_spatial_crops	transform	data_listijs                 ^/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/preprocessors/video.pyReadVideoDatar:      sE    *%JL(VOO. .S.IJ!%8))J-$	
  WW\\*=JN !<>D " "-6(I	HH66(hh.H.HI	I499Q< ()A  #55a8Ytw/0 * ! ;;ya((- "!s   AE33
Fc                 R   [        U R                  R                  U R                  R                  /U R                  R                  US9n[        R
                  " 5       U[        R                  " U R                  R                  U R                  R                  SS9/n[        U5      $ )a)  
Configs the transform for the kinetics-400 dataset.
We apply controlled spatial cropping and normalization.
Args:
    cfg (Config): The global config object.
    num_spatial_crops (int): the spatial crops per clip
Returns:
    transform_function (Compose): the transform function for input clips
)short_side_range	crop_sizer4   T)meanstdinplace)
KineticsResizedCropDATA
TEST_SCALETEST_CROP_SIZEr&   ToTensorVideoNormalizeVideoMEANSTDr   )r+   r4   resize_videostd_transform_lists       r9   r!   r!   E   s     '((--sxx/B/BC(())+-L
 	  "L!!CHHLL$	@
 %&&    c                 |   US:X  a  [         R                  " SU S-
  5      /nU$ XV-  U-  U-  n	[        X	-
  S5      n
US:X  a  U
S-  nOU[        R                  " XS-
  -  5      -  nU(       a  X-   U-
  nOX-   S-
  n[
        R                  " XU5      n[
        R                  " USU S-
  5      R                  5       nU$ )a  
Generates the frame index list using interval based sampling.

Args:
    vid_length (int): the length of the whole video (valid selection range).
    vid_fps (int): the original video fps
    target_fps (int): the normalized video fps
    clip_idx (int):
        -1 for random temporal sampling, and positive values for sampling specific
        clip from the video
    num_clips (int):
        the total clips to be sampled from each video. combined with clip_idx,
        the sampled video is the "clip_idx-th" video from "num_clips" videos.
    num_frames (int): number of frames in each sampled clips.
    interval (int): the interval to sample each frame.
    minus_interval (bool): control the end index

Returns:
    index (tensor): the sampled frame indexes
r   r      )	randomrandintmaxmathfloorr)   linspaceclamplong)
vid_lengthvid_fps
target_fpsclip_idx	num_clips
num_framesintervalminus_intervalindexclip_lengthmax_idx	start_idxend_idxs                r9   _interval_based_samplingrc   [   s    , Q:>23$ L !+g5
Bj.2>!I 4::gQ.G#HHI-8G-1Gy:>E1j1n5::<LrK   c                 \   [        U[        5      (       d   eUb  UnOU R                  R                  n/ n[	        U5       H  n[        [        U5      UU R                  R                  UUU R                  R                  U R                  R                  U R                  R                  5      nSn[        R                  " [        R                  " UR!                  5        V	s/ s H  oU	   PM	     sn	SS95      nUR#                  U5        M     [        R                  " U5      nA
U$ s  sn	f )ai  
Decodes the video given the numpy frames.
Args:
    cfg          (Config): The global config object.
    frames_list  (list):  all frames for a video, the frames should be numpy array.
    vid_fps      (int):  the fps of this video.
    num_temporal_views_override (int): the temporal clips per video
Returns:
    frames            (Tensor): video tensor data
Nr   )axis)
isinstancelistr"   NUM_ENSEMBLE_VIEWSr$   rc   lenrB   
TARGET_FPSNUM_INPUT_FRAMESSAMPLING_RATEMINUS_INTERVALr)   
from_numpynpr*   tolistr(   )r+   frames_listrW   r.   num_clips_per_video
frame_listrY   list_framesr^   vrs              r9   _decode_video_frames_listrw      s    k4((((".9!hh99J-. )HHHH%%HH""HH##	
 !!HHellnEnU%(nEANP&!! /" [[$F
M	 Fs   #D)c                 |   [        U5      nUb  UnOU R                  R                  n/ n[        U5       GHk  n[	        [        U5      UR                  5       U R                  R                  UUU R                  R                  U R                  R                  U R                  R                  5      nSnUR                  S5      (       a  [        R                  " SUS   S5      n	[        R                   " UR#                  [        R$                  " U	U/5      5      R'                  5       5      R)                  5       nXR*                  S   S nOA[        R                   " UR#                  U5      R'                  5       5      R)                  5       nUR-                  U5        GMn     [        R.                  " U5      nAU$ )a  
Decodes the video given the numpy frames.
Args:
    cfg          (Config): The global config object.
    path          (str): video file path.
    num_temporal_views_override (int): the temporal clips per video
Returns:
    frames            (Tensor): video tensor data
Nz.avir      )r   r"   rh   r$   rc   ri   get_avg_fpsrB   rj   rk   rl   rm   endswithr)   arangedlpackfrom_dlpack	get_batchcat	to_dlpackcloneshaper(   r*   )
r+   r   r.   rv   rr   rs   rY   rt   ru   append_lists
             r9   r   r      sq    
T	B".9!hh99J-. )GNNHHHH%%HH""HH##	
 ==  ,,q%(A6K''UYY(-(/ 0 11:>>Ceg  --a012F''U#--/116 &!/ /0 [[$F
MrK   c                   <    \ rS rSrSr S
S jrS rS rS rS r	Sr
g	)rA      a,  Perform resize and crop for kinetics-400 dataset
Args:
    short_side_range (list): The length of short side range. In inference, this should be [256, 256]
    crop_size         (int): The cropped size for frames.
    num_spatial_crops (int): The number of the cropped spatial regions in each video.
c                 J    SU l         Xl        [        U5      U l        X0l        g )N)idxr<   intr=   r4   )selfr<   r=   r4   s       r9   __init__KineticsResizedCrop.__init__   s"      0Y!2rK   c                 @   UR                   u    p#nU R                  S   nX4:  aF  [        U5      n[        XC-  U-  5      n[        R                  R
                  R                  XU4SS9nOE[        U5      n[        X4-  U-  5      n[        R                  R
                  R                  XU4SS9n[        XpR                  -
  5      n	[        X`R                  -
  5      n
U R                  S:X  a  U	S-  nU
S-  nO~U R                  S:X  an  U R                  S:X  a  Xu:X  a  U	S-  nSnOQXe:X  a  SnU
S-  nODU R                  S:X  a  U	S-  nU
S-  nO)U R                  S:X  a  Xu:X  a  U	S-  nU
nOXe:X  a  U	nU
S-  nUSS2SS2WXR                  -   2WXR                  -   24   $ )zmPerform controlled crop for video tensor.
Args:
    clip (Tensor): the video data, the shape is [T, C, H, W]
r   bilinearr%   moder   rM      N)
r   r<   r   r)   nn
functionalinterpolater=   r4   r   )r   clip_clip_height
clip_widthlengthnew_clip_heightnew_clip_widthnew_clipx_maxy_maxxys                r9   _get_controlled_crop(KineticsResizedCrop._get_controlled_crop   s   
 )-

%1:&&q)#!&kO !9O!KLNxx**66^<: 7 OH ![N!+":^"KLOxx**66^<: 7 OHN^^34Onn45!!Q&
A
A##q(xx1}!+
AA$.A
AQQJQJQ!+
AA$.A
A1aNN 22Aa..6H4HHIIrK   c                 X   UR                   u    p#n[        X45      n[        X45      n[        [        R
                  " U R                  6 5      n[        Xe-  U-  5      nX4:  a  Un	Un
OUn	Un
[        R                  R                  R                  XU
4SS9n[        XR                  -
  5      n[        XR                  -
  5      n[        [        R
                  " SU5      5      n[        [        R
                  " SU5      5      nUS S 2S S 2XU R                  -   2XU R                  -   24   $ )Nr   r   r   )r   minrP   r   rN   uniformr<   r)   r   r   r   r=   )r   r   r   r   r   
short_side	long_sidenew_short_sidenew_long_sider   r   r   r   r   r   r   s                   r9   _get_random_crop$KineticsResizedCrop._get_random_crop  s   (,

%1:1
0	V^^T-B-BCDI2^CD#,O*N+O+N88&&228z 3 K N^^34Onn45q%()q%()1aDNN 22A$..6H4HHIIrK   c                     Xl         g)zSet the spatial cropping index for controlled cropping..
Args:
    idx (int): the spatial index. The value should be in [0, 1, 2], means [left, center, right], respectively.
N)r   )r   r   s     r9   r'   %KineticsResizedCrop.set_spatial_index6  s	    
 rK   c                 $    U R                  U5      $ N)r   )r   r   s     r9   __call__KineticsResizedCrop.__call__=  s    ((..rK   )r=   r   r4   r<   N)r   )__name__
__module____qualname____firstlineno____doc__r   r   r   r'   r   __static_attributes__ rK   r9   rA   rA      s(     		3*JXJ./rK   rA   )module_namec                   R   ^  \ rS rSrU 4S jrS rS r\" \\5      S 5       r	Sr
U =r$ )"MovieSceneSegmentationPreprocessoriA  c                 ~  > [         TU ]  " U0 UD6  UR                  SS5      U l        UR                  [        R
                  S5      U l        UR                  [        R                  S5      U l        UR                  SS5      U l	        SSK
Jn  U" U R                  5      U l        U" U R                  5      U l        g)z'
movie scene segmentation preprocessor
is_trainTNnum_keyframer   r   )get_transform)superr   popr   r   TRAINpreprocessor_train_cfgEVALpreprocessor_test_cfgr   movie_scene_segmentationr   train_transformtest_transform)r   argskwargsr   	__class__s       r9   r   +MovieSceneSegmentationPreprocessor.__init__E  s     	$)&)

:t4&,jj&F#%+ZZt%D""JJ~q9;,T-H-HI+D,F,FGrK   c                     SU l         g )NTr   r   s    r9   train(MovieSceneSegmentationPreprocessor.trainT  s    rK   c                     SU l         g )NFr   r   s    r9   eval'MovieSceneSegmentationPreprocessor.evalX  s    rK   c                     U R                   (       a  U R                  nOU R                  n[        R                  " U" U5      SS9nUR                  SU R                  SSS5      nU$ )Nr   r   r   r   r   )r   r   r   r)   r*   viewr   )r   resultsr&   s      r9   r   +MovieSceneSegmentationPreprocessor.__call__\  sU    ==--J,,J++j1q9,,r4#4#4acBrK   )r   r   r   r   r   r   )r   r   r   r   r   r   r   r   objectr   r   __classcell__)r   s   @r9   r   r   A  s.    H   !rK   r   )NNr   )0rQ   r   rN   r   os.pathr   tempfiler   urllib.parser   numpyro   r)   torch.utils.datatorch.utils.dlpackutilsr}   (torchvision.transforms._transforms_videor&   _transforms_videodecordr   torchvision.transformsr   modelscope.hub.file_downloadr   modelscope.metainfor	   modelscope.utils.constantr
   r   modelscope.utils.type_assertr   baser   builderr   r:   r!   rc   rw   r   r   rA   register_modulecv%movie_scene_segmentation_preprocessorr   r   rK   r9   <module>r      s     	    ' !    # # = =  * 6 - 6 4  "
 .2.2))X',)^ ;?(V+\^/& ^/B 
II=NNP" "P"rK   