
    9i^                        S SK Jr  S SKJrJr  S SKrS SKrS SKJ	r	  S SK
J	s  Jr  S SKJr  S SKJr  S SKJr  S SKJr  S SKJrJr  SS	KJr  SS
KJrJr  \R:                  " \R<                  \R>                  S9 " S S\5      5       r g)    N)AnyDict)Models)
TorchModel)MODELS)
OutputKeys)	ModelFileTasks   )build_backbone)FPNSegmentorLinearClassifier)module_namec                   ^   ^  \ rS rSrSrS\4U 4S jjrS\\\4   4S jr	S r
S rS	 rS
rU =r$ )VisionMiddlewareModel   aj  
The implementation of 'ViM: Vision Middleware for Unified Downstream Transferring'.
    This model is dynamically initialized with the following parts:

    - backbone: the upstream pre-trained backbone model (CLIP in this code)
    - ViM: the zoo of middlestream trained ViM modules
    - ViM-aggregation: the specific aggregation weights for downstream tasks
	model_dirc                 B  > [         [        U ]  5         [        R                  " U[
        R                  5      n[        R                  " USS9nUS   nUS   U l	        US   n[        US   US9U l        U R                  R                  5         US   n[        U5      n	[        U	5       H  n
U R                  R                  R                   U
   R"                  R%                  X   S	   5        U R                  R                  R                   U
   R&                  R%                  X   S
   5        M     US   nUS   nUS    H  n[        U	5       H  n
U R                  R                  R                   U
   R"                  R)                  UX   U
   S   U5        U R                  R                  R                   U
   R&                  R)                  UX   U
   S   U5        M     M     [*        R,                  " 5       U l        0 U l        US    GH  nUS   nUR3                  S5      (       a>  [5        U R                  R6                  X   S   R8                  S   S9U R.                  U'   OHUR3                  S5      (       a  [;        5       U R.                  U'   O[=        SR?                  U5      5      eU R.                  U   RA                  X   5        U R.                  U   R                  5         XS   RC                  5       ;   d  M  US   U   U R0                  U'   GM     g)as  
Initialize a ViM-based Model.

Args:
    model_dir: model id or path, where model_dir/pytorch_model.pt contains:

        - 'meta_info': basic information of ViM, e.g. task_list
        - 'backbone_weights': parameters of backbone [upstream]
        - 'ViM_weights': parameters of ViM [midstream]
        - 'ViM_agg_weights': parameters of ViM-aggregation [downstream]

cpu)map_location	meta_info	task_listbackbone_weightsbackbone_arch)arch
pretrainedViM_weightsvim_att_weightsvim_mlp_weightsViM_agg_weightsViM_agg_algovim_att_aggvim_mlp_agghead_weightsclszclassifier.biasr   )in_channelsnum_classessegzTask type [{}] is not supported	label_mapN)"superr   __init__ospjoinr	   TORCH_MODEL_FILEtorchloadr   r   backboneevallenrangetransformer	resblocksvim_attregister_ViMvim_mlpregister_tasknn
ModuleDictheads
label_maps
startswithr   
output_dimshaper   NotImplementedErrorformatload_state_dictkeys)selfr   argskwargs
model_path
model_dictr   r   vim_weights
num_layerslayer_iagg_weightsagg_algo	task_namer$   	__class__s                  l/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/cv/vision_middleware/model.pyr+   VisionMiddlewareModel.__init__    s    	#T35XXi)C)CD
ZZ
?
{+	";/ &&89&?+8HJ !/%
Z(GMM%%//8@@MM$%679MM%%//8@@MM$%679 ) !!23^,";/I ,))33$W]]!#.w7F &" ))33$W]]!#.w7F &" - 0 ]]_
";/I%n5L##E**(8 $ 8 8 , 7&!((-a!1)2

9% %%e,,(4

9%)5<<YGI I JJy!11,2IJJJy!&&(k27799-6{-CI-N	*# 0    returnc                 D    U R                  U R                  X5      X5      $ )N)postprocessforward)rF   inputsrP   s      rR   __call__VisionMiddlewareModel.__call__j   s%    LL+V@ 	@rT   c                     X R                   ;  a  [        SU R                    SU 35      eU R                  XS9nU R                  U   " U5      nU$ )z{
Dynamic Forward Function of ViM.

Args:
    x: the input images (B, 3, H, W)
    task_name: specified task for forwarding
ztask_name should in z
, but got )rP   )r   rB   r1   r=   )rF   rY   rP   featuresoutputss        rR   rX   VisionMiddlewareModel.forwardn   s\     NN*%&t~~&6jLN N ====**Y'1rT   c           	      t   UR                  5       u  pEpgSU;   Ga  [        R                  " USS9n[        R                  " XU4SSS5      nUS   R	                  5       R                  5       n[        R                  " USS9n	[        [        [        U	R                  S5      R                  5       5      5      5      n
/ / pU
 H  nX:H  nUR                  UR                  5       R                  5       5        UR                  UR                  5       X   -  R!                  5       UR                  5       R!                  5       -  R#                  5       5        M     U
 Vs/ s H  oR$                  U   U   PM     nn[&        R(                  U[&        R*                  U[&        R,                  U0$ [/        S	5      es  snf )
z
Post-process of ViM, based on task_name.

Args:
    inputs: batched input image (B, 3, H, W)
    outputs: batched output (format based on task_name)
    task_name (str): task name
r(   r   )dimNbilinearTr   z9Only segmentation task is currently supported in pipeline)sizeFsoftmaxinterpolatedetachr   r/   argmaxsortedlistsetreshapenumpyappendlongfloatsumitemr>   r   MASKSLABELSSCORESrB   )rF   r^   rY   rP   _r&   
img_height	img_widthr(   predlabelsmasksscoreslabelmasklabel_namess                   rR   rW   !VisionMiddlewareModel.postprocess   s}    17-
I))G+C--)%<dJ $&Ca&--/%%'C<<+DDT\\"%5%;%;%=!>?@F6TYY[..01

sz 9>>@!%!1!1!3 459TV=   @F?Ee	*51v  
   %!!;!!6  &KM Ms   F5c                     U R                   $ )z/
Get the supported tasks of current ViM model.
)r   )rF   s    rR   	get_tasksVisionMiddlewareModel.get_tasks   s     ~~rT   )r1   r=   r>   r   )__name__
__module____qualname____firstlineno____doc__strr+   r   r   rZ   rX   rW   r   __static_attributes____classcell__)rQ   s   @rR   r   r      sE    HO# HOT@T#s(^ @"(MT rT   r   )!os.pathpathr,   typingr   r   jsonr/   torch.nnr;   torch.nn.functional
functionalre   modelscope.metainfor   'modelscope.models.base.base_torch_modelr   modelscope.models.builderr   modelscope.outputsr   modelscope.utils.constantr	   r
   r1   r   headr   r   register_moduleimage_segmentationvision_middlewarer    rT   rR   <module>r      sm           & > , ) 6 $ 0 	&*B*BDWJ WDWrT   