
    9i{7                     n   S SK rS SKJrJrJrJr  S SKrS SK	r	S SK
Js  Jr  S SKJrJr  S SKJr  S SKJr  S SKJr  S SKJr  S SKJrJr  S S	KJr  S S
KJr  S SKJ r   S SK!J"r"  S SK#J$r$  S SK%J&r&  \&" 5       r'S/r(\RR                  " \$RT                  \RT                  S9 " S S\ 5      5       r+ " S S\5      r,g)    N)AnyDictOptionalUnion)DDIMSchedulerStableDiffusionPipeline)Image)
transforms)tqdm)	Pipelines)MutualSelfAttentionControl#register_attention_editor_diffusers)
OutputKeys)	PIPELINES)DiffusersPipeline)	LoadImage)Tasks)
get_loggerImageEditingPipeline)module_namec                      ^  \ rS rSr\S4U 4S jjrS\\\4   S\\\4   4S jrS\\\4   S\\\4   4S jr	S\\\4   S\\\4   4S jr
S	rU =r$ )
r      Nc                 4  > [         TU ]  " SXS.UD6  UR                  S[        R                  5      n[        US[        R                  " [        R                  R                  5       (       a  SOS5      5      U l	        [        R                  S5        [        R                  " [        R                  R!                  US5      SS	9n["        R                  [        R                  R!                  US5      UUS
S9R%                  U R                  5      U l        g)a  MasaCtrl Image Editing Pipeline.

Examples:

>>> import cv2
>>> from modelscope.pipelines import pipeline
>>> from modelscope.utils.constant import Tasks

>>> prompts = [
>>>     "",                           # source prompt
>>>     "a photo of a running corgi"  # target prompt
>>> ]
>>> output_image_path = './result.png'
>>> img = 'https://public-vigen-video.oss-cn-shanghai.aliyuncs.com/public/ModelScope/test/images/corgi.jpg'
>>> input = {'img': img, 'prompts': prompts}
>>>
>>> pipe = pipeline(
>>>     Tasks.image_editing,
>>>     model='damo/cv_masactrl_image-editing')
>>>
>>> output = pipe(input)['output_img']
>>> cv2.imwrite(output_image_path, output)
>>> print('pipeline: the output image path is {}'.format(output_image_path))
)modelpreprocessortorch_dtypedevicecudacpuz load image editing pipeline donezstable-diffusion-v1-4	scheduler)	subfolderT)r    r   use_safetensorsN )super__init__gettorchfloat32getattrr   r   is_available_deviceloggerinfor   from_pretrainedospathjoin_MasaCtrlPipelinetopipeline)selfr   r   kwargsr   r    	__class__s         n/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/pipelines/cv/image_editing_pipeline.pyr%   ImageEditingPipeline.__init__!   s    2 	JuJ6Jjj>HLL5::#:#:#<#<%HJ 	67!11GGLL 78!#	 *99GGLL 78# 	 : " #%"T\\"2	 	    inputreturnc                 l   [         R                  " UR                  S5      5      n[        R                  " [        R
                  " 5       [        R                  " S/S/5      /5      nU" U5      R                  S5      n[        R                  " US5      nUR                  U R                  5      US'   U$ )Nimg      ?r   )   r@   )r   convert_to_imgr&   r
   ComposeToTensor	Normalize	unsqueezeFinterpolater3   r+   )r5   r;   r>   test_transformss       r8   
preprocessImageEditingPipeline.preprocessJ   s    &&uyy'78$,,  "!!3%#/12 c",,Q/mmC,vvdll+er:   c                    [        U[        5      (       d  [        S[        U5       35      eUR	                  S5      nU R
                  R                  UR	                  S5      US   SSSS9u  p4UR                  [        U5      S	S	S	5      nS
u  pV[        XV5      n[        U R
                  U5        U R                  UUUR	                  SS5      S9S	S  nSU0$ )Nz/Expected the input to be a dictionary, but got promptsr>   r         @2   T)guidance_scalenum_inference_stepsreturn_intermediates)   
   rO   )latentsrO   output_tensor)
isinstancedict
ValueErrortyper&   r4   invertexpandlenr   r   )	r5   r;   rL   
start_codelatents_listSTEPLAYEReditoroutputs	            r8   forwardImageEditingPipeline.forwardT   s    %&&A$u+O  ))I&#'==#7#7IIeAJ "!% $8 $' 
  &&s7|RR@
+D8+DMM6B  99%5s;  
 #	  ((r:   c                     US   R                  S5      S-  R                  5       R                  SSS5      R                  5       R	                  S5      n[
        R                  US S 2S S 2S S S24   0$ )NrV   r            uint8rR   )squeezer   permutenumpyastyper   
OUTPUT_IMG)r5   r;   
output_imgs      r8   postprocess ImageEditingPipeline.postprocesso   si    O,44Q7#=BBDLLq!UWVVG_ 	%%z!Q"*'=>>r:   )r+   r4   )__name__
__module____qualname____firstlineno__strr%   r   r   rI   rd   rq   __static_attributes____classcell__)r7   s   @r8   r   r      s~     !t '3RS#X 4S> )T#s(^ )S#X )6?c3h ?DcN ? ?r:   c            	          \ rS rSr  SS\R
                  S\S\R
                  4S jjr  SS\R
                  S\S\R
                  S\4S jjr	\R                  " 5       S 5       r\R                  " 5       SS	 j5       r\R                  " 5                  SS j5       r\R                  " 5           SS\R                  4S jj5       rSrg
)r2   u   model_outputtimestepxc                    U(       a  [        SU5        Un[        X R                  R                  R                  U R                  R
                  -  -
  S5      nUS:  a  U R                  R                  U   OU R                  R                  nU R                  R                  U   nSU-
  n	X9S-  U-  -
  US-  -  n
SU-
  S-  U-  nUS-  U
-  U-   nX4$ )z4
Inverse sampling for DDIM Inversion
x_t -> x_(t+1)
z
timestep: i  r   rh   r?   )printminr    confignum_train_timestepsrP   alphas_cumprodfinal_alpha_cumprod)r5   r|   r}   r~   etaverbose	next_stepalpha_prod_talpha_prod_t_nextbeta_prod_tpred_x0pred_dirx_nexts                r8   r   _MasaCtrlPipeline.next_stepw   s     ,)	~~,,@@~~112 2368 "Q ~~44,0NN,N,N 	 NN99)D,&C',66,:KK))C/,>"C''1H<r:   r   c                 l   X R                   R                  R                  U R                   R                  -  -
  nU R                   R                  U   nUS:  a  U R                   R                  U   OU R                   R
                  nSU-
  n	X9S-  U-  -
  US-  -  n
SU-
  S-  U-  nUS-  U
-  U-   nX4$ )zI
predict the sample the next step in the denoise process.
x_t -> x_(t-1)
r   rh   r?   )r    r   r   rP   r   r   )r5   r|   r}   r~   r   r   prev_timestepr   alpha_prod_t_prevr   r   r   x_prevs                r8   step_MasaCtrlPipeline.step   s     !>>#8#8#L#LPTP^P^PrPr#rr~~44X>+a/ !NN9959^^5W5W 	,&C',66,:KK))C/,>"C''1H<r:   c                 ~   U R                   n[        U5      [        L aq  [        R                  " U5      n[
        R                  " U5      R                  5       S-  S-
  nUR                  SSS5      R                  S5      R                  U5      nU R                  R                  U5      S   R                  nUS-  nU$ )Ng     _@rh   ri   r   latent_distg{P?)_execution_devicerZ   r	   nparrayr'   
from_numpyfloatrl   rE   r3   vaeencodemean)r5   imageDEVICErU   s       r8   image2latent_MasaCtrlPipeline.image2latent   s    '';%HHUOE$$U+113e;a?EMM!Q*44Q7::6BE((//%(7<<G#r:   c                    SUR                  5       -  nU R                  R                  U5      S   nUS:X  ao  US-  S-   R                  SS5      nUR	                  5       R                  SSSS5      R                  5       S   nUS	-  R                  [        R                  5      nU$ US
:X  a  US-  S-   R                  SS5      nU$ )Ng!ޅ@sampler   ri   r?   r   rh      rg   pt)
detachr   decodeclampr   rl   rm   rn   r   rj   )r5   rU   return_typer   s       r8   latent2image_MasaCtrlPipeline.latent2image   s     00(2$QY_++Aq1EIIK''1a399;A>ES[((2E  D QY_++Aq1Er:   Nc                    U R                   n[        U[        5      (       a  [        U5      nO![        U[        5      (       a  US:  a  U/U-  nU R                  USSSS9nU R                  UR                  R                  U5      5      S   n[        SUR                  5        X R                  R                  US-  US-  4nUc  [        R                  " UUS	9nO%UR                  U:X  d   S
UR                   S35       eUS:  af  U
(       a  U
nOSnU R                  U/U-  SSSS9nU R                  UR                  R                  U5      5      S   n[        R                  " UU/SS9n[        SUR                  5        U R                   R#                  U5        U/nU/n[%        ['        U R                   R(                  SS95       GH6  u  nnUb4  USU-
     nUR+                  S5      u  nn[        R                  " UU/5      nUS:  a  [        R                  " U/S-  5      nOUnU	b[  [        U	[        5      (       aF  UR+                  S5      u  nn[        R                  " U	U   R,                  " UR                  6 U/5      nU R                  UUUS9R.                  nUS:  a  UR+                  SSS9u  nnUUUU-
  -  -   nU R1                  UUU5      u  nn UR3                  U5        UR3                  U 5        GM9     U R5                  USS9n!U(       aE  U V"s/ s H  n"U R5                  U"SS9PM     nn"U V"s/ s H  n"U R5                  U"SS9PM     nn"U!UU4$ U!$ s  sn"f s  sn"f )Nrh   
max_lengthM   r   paddingr   return_tensorsr   input text embeddings :   )r   z!The shape of input latent tensor z  should equal to predefined one.      ? dimlatents shape: zDDIM SamplerdescrR   ri   encoder_hidden_states)r   )r   rW   listr]   rw   	tokenizertext_encoder	input_idsr3   r   shapeunetin_channelsr'   randncatr    set_timesteps	enumerater   	timestepschunkr\   r   r   appendr   )#r5   prompt
batch_sizeheightwidthrP   rO   r   rU   unconditioning
neg_promptref_intermediate_latentsrQ   kwdsr   
text_inputtext_embeddingslatents_shapeuc_textunconditional_inputunconditional_embeddingsr_   pred_x0_listitlatents_ref_latents_curmodel_inputs
noise_prednoise_pred_unconnoise_pred_conr   r   r>   s#                                      r8   __call___MasaCtrlPipeline.__call__   s    ''fd##VJ$$A~ J. ^^LR $ N
 ++J,@,@,C,CF,KLQO')>)>? $YY%:%:FaK!%?kk-?G==M1 I5VW^WdWdVe fH 4I I1 B$"&..	J&$#	 #1 #%
 (,'8'8#--008(::;(=$#ii)?;DO 	/$$%89yyT^^--NCEDAq'36rAv>!(q!1;))[+$>?"$yy'Q7&)j.N.N%4%:%:1%="?"'))"1%,,o.C.CD#- #
 a # HHN "3=3C3CA13C3M0 .-"%5517 7
  $yyQ@GW((9E< !!'t!< ('C !!#4!8'   ('C !!#4!8'   ,44s   !M$M)r   c                    U R                   nUR                  S   n	[        U[        5      (       a$  U	S:X  a  UR	                  [        U5      SSS5      nO![        U[        5      (       a  U	S:  a  U/U	-  nU R                  USSSS9n
U R                  U
R                  R                  U5      5      S   n[        SUR                  5        U R                  U5      nUnUS	:  aY  U R                  S
/U	-  SSSS9nU R                  UR                  R                  U5      5      S   n[        R                  " X/SS9n[        SUR                  5        U R                  R!                  U5        [        S[#        U R                  R$                  5      5        U/nU/n['        [)        [#        U R                  R$                  5      SS95       H  u  nnUS	:  a  [        R                  " U/S-  5      nOUnU R+                  UUUS9R,                  nUS	:  a  UR/                  SSS9u  nnUUUU-
  -  -   nU R1                  UUU5      u  nnUR3                  U5        UR3                  U5        M     U(       a  UU4$ X4$ )zD
invert a real image into noise map with determinisc DDIM inversion
r   rh   rR   r   r   r   r   r   r   r   r   r   zValid timesteps: zDDIM Inversionr   ri   r   )r   r   rW   r   r\   r]   rw   r   r   r   r3   r   r   r'   r   r    r   reversedr   r   r   r   r   r   r   r   )r5   r   r   rP   rO   r   rQ   r   r   r   r   r   rU   start_latentsr   r   r_   r   r   r   r   r   r   r   r   s                            r8   r[   _MasaCtrlPipeline.invert'  s    ''[[^
fd##QS["b"=$$A~ J. ^^LR $ N
++J,@,@,C,CF,KLQO')>)>?##E* B"&..z!$#	 #1 #%
 (,'8'8#--008(::;(=$#ii);DO 	/$$%89!8DNN,D,D#EFyyT^^556)+,DAq "$yy'Q7& a # HHN "3=3C3CA13C3M0 .-"%5517 7
  $~~j!WEGW((',*  L((%%r:   r#   )r   F)        F)r   )rh   r@   r@   rN   rM   r   NNNNF)rN   rM   r   F)rs   rt   ru   rv   r'   FloatTensorintr   r   r   no_gradr   r   r   Tensorr[   rx   r#   r:   r8   r2   r2   u   s"    ''  	@ ''  	
 , ]]_	 	 ]]_
 
 ]]_ %' # $ *.&+c cJ ]]_ $&!$)E&llE& E&r:   r2   )-os.pathr/   typingr   r   r   r   rm   r   r'   torch.nn.functionalnn
functionalrF   	diffusersr   r   PILr	   torchvisionr
   r   modelscope.metainfor   "modelscope.models.cv.image_editingr   r   modelscope.outputsr   modelscope.pipelines.builderr   Emodelscope.pipelines.multi_modal.diffusers_wrapped.diffusers_pipeliner   modelscope.preprocessorsr   modelscope.utils.constantr   modelscope.utils.loggerr   r,   __all__register_moduleimage_editingr   r2   r#   r:   r8   <module>r     s     - -     <  "  )E ) 2 . + .	!
" 	Y%<%<>S?, S?>S?lx&/ x&r:   