
    9io8                        S SK Jr  S SKJrJr  S SKrS SKrS SK	r	S SK
Jr  S SKJs  Jr  S SKJr  S SKJr  S SKJr  S SKJrJr  S SKJrJr  S SKJr  S S	KJr  S S
KJ r   S SK!J"r"  S SK#J$r$J%r%  S SK&J'r'  S SK(J)r)  \)" 5       r*S/r+    SS jr, " S S\-5      r. " S S\R^                  5      r0\Rb                  " \%Rd                  \Rf                  S9 " S S\5      5       r4g)    N)AnyDict)Models)Model)MODELS)GaussianDiffusionbeta_schedule)
BertConfig	BertModel)FullTokenizer)DiffusionGenerator)SuperResUNet256)SuperResUNet1024)	ModelFileTasks)create_device)
get_logger DiffusionForTextToImageSynthesisc                 0    [        XX#5      n[        XTS9nU$ )N)var_type)r	   r   )schedulenum_timesteps	init_beta	last_betar   betas	diffusions          m/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/multi_modal/diffusion/model.pymake_diffusionr   "   s     
 (9HE!%;I    c                   $    \ rS rSrSS jrS rSrg)	Tokenizer,   c                 :    Xl         X l        [        USS9U l        g )NT)
vocab_filedo_lower_case)r$   seq_lenr   	tokenizer)selfr$   r&   s      r   __init__Tokenizer.__init__.   s    $&!7r   c                    U R                   R                  U5      nS/US U R                  S-
   -   S/-   nU R                   R                  U5      nS/[	        U5      -  nS/[	        U5      -  nUS/U R                  [	        U5      -
  -  -  nUS/U R                  [	        U5      -
  -  -  nUS/U R                  [	        U5      -
  -  -  n[	        U5      [	        U5      s=:X  a!  [	        U5      s=:X  a  U R                  :X  d   e   e[
        R                  " U5      n[
        R                  " U5      n[
        R                  " U5      nX5U4$ )Nz[CLS]   z[SEP]   r   )r'   tokenizer&   convert_tokens_to_idslentorch
LongTensor)r(   texttokens	input_ids
input_masksegment_idss         r   __call__Tokenizer.__call__4   sI   ((.V$5T\\A%566'BNN88@	S3y>)
cC	N* 	aSDLL3y>9::	qcT\\C
O;<<
sdllS-==>>9~Z )C5 ) LL) 	) ) 	) ) $$Y/	%%j1
&&{3z11r   )r&   r'   r$   N)@   )__name__
__module____qualname____firstlineno__r)   r8   __static_attributes__ r   r   r!   r!   ,   s    72r   r!   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )DiffusionModelJ   c                 T  > [         [        U ]  5         [        R                  " [        SR                  U5      SS95      nUS   n[        [        R                  " U5      5      U l
        US   n[        S0 UD6U l        US   n[        S0 UD6U l        US   n[        S0 UD6U l        g )	Nz{}/model_config.jsonutf-8encodingtext_configgenerator_configupsampler_256_configupsampler_1024_configr@   )superrB   r)   jsonloadopenformatr   r
   	from_dicttext_encoderr   unet_generatorr   unet_upsampler_256r   unet_upsampler_1024)r(   	model_dirmodel_configrH   rI   rJ   rK   	__class__s          r   r)   DiffusionModel.__init__L   s    nd,.yy'..y9GLN #=1%j&:&:;&GH ((:;0D3CD  ,,BC"1"I4H"I !--D E#3#L6K#L r   c           	          U R                  UUUS9u  pgUS   nU R                  XXvU5      nU R                  XU[        R                  " U5      XvU5      nU R                  U[        U5      nU$ )Nr5   token_type_idsattention_mask)rR   rS   rT   r1   
zeros_likerU   t)	r(   noise	timestepsr5   r\   r]   contextyxs	            r   forwardDiffusionModel.forwardb   s    &&)) ' +
 "+!nM##Ea$)$4$4Y$?$24 $$Q1-r   )rR   rS   rU   rT   )r;   r<   r=   r>   r)   rf   r?   __classcell__rX   s   @r   rB   rB   J   s    M, r   rB   )module_namec                      ^  \ rS rSrS
U 4S jjrS\\\4   S\\\4   4S jrS\\\4   S\\\4   4S jr	\
R                  " 5       S\\\4   S\\\4   4S j5       rS	rU =r$ )r   q   c                 N  > [         R                  R                  5       (       a  SOSn[        TU ]  " SXS.UD6  [        US9n[         R                  " [        R                  " U[        R                  5      S5      nUR                  U5        UR                  5       R                  5         [        U5      U l        UR                  U R                  5        UR                   U l        UR"                  U l        UR$                  U l        UR&                  U l        U S[        R(                   3n[+        USS9U l        [.        R                  " [1        SR3                  U5      S	S
95      n[5        S0 US   D6U l        [5        S0 US   D6U l        [5        S0 US   D6U l        g )Ngpucpu)rV   device)rV   /r:   )r$   r&   z{}/diffusion_config.jsonrE   rF   rI   rJ   rK   r@   )r1   cudais_availablerL   r)   rB   rN   ospjoinr   TORCH_MODEL_BIN_FILEload_state_dictevaltor   rp   rR   rS   rT   rU   
VOCAB_FILEr!   r'   rM   rO   rP   r   diffusion_generatordiffusion_upsampler_256diffusion_upsampler_1024)	r(   rV   rp   kwargsdiffusion_modelpretrained_params
vocab_pathdiffusion_paramsrX   s	           r   r)   )DiffusionForTextToImageSynthesis.__init__u   s   **1133F9FvF(9=!JJHHY	 > >?H''(9:!!##F+4;;' ,88-<<"1"D"D#2#F#F  "{!I$8$8#9:
"j"E  99*11)< "# $2 $412$4 '5 (856(8$(6 )967)9%r   inputreturnc           	         [        S Vs/ s H  o"U;   PM	     sn5      (       d  [        SUR                  5        35      eU R                  US   5      u  p4nUR	                  U R
                  5      R                  S5      nUR	                  U R
                  5      R                  S5      nUR	                  U R
                  5      R                  S5      nU R                  UUUS9u  pgUS   nU R                  [        [        XvU5      nU R                  [        [        U[        R                  " [        5      XvU5      nU R                  U[        U5      nUR!                  SS5      R#                  S5      R%                  S5      n	U	R'                  S5      R)                  SS	S5      R+                  5       R-                  5       R/                  [0        R2                  5      n	U	$ s  snf )
N)r3   ra   rb   z@input should contains "text", "noise", and "timesteps", but got r3   r   r[   r^   r-        _@r,   )all
ValueErrorkeysr'   ry   rp   	unsqueezerR   rS   ra   rb   rT   r1   r_   rU   r`   clampaddmulsqueezepermutero   numpyastypenpuint8)
r(   r   keyr5   r\   r]   rc   rd   re   imgs
             r   rf   (DiffusionForTextToImageSynthesis.forward   s   ,JK,JS5L,JKLLRSXS]S]S_R`a  59NN&M51	>LL-77:	'**4;;7AA!D'**4;;7AA!D&&)) ' +
 "+y!nM##E9a$)$4$4Y$?$24 $$Q1-ggb!n  #''.kk!n$$Q1-11399;BB288L
+ Ls   G#inputsc                     U$ )Nr@   )r(   r   s     r   postprocess,DiffusionForTextToImageSynthesis.postprocess   s    r   c                    SU;  a  [        SUR                  5        35      eU R                  US   5      u  p#nUR                  U R                  5      R                  S5      nUR                  U R                  5      R                  S5      nUR                  U R                  5      R                  S5      nU R                  UUUS9u  pVUS   nUR                  SS5      nUS:X  GaD  U R                  R                  [        R                  " SS	S
S
5      R                  U R                  5      U R                  UUUS.[        R                  " U5      [        R                  " U5      US./UR                  SS5      UR                  SS5      UR                  SS5      S	SSSS9
nUR                  SS5      (       d  [        R                  " USSSS9nU R                   R                  [        R"                  " U5      U R$                  U[        R&                  " S5      R                  U R                  5      UUUS.U[        R&                  " S5      R                  U R                  5      [        R                  " U5      [        R                  " U5      [        R                  " U5      S./UR                  SS5      UR                  SS5      UR                  SS5      S	SSSS9
nUR                  SS5      (       d  [        R                  " USSSS9nU R(                  R                  [        R"                  " U5      U R$                  U[        R&                  " S5      R                  U R                  5      UUUS.U[        R&                  " S5      R                  U R                  5      [        R                  " U5      [        R                  " U5      [        R                  " U5      S./UR                  SS5      UR                  SS5      UR                  SS5      S	SSS S9
nGOUS:X  Ga  U R                  R+                  [        R                  " SS	S
S
5      R                  U R                  5      U R                  UUUS.[        R                  " U5      [        R                  " U5      US./UR                  SS5      UR                  SS5      UR                  S S!5      UR                  S"S#5      S$9nUR                  SS5      (       d  [        R                  " USSSS9nU R                   R+                  [        R"                  " U5      U R$                  U[        R&                  " S5      R                  U R                  5      UUUS.U[        R&                  " S5      R                  U R                  5      [        R                  " U5      [        R                  " U5      [        R                  " U5      S./UR                  SS5      UR                  SS5      UR                  S%S&5      UR                  S'S#5      S$9nUR                  SS5      (       d  [        R                  " USSSS9nU R(                  R+                  [        R"                  " U5      U R,                  S(U0UR                  S)S5      UR                  S*S5      UR                  S+S#5      S,9nO[        S-5      eUR/                  SS5      R1                  S5      R3                  S.5      R5                  S5      R7                  SS/S5      R9                  5       R;                  5       R=                  [>        R@                  5      nU$ )0Nr3   z%input should contain "text", but got r   r[   r^   solverz
dpm-solverr-      r:   )rd   rc   maskgenerator_percentilegףp=
?generator_guide_scaleg      @dpm_solver_timesteps   logSNR
singlestepgO@a?)
ra   modelmodel_kwargs
percentileguide_scaler   order	skip_typemethodt_startdebugFg      @bilinear)scale_factormodealign_corners)lxltrd   rc   r   upsampler_256_percentileupsampler_256_guide_scale
   ddimgenerator_ddim_timesteps   generator_ddim_etag        )ra   r   r   r   r   ddim_timestepsetaupsampler_256_ddim_timesteps2   upsampler_256_ddim_etaconcatupsampler_1024_percentileupsampler_1024_ddim_timestepsupsampler_1024_ddim_eta)ra   r   r   r   r   r   z6currently only supports "ddim" and "dpm-solve" solversr   r,   )!r   r   r'   ry   rp   r   rR   getr{   dpm_solver_sample_loopr1   randnrS   r_   Finterpolater|   
randn_likerT   zerosr}   ddim_sample_looprU   r   r   r   r   r   ro   r   r   r   r   )	r(   r   r5   r\   r]   rc   rd   r   r   s	            r   generate)DiffusionForTextToImageSynthesis.generate   s   7

~FH H 59NN&M51	>LL-77:	'**4;;7AA!D'**4;;7AA!D&&)) ' +
 "+ 8\2\!**AAkk!QB/224;;?))&*
 ))!,$//8*	 !99%;UC!II&=sC%*YY/Er%J"#% B  C* 99We,,mm!$#"'	)
 ..EE&&s+--++a.++DKK8&* ++a.++DKK8))!,$//8!,,^< !99%?G!II&A3G%*YY/Er%J"#- F  C2 99We,,mm!$#"'	)
 //FF&&s+--++a.++DKK8&* ++a.++DKK8))!,$//8!,,^< !99%?G!II&A3G%*YY/Er%J"#- G C. v**;;kk!QB/224;;?))&*
 ))!,$//8*	 !99%;UC!II&=sC$yy)CSIII2C8 < :C$ 99We,,mm!$#"'	)
 ..??&&s+--++a.++DKK8&* ++a.++DKK8))!,$//8!,,^< !99%?G!II&A3G$yy)GLII6<' @ >C, 99We,,mm!$#"'	)
 //@@&&s+..&_ 99%@%H$yy)H"MII7= A ?C HJ J iiA""1%))%088;CCq!SU55766"((#3 	
r   )	rp   r{   r}   r|   rR   r'   rS   rU   rT   )rn   )r;   r<   r=   r>   r)   r   strr   rf   r   r1   no_gradr   r?   rh   ri   s   @r   r   r   q   s     9DT#s(^ S#X 0$sCx. T#s(^  ]]_jd38n jc3h j jr   )i  NNfixed_small)5os.pathpathrt   typingr   r   rM   r   r   r1   torch.nnnntorch.nn.functional
functionalr   modelscope.metainfor   modelscope.modelsr   modelscope.models.builderr   1modelscope.models.multi_modal.diffusion.diffusionr   r	   2modelscope.models.multi_modal.diffusion.structbertr
   r   1modelscope.models.multi_modal.diffusion.tokenizerr   6modelscope.models.multi_modal.diffusion.unet_generatorr   :modelscope.models.multi_modal.diffusion.unet_upsampler_256r   ;modelscope.models.multi_modal.diffusion.unet_upsampler_1024r   modelscope.utils.constantr   r   modelscope.utils.devicer   modelscope.utils.loggerr   logger__all__r   objectr!   ModulerB   register_moduletext_to_image_synthesisr   r   r@   r   r   <module>r      s            & # ,&K K 6 1 .	-
. "&!!)	2 2<$RYY $N 	!!v/?/?Aju jAjr   