
    9iT              
          S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	J
r
  S SKJrJrJrJrJrJrJr  S SKJr  S SKrS SKrS SKrS SKrS SKJr  S SKJr  S SKJrJr  S SK J!r!  S S	K"J#r#  S S
K$J%r%J&r&J'r'  S SK(J)r)  S SK*J+r+  S SK,J-r-  S SK.J/r/  S SK0J1r1  S/r2Sr3Sr4    SS\\\5   \\5   4   4S jjr6 S S\\\5   \54   S\7S\\5\4   4S jjr8S\5S\5S\5S\\5\4   4S jr9\/Rt                  " \1Rv                  \Rx                  S9 " S S\-5      5       r=\>S:X  a3  \=" 5       r?Sr@\?R                  \@5      u  rBrCS SKDrD\DR                  " 5         gg)!    N)Path)AnyDictListOptionalSequenceTupleUnion)result)tqdm)Preprocessors)proteinresidue_constants)PDB_CHAIN_IDS)compress_features)parserspipeline	templates)hhsearch)divide_multi_chains)Preprocessor)PREPROCESSORS)FieldsUniFoldPreprocessorzM{l_bar}{bar}| {n_fmt}/{total_fmt} [elapsed: {elapsed} remaining: {remaining}]https://api.colabfold.comreturnc           
      >  ^^. U(       a  SOSm.S8UU.4S jjnU4S jnU4S jn[        U [        5      (       a  U /OU n	Sn
U(       a  Sn
SnSnU n[        R                  R	                  U5      (       d  [        R
                  " U5        U S	U
 S
3nSu  p/ nU	 V s/ s H  o U;  d  M
  UR                  U 5      PM       n U	 Vs/ s H  nXR                  U5      -   PM     nn[        R                  R                  U5      (       Gd  S[        U5      -  n[        U[        S9 nU(       Gax  UR                  S5        U" XU5      nUS   S;   aD  S[        R                  " SS5      -   n[        R                   " U5        U" XU5      nUS   S;   a  MD  US   S:X  a  SnUS-   n[#        U5      eUS   S:X  a  [#        S5      eUS   SnnUR                  US   5        US   S;   at  S[        R                  " SS5      -   n[        R                   " U5        U" U5      nUR                  US   5        US   S:X  a  UU-  nUR%                  US9  US   S;   a  Mt  US   S:X  a  UU:  a  UR%                  UU-
  S9  SnUS   S:X  a  SnSnUS-   n[#        U5      eU(       a  GMx  U" WU5        S S S 5        U(       a  U S3/nO!U S3/nU(       a  UR                  U S35        ['        S  U 5       5      (       a0  [(        R*                  " U5       nUR-                  U5        S S S 5        U(       Ga`  0 n[+        U S!3S"5       nUR/                  5       nU Hb  nUR1                  5       R3                  5       n U S   U S#   U S$   U S%   4u  n!n"  n#[5        U!5      n!U!U;  a  / UU!'   UU!   R                  U"5        Md     S S S 5        0 n$UR6                  " 5        H  u  n%n&U S&U% 3n'[        R                  R	                  U'5      (       d  [        R
                  " U'5        S'R9                  U&S S( 5      n([        R:                  " S)T S*U( S+U' S,35        [        R:                  " S-U' S.U' S/35        [        R:                  " S0U' S135        U'U$U%'   M     0 n)U H  n*S2u  n+n![+        U*S"S3S49 nUR/                  5       nU H  n[        U5      S:  d  M  S5U;   a  UR=                  S5S5      nS6n+UR?                  S75      (       a0  U+(       a)  [5        US#S  R1                  5       5      n!Sn+U!U);  a  / U)U!'   U)U!   R                  U5        M     S S S 5        M     U V,s/ s H  n,SR9                  U)U,   5      PM     n)n,U(       a:  / n-U H0  n,U,W$;  a  U-R                  S 5        M  U-R                  U$U,   5        M2     U-n$U(       a  U)W$4$ U)$ s  sn f s  snf ! , (       d  f       GN+= f! , (       d  f       GN= f! , (       d  f       GN?= f! , (       d  f       GM  = fs  sn,f )9Nzticket/pairz
ticket/msac                    > USpCU  H  nUSU SU S3-  nUS-  nM     [         R                  " T ST	 3UUS.S9n UR                  5       nU$ ! [         a    SS	0n U$ f = f)
N >
   /)qmode)datastatusERROR)requestspostjson
ValueError)
seqsr%   Nnqueryseqresouthost_urlsubmission_endpoints
           i/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/preprocessors/science/uni_fold.pysubmitrun_mmseqs2.<locals>.submit3   s    b5Cq2cU"%%EFA  mmj-./	&((*C 
  	&W%C
	&s   A A! A!c                    > [         R                  " T SU  35      n UR                  5       nU$ ! [         a    SS0n U$ f = f)Nz/ticket/r'   r(   )r)   getr+   r,   )IDr2   r3   r4   s      r6   r'   run_mmseqs2.<locals>.statusE   sS    llhZxt45	&((*C 
  	&W%C
	&s   0 AAc                    > [         R                  " T SU  35      n[        US5       nUR                  UR                  5        S S S 5        g ! , (       d  f       g = f)Nz/result/download/wb)r)   r:   openwritecontent)r;   pathr2   r3   r4   s       r6   downloadrun_mmseqs2.<locals>.downloadM   sE    llhZ'8=>$IIckk" s   A
Aenvr   Fz/out_z.tar.gz)e   T   )total
bar_formatSUBMITr'   )UNKNOWN	RATELIMIT   r   r(   zTMMseqs2 API is giving errors. Please confirm your input is a valid protein sequence.z2If error persists, please try again an hour later.MAINTENANCEzIMMseqs2 API is undergoing maintenance. Please try again in a few minutes.id)rK   RUNNINGPENDINGrP   )r/   COMPLETEz	/pair.a3mz/uniref.a3mz"/bfd.mgnify30.metaeuk30.smag30.a3mc              3   j   #    U  H)  n[         R                  R                  U5      (       + v   M+     g 7fN)osrB   isfile).0a3m_files     r6   	<genexpr>run_mmseqs2.<locals>.<genexpr>   s#     
B	Hrww~~h'''	s   13z	/pdb70.m8rr"      
   z/templates_,   zcurl -s -L z
/template/z | tar xzf - -C r#   zcp z/pdb70_a3m.ffindex z/pdb70_cs219.ffindexztouch z/pdb70_cs219.ffdata)TNzutf-8)encoding Tr    )rF   ) 
isinstancestrrU   rB   isdirmkdirappendindexrV   lenr   TQDM_BAR_FORMATset_descriptionrandomrandinttimesleep	Exceptionupdateanytarfiler?   
extractall	readlinesrstripsplitintitemsjoinsystemreplace
startswith)/xprefixuse_envuse_templatesuse_pairingr4   r7   r'   rC   r-   r%   rB   tar_gz_filer.   REDOseqs_uniquer1   MsTIME_ESTIMATEpbarr3   
sleep_timeerrorr;   TIMEt	a3m_filestar_gzr   flineslinepMpdb_template_pathskTMPL	TMPL_PATH	TMPL_LINE	a3m_linesrX   update_Mr/   template_paths_r5   s/        `                                        @r6   run_mmseqs2r   *   s    ,7-L $# Q$$A3!DD XD77==
 F%vW-KGA K$(ADq[,@[DA04	5!$
$B	577>>+&&c+../Bd$$X. [2(m'??!"V^^Aq%9!9J JJz* A6C (m'?? x=G+rE!$XXE#E**x=M1#c 
 t9aD$$S]3(m'HHFNN1a00AJJqM *C((X78}	1	a( (m'HH x=J.m+}t';= Dx=G+ DrE!$XXE#E**Y $^ R%a Cf vY'(	v[)*	v%GHI 
B	
BBB\\+&&d# ' 	TF)$c*aKKMEKKM'') tQqT1Q42631FI%#%IaL!##C(  +  (GAt!(+aS1I77==++#HHT#2Y/			!(:i[@PQZP[[\] 		)$7	{BVW 		F9+-@AB )N1 ) I !(C'2aKKMEt9q=~#||FB7#'s++QR 12#(I-+-IaLaL''-  32 $ 13311&I3A&&&t,  &&~a'89  )*7I~&FYFC B	5 CBx '& +*: 32  4se   	W(WW#A.WCWAW$	WW#A9W5*#XA8XX
W #
W25
X
X	query_sequencenum_tempc           
         [        U [        5      (       a  [        U 5      O[        S U  5       5      nSU-  n[        R
                  " U[        R                  R                  S45      n[        R
                  " U[        R                  R                  45      n[        R                  R                  U[        R                  R                  5      n[        R                  " US    USSS/5      [        R                  " US    USS/5      SR                  5       /U-  [        R                  " [        R                  " U5      S    USS/5      SR                  5       /U-  [        R
                  " U/[        R                  S9S.nU$ )Nc              3   8   #    U  H  n[        U5      v   M     g 7frT   )rh   )rW   ss     r6   rY   $get_null_template.<locals>.<genexpr>   s      H,*qCFFNs   A   r"   none)dtype)template_all_atom_positionstemplate_all_atom_maskstemplate_sequencetemplate_aatypetemplate_domain_namestemplate_sum_probs)rb   rc   rh   sumnpzerosr   r   atom_type_numsequence_to_onehotHHBLITS_AA_TO_IDtileencodearrayfloat32)r   r   lnoutput_templates_sequencetemplates_all_atom_positionstemplates_all_atom_maskstemplates_aatypetemplate_featuress           r6   get_null_templater      sW     *.#>>NC H,*H, E,  !$b $&88	Y((66:$< !xx	Y((667 9 22EE!##446
 	,T2Xq!Q4GH
(.1a0@A$mmo.9
)*408Q2BC"(--/!2X!=
(2::.     r   template_pathc           	         [         R                  " USSSS S S9n[        R                  " SU S3/S9nUR	                  U 5      n[
        R                  R                  U5      nUR                  X&S9n[        UR                  5      $ )	Nz
2100-01-01r_   kalign)	mmcif_dirmax_template_datemax_hitskalign_binary_pathrelease_dates_pathobsolete_pdbs_pathr   z/pdb70)binary_path	databases)r   hits)r   HhsearchHitFeaturizerr   HHSearchr0   r   r   	parse_hhrget_templatesdictfeatures)r   r   r   template_featurizerhhsearch_pdb70_runnerhhsearch_resulthhsearch_hitstemplates_results           r6   get_templater   	  s    #99&# %--m_F+C*DF ,11)<O$$..?M*88% 9 ; ))**r   )module_namec                      \ rS rSrS rS\S\S\S\4S jrS\\   S	\S\S\S
\S\	\\   \
4   4S jrS rS\4S\S\\\\   4   S\S\S\
S\S\S\	\\\      \\\      \\   \\   \\\\4      4   4S jjrS\\\	4   4S jrSrg)r   i  c                     US   U l         U R                   (       d  S U l         SU l        SU l        SU l        SU l        SU l        [        R                  " U R
                  SS9  g )Nsymmetry_group   i  unifoldz./unifold-predictionsT)exist_ok)r   MIN_SINGLE_SEQUENCE_LENGTHMAX_SINGLE_SEQUENCE_LENGTHMAX_MULTIMER_LENGTHjobnameoutput_dir_baserU   makedirs)selfcfgs     r6   __init__UniFoldPreprocessor.__init__"  s\    !"23"""&D*,'*.'#'  6
D((48r   input_sequence
min_length
max_lengthr   c                    UR                  [        R                  SSS5      5      R                  5       n[	        [
        R                  5      n[	        U5      R                  U5      (       d  [        S[	        U5      U-
   S35      e[        U5      U:  a  [        S[        U5       SU 35      e[        U5      U:  a  [        S[        U5       SU S	35      eU$ )
Nr   z 
	z0Input sequence contains non-amino acid letters: z<. AlphaFold only supports 20 standard amino acids as inputs.zInput sequence is too short: z# amino acids, while the minimum is zInput sequence is too long: z# amino acids, while the maximum is zr. You may be able to run it with the full Uni-Fold system depending on your resources (system memory, GPU memory).)
	translaterc   	maketransuppersetr   restypesissubsetr,   rh   )r   r   r   r   clean_sequenceaatypess         r6   clean_and_validate_sequence/UniFoldPreprocessor.clean_and_validate_sequence-  s    '11MM"b'*,,1EG 	'001>"++G44B~&01 2))* * ~+/N0C/D E((2|56 6 ~+.s>/B.C D"", . ! !
 r   input_sequencesr   max_multimer_lengthc                    / nU H<  nUR                  5       (       d  M  U R                  UUUS9nUR                  U5        M>     Ubd  US:w  a^  UR                  S5      (       a9  USS R	                  5       (       a!  [        SU S35        [        U5      S:  nXhU4$ [        SU S	35      e[        U5      S:X  a  [        S
5        USS4$ [        U5      S:  aY  [        U V	s/ s H  n	[        U	5      PM     sn	5      n
X:  a  [        SU
 SU S35      e[        S[        U5       S35        USS4$ [        S5      es  sn	f )zGValidates and cleans input sequences and determines which model to use.)r   r   r   NC1Cr"   zUsing UF-Symmetry with group zp. If you do not want to use UF-Symmetry, please use `C1` and copy the AU sequences to the count in the assembly.z,UF-Symmetry does not support symmetry group z2 currently. Cyclic groups (Cx) are supported only.zUsing the single-chain model.Fz4The total length of multimer sequences is too long: z, while the maximum is z:. Please use the full AlphaFold system for long multimers.zUsing the multimer model with z sequences.TzLNo input amino acid sequence provided, please provide at least one sequence.)	stripr   rf   r|   	isnumericprintrh   r,   r   )r   r   r   r   r   r   	sequencesr   is_multimerr1   total_multimer_lengths              r6   validate_input"UniFoldPreprocessor.validate_inputC  s    	-N##%%!%!A!A#1)) "B "+   0 . %.D*@(( +AB/99;;3N3C D> ?@  #9~1 ~== B%& '&'( (
 ^q 12eT))^a$'Y(GYcSY(G$H!$: J,--D*+ ,123 3
 23y>2B+NOdD(( &' ' )Hs   #D>c                 x    US-   [         R                  " UR                  5       5      R                  5       S S -   $ )Nr   rM   )hashlibsha1r   	hexdigest)r   r}   ys      r6   add_hashUniFoldPreprocessor.add_hasht  s0    3wahhj1;;=bqAAAr   r"   r   query_seqs_unique
result_dirmsa_moder   homooligomers_numr4   c                    US:H  n/ n	U(       a  [        U[        UR                  U5      5      USUS9u  pUc;  [        S[	        U5      5       H!  n[        X,   5      nU	R                  U5        M#     O[        S[	        U5      5       HY  nX   b3  [        X   X   X,   5      n[	        US   5      S:X  a  [        X,   5      nO[        X,   5      nU	R                  U5        M[     O:[        S[	        U5      5       H!  n[        X,   5      nU	R                  U5        M#     US:X  a@  / nSn[        U5       H,  u  nnUR                  S[        UU-   5      -   S	-   U-   5        M.     O%[        U[        UR                  U5      5      US
US9n[	        U5      S:  a&  [        U[        UR                  U5      5      USUS9nOCSn/ n[        SU5       H/  nUR                  S[        UU-   5      -   S	-   US   -   S	-   5        M1     UUU	4$ )NMMseqs2T)r   r4   r   r   single_sequencerF   r    r!   F)r   r4   r"   )	r   rc   joinpathrangerh   r   rf   r   	enumerate)r   r   r  r  r  r   r  r4   r   r   a3m_lines_mmseqs2r   rg   template_featurer   numir1   paired_a3m_liness                      r6   get_msa_and_templates)UniFoldPreprocessor.get_msa_and_templatesw  sc    i'0;!J''01"!1- %"1c*;&<=E'8)0(2$%,,-=> >
 #1c*;&<=E%,8+7-4*1-4,(
 /0GHIQN/@ 1 80:, ,=-4,6(%,,-=> > q#&7"89#45F5M#N !(()9: : ((IC#$563  s37|!3d!:S!@A 7 $!J''01!!I  !A%*!J''01 !  C!1/0 ''c#'l(:T(A*;A*>)?AE)F G 1
 
 	
r   r&   c           	         [        U[        5      (       aB  UR                  5       R                  5       n[	        U5      S:  a  US/S[	        U5      -
  -  -   nSR                  U5      n[        R                  " SSU5      nU R                  U R                  U5      nU R                  UU R                  U R                  U R                  U R                  S9u  pEn[        [	        U5      5       Vs/ s H  nSU-   S-   [        U5      -   PM     nnU(       a  [!        X0R"                  UU5        / n	[%        X5       H  u  pXU/-  n	M     / nU Vs/ s H  nX;  d  M
  UR'                  U5      PM       n[	        U5      S:X  a  [	        U5      nOSn[)        U R                   S3S	5       nUR+                  S
R                  U	5      5        S S S 5        [-        U R"                  5      n[.        R0                  R                  U R"                  U5      nSnSnU R3                  UUUUUUS9u  nnn/ n/ n[5        U5       GH  u  nn[6        U   n[8        R:                  " USU R                   SU 3[	        U5      S9n[<        R>                  " UU   5      n[8        R@                  " U/5      nUU   n0 UEUEUEn[C        U5      n[.        R0                  R                  USRE                  U5      5      n [F        RH                  " U[J        RL                  " U S5      SS9  UR'                  U5        U(       d  M  [<        R>                  " UU   5      n![8        R@                  " U!/5      n"[C        U"5      n#[.        R0                  R                  USRE                  U5      5      n$[F        RH                  " U#[J        RL                  " U$S5      SS9  UR'                  U#5        GM     UUUUS.$ s  snf s  snf ! , (       d  f       GN0= f)N   r   z\W+)r   r   r   r   r   z> z seqr"   z.fastawr!   r  T)r  r  r   r  z seq )sequencedescriptionnum_resz{}.feature.pkl.gzr>   )protocolz{}.uniprot.pkl.gz)r   pair_features	target_idr   )'rb   rc   r   rv   rh   ry   resubr	  r   r  r   r   r   r   r  r   r   ziprf   r?   r@   r   rU   rB   r  r  r   r   make_sequence_featuresr   	parse_a3mmake_msa_featuresr   formatpickledumpgzipGzipFile)%r   r&   basejobnamer$  r   r   r   iidescriptionsr   desr1   unique_sequencesr}   r  r   r  
output_dirr  r   unpaired_msa
paired_msatemplate_resultsr   pair_features_listidxchain_idsequence_featuresmonomer_msamsa_featuresr   feature_dictfeatures_output_pathmultimer_msar#  pair_feature_dictuniprot_output_paths%                                        r6   __call__UniFoldPreprocessor.__call__  s   dC  ::<%%'D4y1}rda#d)m44ggdmffVR5MM$,,<	$($7$7 ..6666 $ 8 8 %8 %:!	 C	N+
+ 9v%B/+ 	 

 	+?+? ,. L4HCsOA 5 09	
091( '##A&		

  A% #I !T\\N&)3/1GGDIIaL! 0 $../
WW\\$"6"6	B
 595O5O!'/ 6P 612j"2 !"23HC$S)H ( ? ? eH:>C!" "++L,=>K#55{mDL 0 5# $L
 -\:L#%77<</66x@$B KK2D9 OOL){&00CA ( : :L> J$5m$D!&(ggll 3 : :8 D'F#%MM"5t<
 #))*;<E 4L !/"&	
 	
e
	
 0/s   1O+	O0O0+!O55
P)r   r   r   r   r   r   N)__name__
__module____qualname____firstlineno__r   rc   rw   r   r   r	   boolr  r	  DEFAULT_API_SERVERr
   r   r   r   r   r   r  rD  __static_attributes__ r   r6   r   r     s<   	9# 3 038;,/'hsm /''*/'8;/'IL/',//'49(3-:M4N/'bB "#*R
R
 !d3i0R
 	R

 R
 R
 R
 R
 
xS	"HT#Y$7cDIDcN#$ 
%R
hg
U3:. g
r   __main__ڜLILNLRGGAFVSNTQITMADKQKKFINEIQEGDLVRSYSITDETFQQNAVTSIVKHEADQLCQINFGKQHVVCTVNHRFYDPESKLWKSVCPHPGSGISFLKKYDYLLSEEGEKLQITEIKTFTTKQPVFIYHIQVENNHNFFANGVLAHAMQVSI)TFFr   )r"   )Fr.  r  loggingrU   r,  rk   r%  rr   rm   pathlibr   typingr   r   r   r   r   r	   r
   unittestr   r+   numpyr   r)   torchr   modelscope.metainfor   &modelscope.models.science.unifold.datar   r   .modelscope.models.science.unifold.data.proteinr   ,modelscope.models.science.unifold.data.utilsr   %modelscope.models.science.unifold.msar   r   r   +modelscope.models.science.unifold.msa.toolsr   +modelscope.models.science.unifold.msa.utilsr   modelscope.preprocessors.baser    modelscope.preprocessors.builderr   modelscope.utils.constantr   __all__ri   rK  rc   r   rw   r   r   register_modulescienceunifold_preprocessorr   rF  procprotein_examplerD  r   r#  ipdb	set_tracerM  r   r6   <module>rh     s      	   	    D D D       - M H J N N @ K 6 : ,  b0  ,~G 27tCy$s)7K1L~GD '(eDIsN&;  #,0cN<+C + +!$+)-c3h+* 
NN B BDR
, R
DR
j z D^O"mmO<HmNN r   