
    9i,              	          S r SSKrSSKrSSKJrJrJr  SSKrSSK	J
r
  SSKJr  \\\R                  4   r\\\4   rSr\" \5      r\R(                  " SS9 " S	 S
5      5       rSS\S\\   S\4S jjrS\4S jrS\S\4S jrS\S\R                  4S jr SS\S\S\\R                     S\4S jjr SS\S\\R                     S\4S jjrg)zProtein data type.    N)AnyMappingOptional)	PDBParser)residue_constants>ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789T)frozenc                       \ rS rSr% Sr\R                  \S'   \R                  \S'   \R                  \S'   \R                  \S'   \R                  \S'   \R                  \S'   S	 rS
r	g)Protein    z!Protein structure representation.atom_positionsaatype	atom_maskresidue_indexchain_index	b_factorsc                     [        [        R                  " U R                  5      5      [        :  a  [        S[         S35      eg )Nz(Cannot build an instance with more than z6 chains because these cannot be written to PDB format.)lennpuniquer   PDB_MAX_CHAINS
ValueError)selfs    n/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/science/unifold/data/protein.py__post_init__Protein.__post_init__<   sG    ryy))*+n<:>:J KA AB B =     N)
__name__
__module____qualname____firstlineno____doc__r   ndarray__annotations__r   __static_attributes__r   r   r   r   r       sS    + JJ JJ zz :: 
 zzBr   r   pdb_strchain_idreturnc           
         [         R                  " U 5      n[        SS9nUR                  SU5      n[	        UR                  5       5      n[        U5      S:w  a  [        S[        U5       S35      eUS   n/ n/ n/ n	/ n
/ n/ nU GH_  nUb  UR                  U:w  a  M  U GH?  nUR                  S   S	:w  a)  [        S
UR                   SUR                  S    S35      e[        R                  R                  UR                  S5      n[        R                  R                  U[        R                  5      n[        R                   " [        R"                  S45      n[        R                   " [        R"                  45      n[        R                   " [        R"                  45      nU H  nUR$                  [        R&                  ;  a  M#  UR(                  U[        R*                  UR$                     '   SU[        R*                  UR$                     '   UR,                  U[        R*                  UR$                     '   M     [        R.                  " U5      S:  a  GM  UR1                  U5        UR1                  U5        U	R1                  U5        U
R1                  UR                  S   5        UR1                  UR                  5        UR1                  U5        GMB     GMb     [        R2                  " U5      n[5        U5       VVs0 s H	  u  nnUU_M     nnn[        R6                  " U Vs/ s H  nUU   PM
     sn5      n[9        [        R6                  " U5      [        R6                  " U	5      [        R6                  " U5      [        R6                  " U
5      U[        R6                  " U5      S9$ s  snnf s  snf )a  Takes a PDB string and constructs a Protein object.

WARNING: All non-standard residue types will be converted into UNK. All
  non-standard atoms will be ignored.

Args:
  pdb_str: The contents of the pdb file
  chain_id: If chain_id is specified (e.g. A), then only that chain
    is parsed. Otherwise all chains are parsed.

Returns:
  A new `Protein` parsed from the pdb contents.
T)QUIETnone   z,Only single model PDBs are supported. Found z models.r       z(PDB contains an insertion code at chain z and residue index z. These are not supported.X         ?      ?)r   r   r   r   r   r   )ioStringIOr   get_structurelist
get_modelsr   r   idr   restype_3to1getresnamerestype_orderrestype_numr   zerosatom_type_numname
atom_typescoord
atom_orderbfactorsumappendr   	enumeratearrayr   )r'   r(   pdb_fhparser	structuremodelsmodelr   r   r   r   	chain_idsr   chainresres_shortnamerestype_idxposmaskres_b_factorsatomunique_chain_idsncidchain_id_mappingr   s                             r   from_pdb_stringr\   C   s.    [[!FT"F$$VV4I)&&()F
6{a:3v;-xP
 	
 1IENFIMIIEHH$8CvvayC >uxxj I VVAYK'ACD D .::>>S"M+99==0<<>K((-;;Q?@C88.<<?@DHH&7&E&E%HIM99$5$@$@@?Czz%00;<@C&11$))<="&,, /::II    vvd|c!MM+&!!#&T"  +UXX&]+7  B yy+-67G-HI-H61cQ-HI((YGYc,S1YGHKxx/((9%xxhh}-((9%  JGs   N<4Oc                 ,    SnUS U S SUS SUS US 3$ )	NTER<6>5z      >3r/   >1>4r   )
atom_indexend_resname
chain_namer   	chain_ends        r   
_chain_endrh      s7    InZO6+b1A"omB/1 2r   protc                   ^ [         R                  S/-   mU4S jn[         R                  n/ nU R                  nU R                  nU R
                  nU R                  R                  [        R                  5      nU R                  R                  [        R                  5      nU R                  n	[        R                  " U[         R                  :  5      (       a  [        S5      e0 n
[        R                  " U5       H+  nU[         :  a  [        S[          S35      e["        U   X'   M-     UR%                  S5        SnUS   n['        UR(                  S   5       H  nXU   :w  a>  UR%                  [+        UU" X[S-
     5      XUS-
        X{S-
     5      5        X   nUS-  nU" X[   5      n[-        UXk   XK   X   5       H  u  nnnnUS	:  a  M  S
n[/        U5      S:X  a  UOSU 3nSnSnSnUS   nSnUS US SUS US US SXU      S X{   S US SUS   S US   S US   S US US SUS US 3nUR%                  U5        US-  nM     M     UR%                  [+        UU" US   5      XS      US   5      5        UR%                  S5        UR%                  S5        U Vs/ s H  nUR1                  S5      PM     nnSR3                  U5      S-   $ s  snf ) zuConverts a `Protein` instance to a PDB string.

Args:
  prot: The protein to convert to PDB.

Returns:
  PDB string.
r0   c                 J   > [         R                  R                  TU    S5      $ )NUNK)r   restype_1to3r;   )rrestypess    r   res_1to3to_pdb.<locals>.res_1to3   s      --11(1+uEEr   zInvalid aatypes.z The PDB format supports at most z chains.zMODEL     1r-   r   r3   ATOM   r/    r2   r_   r`   z<4rb   ra   rc   z   z>8.3fr.   z>6.2fz
          z>2ENDMDLENDP   
)r   ro   rB   r   r   r   r   astyper   int32r   r   anyr>   r   r   r   PDB_CHAIN_IDSrG   rangeshaperh   zipr   ljustjoin)ri   rp   rB   	pdb_linesr   r   r   r   r   r   rO   ird   last_chain_index
res_name_3	atom_namerT   rU   b_factorrecord_typerA   alt_locinsertion_code	occupancyelementcharge	atom_linelinero   s                               @r   to_pdbr      sF    !))SE1HF #--JII[[F((N&&--bhh7M""))"((3KI	vvf(44455+,, IYY{#2>2B(KM M$Q'		 $ ]#J"1~6<<?#1~-VE]+!a%01!a%(	  +~!OJfi(
.1*2@2C2;,	/N*IsD( cz K #I! 391YKDGNIGF r":b/4)GB<b/9^#<R"@ #B'r':#q6%.Qs1venU#HU#3:2,vbk+  Y'!OJ//N $P VBZ "o&"		
 XU -66IDBII699Y$&& 7s   /K c                 <    [         R                  U R                     $ )aS  Computes an ideal atom mask.

`Protein.atom_mask` typically is defined according to the atoms that are
reported in the PDB. This function computes a mask according to heavy atoms
that should be present in the given sequence of amino acids.

Args:
  prot: `Protein` whose fields are `numpy.ndarray` objects.

Returns:
  An ideal atom mask.
)r   STANDARD_ATOM_MASKr   )ri   s    r   ideal_atom_maskr      s     //<<r   featuresresultr   c           	          SU ;   a	  U S   S-
  nO[         R                  " U S   5      nUc  [         R                  " US   5      n[        U S   US   US   U S   S-   UUS9$ )zAssembles a protein from a prediction.

Args:
  features: Dictionary holding model inputs.
  fold_output: Dictionary holding model outputs.
  b_factors: (Optional) B-factors to use for the protein.

Returns:
  A protein instance.
asym_idr-   r   final_atom_maskfinal_atom_positionsr   r   r   r   r   r   r   r   
zeros_liker   )r   r   r   r   s       r   from_predictionr     s     Hy)A-mmXh%79MM&):";<	!45*+/!3 r   c           	          SU ;   a	  U S   S-
  nO[         R                  " U S   5      nUc  [         R                  " U S   5      n[        U S   U S   U S   U S   S-   UUS9$ )zAssembles a standard pdb from input atom positions & mask.

Args:
  features: Dictionary holding model inputs.
  b_factors: (Optional) B-factors to use for the protein.

Returns:
  A protein instance.
r   r-   r   all_atom_maskall_atom_positionsr   r   r   )r   r   r   s      r   from_featurer   '  s     Hy)A-mmXh%79MM(?";<	! 45?+/!3 r   )N)r#   dataclassesr4   typingr   r   r   numpyr   Bio.PDBr   &modelscope.models.science.unifold.datar   strr$   FeatureDictModelOutputr}   r   r   	dataclassr   r\   rh   r   r   r   r   r   r   r   <module>r      s3     	 ) )   Dc2::o&c3h Q]# d#B B $BDKS KHSM KW K\2c 2]' ]'S ]'@=' =bjj =$ 7;k ''

3?FB 48; $RZZ0<Cr   