
    9iL              	           S SK r S SKJrJr  S SKrS SKJr  S SKJr  S\\	   S\\	   4S jr
S\	S	\	S\\	   S\\	   4S
 jrg)    N)MappingSequence)logging)protein	sequencesdescriptionsc                 |   / nU  H  nX2;  d  M
  UR                  U5        M     [        [        R                  U5       VVs0 s H  u  pCU/ US._M     nnn/ n[        X5       HN  u  p7[        R                  UR	                  U5         nXT   S   R                  U5        UR                  U5        MP     XV4$ s  snnf )zl
Makes a mapping from PDB-format chain ID to sequence and description,
and parses the order of multi-chains
)r   sequencer   )appendzipr   PDB_CHAIN_IDSindex)r   r   unique_seqsseqchain_idchain_id_mapchain_orderdess           k/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/science/unifold/msa/utils.pyget_chain_id_mapr      s     K!s#  !!6!6D
 EMH	 	
 	
 E   K	0(():):3)?@~.55c:8$ 1
 $$s   B8
fasta_nameoutput_dir_basec                     [        U5      [        U5      :w  a$  [        S[        U5       S[        U5       S35      e[        U5      [        R                  :  a  [        S[        U5       S35      e[	        X#5      u  pE[
        R                  R                  X5      n[
        R                  R                  U5      (       d  [
        R                  " U5        [
        R                  R                  US5      n[        US5       n[        R                  " XHSS	S
9  SSS5        [
        R                  R                  US5      n	[        U	S5       nUR                  SR                  U5      5        SSS5        [        R                  " SSR                  U5      5        / n
/ nUR!                  5        H  nU SR#                  U5      -   n[
        R                  R                  XmS-   5      nSR#                  U5      nXL   S   n[        US5       nUR                  SU-   S-   U-   5        SSS5        U
R%                  U5        UR%                  U5        M     X4$ ! , (       d  f       GNG= f! , (       d  f       GN= f! , (       d  f       N[= f)zn
Divides the multi-chains fasta into several single fasta files and
records multi-chains mapping information.
z7sequences and descriptions must have equal length. Got z != .z=Cannot process more chains than the PDB format supports. Got z chains.zchain_id_map.jsonw   T)indent	sort_keysNz
chains.txt z/Mapping multi-chains fasta with chain order: %sz_{}z.fastazchain_{}r
   >
)len
ValueErrorr   PDB_MAX_CHAINSr   ospathjoinexistsmakedirsopenjsondumpwriter   infokeysformatr   )r   r   r   r   r   r   
output_dirchain_id_map_pathfchain_order_path
temp_names
temp_pathsr   	temp_name	temp_pathr   r   s                    r   divide_multi_chainsr9   +   s"    9~\**   #I/tC4E3FaI J 	J
9~...y>"(,- 	- !1 ILo:J77>>*%%
JZ1DE		%		,!t< 
& ww||J=		$	%& 
% LLB+&( JJ %%'h!77	GGLL-AB	)$Z0)S!QGGC#I$s*+ ")$)$ ( !!+ 
&	% 
%	$ "!s$   I!I-I?
I*-
I<?
J	)r%   typingr   r   r+   abslr   &modelscope.models.science.unifold.datar   strr   r9        r   <module>r@      sd    
 $   :%}%3-%<."."." }." 3-	."r?   