
    sg,                        d Z ddlZddlZddlZddlmZmZmZmZm	Z	m
Z
mZmZ ddlZddlmZ e	eej$                  f   Ze	eef   ZdZ ej,                  d       G d	 d
             ZdedefdZddededee   fdZdededefdZdedefdZdedej$                  fdZ	 	 	 	 	 ddedede
ej$                     de
ej$                     de
e   de
ee      de
ee      defdZy)zProtein data type.    N)AnyDictIteratorListMappingOptionalSequenceTuple   )residue_constantsg{Gz?T)frozenc                      e Zd ZU dZej
                  ed<   ej
                  ed<   ej
                  ed<   ej
                  ed<   ej
                  ed<   dZeej
                     ed<   dZ	ee
   ed	<   dZeee
      ed
<   dZeee      ed<   y)Proteinz!Protein structure representation.atom_positionsaatype	atom_maskresidue_index	b_factorsNchain_indexremarkparentsparents_chain_index)__name__
__module____qualname____doc__npndarray__annotations__r   r   r   strr   r	   r   int     a/var/www/html/venv/lib/python3.12/site-packages/transformers/models/esm/openfold_utils/protein.pyr   r   !   s    + JJ JJ zz ::
 zz )-K"**%, !FHSM  (,GXhsm$+ 48(3-07r#   r   proteinnet_strreturnc                    d}t        j                  ||       D cg c]!  }t        |      dkD  s|j                         # }}t	        |dd d   |dd d   D cg c]  }|j                  d       c}      }g d}d }d }d }	|D ]  }
d|
d   k(  r|
d   d   j                         }t        t        |            D ]  }||   t        j                  vsd||<    t        j                  |D cg c]0  }t        j                  j                  |t        j                        2 c}      }d	|
d   k(  rg }t        d
      D ]>  }|j                  t        t        t         |
d   |   j                                            @ t        j                  |      }t        j"                  t        |d         d
z  t        j$                  d
f      j'                  t        j(                        }t+        |      D ]?  \  }}t        j,                  |d d |d d
f         |d d t        j.                  |   d d f<   A |t0        z  }d|
d   k(  st        j                  t        t        dddj                  |
d   d   j                                           }t        j"                  t        |      t        j$                  f      j'                  t        j(                        }	t+        |      D ]  \  }}d|	d d t        j.                  |   f<   ! |	|d   z  }	 |J t3        ||	|t        j4                  t        |            d       S c c}w c c}w c c}w )Nz(\[[A-Z]+\]\n)r      r   
)NCACz	[PRIMARY]Xz
[TERTIARY]   z[MASK])-+).N)r   r   r   r   r   )resplitlenstripzipranger   restypesr   arrayrestype_ordergetrestype_numappendlistmapfloatzerosatom_type_numastypefloat32	enumerate	transpose
atom_orderPICO_TO_ANGSTROMr   arange)r%   tag_retagtagslgroupsatomsr   r   r   gseqi
res_symboltertiaryaxistertiary_npatommasks                      r$   from_proteinnet_stringrX   G   s!   F.0hhv~.N_sRUVYRZ]^R^syy{_D_.1$qt!t*VZ[\[_^_[_V`>aQRqwwt}>a.bF'EFNI )!A$A$q'--/C3s8_ !q6!2!;!;; CF! XXruvdn"0044ZARA^A^_vF QqT!*,Ha FS!T
0@0@0B%C DEF((8,KXXs8A;'71'<>O>]>]_`&abiijljtjtuN$U+ n4KM<<XcdeghgkjkgkdkXlKmq"3">">t"DaGHn..N188D11%5%9%91Q47==?!KLMDI%33
 fRZZ   %U+ E4CD	!.99$??@Ei(I9)< %iiF, O `>a ws   MMM
-5M
protchain_idc                 F   g }| j                   }||j                  d|        | j                  }| j                  }|&|$t	        ||      D cg c]  \  }}||k(  s| }}}|t        |      dk(  rdg}|j                  ddj                  |              |S c c}}w )NREMARK r   N/APARENT  )r   r<   r   r   r5   r3   join)rY   rZ   pdb_headersr   r   r   rQ   ps           r$   get_pdb_headersrc   y   s    K[[FWVH-.llG222>!$%8'!BTAa8m1TT#g,!+''!2 345 Us   BBpdb_strc                    g }|j                  d      }| j                  }||j                  d|        | j                  t	        | j                        dkD  rg }| j
                  i }t        | j                  | j
                        D ]=  \  }}|j                  t        |      g        |t        |         j                  |       ? t        |D 	cg c]  }	t        |	       c}	      }
t        |
dz         D ]/  }|j                  t        |      dg      }|j                  |       1 n)|j                  t        | j                               ndgg}dt        t           dt        fd}|j                   ||d                d}t        |      D ]a  \  }}d	|vrd
|vr|j                  |       d|v s$d||dz      vs/|dz  }|t	        |      k\  s||   }ndg}|j                   ||             c dj!                  |      S c c}	w )zWAdd pdb headers to an existing PDB string. Useful during multi-chain
    recycling
    r)   r\   r   r   r]   rb   r&   c                 *    ddj                  |        S )Nr^   r_   )r`   )rb   s    r$   make_parent_linez)add_pdb_headers.<locals>.make_parent_line   s    !&&r#   PARENTREMARKTEREND)r2   r   r<   r   r3   r   r5   
setdefaultr    maxr!   r6   r:   r=   r	   rD   r`   )rY   rd   out_pdb_lineslinesr   parents_per_chainparent_dictrb   rQ   	chain_idxmax_idxchain_parentsrg   chain_counterrL   s                  r$   add_pdb_headersrv      s     "MMM$E[[Fwvh/0 ||C$5$9##/02KDLL$*B*BC .1&&s1vr2CF#**1-. ;Gi3y>GHG7Q;' 8 +A @!((78 $$T$,,%78#WI'HSM 'c ' )*;A*>?@M%  
B11!2  #A:%uQU|3QM C(9$:: 1- @!&  !1-!@A
B 99]##7 Hs   H
c                 B  ! t         j                  dgz   !dt        dt        f!fd}t         j                  }g }| j
                  }| j                  }| j                  }| j                  j                  t        j                        }| j                  }| j                  }	t        j                  |t         j                  kD        rt!        d      t#        |       }
t%        |
      dkD  r|j'                  |
       |j(                  d   }d}d}t*        j,                  }d}t/        |      D ]@  } |||         }t1        |||   ||   ||         D ]  \  }}}}|d	k  rd
}t%        |      dk(  r|nd| }d}d}d}|d   }d}d}|	||	|      }|d|dd|d|d|dd|d||   d|dd|d   d|d   d|d   d|d|dd|d|d}|j3                  |       |dz  } ||dz
  k(  }|	||dz
  k7  r|	|dz      |k7  r
d}|	|dz      }|sd}|d|dd |||         dd|d||   d} |j3                  |        |dz  }||dz
  k7  s&|j'                  t#        | |             C |j3                  d       |j3                  d       d j5                  |      S )!zConverts a `Protein` instance to a PDB string.

    Args:
      prot: The protein to convert to PDB.

    Returns:
      PDB string.
    r-   rr&   c                 J    t         j                  j                  |    d      S )NUNK)r   restype_1to3r:   )rx   r7   s    r$   res_1to3zto_pdb.<locals>.res_1to3   s      --11(1+uEEr#   zInvalid aatypes.r   r   Ng      ?ATOM   r_    g      ?Az<6z>5z<4z>1z>3z>4z   z>8.3fr(   z>6.2fz
          z>2Trj   z      rk   r)   )r   r7   r!   r    
atom_typesr   r   r   r   rB   r   int32r   r   anyr;   
ValueErrorrc   r3   extendshapestringascii_uppercaser6   r5   r<   r`   )"rY   r|   r   	pdb_linesr   r   r   r   r   r   headersn
atom_indexprev_chain_index
chain_tags	chain_tagrQ   
res_name_3	atom_nameposrW   b_factorrecord_typenamealt_locinsertion_code	occupancyelementcharge	atom_lineshould_terminate	chain_endchain_termination_liner7   s"                                    @r$   to_pdbr      sM    !))SE1HFC FC F #--JII[[F((N&&--bhh7MI""K	vvf(4445+,,d#G
7|a!QAJ''JI1X 0JfQi(
.1*nQ>OQZ[\Q]_hij_k.l 	*IsD(cz K #I! 391YKDGNIlGFI&&{1~6	 r":b/4)GB<b/9R. #B'r':#q6%.Qs1venU#HU#3:2,vbk+  Y'!OJ5	8 A:"AEzk!a%04DD#' #.q1u#5 IR.Bvhvay6I"5MQyY[n]jkl]mnp\qr # 34!OJAEz   7G!HIa0Jd UR99Yr#   c                 <    t         j                  | j                     S )ao  Computes an ideal atom mask.

    `Protein.atom_mask` typically is defined according to the atoms that are reported in the PDB. This function
    computes a mask according to heavy atoms that should be present in the given sequence of amino acids.

    Args:
      prot: `Protein` whose fields are `numpy.ndarray` objects.

    Returns:
      An ideal atom mask.
    )r   STANDARD_ATOM_MASKr   )rY   s    r$   ideal_atom_maskr     s     //<<r#   featuresresultr   r   r   r   r   c                 |    t        | d   |d   |d   | d   dz   ||nt        j                  |d         ||||	      S )a  Assembles a protein from a prediction.

    Args:
      features: Dictionary holding model inputs.
      result: Dictionary holding model outputs.
      b_factors: (Optional) B-factors to use for the protein.
      chain_index: (Optional) Chain indices for multi-chain predictions
      remark: (Optional) Remark about the prediction
      parents: (Optional) List of template names
    Returns:
      A protein instance.
    r   final_atom_positionsfinal_atom_maskr   r   )	r   r   r   r   r   r   r   r   r   )r   r   
zeros_like)r   r   r   r   r   r   r   s          r$   from_predictionr   +  s]    * !45*+/!3(4)"--O`Ha:b/
 
r#   )r   )NNNNN)r   dataclassesr1   r   typingr   r   r   r   r   r   r	   r
   numpyr   r   r   r    r   FeatureDictModelOutputrG   	dataclassr   rX   r!   rc   rv   r   r   r   r"   r#   r$   <module>r      sv      	  P P P   c2::o&c3h  d#"8 "8 $"8J/3 /7 /d' S c (/$' /$C /$C /$dZ  Z S Z z=' =bjj =$ '+(, '+37 

# "**%	
 SM hsm$ "(3-0 r#   