
    sge                   h   d Z ddlmZ ddlZddlmZmZmZ ddlZ	ddl
ZddlmZmZmZ ddlmZmZmZmZmZ ddlmZmZmZmZmZmZmZmZmZ dd	l m!Z!m"Z" dd
l#m$Z$ ddl%m&Z&  e$jN                  e(      Z)dZ*dZ+d Z,d Z-d Z.d Z/ G d dej`                  jb                        Z2 G d dej`                  jb                        Z3 G d dej`                  jb                        Z4 G d dej`                  jb                        Z5 G d dej`                  jb                        Z6 G d dej`                  jb                        Z7 G d d ej`                  jb                        Z8 G d! d"ej`                  jb                        Z9 G d# d$ej`                  jb                        Z: G d% d&ej`                  jb                        Z; G d' d(ej`                  jb                        Z< G d) d*e      Z=d+Z>d,Z? ed-e>       G d. d/ej`                  jb                               Z@ ed-e>       G d0 d1e=             ZA ed2e>       G d3 d4e=e             ZB G d5 d6ej`                  jb                        ZC ed7e>       G d8 d9e=e             ZD ed:e>       G d; d<e=e             ZE G d= d>ej`                  jb                        ZFd@d?ZGy)AzPyTorch ESM model.    )annotationsN)OptionalTupleUnion   )add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forward)+TFBaseModelOutputWithPastAndCrossAttentions.TFBaseModelOutputWithPoolingAndCrossAttentionsTFMaskedLMOutputTFSequenceClassifierOutputTFTokenClassifierOutput)	TFMaskedLanguageModelingLossTFModelInputTypeTFPreTrainedModelTFSequenceClassificationLossTFTokenClassificationLossget_initializerkeras
shape_listunpack_inputs)check_embeddings_within_boundsstable_softmax)logging   )	EsmConfigzfacebook/esm2_t6_8M_UR50Dr   c                l    t        j                  | dd      \  }}t        j                  | |fd      S )N   axis)tfsplitconcat)xx1x2s      Z/var/www/html/venv/lib/python3.12/site-packages/transformers/models/esm/modeling_tf_esm.pyrotate_halfr*   7   s/    XXa$FB99rc2YR((    c                    |d d d d d t        j                  |       d   d d f   }|d d d d d t        j                  |       d   d d f   }| |z  t        |       |z  z   S )N)r#   shaper*   )r&   cossins      r)   apply_rotary_pos_embr1   <   sd    
a%bhhqk"o%q(
)C
a%bhhqk"o%q(
)CGA,--r+   c                F    | t         j                  j                  |       z   S )zJMake layer symmetric in final two dimensions, used for contact prediction.)r#   linalgmatrix_transpose)r&   s    r)   
symmetrizer5   C   s    ryy))!,,,r+   c                    t        j                  | dd      }t        j                  | dd      }t        j                  | dd      }||z  }||z  }| |z
  }|S )z=Perform average product correct, used for contact prediction.r    T)keepdimsr-   )r    r-   )r#   
reduce_sum)r&   a1a2a12avg
normalizeds         r)   average_product_correctr>   H   sY    	q"t	,B	q"t	,B
--8d
3C
r'C
)CSJr+   c                  @     e Zd ZdZdd fdZ fdZddZd	dZ xZS )
TFRotaryEmbeddingz
    Rotary position embeddings based on those in
    [RoFormer](https://huggingface.co/docs/transformers/model_doc/roformer). Query and keys are transformed by rotation
    matrices which depend on their relative positions.
    c                4    t         |   |       || _        y )Nname)super__init__dim)selfrF   rC   	__class__s      r)   rE   zTFRotaryEmbedding.__init__[   s    d# r+   c           
     ^   t         |   |       | j                  d| j                  dz  ft        j
                  t        d      d      | _        | j                  j                  ddt	        j                  d| j                  dt        j
                        | j                  z  z  z         y )	Ninv_freqr         ?F)r.   dtypeinitializer	trainablei'  r   )startlimitdeltarL   )
rD   build
add_weightrF   r#   float32r   rJ   assignrange)rG   input_shaperH   s     r)   rR   zTFRotaryEmbedding.builde   s    k"txx1}.bjjo^aNbns ( 
 	5RXXATXXQbjjY\`\d\ddef	
r+   c                f   t        j                  |      |   }t        j                  || j                  j                        }t        j
                  d|| j                        }t        j                  ||fd      d d d d d d f   }t        j                  |      t        j                  |      fS )NrL   z
i, j -> ijr    r!   )	r#   r.   rV   rJ   rL   einsumr%   r/   r0   )rG   r&   seq_dimensionseq_lentfreqsembs          r)   _compute_cos_sinz"TFRotaryEmbedding._compute_cos_sinn   s    ((1+m,HHWDMM$7$78		,4==9iiR0tQ1ABvvc{BFF3K''r+   c                b    | j                  |d      \  }}t        |||      t        |||      fS )Nr-   )r[   )r`   r1   )rG   qkcos_embsin_embs        r)   callzTFRotaryEmbedding.callw   s@    00"0E !GW5 GW5
 	
r+   N)rF   int)r   )rb   	tf.Tensorrc   ri   returnzTuple[tf.Tensor, tf.Tensor])	__name__
__module____qualname____doc__rE   rR   r`   rf   __classcell__rH   s   @r)   r@   r@   T   s    
(
r+   r@   c                  @     e Zd ZdZ	 	 	 d	 	 	 d fdZddZd Z xZS )TFEsmContactPredictionHeadzWPerforms symmetrization, apc, and computes a logistic regression on the output featuresc                    t         |   |       || _        || _        t        j
                  j                  d|dd      | _        y )NrB   r   sigmoid
regression)use_bias
activationrC   )rD   rE   eos_idxin_featuresr   layersDenseru   )rG   ry   biasrx   rC   rH   s        r)   rE   z#TFEsmContactPredictionHead.__init__   sD     	d#&,,,,Q)Zf,gr+   c                   | j                   ry d| _         t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d | j                  f       d d d        y y # 1 sw Y   y xY w)NTru   )builtgetattrr#   
name_scoperu   rC   rR   ry   rG   rW   s     r)   rR   z TFEsmContactPredictionHead.build   sy    ::
4t,8t334 @%%tT-=-=&>?@ @ 9@ @s   (A==Bc                   t        j                  || j                  k7  |j                        }t        j                  |d      t        j                  |d      z  }||d d d d d d d d f   z  }|dd dd df   }|ddd dd f   }t        |      \  }}}}}t        j                  ||||z  ||f      }t        t        |            }t        j                  |d      }t        j                  | j                  |      d      S )Nr   r   .r    )r   r   r   r   permr   )r#   castrx   rL   expand_dimsr   reshaper>   r5   	transposesqueezeru   )	rG   tokens
attentionseos_mask
batch_sizerz   headsseqlen_s	            r)   rf   zTFEsmContactPredictionHead.call   s    776T\\1:3C3CD>>(A.!1LL(1dD!Q+>"??
SbS#2#.
QR,
/9*/E,
FE61ZZ
Z%QW,XY
 -Z
-CD
\\*<@
zz$//*5q99r+   )Tr   N)ry   rh   rx   rh   rg   )rk   rl   rm   rn   rE   rR   rf   ro   rp   s   @r)   rr   rr      s6    a
 
h
h 	
h@:r+   rr   c                  <     e Zd ZdZd fd	Z	 ddZd ZddZ xZS )TFEsmEmbeddingszV
    Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
    c                   t         |   |       t        j                  j	                  |j
                  |j                  t        |j                        d      | _	        t        j                  j	                  |j                  |j                  t        |j                        d      | _        |j                  r1t        j                  j                  |j                  d      | _        nd | _        t!        |dd      | _        t%        j&                  |j                        d d d f   | _        |j*                  | _        |j.                  | _        |j0                  | _        || _        y )	NrB   word_embeddings)embeddings_initializerrC   position_embeddings
layer_normepsilonrC   position_embedding_typeabsolute)rD   rE   r   rz   	Embedding
vocab_sizehidden_sizer   initializer_ranger   max_position_embeddingsr   emb_layer_norm_beforeLayerNormalizationlayer_norm_epsr   r   r   r#   rV   position_idspad_token_idpadding_idxtoken_dropoutmask_token_idconfigrG   r   rC   rH   s      r)   rE   zTFEsmEmbeddings.__init__   s-   d#$||55#263K3K#L"	  6  
 $)<<#9#9**#263K3K#L&	 $: $
  ''#ll==fF[F[bn=oDO"DO (/v7PR\']$HHV%C%CDT1WM!..#11#11r+   c                Z   |+|t        || j                  |      }n| j                  |      }|1t        || j                  j
                         | j                  |      }|}| j                  rt        j                  || j                  k(  d d d d d f   d|      }d}t        j                  t        j                  |d      t        j                        }|| j                  k(  }	t        j                  j                  |	t        j                  d      |z  }
|d|z
  z  d|
z
  d d d d f   z  }| j                   dk(  r| j#                  |      }||z  }| j$                  | j%                  |      }|7|t        j                  t        j&                  |d      |j(                        z  }|S )Ng        gQ?r    r!   )rL   r"   r   r   )"create_position_ids_from_input_idsr   &create_position_ids_from_inputs_embedsr   r   r   r   r   r#   wherer   r   r8   rT   mathcount_nonzeror   r   r   r   rL   )rG   	input_idsattention_maskr   inputs_embedspast_key_values_length
embeddingsmask_ratio_trainsrc_lengthsmasked_tokensmask_ratio_observedr   s               r)   rf   zTFEsmEmbeddings.call   s    $A)TM]M]_uv#JJ=Y *9dkk6L6LM 00;M #
 90B0B#BAq$J"OQTV`aJ)''"--R"H"**UK%););;M"$''"7"7RZZ^`"7"ado"o#q+;';<DW@WYZ\`bfYf?ggJ'':5"&":":<"H--J??&4J%#bggbnn^R.PR\RbRb&ccJ r+   c                   t        |      dd }|d   }t        j                  | j                  dz   || j                  z   dz   t        j                        }t        j
                  t        j                  |d      |      S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: tf.Tensor

        Returns: tf.Tensor
        Nr    r   )rO   rP   rL   r   )r   r#   rV   r   int64broadcast_tor   )rG   r   rW   sequence_lengthr   s        r)   r   z6TFEsmEmbeddings.create_position_ids_from_inputs_embeds   st     !/4%a.xx""Q&o@P@P.PST.T\^\d\d
 r~~lA>LLr+   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTr   r   r   )r~   r   r#   r   r   rC   rR   r   r   r   r   r   s     r)   rR   zTFEsmEmbeddings.build	  s!   ::
4*D1=t33889 1$$**4014.5At77<<= 5((..t454t,8t334 M%%tT4;;3J3J&KLM M 91 15 5M M$   D<%E?3E<EEErg   )NNNNr   )	rk   rl   rm   rn   rE   rf   r   rR   ro   rp   s   @r)   r   r      s&    > rs+ZM"Mr+   r   c                  j     e Zd Zd fd	ZddZ	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZd	dZ xZS )
TFEsmSelfAttentionc                *   t         |   |       |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _        t        j                  j                  | j                  t        |j                        d      | _        t        j                  j                  | j                  t        |j                        d	      | _        t        j                  j                  | j                  t        |j                        d
      | _        t        j                  j#                  |j$                        | _        |xs t)        |dd      | _        d | _        | j*                  dk(  s| j*                  dk(  rf|j.                  | _        t        j                  j1                  d|j.                  z  dz
  | j                  t        |j                              | _        n+| j*                  dk(  rt5        | j                  d      | _        |j6                  | _        || _        y )NrB   r   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()querykernel_initializerrC   keyvaluer   r   relative_keyrelative_key_queryr   r   )r   rotaryrotary_embeddings)rF   rC   )rD   rE   r   num_attention_headshasattr
ValueErrorrh   attention_head_sizeall_head_sizer   rz   r{   r   r   r   r   r   Dropoutattention_probs_dropout_probdropoutr   r   r   r   r   distance_embeddingr@   
is_decoderr   )rG   r   r   rC   rH   s       r)   rE   zTFEsmSelfAttention.__init__  s@   d# : ::a?PVXhHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PP\\''?6C[C[3\cj ( 

 <<%%?6C[C[3\ch & 
 \\''?6C[C[3\cj ( 

 ||++F,O,OP'> (
'-zC
$ "&''>9T=Y=Y]q=q+1+I+ID(&+ll&<&<F222Q6(('6v7O7O'P '= 'D#
 ))X5%64;S;SZm%nD" ++r+   c                    t        |      d d | j                  | j                  gz   }t        j                  ||      }t        j
                  |d      S )Nr    r   r   r   r   r   )r   r   r   r#   r   r   )rG   r&   new_x_shapes      r)   transpose_for_scoresz'TFEsmSelfAttention.transpose_for_scoresA  sI     mCR(D,D,DdF^F^+__JJq+&||AL11r+   c	                   | j                  |      }	|d u}
|
r||d   }|d   }|}n |
rC| j                  | j                  |            }| j                  | j                  |            }|}n|y| j                  | j                  |            }| j                  | j                  |            }t	        j
                  |d   |gd      }t	        j
                  |d   |gd      }n@| j                  | j                  |            }| j                  | j                  |            }| j                  |	      }|| j                  dz  z  }| j                  r||f}| j                  dk(  r| j                  ||      \  }}t	        j                  ||d      }| j                  d	k(  s| j                  d
k(  r7t        |      d   }t	        j                  t	        j                  |t        j                        d      }t	        j                  t	        j                  |t        j                        d      }||z
  }| j                  || j                   z   dz
        }t	        j"                  ||j$                        }| j                  d	k(  rt	        j&                  d||      }||z   }nE| j                  d
k(  r6t	        j&                  d||      }t	        j&                  d||      }||z   |z   }|||z   }t)        |d      }| j+                  ||      }|||z  }||z  }t	        j,                  |d      }t        |      d d | j.                  gz   }t	        j0                  ||      }|r||fn|f}| j                  r||fz   }|S )Nr   r   r   r!   g      r   Ttranspose_br   r   rY   r    zbhld,lrd->bhlrzbhrd,lrd->bhlrtrainingr   r   r-   )r   r   r   r   r#   r%   r   r   r   r   matmulr   r   rV   r   r   r   r   rL   rZ   r   r   r   r   r   )rG   hidden_statesr   	head_maskencoder_hidden_statesencoder_attention_maskpast_key_valueoutput_attentionsr   mixed_query_layeris_cross_attention	key_layervalue_layerquery_layerattention_scores
seq_lengthposition_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keyattention_probscontext_layernew_context_layer_shapeoutputss                              r)   rf   zTFEsmSelfAttention.callF  st    !JJ}5
 3$>."<&q)I(+K3N11$((;P2QRI33DJJ?T4UVK3N'11$((=2IJI33DJJ}4MNK		>!#4i"@qII))^A%6$D1MK11$((=2IJI33DJJ}4MNK//0AB "D$<$<d$BB?? (5N''83%)%;%;K%S"K 99[)N''>9T=Y=Y]q=q#M215J^^BHHZrxx,PRTUN^^BHHZrxx,PRSTN%6H#'#:#:8dFbFb;bef;f#g #%77+?ARAR#S ++~=+-995E{Th+i(#36N#N --1EE13;K[Zn1o./1yy9I9Vj/k,#36T#TWs#s %/.@ ))9C ,,,J  -	9O'+5]F",]";CR"@DDVDVCW"W

=2IJ6G=/2mM]?? 11Gr+   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   HxY w# 1 sw Y   xY w# 1 sw Y   ~xY w# 1 sw Y   y xY w)NTr   r   r   r   )r~   r   r#   r   r   rC   rR   r   r   r   r   r   r   s     r)   rR   zTFEsmSelfAttention.build  s   ::
4$'3tzz/ H

  $dkk.E.E!FGH4%1txx}}- FdDKK,C,CDEF4$'3tzz/ H

  $dkk.E.E!FGH4,d3?t55::; 3&&,,T23 3 @H HF FH H3 3s0   3G<3G-3GG)GGG&)G2NN)r&   ri   rj   ri   NNNNNFF)r   ri   r   tf.Tensor | Noner   r   r   r   r   r   r   zTuple[Tuple[tf.Tensor]] | Noner   Optional[bool]r   boolrj   zTuple[tf.Tensor]rg   )rk   rl   rm   rE   r   rf   rR   ro   rp   s   @r)   r   r     s    &P2 ,0&*26379=,1e e )e $	e
  0e !1e 7e *e e 
eN3r+   r   c                  0     e Zd Zd fd	ZddZddZ xZS )TFEsmSelfOutputc                   t         |   |       t        j                  j	                  |j
                  t        |j                        d      | _        t        j                  j                  |j                        | _        || _        y NrB   denser   rD   rE   r   rz   r{   r   r   r   r  r   hidden_dropout_probr   r   r   s      r)   rE   zTFEsmSelfOutput.__init__  l    d#\\''?6C[C[3\cj ( 

 ||++F,F,FGr+   c                X    | j                  |      }| j                  ||      }||z  }|S Nr   r  r   rG   r   input_tensorr   s       r)   rf   zTFEsmSelfOutput.call  2    

=1]XF%r+   c                (   | j                   ry d| _         t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   y xY wNTr  	r~   r   r#   r   r  rC   rR   r   r   r   s     r)   rR   zTFEsmSelfOutput.build  }    ::
4$'3tzz/ H

  $dkk.E.E!FGH H 4H H   3BBrg   Frk   rl   rm   rE   rf   rR   ro   rp   s   @r)   r  r    s    Hr+   r  c                  D     e Zd Zd fd	Zd Z	 	 	 	 	 	 	 ddZddZ xZS )TFEsmAttentionc                    t         |   |       t        |d      | _        t	        |d      | _        t               | _        t        j                  j                  |j                  d      | _        || _        y )NrB   rG   output	LayerNormr   )rD   rE   r   rG   r  output_layersetpruned_headsr   rz   r   r   r  r   r   s      r)   rE   zTFEsmAttention.__init__  sc    d#&vF;	+FBE88AVAV]h8ir+   c                    t         rg   NotImplementedError)rG   r   s     r)   prune_headszTFEsmAttention.prune_heads      !!r+   c	           
         | j                  |      }	| j                  |	|||||||      }
| j                  |
d   |      }|f|
dd  z   }|S )Nr   r   )r  rG   r  )rG   r   r   r   r   r   r   r   r   hidden_states_lnself_outputsattention_outputr   s                r)   rf   zTFEsmAttention.call  sk      >>-8yy!"	
  ,,\!_mL#%QR(88r+   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTrG   r  r  )r~   r   r#   r   rG   rC   rR   r  r  r   r   r   s     r)   rR   zTFEsmAttention.build  s   ::
4&2tyy~~. &		%&4.:t00556 .!!''-.4d+7t~~223 L$$dD$++2I2I%JKL L 8& &. .L Lr   rg   r   )rk   rl   rm   rE   r   rf   rR   ro   rp   s   @r)   r  r    s/    " "#4Lr+   r  c                  0     e Zd Zd fdZddZddZ xZS )TFEsmIntermediatec                    t        |   di | t        j                  j	                  |j
                  t        |j                        d      | _        || _	        y )Nr  )unitsr   rC    )
rD   rE   r   rz   r{   intermediate_sizer   r   r  r   rG   r   kwargsrH   s      r)   rE   zTFEsmIntermediate.__init__  sQ    "6"\\''**.v/G/GH ( 


 r+   c                h    | j                  |      }t        j                  j                  |      }|S )Ninputs)r  r#   nngelu)rG   r   s     r)   rf   zTFEsmIntermediate.call  s*    

-
8

=1r+   c                (   | j                   ry d| _         t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   y xY wr  r  r   s     r)   rR   zTFEsmIntermediate.build  r  r  r   r   r   ri   rj   ri   rg   r  rp   s   @r)   r(  r(    s    
Hr+   r(  c                  0     e Zd Zd fd	ZddZddZ xZS )TFEsmOutputc                   t         |   |       t        j                  j	                  |j
                  t        |j                        d      | _        t        j                  j                  |j                        | _        || _        y r  r  r   s      r)   rE   zTFEsmOutput.__init__&  r  r+   c                X    | j                  |      }| j                  ||      }||z  }|S r	  r
  r  s       r)   rf   zTFEsmOutput.call.  r  r+   c                (   | j                   ry d| _         t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   y xY wr  )	r~   r   r#   r   r  rC   rR   r   r,  r   s     r)   rR   zTFEsmOutput.build4  s}    ::
4$'3tzz/ N

  $dkk.K.K!LMN N 4N Nr  rg   r  r  rp   s   @r)   r8  r8  %  s    Nr+   r8  c                  >     e Zd Zd fd	Z	 	 	 	 	 	 	 ddZddZ xZS )
TFEsmLayerc                   t         |   |       |j                  | _        d| _        t	        |d      | _        |j                  | _        |j                  | _        | j                  r*| j                  st        |  d      t	        |      | _	        t        |d      | _        t        |d      | _        t        j                  j!                  |j"                  d      | _        || _        y )	NrB   r   	attentionz> should be used as a decoder model if cross attention is addedintermediater  r  r   )rD   rE   chunk_size_feed_forwardseq_len_dimr  r?  r   add_cross_attentionRuntimeErrorcrossattentionr(  r@  r8  r  r   rz   r   r   r  r   r   s      r)   rE   zTFEsmLayer.__init__>  s    d#'-'E'E$'[A ++#)#=#= ##??"dV+i#jkk"0"8D-f>J'X>88AVAV]h8ir+   c	           
        ||d d nd }	| j                  |||||	|      }
|
d   }| j                  r|
dd }|
d   }n|
dd  }d }| j                  rV|Tt        | d      st        d|  d      ||d	d  nd }| j	                  ||||||||
      }|d   }||dd z   }|d   }|z   }| j                  |      }| j                  |      }| j                  |||      }|f|z   }| j                  r|fz   }|S )Nr   )r   r   r   r   r   r    rE  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r-   r   r   )r   r  r   )r?  r   r   AttributeErrorrE  r  r@  r  )rG   r   r   r   r   r   r   r   r   self_attn_past_key_valueself_attention_outputsr%  r   present_key_valuecross_attn_present_key_valuecross_attn_past_key_valuecross_attention_outputslayernorm_outputintermediate_outputlayer_outputs                       r)   rf   zTFEsmLayer.callN  s    :H9S>"1#5Y] !%/3 "0 "
 2!4 ??,Qr2G 6r :,QR0G'+$??4@4!12$=dV D` `  @N?Yrs(;_c%&*&9&9 %&)!! ': 	'#  7q9 7" ==G ,C2+F( 14P P>>*:;"//>N/O((-<LW_ ) 
  /G+ ??!2 44Gr+   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   1xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTr?  r@  r  r  )r~   r   r#   r   r?  rC   rR   r@  r  r  r   r   r   s     r)   rR   zTFEsmLayer.build  sk   ::
4d+7t~~223 +$$T*+4.:t00556 .!!''-.4.:t00556 .!!''-.4d+7t~~223 L$$dD$++2I2I%JKL L 8+ +. .. .L Ls0   F%F#?F/3F;F #F,/F8;Grg   r   r  rp   s   @r)   r=  r=  =  s,    & "#DLLr+   r=  c                  D     e Zd Zd fd	Z	 	 	 	 	 	 	 	 	 	 ddZddZ xZS )TFEsmEncoderc                
   t         |   |       || _        t        |j                        D cg c]  }t        |d|        c}| _        t        j                  j                  |j                  d      | _        y c c}w )NrB   zlayer_._emb_layer_norm_afterr   )rD   rE   r   rV   num_hidden_layersr=  layerr   rz   r   r   rV  )rG   r   rC   irH   s       r)   rE   zTFEsmEncoder.__init__  ss    d#GLVMeMeGfg!jn=g
$)LL$C$C))0F %D %
! hs   B c                   |	rdnd }|rdnd }|r| j                   j                  rdnd }|rdnd }t        | j                        D ]j  \  }}|	r||fz   }|||   nd }|||   nd } |||||||||      }|d   }|r	||d   fz  }|sB||d   fz   }| j                   j                  sb||d   fz   }l | j                  r| j	                  |      }|	r||fz   }|
st        d |||||fD              S t        |||||      S )Nr+  r   r    r   r   c              3  $   K   | ]  }|| 
 y wrg   r+  ).0vs     r)   	<genexpr>z$TFEsmEncoder.call.<locals>.<genexpr>  s      
 = 
s   )last_hidden_statepast_key_valuesr   r   cross_attentions)r   rC  	enumeraterX  rV  tupler   )rG   r   r   r   r   r   r`  	use_cacher   output_hidden_statesreturn_dictr   all_hidden_statesall_self_attentionsall_cross_attentionsnext_decoder_cacherY  layer_modulelayer_head_maskr   layer_outputss                        r)   rf   zTFEsmEncoder.call  s    #7BD$5b4%64;;;Z;Zr`d#,R$(4 	VOA|#$58H$H!.7.CilO3B3N_Q/TXN(%&!	M *!,M"}R'8&::" &9]1=M<O&O#;;22+?=QRCSBU+U(1	V4 $$ 55mDM 1]4D D 
 "&%'(
 
 
 ;+.+*1
 	
r+   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   bxY w# 1 sw Y   UxY w)NTrV  rX  )
r~   r   r#   r   rV  rC   rR   r   r   rX  )rG   rW   rX  s      r)   rR   zTFEsmEncoder.build  s    ::
4/6Bt88==> W))//tT[[=T=T0UVW4$'3 &]]5::. &KK%& && 4W W& &s   3CC+C(+C4	rg   )
NNNNNNFFTFr  rp   s   @r)   rT  rT    s4    
 "#"E
N
&r+   rT  c                  0     e Zd Zd fdZddZddZ xZS )TFEsmPoolerc                    t        |   di | t        j                  j	                  |j
                  t        |j                        dd      | _        || _	        y )Ntanhr  )r*  r   rw   rC   r+  )
rD   rE   r   rz   r{   r   r   r   r  r   r-  s      r)   rE   zTFEsmPooler.__init__  sT    "6"\\''$$.v/G/GH	 ( 

 r+   c                <    |d d df   }| j                  |      }|S )Nr   r0  )r  )rG   r   first_token_tensorpooled_outputs       r)   rf   zTFEsmPooler.call  s*     +1a40

*<
=r+   c                (   | j                   ry d| _         t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   y xY wr  r  r   s     r)   rR   zTFEsmPooler.build  r  r  r5  r6  rg   r  rp   s   @r)   rp  rp    s    	Hr+   rp  c                      e Zd ZdZeZdZy)TFEsmPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    esmN)rk   rl   rm   rn   r   config_classbase_model_prefixr+  r+   r)   rx  rx  !  s    
 Lr+   rx  a2  

    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a Keras [Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it as a
    regular Keras model and refer to the TF/Keras documentation for all matters related to general usage and behavior.

    Parameters:
        config ([`EsmConfig`]): Model configuration class with all the parameters of the
            model. Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~TFPreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`tf.Tensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`tf.Tensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        position_ids (`tf.Tensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`tf.Tensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
z]The bare ESM Model transformer outputting raw hidden-states without any specific head on top.c                       e Zd ZdZdgZd
 fd	ZddZd ZddZd Z		 	 	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ
d	 Z xZS )TFEsmMainLayera  

    The model can behave as an encoder (with only self-attention) as well as a decoder, in which case a layer of
    cross-attention is added between the self-attention layers, following the architecture described in [Attention is
    all you need](https://arxiv.org/abs/1706.03762) by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit,
    Llion Jones, Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.

    To behave as an decoder the model needs to be initialized with the `is_decoder` argument of the configuration set
    to `True`. To be used in a Seq2Seq model, the model needs to initialized with both `is_decoder` argument and
    `add_cross_attention` set to `True`; an `encoder_hidden_states` is then expected as an input to the forward pass.
    r   c                H   t        |   d	d|i| || _        |j                  | _        t	        |d      | _        t        |d      | _        |rt        |d      nd | _	        t        | j                  j                  | j                  j                  z  dd      | _        y )
NrC   r   rB   encoderpoolerTcontact_head)ry   r|   rC   r+  )rD   rE   r   r   r   r   rT  r  rp  r  rr   rW  r   r  )rG   r   add_pooling_layerrC   r.  rH   s        r)   rE   zTFEsmMainLayer.__init__w  s    -d-f- ++)&|D#F;<Mk&x8SW6558W8WW^biw
r+   c                `   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   ~xY w# 1 sw Y   y xY w)NTr   r  r  r  )
r~   r   r#   r   r   rC   rR   r  r  r  r   s     r)   rR   zTFEsmMainLayer.build  sQ   ::
4t,8t334 ,%%d+,4D)5t||001 )""4()44(4t{{//0 (!!$'(4.:t00556 .!!''-. . ;, ,) )( (. .s0   E?%F?FF$?F	FF!$F-c                .    | j                   j                  S rg   )r   r   rG   s    r)   get_input_embeddingsz#TFEsmMainLayer.get_input_embeddings  s    ...r+   c                t    || j                   j                  _        t        |      d   | j                   _        y )Nr   )r   r   weightr   r   )rG   r   s     r)   set_input_embeddingsz#TFEsmMainLayer.set_input_embeddings  s*    16''.%/%6q%9"r+   c                    t         rg   r  )rG   heads_to_prunes     r)   _prune_headszTFEsmMainLayer._prune_heads  r!  r+   c                   | j                   j                  sd}	||t        d      |t        |      }n|t        |      d d }nt        d      |\  }}|&d}d gt	        | j
                  j                        z  }nt        |d   d         d   }|t        j                  |||z   fd      }| j                  ||||||	      }t        |      }||z   }| j                  rt        j                  |      }t        j                  t        j                  |d d d d f   ||df      |d d d d f         }t        j                  ||j                  
      }||d d d d d f   z  }t        |      }t        j                  ||d   d|d   |d   f      }|d   3|d d d d | d d d f   }n t        j                  ||d   dd|d   f      }t        j                  ||j                  
      }t        j                   d|j                  
      }t        j                   d|j                  
      }t        j"                  t        j$                  ||      |      }| j                  rf|dt        j                  ||j                  
      }t	        t        |            }|dk(  r|d d d d d d d f   }|dk(  r|d d d d d d f   }dz
  dz  }nd }|t&        d g| j                   j(                  z  }| j                  |||||||	|
|||      }|d   }| j*                  | j+                  |      nd }|s
||f|dd  z   S t-        |||j.                  |j0                  |j2                  |j4                        S )NFzDYou cannot specify both input_ids and inputs_embeds at the same timer    z5You have to specify either input_ids or inputs_embedsr   r-   r   )dimsr   )r   r   r   r   r   r   rY   r   rK   g     r   )r   r   r   r   r   r`  rd  r   re  rf  r   rG  )r_  pooler_outputr`  r   r   ra  )r   r   r   r   lenr  rX  r#   fillr   rV   
less_equaltiler   rL   r   constantmultiplysubtractr  rW  r  r   r`  r   r   ra  )rG   r   r   r   r   r   r   r   r`  rd  r   re  rf  r   rW   r   r   r   embedding_outputattention_mask_shapemask_seq_lengthseq_idscausal_maskextended_attention_maskone_cstten_thousand_cstnum_dims_encoder_attention_maskencoder_extended_attention_maskencoder_outputssequence_outputru  s                                  r)   rf   zTFEsmMainLayer.call  s     {{%%I ]%>cdd"$Y/K&$]3CR8KTUU!,
J"%&"#fs4<<+=+='>>O%/0B10E%Fr%J"!WW:zDZ7Z*[cdeN??)%'#9 + 
  *.9$'==
 ??hh/G--dA._a0PQa&K ''+^5I5IJK&1N1dA:4N&N##-.E#F &(jj'*>q*A1FZ[\F]_stu_v)w'# q!-*A!QVWBW*X'&(jj!5a!8!Q@TUV@W X'# #%''*AIYI_I_"`++c)9)?)?@;;x7G7M7MN"$++bkk'CZ.[]m"n ??5A &(WW-CKbKhKh%i".1*=S2T.U+.!32HDRSUV2W/.!32HDRVXYIY2Z/ 035T/TX`.`+.2+  %%!>!>>I,,*2"7#B+/!5# ' 
 *!,FJkkF]/Bcg  #$ $
 >-'+;;)77&11,==
 	
r+   c                     | ||dd      j                   }t        j                  |d      }t        j                  ||j                        }||d d d d d f   z  }||d d d d d d d f   z  }| j                  ||      S )NT)r   rf  r   r   r!   )r   r#   stackr   rL   r  )rG   r   r   attnss       r)   predict_contactszTFEsmMainLayer.predict_contacts9  s    VN`deppQ'
 =4t 3444q$ 677  //r+   )TNrg   )r   ztf.VariableNNNNNNNNNNNNF)r   TFModelInputType | Noner   np.ndarray | tf.Tensor | Noner   r  r   r  r   r  r   r  r   r  r`  4Optional[Tuple[Tuple[Union[np.ndarray, tf.Tensor]]]]rd  r   r   r   re  r   rf  r   r   r   rj   GUnion[TFBaseModelOutputWithPoolingAndCrossAttentions, Tuple[tf.Tensor]])rk   rl   rm   rn   _keys_to_ignore_on_load_missingrE   rR   r  r  r  rf   r  ro   rp   s   @r)   r}  r}  d  s    

 (7&7#
."/:"
 .28<6:377;?C@DPT$(,0/3&*W
*W
 6W
 4	W

 1W
 5W
  =W
 !>W
 NW
 "W
 *W
 -W
 $W
 W
 
QW
r
0r+   r}  c                       e Zd Zdd fdZe eej                  d             ee	e
e      	 	 	 	 	 	 	 	 	 	 	 	 	 d		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d
d                     Zd ZddZ xZS )
TFEsmModelc                R    t        |   |g|i | t        ||d      | _        y )Nry  r  rC   )rD   rE   r}  ry  )rG   r   r  r1  r.  rH   s        r)   rE   zTFEsmModel.__init__K  s,    3&3F3!&<MTYZr+   batch_size, sequence_length
checkpointoutput_typerz  c                B    | j                  |||||||||	|
|||      }|S )a  
        encoder_hidden_states  (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
            Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
            the model is configured as a decoder.
        encoder_attention_mask (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
            the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        past_key_values (`Tuple[Tuple[tf.Tensor]]` of length `config.n_layers`)
            contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding.
            If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
            don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
            `decoder_input_ids` of shape `(batch_size, sequence_length)`.
        use_cache (`bool`, *optional*, defaults to `True`):
            If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
            `past_key_values`). Set to `False` during training, `True` during generation
        )r   r   r   r   r   r   r   r`  rd  r   re  rf  r   )ry  )rG   r   r   r   r   r   r   r   r`  rd  r   re  rf  r   r   s                  r)   rf   zTFEsmModel.callP  sF    V (()%'"7#9+/!5#  
 r+   c                :    | j                   j                  ||      S rg   ry  r  rG   r   r   s      r)   r  zTFEsmModel.predict_contacts      xx((@@r+   c                    | j                   ry d| _         t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   y xY w)NTry  )r~   r   r#   r   ry  rC   rR   r   s     r)   rR   zTFEsmModel.build  se    ::
4%1txx}}- %t$% % 2% %s   A11A:)Tr5  r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  r`  r  rd  r   r   r   re  r   rf  r   r   r   rj   r  rg   )rk   rl   rm   rE   r   r
   ESM_INPUTS_DOCSTRINGformatr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOCrf   r  rR   ro   rp   s   @r)   r  r  F  s   
[
 *+?+F+FGd+ef&B$ .28<6:377;?C@DPT$(,0/3&*#(3*3 63 4	3
 13 53  =3 !>3 N3 "3 *3 -3 $3 !3 
Q3 g 3jA%r+   r  z1ESM Model with a `language modeling` head on top.c                       e Zd ZdgZdgZ fdZd Zd Zd Ze	 e
ej                  d             eeeed	      	 	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd
                     Zd ZddZ xZS )TFEsmForMaskedLMr   r  c                P   t         |   |       |j                  rt        j	                  d       t        |dd      | _        t        |d      | _        |j                  rt        j                  t        j                  j                  | j                         ddd            5  | j                  j                   j"                  j%                  d	       d d d        | j                  j                   j"                  j&                  d
   | j                  _        y y # 1 sw Y   GxY w)NzjIf you want to use `EsmForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention.Fry  r  lm_headrB   r   r   r   r   )rD   rE   r   loggerwarningr}  ry  TFEsmLMHeadr  tie_word_embeddingsr#   r   ospathjoin_name_scoper   r   rR   weightsdecoderrG   r   rH   s     r)   rE   zTFEsmForMaskedLM.__init__  s     NN1
 "&EN"6	:%%rww||D,<,<,>|Ufgh H##3399,GH#'88#6#6#F#F#N#Nq#QDLL 	 &H Hs   &0DD%c                .    | j                   j                  S rg   r  r  r  s    r)   get_output_embeddingsz&TFEsmForMaskedLM.get_output_embeddings  s    ||###r+   c                &    || j                   _        y rg   r  )rG   new_embeddingss     r)   set_output_embeddingsz&TFEsmForMaskedLM.set_output_embeddings  s    -r+   c                    | j                   S rg   )r  r  s    r)   get_lm_headzTFEsmForMaskedLM.get_lm_head  s    ||r+   r  z<mask>)r  r  rz  maskc                8   ||n| j                   j                  }| j                  ||||||||	|
||      }|d   }| j                  |      }d}|| j	                  ||      }|s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )a!  
        labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        kwargs (`Dict[str, any]`, *optional*, defaults to `{}`):
            Used to hide legacy arguments that have been deprecated.
        N)
r   r   r   r   r   r   r   re  rf  r   r   )labelslogitsr   lossr  r   r   )r   use_return_dictry  r  hf_compute_lossr   r   r   )rG   r   r   r   r   r   r   r   r  r   re  rf  r   r   r  prediction_scoresmasked_lm_lossr  s                     r)   rf   zTFEsmForMaskedLM.call  s    > &1%<k$++B]B](()%'"7#9/!5#  
 "!* LL9!11HY1ZN')GABK7F3A3M^%.YSYY$!//))	
 	
r+   c                :    | j                   j                  ||      S rg   r  r  s      r)   r  z!TFEsmForMaskedLM.predict_contacts  r  r+   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTry  r  )r~   r   r#   r   ry  rC   rR   r  r   s     r)   rR   zTFEsmForMaskedLM.build  s    ::
4%1txx}}- %t$%4D)5t||001 )""4() ) 6% %) )   C%CCC )NNNNNNNNNNNF)r   r  r   r  r   r  r   r  r   r  r   r  r   r  r  r  r   r   re  r   rf  r   r   r   rj   z)Union[TFMaskedLMOutput, Tuple[tf.Tensor]]rg   )rk   rl   rm   r  "_keys_to_ignore_on_load_unexpectedrE   r  r  r  r   r
   r  r  r   r  r   r  rf   r  rR   ro   rp   s   @r)   r  r    s#   '6&7#*3&R"$. *+?+F+FGd+ef&$$	 .28<6:377;?C@D04,0/3&*6
*6
 66
 4	6

 16
 56
  =6
 !>6
 .6
 *6
 -6
 $6
 6
 
36
 g 6
pA	)r+   r  c                  8     e Zd ZdZd fd	ZddZd Zd Z xZS )r  z&ESM Head for masked language modeling.c                   t         |   |       t        j                  j	                  |j
                  t        |j                        d      | _        t        j                  j                  |j                  d      | _        |j                  rd | _        || _        y t        j                  j	                  |j                  t        |j                        dd      | _        || _        y )	NrB   r  r   r   r   r  F)r   rC   rv   )rD   rE   r   rz   r{   r   r   r   r  r   r   r   r  r  r   r   r   s      r)   rE   zTFEsmLMHead.__init__	  s    d#\\''?6C[C[3\cj ( 

  ,,99&BWBW^j9k%%DL  !<<--!!#263K3K#L	 . DL r+   c                   | j                   ry d| _         | j                  d| j                  j                  fdd      | _        t        | dd       dt        j                  | j                  j                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       dt        j                  | j                  j                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       || j                  j                  set        j                  | j                  j                        5  | j                  j                  d d | j                  j                  g       d d d        y y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTr|   zeros)r.   rM   rN   r  r   r  )r~   rS   r   r   r|   r   r#   r   r  rC   rR   r   r   r  r  r   s     r)   rR   zTFEsmLMHead.build  sx    ::
OOF4;;3I3I2KY`lpOq	4$'3tzz/ H

  $dkk.E.E!FGH4t,8t334 M%%tT4;;3J3J&KLM4D)5dkk>]>]t||001 J""D$0G0G#HIJ J ?^5H HM MJ Js$   :3F0+3F=23G	0F:=G	Gc                    d| j                   iS )Nr|   )r|   r  s    r)   get_biaszTFEsmLMHead.get_bias,  s    		""r+   c                R   | j                  |      }t        j                  j                  |      }| j	                  |      }| j
                  j                  r1t        j                  || j                  d      | j                  z   }|S | j                  |      | j                  z   }|S )NTr   )
r  r#   r2  r3  r   r   r  r   r  r|   )rG   featuresr&   s      r)   rf   zTFEsmLMHead.call/  s    JJx EEJJqMOOA ;;**		!T\\t<tyyHA  Q$))+Ar+   rg   )	rk   rl   rm   rn   rE   rR   r  rf   ro   rp   s   @r)   r  r    s    0$J"#
r+   r  z
    ESM Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    c                       e Zd ZdgZ fdZe eej                  d             e	e
ee      	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd                     Zd	dZ xZS )
TFEsmForSequenceClassificationr   c                    t         |   |       |j                  | _        || _        t	        |dd      | _        t        |d      | _        y NFry  r  
classifierrB   )rD   rE   
num_labelsr   r}  ry  TFEsmClassificationHeadr  r  s     r)   rE   z'TFEsmForSequenceClassification.__init__F  sB      ++!&EN1&|Lr+   r  r  c                2   |	|	n| j                   j                  }	| j                  ||||||||	|
	      }|d   }| j                  |      }|dn| j	                  ||      }|	s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )a  
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   r   r   r   re  rf  r   r   r   r  )r   r  ry  r  r  r   r   r   rG   r   r   r   r   r   r  r   re  rf  r   r   r  r  r  r  s                   r)   rf   z#TFEsmForSequenceClassification.callN  s    4 &1%<k$++B]B](()%'/!5#  

 "!*1~t4+?+?+OY,F)-)9TGf$EvE)!//))	
 	
r+   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY wNTry  r  )r~   r   r#   r   ry  rC   rR   r  r   s     r)   rR   z$TFEsmForSequenceClassification.build  s    ::
4%1txx}}- %t$%4t,8t334 ,%%d+, , 9% %, ,r  
NNNNNNNNNF)r   r  r   r  r   r  r   r  r   r  r  r  r   r   re  r   rf  r   r   r   rj   z3Union[TFSequenceClassifierOutput, Tuple[tf.Tensor]]rg   )rk   rl   rm   r  rE   r   r
   r  r  r   r  r   r  rf   rR   ro   rp   s   @r)   r  r  <  s     (7&7#M *+?+F+FGd+ef&.$ .28<6:377;04,0/3&*.
*.
 6.
 4	.

 1.
 5.
 ..
 *.
 -.
 $.
 .
 
=.
 g .
`	,r+   r  z
    ESM Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
    Named-Entity-Recognition (NER) tasks.
    c                       e Zd ZdgZdgZ fdZe eej                  d             e
eee      	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	d                     Zd
dZ xZS )TFEsmForTokenClassificationr  r   c                6   t         |   |       |j                  | _        t        |dd      | _        t
        j                  j                  |j                        | _	        t
        j                  j                  |j                  d      | _        || _        y r  )rD   rE   r  r}  ry  r   rz   r   r  r   r{   r  r   r  s     r)   rE   z$TFEsmForTokenClassification.__init__  sq      ++!&EN||++F,F,FG,,,,V->->\,Rr+   r  r  c                X   |	|	n| j                   j                  }	| j                  ||||||||	|
	      }|d   }| j                  ||
      }| j	                  |      }|dn| j                  ||      }|	s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )z
        labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr  r   r   r   r  )	r   r  ry  r   r  r  r   r   r   r  s                   r)   rf   z TFEsmForTokenClassification.call  s    0 &1%<k$++B]B](()%'/!5#  

 "!*,,,J1~t4+?+?+OY,F)-)9TGf$EvE&!//))	
 	
r+   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   |xY w# 1 sw Y   y xY wr  )
r~   r   r#   r   ry  rC   rR   r  r   r   r   s     r)   rR   z!TFEsmForTokenClassification.build  s    ::
4%1txx}}- %t$%4t,8t334 M%%tT4;;3J3J&KLM M 9% %M Ms   C"%3C."C+.C7r  )r   r  r   r  r   r  r   r  r   r  r  r  r   r   re  r   rf  r   r   r   rj   z0Union[TFTokenClassifierOutput, Tuple[tf.Tensor]]rg   )rk   rl   rm   r  r  rE   r   r
   r  r  r   r  r   r  rf   rR   ro   rp   s   @r)   r  r    s     +4&'6&7# *+?+F+FGd+ef&+$ .28<6:377;04,0/3&*/
*/
 6/
 4	/

 1/
 5/
 ./
 */
 -/
 $/
 /
 
:/
 g /
b	Mr+   r  c                  4     e Zd ZdZd fd	ZddZddZ xZS )r  z-Head for sentence-level classification tasks.c                   t         |   |       t        j                  j	                  |j
                  t        |j                        dd      | _        t        j                  j                  |j                        | _        t        j                  j	                  |j                  t        |j                        dd      | _        || _        y )NrB   rr  r  )r   rw   rC   linearout_proj)rD   rE   r   rz   r{   r   r   r   r  r   r  r   r  r  r   r   s      r)   rE   z TFEsmClassificationHead.__init__  s    d#\\''.v/G/GH	 ( 

 ||++F,F,FG**.v/G/GH	 + 
 r+   c                    |d d dd d f   }| j                  ||      }| j                  |      }| j                  ||      }| j                  |      }|S )Nr   r   )r   r  r  )rG   r  r   r&   s       r)   rf   zTFEsmClassificationHead.call  sV    Q1WLLXL.JJqMLLXL.MM!r+   c                "   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   |xY w# 1 sw Y   y xY w)NTr  r  )
r~   r   r#   r   r  rC   rR   r   r   r  r   s     r)   rR   zTFEsmClassificationHead.build  s    ::
4$'3tzz/ H

  $dkk.E.E!FGH4T*6t}}112 K##T41H1H$IJK K 7H HK Ks   3C9<3D9DDrg   r  )rk   rl   rm   rn   rE   rf   rR   ro   rp   s   @r)   r  r    s    7"	Kr+   r  c                    t        j                  | |k7  t         j                        }t        j                  |d      |z   |z  }||z   S )z
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`.

    Args:
        x: tf.Tensor x:

    Returns: tf.Tensor
    r   r!   )r#   r   r   cumsum)r   r   r   r  incremental_indicess        r)   r   r     sD     779+RXX6D99T25KKtS,,r+   )r   )Hrn   
__future__r   r  typingr   r   r   numpynp
tensorflowr#   
file_utilsr   r	   r
   modeling_tf_outputsr   r   r   r   r   modeling_tf_utilsr   r   r   r   r   r   r   r   r   tf_utilsr   r   utilsr   configuration_esmr   
get_loggerrk   r  r  r  r*   r1   r5   r>   rz   Layerr@   rr   r   r   r  r  r(  r8  r=  rT  rp  rx  ESM_START_DOCSTRINGr  r}  r  r  r  r  r  r  r   r+  r+   r)   <module>r     s    " 	 ) )   q q 
 
 
 G  ( 
		H	%1 )
.-
	)
** )
X%:!3!3 %:PmMell(( mM`d3++ d3NHell(( H02LU\\'' 2LjH** H2N%,,$$ N0fL## fLRZ&5<<%% Z&|H%,,$$ H:,  ' T c[0U\\'' [0	[0| cK%% K%	K%\ MObcj)+-I j) dj)Z3%,,$$ 3l  K,%9;W K,K,\  NM"68Q NMNMb%Kell00 %KP-r+   