
    sg3                      d Z ddlmZ ddlZddlmZ ddlmZmZm	Z	m
Z
 ddlZddlZddlmZ ddlmZmZmZ dd	lmZmZmZmZmZ dd
lmZmZ ddlmZm Z m!Z!m"Z"m#Z# ddl$m%Z%  e"jL                  e'      Z(dZ)dZ*dZ+dZ,e G d de             Z-d Z.d Z/	 dF	 	 	 	 	 	 	 	 	 dGdZ0dHdIdZ1 G d dejd                  jf                        Z4 G d dejd                  jj                        Z6 G d dejd                  jf                        Z7 G d dejd                  jf                        Z8 G d  d!ejd                  jf                        Z9 G d" d#ejd                  jf                        Z: G d$ d%ejd                  jf                        Z; G d& d'ejd                  jf                        Z< G d( d)e<      Z= G d* d+ejd                  jf                        Z> G d, d-ejd                  jf                        Z? G d. d/ejd                  jf                        Z@ G d0 d1ejd                  jf                        ZA G d2 d3ejd                  jf                        ZB G d4 d5ejd                  jf                        ZC G d6 d7ejd                  jf                        ZDe G d8 d9ejd                  jf                               ZE G d: d;e      ZFd<ZGd=ZH e d>eG       G d? d@eF             ZI e dAeG       G dB dCeF             ZJ G dD dEeF      ZKy)JzTensorFlow Wav2Vec2 model.    )annotationsN)	dataclass)AnyOptionalTupleUnion   )get_tf_activation)TFBaseModelOutputTFCausalLMOutputTFSequenceClassifierOutput)TFPreTrainedModelget_initializerkeraskeras_serializableunpack_inputs)
shape_liststable_softmax)ModelOutputadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )Wav2Vec2Config   zfacebook/wav2vec2-base-960hr   g    חc                  J    e Zd ZU dZdZded<   dZded<   dZded<   dZded<   y)	TFWav2Vec2BaseModelOutputa1  
    Output type of [`TFWav2Vec2BaseModelOutput`], with potential hidden states and attentions.

    Args:
        last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        extract_features (`tf.Tensor` of shape `(batch_size, sequence_length, conv_dim[-1])`):
            Sequence of extracted feature vectors of the last convolutional layer of the model.
        hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
            `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    N	tf.Tensorlast_hidden_stateextract_featureszTuple[tf.Tensor] | Nonehidden_states
attentions)	__name__
__module____qualname____doc__r    __annotations__r!   r"   r#        d/var/www/html/venv/lib/python3.12/site-packages/transformers/models/wav2vec2/modeling_tf_wav2vec2.pyr   r   :   s5    * $(y'"&i&-1M*1*.J'.r*   r   c                    t         j                  j                  t         j                  j	                  t        |       dd             }t         j                  j                  | |z   |      \  }}|S )z
    Categorical sampling without replacement is currently not implemented. The gumbel-max trick will do for now - see
    https://github.com/tensorflow/tensorflow/issues/9260 for more info
    r   r   )tfmathlograndomuniformr   nntop_k)distributionnum_samplesz_indicess        r+   _sample_without_replacementr9   W   sS    
 
RYY&&z,'?AF	GGA\A-{;JAwNr*   c           
        t        |      }t        j                  t        j                  t        j                  t        j
                  |d         d      |      ddg      }t        j                  t        j                  |t        j                  |ddg      gd            }t        j                  |t        j                  | dg      |      S )zT
    Scatter function as in PyTorch with indices in format (batch_dim, indixes)
    r   axisr   )	r   r-   reshapebroadcast_toexpand_dimsrange	transposeconcat
scatter_nd)valuesbatch_indicesoutput_shapeindices_shapebroad_casted_batch_dimspair_indicess         r+    _scatter_values_on_batch_indicesrK   a   s     }-M jj
rxxa0@'AK][^_ac]d <<		+BBJJ}_`bd^eDf*gij klL==rzz&2$'?NNr*   c           	     x   | \  }}|dk  rt        d      t        j                  j                  ||d| d| d       |t        j                  |t        j
                        z  |z  t        j                  j                  d      z   }t        j                  ||      }t        j                  |t        j                        }t        j                  j                  ||z  |      }t        j                  |      }t        j                  ||ft        j                        }t        j                  |||dz
  z
  f      }t        ||      }	t        j                   |	d	      }	t        j"                  |	dd|f      }	t        j$                  |	|||z  f      }	t        j&                  |      t        j(                  t        j(                  d
d
f   }
t        j"                  |
||df      }
t        j$                  |
|||z  f      }
|	|
z   }	t+        t        j,                  |	      |	t        j.                  |            }|S )a  
    Computes random mask spans for a given shape

    Args:
        shape: the shape for which to compute masks.
            should be of size 2 where first element is batch size and 2nd is timesteps
        attention_mask: optional padding mask of the same size as shape, which will prevent masking padded elements
        mask_prob:
            probability for each token to be chosen as start of the span to be masked. this will be multiplied by
            number of timesteps divided by length of mask span to mask approximately this percentage of all elements.
            however due to overlaps, the actual number will be smaller (unless no_overlap is True)
        mask_length: size of the mask
        min_masks: minimum number of masked spans

    Adapted from [fairseq's
    data_utils.py](https://github.com/pytorch/fairseq/blob/e0788f7007a8473a76db573985031f3c94201e79/fairseq/data/data_utils.py#L376).
    r   z&`mask_length` has to be bigger than 0.zO`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: z and `sequence_length`: `messager   dtyper;   N)
ValueErrorr-   	debuggingassert_lesscastfloat32r0   r1   maximumint32r.   minimumsqueezezerosonesr9   r@   tiler>   rA   newaxisrK   	ones_likeshape)ra   	mask_probmask_length	min_masks
batch_sizesequence_lengthnum_masked_spansspec_aug_maskuniform_distspec_aug_mask_idxsoffsetss              r+   _compute_mask_indicesrl   p   s   . #(JQABBLL]^i]j k##2"316	   !277?BJJ#GG+UXZXaXaXiXijnXoozz"2I>ww/: ww+'EGWXzz"23 HHj/:"((KM 77J;?(KLML 5\CST (:B?!3aK5HI$6EUXcEc8dehh{#BJJ

A$=>Gggg
,<a@AGjj:/?+/M"NOG+g5 5
'(*<bhh}>UM r*   c                    t        |       d   }||n|}t        j                  d      }t        j                  | |j                        } t        j
                  | ddddddf   dd|df      }||z
  t        z  S )z_
    Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
    r   Ng      ?rQ   )r   r-   constantrV   rR   r^   LARGE_NEGATIVE)masktgt_lensrc_lenone_cstexpanded_masks        r+   _expand_maskru      sx     q!G ,g'Gkk#G774w}}-DGGDD$!12Q7A4FGMm#~55r*   c                       e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ fdZd Z fdZd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Z xZS )TFWav2Vec2GroupNormzp
    From tensorflow-addons https://www.tensorflow.org/addons/api_docs/python/tfa/layers/GroupNormalization
    c                H   t        |   di | d| _        || _        || _        || _        || _        || _        t        j                  j                  |      | _        t        j                  j                  |      | _        t        j                  j                  |      | _        t        j                  j                  |	      | _        t        j                   j                  |
      | _        t        j                   j                  |      | _        | j'                          y )NTr)   )super__init__supports_maskinggroupsr=   epsiloncenterscaler   initializersgetbeta_initializergamma_initializerregularizersbeta_regularizergamma_regularizerconstraintsbeta_constraintgamma_constraint_check_axis)selfr|   r=   r}   r~   r   r   r   r   r   r   r   kwargs	__class__s                r+   rz   zTFWav2Vec2GroupNorm.__init__   s     	"6" $	
 % 2 2 6 67G H!&!3!3!7!78I!J % 2 2 6 67G H!&!3!3!7!78I!J$0044_E % 1 1 5 56F Gr*   c                    | j                  |       | j                  |       | j                  |       | j                  |       | j	                  |       | j                  |       d| _        t        | !  |       y )NT)	_check_if_input_shape_is_none'_set_number_of_groups_for_instance_norm_check_size_of_dimensions_create_input_spec_add_gamma_weight_add_beta_weightbuiltry   buildr   input_shaper   s     r+   r   zTFWav2Vec2GroupNorm.build   sj    **;744[A&&{3,{+k*
k"r*   c                4   t         j                  j                  |      }t        j                  |      }| j                  |||      \  }}| j                  ||      }|| j                     | j                  z  dk(  }|st        j                  ||      }|S |}|S Nr   )
r   backend	int_shaper-   ra   _reshape_into_groups_apply_normalizationr=   r|   r>   )	r   inputsr   tensor_input_shapereshaped_inputsgroup_shapenormalized_inputsis_instance_normoutputss	            r+   callzTFWav2Vec2GroupNorm.call   s    mm--f5XXf-'+'@'@Vh'i$ 55o{S'		2dkkAaGjj!24FGG  (Gr*   c                   | j                   | j                  | j                  | j                  | j                  t
        j                  j                  | j                        t
        j                  j                  | j                        t
        j                  j                  | j                        t
        j                  j                  | j                        t
        j                  j                  | j                        t
        j                  j                  | j                        d}t         | E         }i ||S )N)r|   r=   r}   r~   r   r   r   r   r   r   r   )r|   r=   r}   r~   r   r   r   	serializer   r   r   r   r   r   r   r   ry   
get_config)r   configbase_configr   s      r+   r   zTFWav2Vec2GroupNorm.get_config  s    kkII||kkZZ % 2 2 < <T=R=R S!&!3!3!=!=d>T>T!U % 2 2 < <T=R=R S!&!3!3!=!=d>T>T!U$00::4;O;OP % 1 1 ; ;D<Q<Q R
 g(*(+(((r*   c                    |S Nr)   r   r   s     r+   compute_output_shapez(TFWav2Vec2GroupNorm.compute_output_shape  s    r*   c                   t        t        |            D cg c]  }||   	 }}|| j                     | j                  z  dk(  }|s~|| j                     | j                  z  || j                  <   |j	                  | j                  | j                         t        j                  |      }t        j                  ||      }||fS ||fS c c}w r   )rA   lenr=   r|   insertr-   stackr>   )r   r   r   r   ir   r   r   s           r+   r   z(TFWav2Vec2GroupNorm._reshape_into_groups  s    6;C<L6MN)!,NN'		2dkkAaG%0%;t{{%JK		"tyy$++6((;/K jj=O"K//;&& Os   Cc                6   t         j                  j                  |      }t        t	        dt        |                  }|| j                     | j                  z  dk(  }|s!| j                  dk(  rdn| j                  dz
  }n | j                  dk(  rdn| j                  dz
  }|j                  |       t        j                  j                  ||d      \  }}| j                  |      \  }	}
t        j                  j                  ||||	|
| j                        }|S )Nr   r;   T)keepdims)meanvariancer   offsetvariance_epsilon)r   r   r   listrA   r   r=   r|   popr-   r2   moments_get_reshaped_weightsbatch_normalizationr}   )r   r   r   r   group_reduction_axesr   r=   r   r   gammabetar   s               r+   r   z(TFWav2Vec2GroupNorm._apply_normalization&  s    mm--o>#E!S-=$>?'		2dkkAaGb2dii!mDb2dii!mD  &8LW[\h00=tEE55!\\ 6 
 ! r*   c                    | j                  |      }d }d }| j                  r t        j                  | j                  |      }| j
                  r t        j                  | j                  |      }||fS r   )_create_broadcast_shaper   r-   r>   r   r~   r   )r   r   broadcast_shaper   r   s        r+   r   z)TFWav2Vec2GroupNorm._get_reshaped_weights=  s\    66{C::JJtzz?;E;;::dii9Dd{r*   c                    || j                      }|3t        dt        | j                         z   dz   t        |      z   dz         y )NzAxis z\ of input tensor should have a defined dimension but the layer received an input with shape .)r=   rS   strr   r   dims      r+   r   z1TFWav2Vec2GroupNorm._check_if_input_shape_is_noneH  s\    $))$;dii.!pq k"# 	  r*   c                P    || j                      }| j                  dk(  r|| _        y y Nr;   )r=   r|   r   s      r+   r   z;TFWav2Vec2GroupNorm._set_number_of_groups_for_instance_normS  s(    $))$;;"DK r*   c                0   || j                      }|| j                  k  r3t        dt        | j                        z   dz   t        |      z   dz         || j                  z  dk7  r3t        dt        | j                        z   dz   t        |      z   dz         y )NzNumber of groups (z.) cannot be more than the number of channels ().r   z0) must be a multiple of the number of channels ()r=   r|   rS   r   r   s      r+   r   z-TFWav2Vec2GroupNorm._check_size_of_dimensionsY  s    $))$$dkk"#BC c( 	  !$dkk"#DE c( 	  "r*   c                8    | j                   dk(  rt        d      y )Nr   zdYou are trying to normalize your batch axis. Do you want to use tf.layer.batch_normalization instead)r=   rS   r   s    r+   r   zTFWav2Vec2GroupNorm._check_axism  s"    99>v  r*   c                    || j                      }t        j                  j                  t	        |      | j                   |i      | _        y )N)ndimaxes)r=   r   layers	InputSpecr   
input_specr   s      r+   r   z&TFWav2Vec2GroupNorm._create_input_specs  s:    $))$,,00c+6FdiiY\M]0^r*   c                    || j                      }|f}| j                  r:| j                  |d| j                  | j                  | j
                        | _        y d | _        y )Nr   ra   nameinitializerregularizer
constraint)r=   r   
add_weightr   r   r   r   r   r   r   ra   s       r+   r   z%TFWav2Vec2GroupNorm._add_gamma_weightw  s]    $))$:: 22 2200 ) DJ DJr*   c                    || j                      }|f}| j                  r:| j                  |d| j                  | j                  | j
                        | _        y d | _        y )Nr   r   )r=   r~   r   r   r   r   r   r   s       r+   r   z$TFWav2Vec2GroupNorm._add_beta_weight  s]    $))$;; 11 11// ( DI DIr*   c                :   dgt        |      z  }|| j                     | j                  z  dk(  }|sQ|| j                     | j                  z  || j                  <   |j                  | j                  | j                         |S | j                  || j                  <   |S r   )r   r=   r|   r   )r   r   r   r   s       r+   r   z+TFWav2Vec2GroupNorm._create_broadcast_shape  s    #K 00'		2dkkAaG)4TYY)?4;;)NODII&""499dkk:  *.ODII&r*   )    r;   gMbP?TTr\   r]   NNNN)r|   intr=   r   r}   floatr~   boolr   r   r   keras.initializers.Initializerr   r   r   keras.regularizers.Regularizerr   r   r   keras.constraints.Constraintr   r   )r$   r%   r&   r'   rz   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__r   s   @r+   rw   rw      s     ;B<B;?<@8<9=  	
   9 : 9 : 6 7<	# )"
'!.		(_r*   rw   c                  B     e Zd ZdZ fdZd Zd Z fdZ fdZ xZ	S )TFWav2Vec2WeightNormConv1DzeAdapted from https://www.tensorflow.org/probability/api_docs/python/tfp/layers/weight_norm/WeightNormc           
         t        |   d|||dddd| || _        d| _        t	        j
                  ddg      | _        y )	NvalidT	he_normal)filterskernel_sizer|   paddinguse_biasbias_initializerr   r   r   r)   )ry   rz   explicit_paddingfilter_axisr-   rn   kernel_norm_axes)r   r   r   r|   r   r   r   s         r+   rz   z#TFWav2Vec2WeightNormConv1D.__init__  sX     	
#(	
 	
 !1 "QF 3r*   c                $   t        j                  t        j                  t        j                  | j                        | j
                              }| j                  j                  |ddt         j                  t         j                  f          y)z"Set the norm of the weight vector.r<   N)	r-   sqrt
reduce_sumsquareweight_vr   weight_gassignr_   )r   kernel_norms     r+   
_init_normz%TFWav2Vec2WeightNormConv1D._init_norm  sT    ggbmmBIIdmm,D4K`K`ab[BJJ

)BCDr*   c                    t         j                  j                  | j                  | j                        t        j
                  | j                        z  }t        j
                  |      | _        y)zGenerate normalized weights.r<   N)r-   r2   l2_normalizer   r   rB   r   kernel)r   r  s     r+   _normalize_kernelz,TFWav2Vec2WeightNormConv1D._normalize_kernel  sM    ##DMM8M8M#NQSQ]Q]^b^k^kQllll6*r*   c                   | j                   st        | 	  |       t        j                  t        j
                  | j                        dd      | _        | j                  | _        | j                  dt        | j                  j                  | j                           ddfd| j                  j                  d      | _        | j                          | j                  d| j                  fd	d
      | _        y y )Nr   T)r   	trainabler   r   r]   )r   ra   r   rR   r  biasr\   )r   ra   r   r  )r   ry   r   r-   VariablerB   r  r   r   r   ra   r   rR   r   r   r   r  r   s     r+   r   z TFWav2Vec2WeightNormConv1D.build  s    zzGM+&++bll4;;&?j\`aDK KKDM OO4==..t/?/?@A1aH"mm)) , DM OOVDLL?X_kopDI r*   c                    | j                          t        j                  |d| j                  | j                  fdf      }t        |   |      }|S )N)r   r   )r  r-   padr   ry   r   )r   r   padded_inputsoutputr   s       r+   r   zTFWav2Vec2WeightNormConv1D.call  sM     	 v1F1FH]H]0^`f'ghm,r*   )
r$   r%   r&   r'   rz   r   r  r   r   r   r   s   @r+   r   r     s&    o4E
+
q"	 	r*   r   c                  2     e Zd Zdd fdZddZddZ xZS )TFWav2Vec2NoLayerNormConvLayerc                d   t        |   di | |dkD  r|j                  |   nd| _        |j                  |   | _        t
        j                  j                  | j                  |j                  |   |j                  |   |j                  d      | _        t        |j                        | _        y )Nr   r   convr   r   stridesr   r   r)   )ry   rz   conv_dimin_conv_dimout_conv_dimr   r   Conv1Dconv_kernelconv_stride	conv_biasr  r
   feat_extract_activation
activationr   r   layer_idr   r   s       r+   rz   z'TFWav2Vec2NoLayerNormConvLayer.__init__  s    "6"8@16??84!"OOH5LL''%%**84&&x0%% ( 
	 ,F,J,JKr*   c                J    | j                  |      }| j                  |      }|S r   )r  r  r   r"   s     r+   r   z#TFWav2Vec2NoLayerNormConvLayer.call  s$    		-06r*   c                   | j                   ry d| _         t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        y y # 1 sw Y   y xY wNTr  )r   getattrr-   
name_scoper  r   r   r  r   s     r+   r   z$TFWav2Vec2NoLayerNormConvLayer.build  sw    ::
4&2tyy~~. @		tT-=-= >?@ @ 3@ @s   )A>>Br   r   r   r  r   r   r   returnNoner"   r   r&  r   r   r$   r%   r&   rz   r   r   r   r   s   @r+   r  r    s    L
@r*   r  c                  2     e Zd Zdd fdZddZddZ xZS )TFWav2Vec2LayerNormConvLayerc                   t        |   di | |dkD  r|j                  |   nd| _        |j                  |   | _        t
        j                  j                  | j                  |j                  |   |j                  |   |j                  d      | _        t
        j                  j                  d|j                        | _        t        |j                         | _        y )Nr   r   r  r  
layer_norm)r   r}   r)   )ry   rz   r  r  r  r   r   r  r  r  r  r  LayerNormalizationlayer_norm_epsr-  r
   r  r  r  s       r+   rz   z%TFWav2Vec2LayerNormConvLayer.__init__  s    "6"8@16??84!"OOH5LL''%%**84&&x0%% ( 
	  ,,99|U[UjUj9k+F,J,JKr*   c                l    | j                  |      }| j                  |      }| j                  |      }|S r   r  r-  r  r  s     r+   r   z!TFWav2Vec2LayerNormConvLayer.call  2    		-066r*   c                   | j                   ry d| _         t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        y y # 1 sw Y   rxY w# 1 sw Y   y xY wNTr  r-  
r   r"  r-   r#  r  r   r   r  r-  r  r   s     r+   r   z"TFWav2Vec2LayerNormConvLayer.build      ::
4&2tyy~~. @		tT-=-= >?@4t,8t334 G%%tT43D3D&EFG G 9@ @G G   )C%2)C1%C.1C:r$  r%  r(  r   r)  r   s   @r+   r+  r+    s    L	Gr*   r+  c                  2     e Zd Zdd fdZddZddZ xZS )TFWav2Vec2GroupNormConvLayerc                   t        |   di | |dkD  r|j                  |   nd| _        |j                  |   | _        t
        j                  j                  | j                  |j                  |   |j                  |   |j                  d      | _        t        |j                        | _        t        | j                  |j                   d      | _        y )Nr   r   r  r  r-  )r|   r}   r   r)   )ry   rz   r  r  r  r   r   r  r  r  r  r  r
   r  r  rw   r/  r-  r  s       r+   rz   z%TFWav2Vec2GroupNormConvLayer.__init__  s    "6"8@16??84!"OOH5LL''%%**84&&x0%% ( 
	 ,F,J,JK-$$f.C.C,
r*   c                l    | j                  |      }| j                  |      }| j                  |      }|S r   r1  r  s     r+   r   z!TFWav2Vec2GroupNormConvLayer.call)  r2  r*   c                   | j                   ry d| _         t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        y y # 1 sw Y   rxY w# 1 sw Y   y xY wr4  r5  r   s     r+   r   z"TFWav2Vec2GroupNormConvLayer.build/  r6  r7  r$  r%  r(  r   r)  r   s   @r+   r9  r9    s    
"	Gr*   r9  c                  0     e Zd Zd fdZddZddZ xZS )!TFWav2Vec2PositionalConvEmbeddingc                   t        |   di | t        |j                  |j                  |j
                  |j                  dz  d      | _        t        |j                        | _        t        |j                        | _        || _        y )Nr   r  )r   r   r|   r   r   r)   )ry   rz   r   hidden_sizenum_conv_pos_embeddingsnum_conv_pos_embedding_groupsr  TFWav2Vec2SamePadLayerr   r
   r  r  r   r   r   r   r   s      r+   rz   z*TFWav2Vec2PositionalConvEmbedding.__init__<  sx    "6".&&6677#;;q@
	 .f.L.LM+F,J,JKr*   c                l    | j                  |      }| j                  |      }| j                  |      }|S r   )r  r   r  r  s     r+   r   z&TFWav2Vec2PositionalConvEmbedding.callI  s2    		-0]36r*   c                (   | j                   ry d| _         t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   y xY wr!  )	r   r"  r-   r#  r  r   r   r   r@  r   s     r+   r   z'TFWav2Vec2PositionalConvEmbedding.buildO  s{    ::
4&2tyy~~. G		tT[[-D-D EFG G 3G Gs   3BBr   r   r   r   r&  r'  r(  r   r)  r   s   @r+   r>  r>  ;  s    Gr*   r>  c                  $     e Zd Z fdZd Z xZS )rC  c                R    t        |   di | |dz  dk(  rd| _        y d| _        y )Nr   r   r   r)   )ry   rz   num_pad_remove)r   rA  r   r   s      r+   rz   zTFWav2Vec2SamePadLayer.__init__Y  s.    "6"#:Q#>!#Car*   c                V    | j                   dkD  r|d d d | j                    d d f   }|S )Nr   )rJ  r  s     r+   r   zTFWav2Vec2SamePadLayer.call]  s6    ")!-C0C0C/C-CQ*FGMr*   )r$   r%   r&   rz   r   r   r   s   @r+   rC  rC  X  s    Kr*   rC  c                  .     e Zd Zd fdZd ZddZ xZS )TFWav2Vec2FeatureEncoderc                   t        |   d	i | |j                  dk(  rVt        |ddd       gt	        |j
                  dz
        D cg c]  }t        ||dz   d|dz           c}z   }|| _	        y |j                  dk(  r9t	        |j
                        D cg c]  }t        ||d|        }}|| _	        y t        d|j                   d      c c}w c c}w )
Ngroupr   zconv_layers.)r  r   r   layerz`config.feat_extract_norm` is z), but has to be one of ['group', 'layer']r)   )
ry   rz   feat_extract_normr9  rA   num_feat_extract_layersr  r+  rS   conv_layers)r   r   r   r   rS  r   s        r+   rz   z!TFWav2Vec2FeatureEncoder.__init__d  s   "6"##w.7S_`a_bQcdev==ABi /vAl[\]^[^Z_L`ai K ' %%0 v==> -VaUVTWFXYK  ' 01I1I0JJst i
s   CCc                d    t        j                  |d      }| j                  D ]
  } ||      } |S r   )r-   r@   rS  )r   input_valuesr"   
conv_layers       r+   r   zTFWav2Vec2FeatureEncoder.callw  s7    |R8** 	6J&}5M	6r*   c                    | j                   ry d| _         t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   IxY w)NTrS  )r   r"  rS  r-   r#  r   r   )r   r   rV  s      r+   r   zTFWav2Vec2FeatureEncoder.build}  st    ::
4-9".. +
]]:??3 +$$T*+ ++ :+ +s   A..A7	rG  r   r)  r   s   @r+   rM  rM  c  s    '&+r*   rM  c                       e Zd Z fdZ xZS )TFWav2Vec2FeatureExtractorc                    t        |   |fi | t        j                  d| j                  j
                   d| j                  j                  d   j
                   dt               y )NzThe class `zD` has been depreciated and will be removed in Transformers v5. Use `r   z
` instead.)ry   rz   warningswarnr   r$   	__bases__FutureWarningrD  s      r+   rz   z#TFWav2Vec2FeatureExtractor.__init__  s`    *6*$..112 3NN,,Q/889E 		
r*   )r$   r%   r&   rz   r   r   s   @r+   rY  rY    s    
 
r*   rY  c                  2     e Zd Zd fdZdddZddZ xZS )TFWav2Vec2FeatureProjectionc                z   t        |   di | t        j                  j	                  |j
                  d      | _        t        j                  j                  |j                  t        |j                        dd      | _        t        j                  j                  |j                        | _        || _        y )Nr-  r}   r   r\   
projectionunitskernel_initializerr   r   )rater)   )ry   rz   r   r   r.  r/  r-  Denser@  r   initializer_rangerc  Dropoutfeat_proj_dropoutdropoutr   rD  s      r+   rz   z$TFWav2Vec2FeatureProjection.__init__  s    "6",,99&BWBW^j9k,,,,$$.v/G/GH$	 - 
 ||++1I1I+Jr*   c                t    | j                  |      }| j                  |      }| j                  ||      }||fS Ntraining)r-  rc  rl  )r   r"   rp  norm_hidden_statess       r+   r   z TFWav2Vec2FeatureProjection.call  s>    !__];(:;]XF000r*   c                .   | j                   ry d| _         t        | dd       gt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  d   g       d d d        t        | dd       ht        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  d   g       d d d        y y # 1 sw Y   xY w# 1 sw Y   y xY w)NTr-  r;   rc  )
r   r"  r-   r#  r-  r   r   r   r  rc  r   s     r+   r   z!TFWav2Vec2FeatureProjection.build  s    ::
4t,8t334 N%%tT4;;3G3G3K&LMN4t,8t334 N%%tT4;;3G3G3K&LMN N 9N NN Ns   6C??6D?DDr   r   Fr"   r   rp  r   r&  r   r   r)  r   s   @r+   r`  r`    s    1	Nr*   r`  c                  |     e Zd ZdZ	 	 	 d	 	 	 	 	 	 	 	 	 d fdZddZ	 	 	 	 	 d		 	 	 	 	 	 	 	 	 	 	 	 	 d
dZddZ xZS )TFWav2Vec2Attentionz6Multi-headed attention from "Attention Is All You Needc                z   t        |   d
i | || _        || _        t        j
                  j                  |      | _        ||z  | _        | j                  |z  | j                  k7  rt        d| j                   d| d      | j                  dz  | _
        || _        t        j
                  j                  ||d      | _        t        j
                  j                  ||d      | _        t        j
                  j                  ||d      | _        t        j
                  j                  ||d	      | _        y )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: r   g      k_proj)r   r   q_projv_projout_projr)   )ry   rz   	embed_dim	num_headsr   r   rj  rl  head_dimrS   scaling
is_decoderrh  ry  rz  r{  r|  )r   r}  r~  rl  r  r  r   r   s          r+   rz   zTFWav2Vec2Attention.__init__  s    	"6"""||++G4!Y.MMI%$..8MdnnM]$YKr3  }}d*$ll((T(Qll((T(Qll((T(Q**9t**Ur*   c           	         t        j                  t        j                  |||| j                  | j                  f      d      S )Nr   r   r   r	   )r-   rB   r>   r~  r  )r   tensorseq_lenbszs       r+   _shapezTFWav2Vec2Attention._shape  s0    ||BJJvWdnndmm/\]_kllr*   c           
     	   |du}t        |      \  }}	}
| j                  |      | j                  z  }|r||d   }|d   }n
|rE| j                  | j	                  |      d|      }| j                  | j                  |      d|      }n|}| j                  | j	                  |      d|      }| j                  | j                  |      d|      }t        j                  |d   |gd      }t        j                  |d   |gd      }nD| j                  | j	                  |      d|      }| j                  | j                  |      d|      }| j                  r||f}|| j                  z  d| j                  f}t        j                  | j                  ||	|      |      }t        j                  ||      }t        j                  ||      }t        |      d   }t        j                  ||d      }t        j                  j                  t        |      || j                  z  |	|gd	|| j                  z  |	|f d
t        |              |t        j                  j                  t        |      |d|	|gd|d|	|f d
t        |              t        j                  ||j                         }t        j                  ||| j                  |	|f      |z   }t        j                  ||| j                  z  |	|f      }t#        |d      }|t        j                  j                  t        |      | j                  gd| j                   d
t        |              t        j                  |d      t        j                  ||| j                  |	|f      z  }t        j                  ||| j                  z  |	|f      }| j%                  ||      }t        j                  ||      }t        j                  j                  t        |      || j                  z  |	| j                  gd|| j                  |	| j                  f d
t        |              t        j&                  t        j                  ||| j                  |	| j                  f      d      }t        j                  |||	|
f      }| j)                  |      }t        j                  ||| j                  |	|f      }|||fS )z#Input shape: Batch x Time x ChannelNr   r   r;   r   r<   T)transpose_bz$Attention weights should be of size z	, but is rN   z!Attention mask should be of size rQ   z/Head mask for a single layer should be of size )r   r;   r   r   ro  z `attn_output` should be of size r  )r   rz  r  r  ry  r{  r-   rC   r  r~  r  r>   matmulrT   assert_equalrV   rR   r   rl  rB   r|  )r   r"   key_value_statespast_key_valueattention_masklayer_head_maskrp  is_cross_attentionr  rq   r}  query_states
key_statesvalue_states
proj_shaperr   attn_weights
attn_probsattn_outputs                      r+   r   zTFWav2Vec2Attention.call  s    .T9",]";Wi {{=1DLL@."<'*J)!,LT[[1A%BBLJ;;t{{3C'Db#NL'T[[%?SIJ;;t{{='A2sKLN1$5z#BKJ99nQ&7%FQOL T[[%?SIJ;;t{{='A2sKL?? ),7NDNN*B>
zz$++lGS"I:VZZ
J7
zz,
;Z(+yyztL
!!|$4>>!7G46dnn8LgW^7_6` a|,-/	 	" 	
 %LL%%>*a'*7a'8R7S T">235	 &   WW^<;M;MNN::lS$..'SZ4[\_mmL::lS4>>5I7T[4\]L%l<&LL%%?+ Et~~EW X"?346	 &  ::o}E

sDNNGWEI L ::lS4>>5I7T[4\]L\\,\B
ii
L9
!!{#4>>!7DMM:2CRVR_R_3`2a b{+,.	 	" 	
 llJJ{S$..'4==$QRT`
 jjsGY.GHmmK0"$**\CQXZa;b"cL.88r*   c                   | j                   ry d| _         t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        y y # 1 sw Y   AxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTry  rz  r{  r|  )r   r"  r-   r#  ry  r   r   r}  rz  r{  r|  r   s     r+   r   zTFWav2Vec2Attention.buildK  s   ::
44(4t{{//0 @!!4t~~">?@44(4t{{//0 @!!4t~~">?@44(4t{{//0 @!!4t~~">?@4T*6t}}112 B##T4$@AB B 7@ @@ @@ @B Bs0   )F32)G )G )G3F= G	GG!)        FT)
r}  r   r~  r   rl  r   r  r   r  r   )r  r   r  r   r  r   )NNNNF)r"   r   r  tf.Tensor | Noner  zTuple[Tuple[tf.Tensor]] | Noner  r  r  r  rp  Optional[bool]r&  z"Tuple[tf.Tensor, tf.Tensor | None]r   )	r$   r%   r&   r'   rz   r  r   r   r   r   s   @r+   rw  rw    s    @  VV V 	V
 V V8m .29=+/,0#(t9 t9 +t9 7	t9
 )t9 *t9 !t9 
,t9lBr*   rw  c                  2     e Zd Zd fdZdddZddZ xZS )TFWav2Vec2FeedForwardc                2   t        |   di | t        j                  j	                  |j
                        | _        t        j                  j                  |j                  t        |j                        dd      | _        t        |j                        | _        t        j                  j                  |j                  t        |j                        dd      | _        t        j                  j	                  |j"                        | _        || _        y )Nr\   intermediate_denserd  output_denser)   )ry   rz   r   r   rj  activation_dropoutintermediate_dropoutrh  intermediate_sizer   ri  r  r
   
hidden_actintermediate_act_fnr@  r  hidden_dropoutoutput_dropoutr   rD  s      r+   rz   zTFWav2Vec2FeedForward.__init__^  s    "6"$)LL$8$89R9R$S!"',,"4"4**.v/G/GH$%	 #5 #
 $5V5F5F#G !LL..$$.v/G/GH$	 / 
 $ll2263H3HIr*   c                    | j                  |      }| j                  |      }| j                  ||      }| j                  |      }| j	                  ||      }|S rn  )r  r  r  r  r  )r   r"   rp  s      r+   r   zTFWav2Vec2FeedForward.callt  sb    //>00?11-(1S))-8++MH+Mr*   c                "   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   |xY w# 1 sw Y   y xY w)NTr  r  )r   r"  r-   r#  r  r   r   r   r@  r  r  r   s     r+   r   zTFWav2Vec2FeedForward.build}  s    ::
4-t4@t66;;< U''--tT4;;;R;R.STU4.:t00556 U!!''tT[[5R5R(STU U ;U UU Us   3C9<3D9DDrs  rt  ru  r   r)  r   s   @r+   r  r  ]  s    ,	Ur*   r  c                  J     e Zd Zd fdZ	 	 	 d	 	 	 	 	 	 	 	 	 ddZddZ xZS )TFWav2Vec2EncoderLayerc                   t        |   d	i | t        |j                  |j                  |j
                  dd      | _        t        j                  j                  |j                        | _        t        j                  j                  |j                  d      | _        t        |d      | _        t        j                  j                  |j                  d      | _        || _        y 
NF	attention)r}  r~  rl  r  r   r-  rb  feed_forwardr   final_layer_normr)   ry   rz   rw  r@  num_attention_headsattention_dropoutr  r   r   rj  r  rl  r.  r/  r-  r  r  r  r   rD  s      r+   rz   zTFWav2Vec2EncoderLayer.__init__      "6",((00,,
 ||++F,A,AB,,99&BWBW^j9k1&~N % ? ?H]H]dv ? wr*   c                    |}| j                  |||      \  }}}| j                  ||      }||z   }| j                  |      }|| j                  |      z   }| j	                  |      }|f}|r||fz  }|S N)r  rp  ro  )r  rl  r-  r  r  	r   r"   r  output_attentionsrp  attn_residualr  r7   r   s	            r+   r   zTFWav2Vec2EncoderLayer.call  s     &)-.8 *8 *
&|Q ]XF%56%(9(9-(HH--m< "&Gr*   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   HxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY wNTr  r-  r  r  r   r"  r-   r#  r  r   r   r-  r   r@  r  r  r   s     r+   r   zTFWav2Vec2EncoderLayer.build     ::
4d+7t~~223 +$$T*+4t,8t334 M%%tT4;;3J3J&KLM4.:t00556 .!!''-.4+T2>t4499: S%%++T49P9P,QRS S ?+ +M M. .S S0   F-%3F:G03G-F7:GGGrs  NFF
r"   r   r  r  r  r  rp  r   r&  zTuple[tf.Tensor]r   r)  r   s   @r+   r  r    sN    $ ,0,1  ) *	
  
2Sr*   r  c                  J     e Zd Zd fdZ	 	 	 d	 	 	 	 	 	 	 	 	 ddZddZ xZS )%TFWav2Vec2EncoderLayerStableLayerNormc                   t        |   d	i | t        |j                  |j                  |j
                  dd      | _        t        j                  j                  |j                        | _        t        j                  j                  |j                  d      | _        t        |d      | _        t        j                  j                  |j                  d      | _        || _        y r  r  rD  s      r+   rz   z.TFWav2Vec2EncoderLayerStableLayerNorm.__init__  r  r*   c                    |}| j                  |      }| j                  |||      \  }}}| j                  ||      }||z   }|| j                  | j	                  |            z   }|f}|r||fz  }|S r  )r-  r  rl  r  r  r  s	            r+   r   z*TFWav2Vec2EncoderLayerStableLayerNorm.call  s     &6)-.8 *8 *
&|Q ]XF%5%(9(9$:O:OP]:^(__ "&Gr*   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   HxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY wr  r  r   s     r+   r   z+TFWav2Vec2EncoderLayerStableLayerNorm.build  r  r  rs  r  r  r   r)  r   s   @r+   r  r    sN    $ ,0,1  ) *	
  
.Sr*   r  c                  V     e Zd Zd fdZ	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZ xZS )TFWav2Vec2Encoderc                   t        |   di | || _        t        |d      | _        t
        j                  j                  |j                  d      | _	        t
        j                  j                  |j                        | _        t        |j                        D cg c]  }t        |d|        c}| _        y c c}w Npos_conv_embedr  r-  rb  zlayers.r)   )ry   rz   r   r>  r  r   r   r.  r/  r-  rj  r  rl  rA   num_hidden_layersr  rP  r   r   r   r   r   s       r+   rz   zTFWav2Vec2Encoder.__init__  s    "6"?M]^,,99&BWBW^j9k||++F,A,ABRWX^XpXpRqrQ,VGA3-Hr
r   B?c                6   |rdnd }|rdnd }|%|t        j                  |d      z  }t        |      }nd }| j                  |      }	||	z   }| j	                  |      }| j                  ||      }t        | j                        D ]f  \  }
}|r||fz   }t        j                  j                  dd      }|r|| j                  j                  k  rJ |||||      }|d   }|s^||d   fz   }h |r||fz   }|st        d |||fD              S t        |||      S )	Nr)   r;   ro  r   r   r"   r  r  rp  c              3  &   K   | ]	  }||  y wr   r)   .0vs     r+   	<genexpr>z)TFWav2Vec2Encoder.call.<locals>.<genexpr>6       mq_`_lm   r    r"   r#   )r-   r@   ru   r  r-  rl  	enumeraterP  npr0   r1   r   	layerdroptupler   r   r"   r  r  output_hidden_statesreturn_dictrp  all_hidden_statesall_self_attentionsposition_embeddingsr   layer_moduledropout_probabilitylayer_outputss                 r+   r   zTFWav2Vec2Encoder.call  s\    #7BD$5b4%)BNN>2,NNM).9N!N"11-@%(;;6]XF(4 	POA|#$58H$H! #%))"3"3Aq"904;;3H3HH(+-"3!	M *!,M &9]1=M<O&O#%	P*   1]4D Dm]4EGZ$[mmm ++*
 	
r*   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   xY w# 1 sw Y   nxY w# 1 sw Y   axY wNTr  r-  rP  r   r"  r-   r#  r  r   r   r-  r   r@  rP  r   r   rP  s      r+   r   zTFWav2Vec2Encoder.build=  "   ::
4)40<t22778 0##))$/04t,8t334 M%%tT4;;3J3J&KLM4$'3 &]]5::. &KK%& && 40 0M M& &$   D9%3EE9EEE	rs  NFFTFr"   r   r  r  r  r  r  r  r  r  rp  r  r&  *Union[TFBaseModelOutput, Tuple[tf.Tensor]]r   r)  r   s   @r+   r  r    si    s ,0,1/4&*#(5
 5
 )5
 *	5

 -5
 $5
 !5
 
45
n&r*   r  c                  V     e Zd Zd fdZ	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZ xZS ) TFWav2Vec2EncoderStableLayerNormc                   t        |   di | || _        t        |d      | _        t
        j                  j                  |j                  d      | _	        t
        j                  j                  |j                        | _        t        |j                        D cg c]  }t        |d|        c}| _        y c c}w r  )ry   rz   r   r>  r  r   r   r.  r/  r-  rj  r  rl  rA   r  r  rP  r  s       r+   rz   z)TFWav2Vec2EncoderStableLayerNorm.__init__N  s    "6"?M]^,,99&BWBW^j9k||++F,A,ABW\]c]u]uWv
RS1&}M

 
r  c                6   |rdnd }|rdnd }|%|t        j                  |d      z  }t        |      }nd }| j                  |      }	||	z   }| j	                  ||      }t        | j                        D ]f  \  }
}|r||fz   }t        j                  j                  dd      }|r|| j                  j                  k  rJ |||||      }|d   }|s^||d   fz   }h | j                  |      }|r||fz   }|st        d |||fD              S t        |||      S )	Nr)   r;   ro  r   r   r  c              3  &   K   | ]	  }||  y wr   r)   r  s     r+   r  z8TFWav2Vec2EncoderStableLayerNorm.call.<locals>.<genexpr>  r  r  r  )r-   r@   ru   r  rl  r  rP  r  r0   r1   r   r  r-  r  r   r  s                 r+   r   z%TFWav2Vec2EncoderStableLayerNorm.callX  s\    #7BD$5b4%)BNN>2,NNM).9N!N"11-@%(;;]XF(4 	POA|#$58H$H! #%))"3"3Aq"904;;3H3HH(+-"3!	M *!,M &9]1=M<O&O#%	P( 6 1]4D Dm]4EGZ$[mmm ++*
 	
r*   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   xY w# 1 sw Y   nxY w# 1 sw Y   axY wr  r  r  s      r+   r   z&TFWav2Vec2EncoderStableLayerNorm.build  r  r  rs  r  r  r   r)  r   s   @r+   r  r  M  sh    
 ,0,1/4&*#(5
 5
 )5
 *	5

 -5
 $5
 !5
 
45
n&r*   r  c                       e Zd ZeZd fdZddZddZdd	dZe		 	 	 	 	 	 	 	 	 d
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Z
 xZS )TFWav2Vec2MainLayerc                    t        |   di | || _        t        |d      | _        t        |d      | _        |j                  rt        |d      | _	        y t        |d      | _	        y )Nfeature_extractorr  feature_projectionencoderr)   )ry   rz   r   rM  r  r`  r  do_stable_layer_normr  r  r  rD  s      r+   rz   zTFWav2Vec2MainLayer.__init__  s_    "6"!9&GZ!["=fK_"`&&;FSDL,V)DDLr*   c                T   | j                   ry d| _         | j                  j                  dkD  s| j                  j                  dkD  r/| j	                  | j                  j
                  fddd      | _        t        | dd       Mt        j                  | j                  j                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j                        5  | j                  j                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   qxY w# 1 sw Y   y xY w)	NTr  r1   masked_spec_embedra   r   r  r   r  r  r  )r   r   mask_time_probmask_feature_probr   r@  r  r"  r-   r#  r  r   r   r  r  r   s     r+   r   zTFWav2Vec2MainLayer.build  s^   ::
;;%%+t{{/L/Ls/R%)__{{..0iSW^q &5 &D" 4,d3?t55::; 3&&,,T234-t4@t66;;< 4''--d344D)5t||001 )""4() ) 63 34 4) )s$   ,FF FFFF'c                    d }t        | j                  j                  | j                  j                        D ]  \  }} ||||      } |S )H
        Computes the output length of the convolutional layers
        c                    | |z
  |z  dz   S r   r)   input_lengthr   strides      r+   _conv_out_lengthzNTFWav2Vec2MainLayer._get_feat_extract_output_lengths.<locals>._conv_out_length  s     !;.69A==r*   )zipr   r  r  )r   input_lengthsr  r   r  s        r+    _get_feat_extract_output_lengthsz4TFWav2Vec2MainLayer._get_feat_extract_output_lengths  sP    
	>
 $'t{{'>'>@W@W#X 	QK,]KPM	Q r*   c                   t        |      \  }}}t        | j                  dd      s|S |t        j                  t        j
                  |ddddt        j                  f   t        j                        | j                  t        j                  t        j                  ddf   |      }n| j                  j                  dkD  rt        ||f| j                  j                  | j                  j                  d      }t        j                  t        j
                  |ddddt        j                  f   t        j                        | j                  t        j                  t        j                  ddf   |      }| j                  j                  dkD  rgt        ||f| j                  j                  | j                  j                        }t        j                  |ddt        j                  ddf   |d      }|S )z
        Masks extracted features along time axis and/or along feature axis according to
        [SpecAugment](https://arxiv.org/abs/1904.08779).
        apply_spec_augmentTNr   r   )rb   rc   rd   )rb   rc   )r   r"  r   r-   whererV   r_   r   r  r  rl   mask_time_lengthr   mask_feature_length)r   r"   mask_time_indicesre   rf   r@  mask_feature_indicess          r+   _mask_hidden_statesz'TFWav2Vec2MainLayer._mask_hidden_states  s   
 4>m3L0
O[ t{{$8$?  (HH)!Q

*:;RWWE&&rzz2::q'@AM [[''!+ 5_-++44 KK88	! HH)!Q

*:;RWWE&&rzz2::q'@AM ;;((1,#8[)++77 KK;;$ 
 HH%9!RZZ:J%K]\]^Mr*   c                &   | j                  t        j                  |t        j                        |
      }|S| j	                  t        j
                  |d            }t        j                  |t        |      d   |j                        }| j                  ||
      \  }}|j                  dd       }|
r| j                  ||      }| j                  |||||	|
      }|d   }|	s
||f|dd  z   S t        |||j                  |j                  	      S )
Nro  r;   r   )maxlenrR   r  )r  r  r  r  r  rp  r   )r    r!   r"   r#   )r  r-   rV   rW   r
  r   sequence_maskr   rR   r  r   r  r  r   r"   r#   )r   rU  r  token_type_idsposition_ids	head_maskinputs_embedsr  r  r  rp  r   r!   output_lengthsr"   r  encoder_outputss                    r+   r   zTFWav2Vec2MainLayer.call  s3     11"'',

2S^f1g %!BB2==Q_acCdeN--z2B'CA'FN^NdNdN +/*A*ABR]e*A*f''"JJ':DA 44]Vg4hM,,)/!5# ' 
 (*!#34qr7JJJ(+-)77&11	
 	
r*   rs  r   )r	  r   )r"   r   r  r  	NNNNNNNNF)rU  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rp  r   r   r   )r$   r%   r&   r   config_classrz   r   r
  r  r   r   r   r   s   @r+   r  r    s    !L	E)$*X  ,0+/)-&**.,0/3&*1
1
 )1
 )	1

 '1
 $1
 (1
 *1
 -1
 $1
 1
 1
 1
r*   r  c                  h     e Zd ZdZeZdZdZed        Z	ed        Z
 fdZd	dZ	 d		 	 	 d
dZ xZS )TFWav2Vec2PreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    wav2vec2rU  c                    t        j                  dt         j                  d      t        j                  dt         j                  d      dS )N)NNrU  r  r  rU  r  )r-   
TensorSpecrW   r   s    r+   input_signaturez)TFWav2Vec2PreTrainedModel.input_signature:  s7     MM,

X mmL"**K[\
 	
r*   c                    t         j                  j                  dt         j                        t        j                  dt         j                        dS )N)r   i  )ra   rR   r#  )r-   r0   r1   rW   r]   r   s    r+   dummy_inputsz&TFWav2Vec2PreTrainedModel.dummy_inputsA  s;     II--HBJJ-O ggHBJJG
 	
r*   c                    t        |   |g|i | t        j                  d| j                  j
                   d       y )N
z has backpropagation operations that are NOT supported on CPU. If you wish to train/fine-tune this model, you need a GPU or a TPU)ry   rz   loggerwarningr   r$   r   r   r   r   r   s       r+   rz   z"TFWav2Vec2PreTrainedModel.__init__H  sD    3&3F3(() *E E	
r*   c                T   || j                   j                  n|}d }t        | j                   j                  | j                   j                        D ]  \  }} ||||      } |rBt        | j                   j                        D ]   } ||d| j                   j                        }" |S )r  c                N    t         j                  j                  | |z
  |      dz   S r   )r-   r.   floordivr  s      r+   r  zTTFWav2Vec2PreTrainedModel._get_feat_extract_output_lengths.<locals>._conv_out_lengthU  s#    77##L;$>G!KKr*   r   )r   add_adapterr  r  r  rA   num_adapter_layersadapter_stride)r   r	  r0  r  r   r  r7   s          r+   r
  z:TFWav2Vec2PreTrainedModel._get_feat_extract_output_lengthsO  s     2=1Ddkk--+	L $'t{{'>'>@W@W#X 	QK,]KPM	Q 4;;99: _ 04;;C]C] ^_r*   c                   t         j                  j                  |d      d d df   }| j                  ||      }t        j                  |t         j
                        }t        j                  |      d   }t        j                  ||f|j                  d      }t        j                  |t        j                  t        j                  |      |dz
  gd      t        j                  |g|j                        	      }t        j                  |dg      }t        j                  |d      }t        j                  |dg      }t        j                  |t         j                        }|S )
Nr;   r<   )r0  r   r  )rR   r   r   rQ   )r8   updates)r-   r.   cumsumr
  rV   rY   ra   r\   rR   tensor_scatter_nd_updater   rA   r]   reverser   )r   feature_vector_lengthr  r0  non_padded_lengthsr  re   s          r+   "_get_feature_vector_attention_maskz<TFWav2Vec2PreTrainedModel._get_feature_vector_attention_mask`  s      WW^^N^DQUK>>?Q_j>k:XXn-a0
./~7K7KRb
 44HHbhhz2NQ4FGaPGGZL0D0DE

 N">>;N">9r*   r   )r8  r   r  r   )r$   r%   r&   r'   r   r  base_model_prefixmain_input_namepropertyr%  r'  rz   r
  r:  r   r   s   @r+   r   r   0  sf    
 "L"$O
 
 
 

$ RV%(:Cr*   r   a	  

    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
    as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and
    behavior.

    <Tip>

    TensorFlow models and layers in `transformers` accept two formats as input:

    - having all inputs as keyword arguments (like PyTorch models), or
    - having all inputs as a list, tuple or dict in the first positional argument.

    The reason the second format is supported is that Keras methods prefer this format when passing inputs to models
    and layers. Because of this support, when using methods like `model.fit()` things should "just work" for you - just
    pass your inputs and labels in any format that `model.fit()` supports! If, however, you want to use the second
    format outside of Keras methods like `fit()` and `predict()`, such as when creating your own layers or models with
    the Keras `Functional` API, there are three possibilities you can use to gather all the input Tensors in the first
    positional argument:

    - a single Tensor with `input_values` only and nothing else: `model(input_values)`
    - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
    `model([input_values, attention_mask])` or `model([input_values, attention_mask, token_type_ids])`
    - a dictionary with one or several input Tensors associated to the input names given in the docstring:
    `model({"input_values": input_values, "token_type_ids": token_type_ids})`

    Note that when creating models and layers with
    [subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) then you don't need to worry
    about any of this, as you can just pass inputs like you would to any other Python function!

    </Tip>

    Args:
        config ([`Wav2Vec2Config`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a	  
    Args:
        input_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]` `Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.__call__`] and
            [`PreTrainedTokenizer.encode`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`np.ndarray` or `tf.Tensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`np.ndarray` or `tf.Tensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`np.ndarray` or `tf.Tensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`np.ndarray` or `tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`np.ndarray` or `tf.Tensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_values` you can choose to directly pass an embedded representation.
            This is useful if you want more control over how to convert `input_values` indices into associated vectors
            than the model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the
            config will be used instead.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
            used instead.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in
            eager mode, in graph mode the value will always be set to True.
        training (`bool`, *optional*, defaults to `False``):
            Whether or not to use the model in training mode (some modules like dropout modules have different
            behaviors between training and evaluation).
zcThe bare TFWav2Vec2 Model transformer outputing raw hidden-states without any specific head on top.c                       e Zd Zd fdZ ee       eee      e		 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd                     Z
ddZ xZS )	TFWav2Vec2Modelc                ^    t        |   |g|i | || _        t        |d      | _        y )Nr!  r  )ry   rz   r   r  r!  r,  s       r+   rz   zTFWav2Vec2Model.__init__  s/    3&3F3+FDr*   output_typer  c                    |r|n| j                   j                  }|r|n| j                   j                  }|	r|	n| j                   j                  }	| j	                  |||||||||	|

      }|S )a\  

        Returns:

        Example:

        ```python
        >>> from transformers import AutoProcessor, TFWav2Vec2Model
        >>> from datasets import load_dataset
        >>> import soundfile as sf

        >>> processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
        >>> model = TFWav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")


        >>> def map_to_array(batch):
        ...     speech, _ = sf.read(batch["file"])
        ...     batch["speech"] = speech
        ...     return batch


        >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
        >>> ds = ds.map(map_to_array)

        >>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values  # Batch size 1
        >>> hidden_states = model(input_values).last_hidden_state
        ```
rU  r  r  r  r  r  r  r  r  rp  )r   r  r  r  r!  )r   rU  r  r  r  r  r  r  r  r  rp  r   s               r+   r   zTFWav2Vec2Model.call  s~    X 8L3QUQ\Q\QqQq1B-HeHe%0kdkk6M6M--%))%'/!5#   
 r*   c                    | j                   ry d| _         t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   y xY w)NTr!  )r   r"  r-   r#  r!  r   r   r   s     r+   r   zTFWav2Vec2Model.build#  si    ::
4T*6t}}112 *##D)* * 7* *s   A11A:rs  r  )rU  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rp  r   r&  r  r   )r$   r%   r&   rz   r   WAV_2_VEC_2_INPUTS_DOCSTRINGr   r   _CONFIG_FOR_DOCr   r   r   r   r   s   @r+   r?  r?    s    
E
 ++GH+<?[ ,0+/)-&**.,0/3&*:: ): )	:
 ': $: (: *: -: $: : 
4:  \ I:x*r*   r?  zhTFWav2Vec2 Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).c                       e Zd Zd fdZd Zd Ze ee       e	e
e      	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	d                     Zd
dZ xZS )TFWav2Vec2ForCTCc                   t        |   |g|i | t        |d      | _        t        j
                  j                  |j                        | _        t        j
                  j                  |j                  d      | _        t        |d      r|j                  r|j                  | _        y |j                  | _        y )Nr!  r  lm_headr0  )ry   rz   r  r!  r   r   rj  final_dropoutrl  rh  
vocab_sizerK  hasattrr0  output_hidden_sizer@  r,  s       r+   rz   zTFWav2Vec2ForCTC.__init__1  s    3&3F3+FD||++F,@,@A||))&*;*;))L)0)GFL^L^F%% 	djdvdv 	r*   c                X    t        j                  dt               | j                          yz
        Calling this function will disable the gradient computation for the feature encoder so that its parameters will
        not be updated during training.
        zThe method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. Please use the equivalent `freeze_feature_encoder` method instead.Nr[  r\  r^  freeze_feature_encoderr   s    r+   freeze_feature_extractorz)TFWav2Vec2ForCTC.freeze_feature_extractor;  '    
 	Q	

 	##%r*   c                :    d| j                   j                  _        yz
        Calling this function will disable the gradient computation for the feature encoder so that its parameter will
        not be updated during training.
        FNr!  r  r  r   s    r+   rS  z'TFWav2Vec2ForCTC.freeze_feature_encoderG      
 5:''1r*   rA  c                2   |Nt        j                  |      | j                  j                  k\  r"t	        d| j                  j                         | j                  ||||||||	|
|
      }|d   }| j                  ||      }| j                  |      }|C||n$t        j                  |t         j                        }| j
                  j                  t        j                  |d            }t        j                  |dk\  t         j                        }t        j                  |d      }t         j                  j                  ||||| j                  j                   d	
      }| j                  j"                  dk(  rt        j                  |      }| j                  j"                  dk(  rt        j$                  |      }t        j&                  |d      }nd}|
s|f|t(        d z   }||f|z   S |S t+        |||j,                  |j.                        S )a  
        labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_values` docstring) Tokens with indices set to `-100` are ignored (masked),
            the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

        Returns:

        Example:

        ```python
        >>> import tensorflow as tf
        >>> from transformers import AutoProcessor, TFWav2Vec2ForCTC
        >>> from datasets import load_dataset
        >>> import soundfile as sf

        >>> processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
        >>> model = TFWav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")


        >>> def map_to_array(batch):
        ...     speech, _ = sf.read(batch["file"])
        ...     batch["speech"] = speech
        ...     return batch


        >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
        >>> ds = ds.map(map_to_array)

        >>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values  # Batch size 1
        >>> logits = model(input_values).logits
        >>> predicted_ids = tf.argmax(logits, axis=-1)

        >>> transcription = processor.decode(predicted_ids[0])

        >>> # compute loss
        >>> target_transcription = "A MAN SAID TO THE UNIVERSE SIR I EXIST"

        >>> # Pass transcription as `text` to encode labels
        >>> labels = processor(text=transcription, return_tensors="tf").input_ids

        >>> loss = model(input_values, labels=labels).loss
        ```Nz$Label values must be <= vocab_size: rD  r   ro  rQ   r;   r<   F)logitslabelslogit_lengthlabel_lengthblank_indexlogits_time_majorsumr   rP   lossr[  r"   r#   )r-   
reduce_maxr   rM  rS   r!  rl  rK  r`   rW   r
  r   rV   rY   r2   ctc_losspad_token_idctc_loss_reductionreduce_meanr>   _HIDDEN_STATES_START_POSITIONr   r"   r#   )r   rU  r  r  r  r  r  r  r\  r  r  rp  r   r"   r[  r	  labels_masktarget_lengthsrc  r  s                       r+   r   zTFWav2Vec2ForCTC.callN  s   x "--"74;;;Q;Q"QCDKKDZDZC[\]]--%))%'/!5#   
  
]XFm,"0"<",,|cecmcmBn  !MMJJ2==YgnpKqrM ''&A+rxx8K]];R@N55>>*+ KK44"' " D {{--6}}T*{{--7~~d+::dD)DDY)F)G!HHF)-)9TGf$EvE!//))	
 	
r*   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        y y # 1 sw Y   rxY w# 1 sw Y   y xY w)NTr!  rK  )	r   r"  r-   r#  r!  r   r   rK  rO  r   s     r+   r   zTFWav2Vec2ForCTC.build  s    ::
4T*6t}}112 *##D)*4D)5t||001 J""D$0G0G#HIJ J 6* *J Js   C%)C$C!$C-rs  )
NNNNNNNNNF)rU  r   r  r  r  r  r  r  r  r  r  r  r  r  r\  r  r  r  r  r  rp  r  r&  z)Union[TFCausalLMOutput, Tuple[tf.Tensor]]r   )r$   r%   r&   rz   rT  rS  r   r   rF  r   r   rG  r   r   r   r   s   @r+   rI  rI  ,  s    


&: *+GH+;/Z ,0+/)-&**.,0#'/3&*#(s
s
 )s
 )	s

 's
 $s
 (s
 *s
 !s
 -s
 $s
 !s
 
3s
 [ I s
j	Jr*   rI  c                  v     e Zd Z fdZd Zd Zd Ze	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Zd	dZ	 xZ
S )
#TFWav2Vec2ForSequenceClassificationc                   t         |   |       t        |d      | _        |j                  dz   | _        t        j                  | j                               5  |j                  r%| j                  | j
                  fddd      | _        d d d        || _        t        j                  j                  |j                   d	      | _        t        j                  j                  |j$                  d d
      | _        y # 1 sw Y   rxY w)Nr!  r  r   r]   Tlayer_weightsr  	projector)re  r   
classifier)re  r  r   )ry   rz   r  r!  r  
num_layersr-   r#  _name_scopeuse_weighted_layer_sumr   rp  r   r   r   rh  classifier_proj_sizerq  
num_labelsrr  )r   r   r   s     r+   rz   z,TFWav2Vec2ForSequenceClassification.__init__  s     +FD 22Q6]]4++-. 	,,%)__??,&DWf &5 &"	
 ++&2M2MT_+`,,,,63D3DQU\h,i	 	s   2C==Dc                X    t        j                  dt               | j                          yrQ  rR  r   s    r+   rT  z<TFWav2Vec2ForSequenceClassification.freeze_feature_extractor  rU  r*   c                :    d| j                   j                  _        yrW  rX  r   s    r+   rS  z:TFWav2Vec2ForSequenceClassification.freeze_feature_encoder  rY  r*   c                H    | j                   j                  D ]	  }d|_         y)z
        Calling this function will disable the gradient computation for the base model so that its parameters will not
        be updated during training. Only the classification head will be updated.
        FN)r!  r   r  )r   rP  s     r+   freeze_base_modelz5TFWav2Vec2ForSequenceClassification.freeze_base_model  s$    
 ]])) 	$E#EO	$r*   c           	     
   ||n| j                   j                  }| j                   j                  rdn|}| j                  ||||||      }| j                   j                  r||t           }	t        j                  |	d      }	t
        j                  j                  | j                  d      }
t        j                  |	t        j                  |
g d      z  d      }	n|d   }	| j                  |	      }	|t        j                  |	d      }n| j                  t        |	      d   |      }t        j                   ||	j"                        }t        j$                  |	t        j&                  |d            }	t        j(                  t        j                  |	d      t        j&                  t        j                  |d      d            }| j+                  |      }d }|ht,        j.                  j1                  d      } |t        j                  |dg      t        j                  |d| j                   j2                  g            }|s|f|t        d  z   }||f|z   S |S t5        |||j6                  |j8                  	      S )
NTr  r   r<   r;   )r;   r   r   r   )from_logitsrb  )r   use_return_dictru  r!  ri  r-   r   r2   softmaxrp  r   r>   rq  rh  r:  r   rV   rR   multiplyr@   dividerr  r   lossesSparseCategoricalCrossentropyrw  r   r"   r#   )r   rU  r  r  r  r  r\  rp  r   r"   norm_weightspooled_outputpadding_maskpadding_mask_floatr[  rc  loss_fnr  s                     r+   r   z(TFWav2Vec2ForSequenceClassification.call  s+    &1%<k$++B]B]'+{{'I'ItOc--)/!5#   
 ;;--#$ABMHH];M55==););"=ELMM-"**\S]:^*^efgM#AJM}5!NN=qAMBB:mC\]^C_aopL!#}7J7J!KKKr~~FX_a7bcMIIm!4bnnR]]SelmEnuv6wM /ll@@T@RG2::frd3RZZT[[McMcHd5efDY)F)G!HHF)-)9TGf$EvE)!//))	
 	
r*   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTr!  rq  rr  )r   r"  r-   r#  r!  r   r   rq  r   r@  rr  rv  r   s     r+   r   z)TFWav2Vec2ForSequenceClassification.build3  s,   ::
4T*6t}}112 *##D)*4d+7t~~223 L$$dD$++2I2I%JKL4t,8t334 V%%tT4;;3S3S&TUV V 9* *L LV Vs$   E%3E3E+EE(+E4)NNNNNF)rU  r   r  r  r  bool | Noner  r  r  r  r\  r  rp  r   r&  z-TFSequenceClassifierOutput | Tuple[tf.Tensor]r   )r$   r%   r&   rz   rT  rS  r{  r   r   r   r   r   s   @r+   rn  rn    s    j
&:$  ,0)-,0#'#'5
5
 )5
 '	5

 *5
 !5
 !5
 5
 
75
 5
nVr*   rn  r$  )
ra   zTuple[int, int]rb   r   rc   r   rd   r   r&  r   r   )rp   r   rq   zOptional[int])Lr'   
__future__r   r[  dataclassesr   typingr   r   r   r   numpyr  
tensorflowr-   activations_tfr
   modeling_tf_outputsr   r   r   modeling_tf_utilsr   r   r   r   r   tf_utilsr   r   utilsr   r   r   r   r   configuration_wav2vec2r   
get_loggerr$   r*  ri  _CHECKPOINT_FOR_DOCrG  ro   r   r9   rK   rl   ru   r   Layerrw   r  r   r  r+  r9  r>  rC  rM  rY  r`  rw  r  r  r  r  r  r  r   WAV_2_VEC_2_START_DOCSTRINGrF  r?  rI  rn  r)   r*   r+   <module>r     s   ! "  ! . .   / b b  3  3 
		H	% !" 3 "  / / /8O& 	GGG G 	G
 GV
6U%,,,, Up5!4!4 5p@U\\%7%7 @:G5<<#5#5 GD!G5<<#5#5 !GHG(:(: G:U\\// !+u||11 !+H
!9 
N%,,"4"4 NBgB%,,,, gBT)UELL.. )UX8SU\\// 8Sv6SELL,>,> 6SrM&** M&`O&u||'9'9 O&d M
%,,,, M
 M
`E 1 EP( T5  p iK*/ K*	K*\ r_J0 _J	_JDmV*C mVr*   