
    sg                   ^   d Z ddlmZ ddlZddlZddlZddlmZ ddl	m
Z
 ddlmZmZmZmZmZmZmZmZ ddlZddlmZ dd	lmZmZmZmZmZmZ dd
lm Z  ddl!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' ddl(m)Z)  e&jT                  e+      Z,dZ-dZ.g dZ/dZ0dZ1e G d de"             Z2e G d de"             Z3e G d de"             Z4e G d de"             Z5dKdZ6dLdZ7	 dM	 	 	 	 	 	 	 	 	 dNdZ8 G d dejr                  jt                        Z; G d d ejr                  jt                        Z< G d! d"ejr                  jt                        Z= G d# d$ejr                  jt                        Z> G d% d&ejr                  jt                        Z? G d' d(ejr                  jt                        Z@ G d) d*ejr                  jt                        ZA G d+ d,ejr                  jt                        ZB G d- d.ejr                  jt                        ZC G d/ d0ejr                  jt                        ZD G d1 d2ejr                  jt                        ZE G d3 d4ejr                  jt                        ZF G d5 d6e      ZGd7ZHd8ZIdOd9ZJ G d: d;ejr                  jt                        ZKe G d< d=ejr                  jt                               ZL e$d>eH       G d? d@eG             ZM G dA dBejr                  jt                        ZN G dC dDejr                  jt                        ZO e$dEeH       G dF dGeG             ZP e$dHeH       G dI dJeGe             ZQy)PzTF 2.0 Swin Transformer model.    )annotationsN)	dataclass)partial)AnyCallableDictIterableListOptionalTupleUnion   )ACT2FN)TFPreTrainedModelTFSequenceClassificationLossget_initializerkeraskeras_serializableunpack_inputs)
shape_list)ModelOutputadd_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )
SwinConfigr   z&microsoft/swin-tiny-patch4-window7-224)r   1   i   ztabby, tabby catc                  J    e Zd ZU dZdZded<   dZded<   dZded<   dZded<   y)	TFSwinEncoderOutputaH  
    Swin encoder's outputs, with potential hidden states and attentions.

    Args:
        last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each stage) of shape
            `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `tf.Tensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        reshaped_hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each stage) of shape
            `(batch_size, hidden_size, height, width)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
            include the spatial dimensions.
    N	tf.Tensorlast_hidden_stateTuple[tf.Tensor, ...] | Nonehidden_states
attentionsreshaped_hidden_states)	__name__
__module____qualname____doc__r#   __annotations__r%   r&   r'        \/var/www/html/venv/lib/python3.12/site-packages/transformers/models/swin/modeling_tf_swin.pyr!   r!   C   s6    2 $(y'26M/6/3J,3;?8?r.   r!   c                  X    e Zd ZU dZdZded<   dZded<   dZded<   dZded	<   dZ	ded
<   y)TFSwinModelOutputa  
    Swin model's outputs that also contains a pooling of the last hidden states.

    Args:
        last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        pooler_output (`tf.Tensor` of shape `(batch_size, hidden_size)`, *optional*, returned when `add_pooling_layer=True` is passed):
            Average pooling of the last layer hidden-state.
        hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each stage) of shape
            `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `tf.Tensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        reshaped_hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each stage) of shape
            `(batch_size, hidden_size, height, width)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
            include the spatial dimensions.
    Nr"   r#   tf.Tensor | Nonepooler_outputr$   r%   r&   r'   )
r(   r)   r*   r+   r#   r,   r3   r%   r&   r'   r-   r.   r/   r1   r1   d   sA    6 $(y'&*M#*26M/6/3J,3;?8?r.   r1   c                  h    e Zd ZU dZdZded<   dZded<   dZded<   dZded	<   dZ	ded
<   e
d        Zy)TFSwinMaskedImageModelingOutputa  
    Swin masked image model outputs.

    Args:
        loss (`tf.Tensor` of shape `(1,)`, *optional*, returned when `bool_masked_pos` is provided):
            Masked image modeling (MLM) loss.
        reconstruction (`tf.Tensor` of shape `(batch_size, num_channels, height, width)`):
            Reconstructed pixel values.
        hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each stage) of shape
            `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `tf.Tensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        reshaped_hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each stage) of shape
            `(batch_size, hidden_size, height, width)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
            include the spatial dimensions.
    Nr2   lossr"   reconstructionr$   r%   r&   r'   c                N    t        j                  dt               | j                  S )Nzlogits attribute is deprecated and will be removed in version 5 of Transformers. Please use the reconstruction attribute to retrieve the final output instead.)warningswarnFutureWarningr7   selfs    r/   logitsz&TFSwinMaskedImageModelingOutput.logits   s%    ]	

 """r.   )r(   r)   r*   r+   r6   r,   r7   r%   r&   r'   propertyr>   r-   r.   r/   r5   r5      sR    6 "D
! $NI$26M/6/3J,3;?8?# #r.   r5   c                  X    e Zd ZU dZdZded<   dZded<   dZded<   dZded	<   dZ	ded
<   y)TFSwinImageClassifierOutputa  
    Swin outputs for image classification.

    Args:
        loss (`tf.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Classification (or regression if config.num_labels==1) loss.
        logits (`tf.Tensor` of shape `(batch_size, config.num_labels)`):
            Classification (or regression if config.num_labels==1) scores (before SoftMax).
        hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each stage) of shape
            `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `tf.Tensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        reshaped_hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each stage) of shape
            `(batch_size, hidden_size, height, width)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
            include the spatial dimensions.
    Nr2   r6   r"   r>   r$   r%   r&   r'   )
r(   r)   r*   r+   r6   r,   r>   r%   r&   r'   r-   r.   r/   rA   rA      s@    6 "D
!FI26M/6/3J,3;?8?r.   rA   c           	         t        |       \  }}}}t        j                  | |||z  |||z  ||f      } t        j                  | d      }t        j                  |d|||f      }|S )z2
    Partitions the given input into windows.
    r   r   r            )r   tfreshape	transpose)input_featurewindow_size
batch_sizeheightwidthnum_channelswindowss          r/   window_partitionrR      st     /9.G+J|JJ	V{*K+9M{\hiM ll=*<=Gjj2{K"NOGNr.   c           	     t   t        j                  |       d   }t        j                  ||z  ||z  z  t         j                        }t         j                  j                  ||      }t        j                  | |||z  ||z  ||df      } t        j                  | d      } t        j                  | |||df      } | S )z?
    Merges windows to produce higher resolution features.
    r   rG   rC   )rH   shapecastint32mathfloordivrI   rJ   )rQ   rL   rN   rO   xyrM   s          r/   window_reverser[      s     	!A
+";<bhhGA!!!Q'Jjj*f3Uk5I;XceghG ll7$67Gjj:vub"ABGNr.   c                    |dk(  s|s| S d|z
  }t        |       }t        |      }|d   gdg|dz
  z  z   }t        j                  j	                  |      }t        j
                  ||k  dd      }|dkD  r|r||z  }| |z  S )zb
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
            r   r         ?)r   lenrH   randomuniformwhere)	input	drop_probtrainingscale_by_keep	keep_probinput_shapendimrT   random_tensors	            r/   	drop_pathrk      s     CxIIU#K{D^sdQh//EII%%e,MHH]i7cBM3="=  r.   c                  H     e Zd ZdZdd fdZddZ	 d	 	 	 	 	 	 	 d	dZ xZS )
TFSwinEmbeddingszW
    Construct the patch and position embeddings. Optionally, also the mask token.
    c                   t        |   di | t        |d      | _        | j                  j                  | _        | j                  j
                  | _        |j                  | _        || _        |j                  | _	        t        j                  j                  dd      | _        t        j                  j                  |j                  d      | _        || _        y )Npatch_embeddingsnamenormh㈵>)rq   epsilondropoutr-   )super__init__TFSwinPatchEmbeddingsro   num_patches	grid_size
patch_grid	embed_dimuse_mask_tokenuse_absolute_embeddingsr   layersLayerNormalizationrr   Dropouthidden_dropout_probru   config)r=   r   r}   kwargs	__class__s       r/   rw   zTFSwinEmbeddings.__init__  s    "6" 5fCU V00<<//99)),'-'E'E$LL333N	||++F,F,FY+Wr.   c                   | j                   r'| j                  dd| j                  fdd      | _        nd | _        | j                  r4| j                  d| j
                  dz   | j                  fdd      | _        nd | _        | j                  ry d| _        t        | dd       Mt        j                  | j                  j                        5  | j                  j                  d        d d d        t        | d	d       dt        j                  | j                  j                        5  | j                  j                  d d | j                  j                  g       d d d        t        | d
d       Nt        j                  | j                   j                        5  | j                   j                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   qxY w# 1 sw Y   y xY w)Nr   zeros
mask_tokenrT   initializerrq   positional_embeddings)r   rq   Tro   rr   ru   )r}   
add_weightr|   r   r~   ry   position_embeddingsbuiltgetattrrH   
name_scopero   rq   buildrr   r   ru   r=   rh   s     r/   r   zTFSwinEmbeddings.build  s   "ooQ4>>4JX_frosDO"DO'''+D$$q($..9wUl (7 (D$ (,D$::
4+T2>t4499: 2%%++D124&2tyy~~. E		tT[[-B-B CDE4D)5t||001 )""4() ) 62 2E E) )s$   F=&3G	G=G	GGc                   | j                  ||      \  }}| j                  ||      }t        |      \  }}}||t        j                  | j
                  |d      }	t        j                  |	|d      }	t        j                  |d      }
t        j                  |
|	j                        }
|d|
z
  z  |	|
z  z   }| j                  || j                  z   }| j                  ||      }||fS )Nre   r   r   rG   r^   )ro   rr   r   rH   repeatr   expand_dimsrU   dtyper   ru   )r=   pixel_valuesbool_masked_posre   
embeddingsoutput_dimensionsrM   seq_len_mask_tokensmasks              r/   callzTFSwinEmbeddings.call5  s     )-(=(=lU](=(^%
%YYzHY=
!+J!7
GQ&))DOOZCK))K!<K>>/26D774!2!23D#sTz2[45GGJ##/#d&>&>>J\\*x\@
,,,r.   F)r   r   r}   boolreturnNonerh   tf.TensorShaper   r   )NF)r   r"   r   r   re   r   r   !Tuple[tf.Tensor, Tuple[int, int]])r(   r)   r*   r+   rw   r   r   __classcell__r   s   @r/   rm   rm   	  s>    )6 W\-%-8<-OS-	*-r.   rm   c                  <     e Zd ZdZ fdZddZdddZd	dZ xZS )
rx   z#
    Image to Patch Embedding.
    c                B   t        |   di | |j                  |j                  }}|j                  |j
                  }}t        |t        j                  j                        r|n||f}t        |t        j                  j                        r|n||f}|d   |d   z  |d   |d   z  z  }|| _        || _        || _        || _
        |d   |d   z  |d   |d   z  f| _        t        j                  j                  || j                  | j                  dd      | _        y )Nr   r   valid
projection)filterskernel_sizestridespaddingrq   r-   )rv   rw   
image_size
patch_sizerP   r|   
isinstancecollectionsabcr	   ry   rz   r   r   Conv2Dr   )	r=   r   r   r   r   rP   hidden_sizery   r   s	           r/   rw   zTFSwinPatchEmbeddings.__init__R  s   "6"!'!2!2F4E4EJ
$*$7$79I9Ik#-j+//:R:R#SZZdfpYq
#-j+//:R:R#SZZdfpYq
!!}
15*Q-:VW=:XY$$(&$Q-:a=8*Q-:VW=:XY,,--OO . 
r.   c                R   || j                   d   z  dk7  r>dddd| j                   d   || j                   d   z  z
  ff}t        j                  ||      }|| j                   d   z  dk7  r>ddd| j                   d   || j                   d   z  z
  fdf}t        j                  ||      }|S )Nr   r   r   r   )r   rH   pad)r=   r   rN   rO   
pad_valuess        r/   	maybe_padzTFSwinPatchEmbeddings.maybe_padg  s    4??1%%* &&1dooa6H5SWSbSbcdSeKe6e2fgJ66,
;LDOOA&&!+ &1dooa.@6DOO\]L^C^.^*_aghJ66,
;Lr.   c                   t        |      \  }}}}t        j                         r|| j                  k7  rt	        d      | j                  |||      }t        j                  |d      }| j                  ||      }t        j                  |d      }t        |      \  }}	}}||f}
t        j                  |||	df      }t        j                  |d      }||
fS )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.r   rD   r   r   r   r   r   r   rD   rG   r   rD   r   )	r   rH   executing_eagerlyrP   
ValueErrorr   rJ   r   rI   )r=   r   re   r   rP   rN   rO   r   rM   channelsr   s              r/   r   zTFSwinPatchEmbeddings.callp  s    )3L)A&<!ld6G6G&Gw  ~~lFEB ||L,?__\H_E
 \\*l;
.8.D+
Hfe#UOZZ
Z2,FG
\\*i8
,,,r.   c                   | j                   ry d| _         t        | dd       \t        j                  | j                  j
                        5  | j                  j                  d d d | j                  g       d d d        y y # 1 sw Y   y xY w)NTr   )r   r   rH   r   r   rq   r   rP   r   s     r/   r   zTFSwinPatchEmbeddings.build  s}    ::
4t,8t334 M%%tT49J9J&KLM M 9M Ms   *A??B)r   r"   rN   intrO   r   r   r"   r   )r   r"   re   r   r   r   N	r(   r)   r*   r+   rw   r   r   r   r   r   s   @r/   rx   rx   M  s    
*-0Mr.   rx   c                  P     e Zd ZdZ	 d	 	 	 	 	 	 	 d fdZddZd	d
dZddZ xZS )TFSwinPatchMergingaB  
    Patch Merging Layer.

    Args:
        input_resolution (`Tuple[int]`):
            Resolution of input feature.
        dim (`int`):
            Number of input channels.
        norm_layer (`keras.layer.Layer`, *optional*, defaults to `keras.layers.LayerNormalization`):
            Normalization layer class.
    c                   t        |   d	i | || _        || _        t        j
                  j                  d|z  dd      | _        |'t        j
                  j                  dd      | _	        y  |d      | _	        y )
NrD   F	reduction)use_biasrq   rs   rr   rt   rq   rp   r-   )
rv   rw   input_resolutiondimr   r   Denser   r   rr   )r=   r   r   
norm_layerr   r   s        r/   rw   zTFSwinPatchMerging.__init__  sr     	"6" 0++AGe++V7767RDI"/DIr.   c                z    |dz  dk(  xs |dz  dk(  }|r&dd|dz  fd|dz  fdf}t        j                  ||      }|S )NrD   r   r   r   )rH   r   )r=   rK   rN   rO   
should_padr   s         r/   r   zTFSwinPatchMerging.maybe_pad  sS    qjAo:519>
 1fqj/Auqy>6JJFF=*=Mr.   c                   |\  }}t        |      \  }}}t        j                  |||||f      }| j                  |||      }|d d dd ddd dd d f   }	|d d dd ddd dd d f   }
|d d dd ddd dd d f   }|d d dd ddd dd d f   }t        j                  |	|
||gd      }t        j                  ||dd|z  f      }| j                  ||      }| j                  ||      }|S )Nr   rD   r   rG   rE   r   )r   rH   rI   r   concatrr   r   )r=   rK   input_dimensionsre   rN   rO   rM   r   rP   input_feature_0input_feature_1input_feature_2input_feature_3s                r/   r   zTFSwinPatchMerging.call  s)   (&0&?#
A|

=:vul2[\}feD'14a4Aq(89'14a4Aq(89'14a4Aq(89'14a4Aq(89		?O_Ve"fhjk

JA,<=
 		-(	C}xHr.   c                   | j                   ry d| _         t        | dd       ]t        j                  | j                  j
                        5  | j                  j                  d d d| j                  z  g       d d d        t        | dd       ^t        j                  | j                  j
                        5  | j                  j                  d d d| j                  z  g       d d d        y y # 1 sw Y   uxY w# 1 sw Y   y xY w)NTr   rE   rr   )	r   r   rH   r   r   rq   r   r   rr   r   s     r/   r   zTFSwinPatchMerging.build  s    ::
4d+7t~~223 A$$dD!dhh,%?@A4&2tyy~~. <		tQ\ :;< < 3A A< <s   ,C+5,C7+C47D r   )r   Tuple[int, int]r   r   r   Optional[Callable]r   r   )rK   r"   rN   r   rO   r   r   r"   r   )rK   r"   r   r   re   r   r   r"   r   r   s   @r/   r   r     sC    
 ]a0 /0690GY0	06	<r.   r   c                  0     e Zd ZdZdd fdZdddZ xZS )TFSwinDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).c                H    t        t        | 
  di | || _        || _        y Nr-   )rv   r   rw   rd   rf   )r=   rd   rf   r   r   s       r/   rw   zTFSwinDropPath.__init__  s$    nd,6v6"*r.   c                F    t        || j                  || j                        S r   )rk   rd   rf   )r=   rc   re   s      r/   r   zTFSwinDropPath.call  s    $:L:LMMr.   )NT)rd   floatrf   r   r   r   r   )rc   r"   re   r   r   r"   r(   r)   r*   r+   rw   r   r   r   s   @r/   r   r     s    b+
N Nr.   r   c                  X     e Zd Zd fdZddZddZ	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 d	dZ xZS )
TFSwinSelfAttentionc                t   t        |   d	i | ||z  dk7  rt        d| d| d      || _        t	        ||z        | _        | j                  | j
                  z  | _        |j                  }t        |t        j                  j                        r|n||f| _        t        j                  j                  | j                  t        |j                         |j"                  d      | _        t        j                  j                  | j                  t        |j                         |j"                  d      | _        t        j                  j                  | j                  t        |j                         |j"                  d      | _        t        j                  j+                  |j,                        | _        y )
Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()query)kernel_initializerr   rq   keyvaluer-   )rv   rw   r   num_attention_headsr   attention_head_sizeall_head_sizerL   r   r   r   r	   r   r   r   r   initializer_rangeqkv_biasr   r   r   r   attention_probs_dropout_probru   )r=   r   r   	num_headsr   rL   r   s         r/   rw   zTFSwinSelfAttention.__init__  s   "6"?a#C5(^_h^iijk  $- #&sY#7 !558P8PP((%k;??3K3KLKS^`kRl 	 \\''.v/G/GH__	 ( 

 <<%%.v/G/GH__	 & 
 \\''.v/G/GH__	 ( 

 ||++F,O,OPr.   c                "   | j                  d| j                  d   z  dz
  d| j                  d   z  dz
  z  | j                  fdd      | _        | j                  | j                  d   dz  | j                  d   dz  fdt        j
                  d	      | _        t	        j                  | j                  d         }t	        j                  | j                  d         }t	        j                  t	        j                  ||d
            }t	        j                  |t        |      d   df      }|d d d d d f   |d d d d d f   z
  }t	        j                  |d      }t	        j                  |d      \  }}|| j                  d   dz
  z  }|d| j                  d   z  dz
  z  }|| j                  d   dz
  z  }t	        j                  ||gd      }| j                  j                  t	        j                  t	        j                   |d      t        j
                               | j"                  ry d| _        t%        | dd       Zt	        j&                  | j(                  j*                        5  | j(                  j-                  d d | j.                  g       d d d        t%        | dd       Zt	        j&                  | j0                  j*                        5  | j0                  j-                  d d | j.                  g       d d d        t%        | dd       [t	        j&                  | j2                  j*                        5  | j2                  j-                  d d | j.                  g       d d d        y y # 1 sw Y   xY w# 1 sw Y   ~xY w# 1 sw Y   y xY w)NrD   r   r   r   relative_position_bias_tabler   Frelative_position_index)rT   	trainabler   rq   ij)indexingrG   )r   rD   r   axisTr   r   r   )r   rL   r   r   rH   rV   r   rangestackmeshgridrI   r   rJ   unstackassignrU   
reduce_sumr   r   r   r   rq   r   r   r   r   )	r=   rh   coords_hcoords_wcoordscoords_flattenrelative_coordsstack_0stack_1s	            r/   r   zTFSwinSelfAttention.build  s!   ,0OO))!,,q0Q9I9I!9L5Lq5PQTXTlTlm/ -< -
)
 (,##A&!+T-=-=a-@A-EF((*	 (7 (
$ 88D,,Q/088D,,Q/0"++h4HIFZ-?-BB,GH(At4~aqj7QQ,,	B::oA>4##A&**1t''**Q..4##A&**((GW#5A>$$++BGGBMM/XZ4[]_]e]e,fg::
4$'3tzz/ C

  $d.@.@!ABC4%1txx}}- AdD,>,>?@A4$'3tzz/ C

  $d.@.@!ABC C 4C CA AC Cs$   ,)M-)M9:)N-M69NNc                    t        |      d d | j                  | j                  gz   }t        j                  ||      }t        j
                  |d      S )NrG   r   rD   r   r   )r   r   r   rH   rI   rJ   )r=   rY   new_x_shapes      r/   transpose_for_scoresz(TFSwinSelfAttention.transpose_for_scores5  sI     mCR(D,D,DdF^F^+__JJq+&||A|,,r.   c                b   t        |      \  }}}| j                  |      }	| j                  | j                  |            }
| j                  | j	                  |            }| j                  |	      }t        j                  |t        j                  |
d            }|t        j                  | j                        z  }t        j                  | j                  t        j                  | j                  d            }t        j                  || j                  d   | j                  d   z  | j                  d   | j                  d   z  df      }t        j                  |d      }|t        j                   |d      z   }|t        |      d   }t        j                  |||z  || j"                  ||f      }t        j                   |d      }t        j                   |d      }||z   }t        j                  |d| j"                  ||f      }t
        j$                  j'                  |d      }| j)                  ||      }|||z  }t        j                  ||      }t        j                  |d	      }t        |      d d
 | j*                  gz   }t        j                  ||      }|r||f}|S |f}|S )N)r   r   r   rD   rG   r   r   rG   )rD   r   r   r   r   r  )r   r   r  r   r   rH   matmulrJ   rW   sqrtr   gatherr   rI   r   rL   r   r   nnsoftmaxru   r   )r=   r%   attention_mask	head_maskoutput_attentionsre   rM   r   r   mixed_query_layer	key_layervalue_layerquery_layerattention_scoresrelative_position_bias
mask_shapeattention_probscontext_layernew_context_layer_shapeoutputss                       r/   r   zTFSwinSelfAttention.call:  s    (6
C JJ}5--dhh}.EF	//

=0IJ//0AB 99[",,y,2WX+dii8P8P.QQ!#--rzz$:V:VX]/^"
 "$"a 4#3#3A#668H8H8KdN^N^_`Na8acef"

 "$.Di!P+bnn=SUV.WW%#N3A6J!zz :#;ZIaIacfhk"l   ^^NA>N^^NA>N/.@!zz*:RAYAY[^`c<de %%--(8r-B ,,,J  -	9O		/;?]LA",]";CR"@D
 #
 

=2IJ6G=/2 O\M]r.   r   r   r   r   r   r   r   r   r   rY   r"   r   r"   NNFF)r%   r"   r  r2   r  r2   r  r   re   r   r   Tuple[tf.Tensor, ...])r(   r)   r*   rw   r   r  r   r   r   s   @r/   r   r     sc    "QH(CT- ,0&*"'= = )= $	=
  = = 
=r.   r   c                  2     e Zd Zd fdZdddZddZ xZS )TFSwinSelfOutputc                    t        |   di | t        j                  j	                  |d      | _        t        j                  j                  |j                  d      | _        || _	        y Ndenserp   ru   r-   )
rv   rw   r   r   r   r,  r   r   ru   r   r=   r   r   r   r   s       r/   rw   zTFSwinSelfOutput.__init__{  sW    "6"\\'''':
||++F,O,OV_+`r.   c                N    | j                  |      }| j                  ||      }|S Nr   r,  ru   )r=   r%   input_tensorre   s       r/   r   zTFSwinSelfOutput.call  (    

=1]XFr.   c                   | j                   ry d| _         t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr,  ru   )	r   r   rH   r   r,  rq   r   r   ru   r   s     r/   r   zTFSwinSelfOutput.build  s    ::
4$'3tzz/ 9

  $dhh!7894D)5t||001 )""4() ) 69 9) )s   )C2C$C!$C-r   r   r   r   r   r   r   )r%   r"   r1  r"   re   r   r   r"   r   r(   r)   r*   rw   r   r   r   r   s   @r/   r)  r)  z  s    
	)r.   r)  c                  V     e Zd Zd fdZd Z	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 ddZddZ xZS )	TFSwinAttentionc                    t        |   di | t        |||d      | _        t	        ||d      | _        t               | _        y )Nr=   rp   outputr-   )rv   rw   r   r=   r)  self_outputsetpruned_heads)r=   r   r   r   r   r   s        r/   rw   zTFSwinAttention.__init__  s@    "6"'YVL	+FChGEr.   c                    t         )z
        Prunes heads of the model. See base class PreTrainedModel heads: dict of {layer_num: list of heads to prune in
        this layer}
        )NotImplementedError)r=   headss     r/   prune_headszTFSwinAttention.prune_heads  s
    
 "!r.   c                r    | j                  |||||      }| j                  |d   ||      }|f|dd  z   }|S )Nr   r   r   )r=   r:  )	r=   r%   r  r  r  re   self_outputsattention_outputr#  s	            r/   r   zTFSwinAttention.call  sT     yy	K\goyp++LO]U]+^#%QR(88r.   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr=   r:  )r   r   rH   r   r=   rq   r   r:  r   s     r/   r   zTFSwinAttention.build  s    ::
4&2tyy~~. &		%&4-9t//445 -  &&t,- - :& &- -   C%CCC r$  r&  )r%   r"   r  r2   r  r2   r  r   re   r   r   r"   r   )r(   r)   r*   rw   r@  r   r   r   r   s   @r/   r7  r7    s_    "" ,0&*"'  ) $	
    
	-r.   r7  c                  0     e Zd Zd fdZddZddZ xZS )TFSwinIntermediatec                @   t        |   di | t        j                  j	                  t        |j                  |z        d      | _        t        |j                  t              r t        |j                     | _        || _        y |j                  | _        || _        y )Nr,  rp   r-   )rv   rw   r   r   r   r   	mlp_ratior,  r   
hidden_actstrr   intermediate_act_fnr   r-  s       r/   rw   zTFSwinIntermediate.__init__  s    "6"\\''F,<,<s,B(C''R
f''-'-f.?.?'@D$  (.'8'8D$r.   c                J    | j                  |      }| j                  |      }|S r   )r,  rL  )r=   r%   s     r/   r   zTFSwinIntermediate.call  s&    

=100?r.   c                   | j                   ry d| _         t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        y y # 1 sw Y   y xY wNTr,  )r   r   rH   r   r,  rq   r   r   r   s     r/   r   zTFSwinIntermediate.build  sr    ::
4$'3tzz/ 9

  $dhh!789 9 49 9s   )A>>Br4  )r%   r"   r   r"   r   r5  r   s   @r/   rG  rG    s    
9r.   rG  c                  2     e Zd Zd fdZdddZddZ xZS )TFSwinOutputc                    t        |   di | t        j                  j	                  |d      | _        t        j                  j                  |j                  d      | _        || _	        || _
        y r+  )rv   rw   r   r   r   r,  r   r   ru   r   r   r-  s       r/   rw   zTFSwinOutput.__init__  sZ    "6"\\'''':
||++F,F,F	Rr.   c                N    | j                  |      }| j                  ||      }|S r/  r0  )r=   r%   re   s      r/   r   zTFSwinOutput.call  r2  r.   c           	     T   | j                   ry d| _         t        | dd       {t        j                  | j                  j
                        5  | j                  j                  d d t        | j                  j                  | j                  z        g       d d d        y y # 1 sw Y   y xY wrO  )r   r   rH   r   r,  rq   r   r   r   rI  r   r   s     r/   r   zTFSwinOutput.build  s    ::
4$'3tzz/ V

  $c$++2G2G$((2R.S!TUV V 4V Vs   A	BB'r4  r   )r%   r"   re   r   r   r"   r   r5  r   s   @r/   rQ  rQ    s    
Vr.   rQ  c                       e Zd Z	 	 d	 	 	 	 	 	 	 	 	 d fdZddZ	 	 	 	 	 	 	 	 	 	 d	dZ	 	 	 d
	 	 	 	 	 	 	 	 	 	 	 ddZddZ xZS )TFSwinLayerc                   t        	|   di | |j                  | _        t        j                  |      }||j
                  k  r|n|j
                  | _        || j
                  k  rdn|| _        || _        t        j                  j                  |j                  d      | _        t        |||d      | _        |dkD  rt        |d      n t        j                  j!                  dd      | _        t        j                  j                  |j                  d	      | _        t'        ||d
      | _        t+        ||d      | _        || _        y )Nr   layernorm_beforer   	attentionrp   r]   rk   linearlayernorm_afterintermediater9  r-   )rv   rw   chunk_size_feed_forwardrH   
reduce_minrL   
shift_sizer   r   r   r   layer_norm_epsrX  r7  rY  r   
Activationrk   r[  rG  r\  rQ  swin_outputr   )
r=   r   r   r   r   drop_path_rater_  r   min_resr   s
            r/   rw   zTFSwinLayer.__init__  s$    	"6"'-'E'E$-- 01&-1C1C&C7I[I[&$*:*::!
 0 % ? ?H]H]dv ? w(ikR # ><(((D 	
  %||>>vG\G\ct>u.vsP'(Cr.   c           
        t        j                  ||f      }d| f| | f| dff}d| f| | f| dff}|dkD  rd}|D ]  }	|D ]  }
t        j                  |	d   |z  |	d   |z  dz         }t        j                  |
d   |z  |
d   |z  dz         }t        j                  t        j                  t        j
                  ||      d      d      }t        |      dk\  rEt        j                  t        |      f|j                        |z  }t        j                  |||      }|dz  }  t        j                  |d      }t        j                  |d      }t        ||      }t        j                  |d||z  f      }t        j                  |d      t        j                  |d      z
  }t        j                  |dk7  t        d      |      }t        j                  |dk(  t        d	      |      }|S )
Nr   rG   r   r   )rG   rD   )r   rD   g      Yr]   )rH   r   r   rI   r   r   r_   onesr   tensor_scatter_nd_updater   rR   rb   r   )r=   rN   rO   rL   r_  img_maskheight_sliceswidth_slicescountheight_slicewidth_sliceheight_inds
width_indsindicesupdatesmask_windows	attn_masks                    r/   get_attn_maskzTFSwinLayer.get_attn_mask  s   88VUO,k\*k\J;,G:+WYIZ[[L)[L:++F*VXHYZ >E - #/ K"$((<?V+C\RS_W]E]`aEa"bK!#+a.5*@+a.SXBX[\B\!]J jj"++k:2V]_)`bijG7|q("$''3w</"PSX"X#%#>#>xRY#ZQJE >>(B/>>(A.'+>zz,[;5N0OPNN<3bnn\ST6UU	HHY!^U6]IF	HHY!^U3ZC	r.   c                    |||z  z
  |z  }|||z  z
  |z  }ddgd|gd|gddgg}t        j                  ||      }t        j                  |d      }||fS )Nr   r  )rH   r   rI   )r=   r%   rL   rN   rO   	pad_right
pad_bottomr   s           r/   r   zTFSwinLayer.maybe_pad!  su     !5;#66+E	!F[$88KG
!fq*o9~1vF
}j9ZZ
E2
j((r.   c                B   t        j                  |      }|| j                  k  rdn| j                  }|| j                  k  r|n| j                  }|\  }	}
t	        |      \  }}}|}| j                  ||      }t        j                  |||	|
|f      }| j                  |||	|
      \  }}t	        |      \  }}}}|dkD  rt        j                  || | fd      }n|}t        ||      }t        j                  |d||z  |f      }| j                  ||||      }| j                  |||||      }|d   }t        j                  |d|||f      }t        ||||      }|dkD  rt        j                  |||fd      }n|}|d   dkD  xs |d	   dkD  }|r|d d d |	d |
d d f   }t        j                  |||	|
z  |f      }|| j                  ||      z   }| j                  ||      }| j                  |      }|| j!                  ||      z   }|r	||d
   f}|S |f}|S )Nr   r   )r   rD   )shiftr   rG   )rN   rO   rL   r_  )r  re   r   rF   r   )rH   r^  rL   r_  r   rX  rI   r   rollrR   rt  rY  r[   rk   r[  r\  rb  )r=   r%   r   r  r  re   rd  r_  rL   rN   rO   rM   r   r   shortcutr   
height_pad	width_padshifted_hidden_stateshidden_states_windowsrs  attention_outputsrC  attention_windowsshifted_windows
was_paddedlayer_outputlayer_outputss                               r/   r   zTFSwinLayer.call+  s    -- 01!T%5%55Q4??
!(D,<,<!<g$BRBR(",]";
Ax --mh-O

=:vuh2WX$(NN=+vW\$]!z&0&?#:y!>$&GGM:+PZ{A[bh$i!$1! !11F T "

+@2{U`G`bjBk l&&YKT^ ' 
	 !NN!9iK\go + 
 -Q/JJ'7"k;X`9ab():KU^_ > "
J?W^d e /]Q&;*Q-!*;
 1!WfWfufa2G HJJ'8:vPU~W_:`a 4>>2Ch>#WW++MH+M((6$t'7'7x'7'XX@Q'8';< YeWfr.   c                0   | j                   ry d| _         t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   AxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTrX  rY  rk   r[  r\  rb  )r   r   rH   r   rX  rq   r   r   rY  rk   r[  r\  rb  r   s     r/   r   zTFSwinLayer.buildm  s   ::
4+T2>t4499: D%%++T4,BCD4d+7t~~223 +$$T*+4d+7t~~223 +$$T*+4*D1=t33889 C$$**D$+ABC4.:t00556 .!!''-.4-9t//445 -  &&t,- - :D D+ ++ +C C. .- -sH   )I2II'&)I4J 'JII$'I14I= J	J)r]   r   )
r   r   r   r   rc  r   r_  r   r   r   )
rN   r   rO   r   rL   r   r_  r   r   r2   )
r%   r"   rL   r   rN   r   rO   r   r   zTuple[tf.Tensor, tf.Tensor]NFF)r%   r"   r   r   r  r2   r  r   re   r   r   r"   r   )	r(   r)   r*   rw   rt  r   r   r   r   r   s   @r/   rV  rV    s     !$ *	
    
:8)&)58)BE)NQ)	$) '+"'@ @ *@ $	@
  @ @ 
@D-r.   rV  c                  n     e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 	 	 d	 	 	 	 	 	 	 	 	 	 	 ddZddZ xZS )TFSwinStagec                x   t        
|   d
i | || _        || _        t	        |      D 	cg c]1  }	t        |||||	dz  dk(  rdn|j                  dz  ||	   d|	       3 c}	| _        |< |||t        t        j                  j                  d      d      | _        d	| _        y d | _        d	| _        y c c}	w )NrD   r   zblocks.)r   r   r   r   r_  rc  rq   rs   )rt   
downsample)r   r   rq   Fr-   )rv   rw   r   r   r   rV  rL   blocksr   r   r   r   r  pointing)r=   r   r   r   depthr   rk   r  r   ir   s             r/   rw   zTFSwinStage.__init__  s     	"6" 5\
  !1#!"Q!1&2D2D2I(|qc]
 !( "5<<#B#BDQ!	DO  #DO1
s   6B7c                   |\  }}t        | j                        D ]   \  }}	|||   nd }
 |	|||
||      }|d   }" | j                  .|dz   dz  |dz   dz  }}||||f}| j                  d   ||      }n||||f}||f}|r|dd  z  }|S )Nr   r   r   rD   )	enumerater  r  )r=   r%   r   r  r  re   rN   rO   r  layer_modulelayer_head_maskr  height_downsampledwidth_downsampledr   stage_outputss                   r/   r   zTFSwinStage.call  s     )(5 	-OA|.7.CilO(/BS^fM *!,M	- ??&5;aZA4EPQ	VWGW 1!'0BDU V OOM!,<>NYaObM!' >&(9:]12..Mr.   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   bxY w# 1 sw Y   UxY w)NTr  r  )r   r   rH   r   r  rq   r   r  r=   rh   layers      r/   r   zTFSwinStage.build  s    ::
4t,8t334 ,%%d+,44(4 &]]5::. &KK%& && 5, ,& &s   C*CCC	)r   r   r   r   r   r   r  r   r   r   rk   zList[float]r  r   r   r   r  )r%   r"   r   r   r  r2   r  Optional[bool]re   r   r   r'  r   r5  r   s   @r/   r  r    s    && & *	&
 & & & '& 
&X '+,1  * $	
 *  
>
&r.   r  c                  Z     e Zd Zd fdZ	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZ xZS )TFSwinEncoderc                   t        |   di | t        |j                        | _        || _        t        t        j                  ddt        |j                              |j                  z  j                               }t        | j                        D cg c]  }t        |t        |j                  d|z  z        |d   d|z  z  |d   d|z  z  f|j                  |   |j                   |   |t        |j                  d |       t        |j                  d |dz           || j                  dz
  k  rt"        nd d|        c}| _        d| _        y c c}w )Nr   r   rD   zlayers.)r   r   r   r  r   rk   r  rq   Fr-   )rv   rw   r_   depths
num_layersr   listrH   linspacesumrc  numpyr   r  r   r|   r   r   r   gradient_checkpointing)r=   r   rz   r   dpri_layerr   s         r/   rw   zTFSwinEncoder.__init__  sD   "6"fmm,BKK1c&--&89F<Q<QQXXZ[ !1
  ((1g:56"+A,1g:">	!QRT[Q[@\!]mmG, **73c&--"9:S}QX[\Q\A]=^_29DOOa<O2O-VZwi(	
 ',#
s   B.Ec                b   d}|rdnd }	|rdnd }
|rdnd }|rMt        |      \  }}}t        j                  ||g||      }t        j                  |d      }|	|fz  }	|
|fz  }
t	        | j
                        D ]  \  }}|||   nd } ||||||      }|d   }|d   }|d   |d   f}||fz  }|rMt        |      \  }}}t        j                  ||g||      }t        j                  |d      }|	|fz  }	|
|fz  }
|s||dd  z  } |st        d	 ||	|fD              S t        ||	||

      S )Nr-   r   r   r   r   r  rG   rD   c              3  &   K   | ]	  }||  y wr   r-   ).0vs     r/   	<genexpr>z%TFSwinEncoder.call.<locals>.<genexpr>   s     mq_`_lms   )r#   r%   r&   r'   )r   rH   rI   rJ   r  r   tupler!   )r=   r%   r   r  r  output_hidden_statesreturn_dictre   all_input_dimensionsall_hidden_statesall_reshaped_hidden_statesall_self_attentionsrM   r   r   reshaped_hidden_stater  r  r  r  r   s                        r/   r   zTFSwinEncoder.call  s     ""6BD+?RT"$5b4)3M)B&J;$&JJ}z>jL\>j^i>j$k!$&LL1F$U!-!11&+@*BB&(5 	9OA|.7.CilO(/BS^fM *!,M -a 0 1" 57H7LM %5$77 #-7-F*
A{(*

=:BnP`BnbmBn(o%(*5JL(Y%!m%55!*/D.FF* #}QR'88#-	90 m]4EGZ$[mmm"++*#=	
 	
r.   c                    | j                   ry d| _         t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   IxY w)NTr   )r   r   r   rH   r   rq   r   r  s      r/   r   zTFSwinEncoder.build)  sp    ::
44(4 &]]5::. &KK%& && 5& &s   A..A7	)r   r   rz   r   )NFFTF)r%   r"   r   r   r  r2   r  r   r  r   r  r   re   r   r   z1Union[Tuple[tf.Tensor, ...], TFSwinEncoderOutput]r   r5  r   s   @r/   r  r    sr    ,2 '+"'%* 7
 7
 *7
 $	7

  7
 #7
 7
 7
 
;7
r&r.   r  c                      e Zd ZdZeZdZdZy)TFSwinPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    swinr   N)r(   r)   r*   r+   r   config_classbase_model_prefixmain_input_namer-   r.   r/   r  r  3  s    
 L$Or.   r  a`  
    This model is a Tensorflow
    [keras.layers.Layer](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer) sub-class. Use it as a
    regular Tensorflow Module and refer to the Tensorflow documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`SwinConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a:  
    Args:
        pixel_values (`tf.Tensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See [`ViTImageProcessor.__call__`]
            for details.
        head_mask (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
c                    | t         j                  j                         } | j                         }|dvrt	        dt        |       z         |S )z
    From tensorflow addons
    https://github.com/tensorflow/addons/blob/8cec33fcaaf1cf90aec7bdd55a0fcdbb251ce5c2/tensorflow_addons/utils/keras_utils.py#L71
    >   channels_lastchannels_firstzWThe `data_format` argument must be one of "channels_first", "channels_last". Received: )r   backendimage_data_formatlowerr   rK  )r   data_formats     r/   normalize_data_formatr  `  sS    
 }//1++-K==ehklqhrr
 	
 r.   c                  h     e Zd ZdZej
                  df	 	 	 	 	 	 	 d fdZddZd	dZd
 fdZ	 xZ
S )AdaptiveAveragePooling1Da|  
    Args:
    Average 1D Pooling with adaptive kernel size.
      output_size: An integer or tuple/list of a single integer, specifying pooled_features.
        The new size of output channels.
      data_format: A string,
        one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs.
        `channels_last` corresponds to inputs with shape `(batch, steps, channels)` while `channels_first` corresponds
        to inputs with shape `(batch, channels, steps)`.
    Input shape:
      - If `data_format='channels_last'`: 3D tensor with shape `(batch, steps, channels)`.
      - If `data_format='channels_first'`: 3D tensor with shape `(batch, channels, steps)`.
    Output shape:
      - If `data_format='channels_last'`: 3D tensor with shape `(batch_size, pooled_steps, channels)`.
      - If `data_format='channels_first'`: 3D tensor with shape `(batch_size, channels, pooled_steps)`.

    Adapted from [tensorflow-addon's adaptive pooling.py](
        https://github.com/tensorflow/addons/blob/8cec33fcaaf1cf90aec7bdd55a0fcdbb251ce5c2/tensorflow_addons/layers/adaptive_pooling.py#L90-L120
    )
    Nc                    t        |      | _        || _        t        |t              r|fn
t        |      | _        t        |    di | y r   )	r  r  reduce_functionr   r   r  output_sizerv   rw   )r=   r  r  r  r   r   s        r/   rw   z!AdaptiveAveragePooling1D.__init__  sE     1=.-7S-IK>uU`Oa"6"r.   c                N   | j                   d   }| j                  dk(  rDt        j                  ||d      }t        j                  |d      }| j                  |d      }|S t        j                  ||d      }t        j                  |d      }| j                  |d      }|S )Nr   r  r   r   rD   r   )r  r  rH   splitr   r  )r=   inputsargsbinssplitsout_vects         r/   r   zAdaptiveAveragePooling1D.call  s    ".XXfd3FXXf1-F++F+;H
  XXfd3FXXf1-F++F+;Hr.   c                   t        j                  |      j                         }| j                  dk(  r-t        j                  |d   | j                  d   |d   g      }|S t        j                  |d   |d   | j                  d   g      }|S )Nr  r   rD   r   )rH   TensorShapeas_listr  r  )r=   rh   rT   s      r/   compute_output_shapez-AdaptiveAveragePooling1D.compute_output_shape  s    nn[199;.NNKND4D4DQ4GUV#XYE  NNKNKNDDTDTUVDW#XYEr.   c                ^    | j                   | j                  d}t        |          }i ||S )N)r  r  )r  r  rv   
get_config)r=   r   base_configr   s      r/   r  z#AdaptiveAveragePooling1D.get_config  s;    ++++
 g(*(+(((r.   )r  zUnion[int, Iterable[int]]r  r   r  zOptional[str]r   r   )r  r"   r   r   )rh   zIterable[int]r   r   )r   zDict[str, Any])r(   r)   r*   r+   rH   reduce_meanrw   r   r  r  r   r   s   @r/   r  r  o  sS    0 %'NN%)	
#.
# "
# #	
# 

#
) )r.   r  c                       e Zd ZeZ	 d	 	 	 	 	 	 	 d fdZd	dZd
dZddZe		 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Z
ddZ xZS )TFSwinMainLayerc                   t        |   di | || _        t        |j                        | _        t        |j                  d| j
                  dz
  z  z        | _        t        ||d      | _
        t        || j                  j                  d      | _        t        j                  j!                  |j"                  d      | _        |rt'        d	
      | _        y d | _        y )NrD   r   r   )r}   rq   encoderrp   	layernormr   r   )r  r-   )rv   rw   r   r_   r  r  r   r|   num_featuresrm   r   r  r{   r  r   r   r   r`  r  r  poolerr=   r   add_pooling_layerr}   r   r   s        r/   rw   zTFSwinMainLayer.__init__  s     	"6"fmm, 0 0119L3M MN*6.Wcd$VT__-G-GiX88AVAV]h8iDU.4@[_r.   c                .    | j                   j                  S r   )r   ro   r<   s    r/   get_input_embeddingsz$TFSwinMainLayer.get_input_embeddings  s    ///r.   c                    |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr  r  rY  r@  )r=   heads_to_pruner  r?  s       r/   _prune_headszTFSwinMainLayer._prune_heads  sE    
 +002 	CLE5LLu%//;;EB	Cr.   c                X    |t         d gt        | j                  j                        z  S r   )r>  r_   r   r  )r=   r  s     r/   get_head_maskzTFSwinMainLayer.get_head_mask  s*     %%vDKK..///r.   c           	     l   ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }|t	        d      | j                  |      }| j                  |||      \  }}	| j                  ||	|||||      }
|
d   }| j                  ||      }d }| j                  8t        |      \  }}}| j                  |      }t        j                  |||f      }|s||f|
dd  z   }|S t        |||
j                  |
j                  |
j                         S )N You have to specify pixel_values)r   re   r  r  r  r  re   r   r   r   )r#   r3   r%   r&   r'   )r   r  r  use_return_dictr   r  r   r  r  r  r   rH   rI   r1   r%   r&   r'   )r=   r   r   r  r  r  r  re   embedding_outputr   encoder_outputssequence_outputpooled_outputrM   r   r  r9  s                    r/   r   zTFSwinMainLayer.call  sn    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]?@@ &&y1	-1__/H .= .
** ,,/!5# ' 
 *!,..8.L;;"*4_*E'J< KK8MJJ}z<6PQM%}58KKFM -')77&11#2#I#I
 	
r.   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        y y # 1 sw Y   xY w# 1 sw Y   ~xY w# 1 sw Y   y xY w)NTr   r  r  )
r   r   rH   r   r   rq   r   r  r  r  r   s     r/   r   zTFSwinMainLayer.build  s   ::
4t,8t334 ,%%d+,4D)5t||001 )""4()4d+7t~~223 F$$dD$2C2C%DEF F 8, ,) )F Fs$   D2%D>?)E
2D;>E
ETFr   r   r  r   r}   r   r   r   )r   rx   )r  zDict[int, List])r  zOptional[Any]r   r
   NNNNNNFr   r2   r   r2   r  r2   r  r  r  r  r  r  re   r   r   z/Union[TFSwinModelOutput, Tuple[tf.Tensor, ...]]r   )r(   r)   r*   r   r  rw   r  r  r  r   r   r   r   r   s   @r/   r  r    s    L Z_` `59`RV`	`0C0
  *.,0&*,0/3&*:
&:
 *:
 $	:

 *:
 -:
 $:
 :
 
9:
 :
xFr.   r  z^The bare Swin Model transformer outputting raw hidden-states without any specific head on top.c                       e Zd Z	 d	 	 	 	 	 	 	 d fdZ ee       eeee	de
      e	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	d                     Zd
dZ xZS )TFSwinModelc                X    t        |   |fi | || _        t        |d      | _        y )Nr  rp   )rv   rw   r   r  r  r  s        r/   rw   zTFSwinModel.__init__!  s,     	*6*#F8	r.   vision)
checkpointoutput_typer  modalityexpected_outputc           	         ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }|t	        d      | j                  |||||||      }|S )z
        bool_masked_pos (`tf.Tensor` of shape `(batch_size, num_patches)`, *optional*):
            Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
        r  )r   r   r  r  r  r  re   )r   r  r  r  r   r  )	r=   r   r   r  r  r  r  re   swin_outputss	            r/   r   zTFSwinModel.call(  s    . 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]?@@yy%+/!5# ! 
 r.   c                    | j                   ry d| _         t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   y xY w)NTr  )r   r   rH   r   r  rq   r   r   s     r/   r   zTFSwinModel.buildT  se    ::
4&2tyy~~. &		%& & 3& &s   A11A:r  r  r  r  r   )r(   r)   r*   rw   r   SWIN_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr1   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr   r   r   r   r   s   @r/   r  r    s     Z_9 9599RV9	9 ++@A&%$.  *.,0&*,0/3&*!&! *! $	!
 *! -! $! ! 
9!  B!F&r.   r  c                  ,     e Zd ZdZd fdZddZ xZS )TFSwinPixelShufflez0TF layer implementation of torch.nn.PixelShufflec                x    t        |   di | t        |t              r|dk  rt	        d|       || _        y )NrD   z1upscale_factor must be an integer value >= 2 got r-   )rv   rw   r   r   r   upscale_factor)r=   r  r   r   s      r/   rw   zTFSwinPixelShuffle.__init__`  sA    "6".#..12DPQ_P`abb,r.   c           
        |}t        |      \  }}}}| j                  dz  }t        ||z        }t        j                  t        |      D 	cg c]  }t        |      D ]
  }	||	|z  z     c}	}g      }
t        j                  |t        j                  |
|dg      d      }t        j                  j                  || j                  d      }|S c c}	}w )NrD   r   rG   )paramsrp  
batch_dimsNHWC)
block_sizer  )
r   r  r   rH   constantr   r  tiler  depth_to_space)r=   rY   r%   rM   r   num_input_channelsblock_size_squaredoutput_depthr  jpermutations              r/   r   zTFSwinPixelShuffle.callf  s    /9-/H,
Aq,!00!3-0BBC
 kk278J2KiQUZ[gUhiPQa!(((i(ij
 		V`bcUd@ertu,,]tGZGZhn,o	 js   C
)r  r   r   r   r%  r   r   s   @r/   r  r  ]  s    :-r.   r  c                  0     e Zd Zd fdZddZddZ xZS )TFSwinDecoderc                    t        |   di | t        j                  j	                  |j
                  dz  |j                  z  ddd      | _        t        |j
                  d      | _	        || _
        y )NrD   r   0)r   r   r   rq   1rp   r-   )rv   rw   r   r   r   encoder_striderP   conv2dr  pixel_shuffler   )r=   r   r   r   s      r/   rw   zTFSwinDecoder.__init__x  sn    "6"ll))))1,v/B/BBPQ[\cf * 
 00E0ECPr.   c                    |}t        j                  |d      }| j                  |      }| j                  |      }t        j                  |d      }|S )Nr   r   )rH   rJ   r  r  )r=   rY   r%   s      r/   r   zTFSwinDecoder.call  sK    ]LAM2**=9]LAr.   c                   | j                   ry d| _         t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d d | j                  j                  g       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr  r  )
r   r   rH   r   r  rq   r   r   r   r  r   s     r/   r   zTFSwinDecoder.build  s    ::
44(4t{{//0 O!!4tT[[5L5L"MNO4$/;t11667 /""((./ / <O O/ /s   4C#=C/#C,/C8r   r   r%  r   r5  r   s   @r/   r  r  w  s    	/r.   r  zvSwin Model with a decoder on top for masked image modeling, as proposed in [SimMIM](https://arxiv.org/abs/2111.09886).c                       e Zd Zd fdZ ee       eee      e		 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd                     Z
ddZ xZS )	TFSwinForMaskedImageModelingc                p    t         |   |       t        |ddd      | _        t	        |d      | _        y )NFTr  )r  r}   rq   decoderrp   )rv   rw   r  r  r  r$  r=   r   r   s     r/   rw   z%TFSwinForMaskedImageModeling.__init__  s2     #FeTX_ef	$V)<r.   )r  r  c           	        ||n| j                   j                  }| j                  |||||||      }|d   }	t        j                  |	d      }	t        |	      \  }
}}t        |dz        x}}t        j                  |	|
|||f      }	| j                  |	      }d}|| j                   j                  | j                   j                  z  }t        j                  |d||f      }t        j                  || j                   j                  d      }t        j                  || j                   j                  d      }t        j                  |d      }t        j                  |t        j                        }t        j                   j#                  t        j                  |d	      t        j                  |d	            }t        j                  |d      }t        j$                  ||z        }t        j$                  |      d
z   | j                   j&                  z  }||z  }t        j                  |d      }|s|f|dd z   }||f|z   S |S t)        |||j*                  |j,                  |j.                        S )aA  
        bool_masked_pos (`tf.Tensor` of shape `(batch_size, num_patches)`):
            Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).

        Returns:

        Examples:
        ```python
        >>> from transformers import AutoImageProcessor, TFSwinForMaskedImageModeling
        >>> import tensorflow as tf
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> image_processor = AutoImageProcessor.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
        >>> model = TFSwinForMaskedImageModeling.from_pretrained("microsoft/swin-tiny-patch4-window7-224")

        >>> num_patches = (model.config.image_size // model.config.patch_size) ** 2
        >>> pixel_values = image_processor(images=image, return_tensors="tf").pixel_values
        >>> # create random boolean mask of shape (batch_size, num_patches)
        >>> bool_masked_pos = tf.random.uniform((1, num_patches)) >= 0.5

        >>> outputs = model(pixel_values, bool_masked_pos=bool_masked_pos)
        >>> loss, reconstructed_pixel_values = outputs.loss, outputs.reconstruction
        >>> list(reconstructed_pixel_values.shape)
        [1, 3, 224, 224]
        ```N)r   r  r  r  r  re   r   r   g      ?rG   r   rD   )r   rD   r   r   rs   r  )r6   r7   r%   r&   r'   )r   r  r  rH   rJ   r   r   rI   r$  r   r   r   r   rU   float32r   lossesmean_absolute_errorr  rP   r5   r%   r&   r'   )r=   r   r   r  r  r  r  re   r#  r  rM   rP   sequence_lengthrN   rO   reconstructed_pixel_valuesmasked_im_losssizer   reconstruction_loss
total_lossnum_masked_pixelsr9  s                          r/   r   z!TFSwinForMaskedImageModeling.call  s@   T &1%<k$++B]B]))+/!5#  
 "!*,,	B4>4O1
L/_c122**_z<QWY^6_` &*\\/%B"&;;))T[[-C-CCD jj2tT:JKO99_dkk.D.DaHD99T4;;#9#91=D>>$*D774,D"',,"B"B\<87F#
 #%..1Da"H':T'ABJ!#t!4t!;t{{?W?W W'*;;NZZ=N02WQR[@F3A3M^%.YSYY.5!//))#*#A#A
 	
r.   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr  r$  )r   r   rH   r   r  rq   r   r$  r   s     r/   r   z"TFSwinForMaskedImageModeling.build  s    ::
4&2tyy~~. &		%&4D)5t||001 )""4() ) 6& &) )rE  r   r  )r   r2   r   r2   r  r2   r  r  r  r  r  r  re   r   r   z-Union[Tuple, TFSwinMaskedImageModelingOutput]r   )r(   r)   r*   rw   r   r  r   r5   r  r   r   r   r   r   s   @r/   r"  r"    s    = ++@A+JYhi *.,0&*,0/3&*[
&[
 *[
 $	[

 *[
 -[
 $[
 [
 
7[
  j B[
z	)r.   r"  z
    Swin Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    c                       e Zd Zd fdZ ee       eeee	e
      e	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd                     ZddZ xZS )	TFSwinForImageClassificationc                6   t         |   |       |j                  | _        t        |d      | _        |j                  dkD  r1t
        j                  j                  |j                  d      | _	        y t
        j                  j                  dd      | _	        y )Nr  rp   r   
classifierrZ  )
rv   rw   
num_labelsr  r  r   r   r   ra  r5  r%  s     r/   rw   z%TFSwinForImageClassification.__init__  s      ++#F8	
   1$ LLv00|D 	 (((E 	r.   )r  r  r  r  c                F   ||n| j                   j                  }| j                  ||||||      }|d   }	| j                  |	|      }
|dn| j	                  ||
      }|s|
f|dd z   }||f|z   S |S t        ||
|j                  |j                  |j                        S )a  
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr  r   r   rD   )r6   r>   r%   r&   r'   )	r   r  r  r5  hf_compute_lossrA   r%   r&   r'   )r=   r   r  labelsr  r  r  re   r#  r  r>   r6   r9  s                r/   r   z!TFSwinForImageClassification.call$  s    0 &1%<k$++B]B]))/!5#  
  
B~t4+?+?+OY,F)-)9TGf$EvE*!//))#*#A#A
 	
r.   c                "   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       |t        | j                  d      ret        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y y # 1 sw Y   xY w# 1 sw Y   y xY w)NTr  r5  rq   )
r   r   rH   r   r  rq   r   hasattrr5  r  r   s     r/   r   z"TFSwinForImageClassification.buildY  s    ::
4&2tyy~~. &		%&4t,8t/]]4??#7#78 POO))4tyy7M7M*NOP P 0 9& &P Ps   C9;3D9DDr   r  )r   r2   r  r2   r9  r2   r  r  r  r  r  r  re   r   r   z9Union[Tuple[tf.Tensor, ...], TFSwinImageClassifierOutput]r   )r(   r)   r*   rw   r   r  r   _IMAGE_CLASS_CHECKPOINTrA   r  _IMAGE_CLASS_EXPECTED_OUTPUTr   r   r   r   r   s   @r/   r3  r3    s    
 ++@A*/$4	  *.&*#',0/3&*+
&+
 $+
 !	+

 *+
 -+
 $+
 +
 
C+
  B+
Z
Pr.   r3  )rK   r"   rL   r   r   r"   )
rQ   r"   rL   r   rN   r   rO   r   r   r"   )r]   FT)
rc   r"   rd   r   re   r   rf   r   r   r"   )r   rK  r   rK  )Rr+   
__future__r   collections.abcr   rW   r9   dataclassesr   	functoolsr   typingr   r   r   r	   r
   r   r   r   
tensorflowrH   activations_tfr   modeling_tf_utilsr   r   r   r   r   r   tf_utilsr   utilsr   r   r   r   r   r   configuration_swinr   
get_loggerr(   loggerr  r  r  r<  r=  r!   r1   r5   rA   rR   r[   rk   r   Layerrm   rx   r   r   r   r)  r7  rG  rQ  rV  r  r  r  SWIN_START_DOCSTRINGr  r  r  r  r  r  r  r"  r3  r-   r.   r/   <module>rM     sv   % "    !  N N N  $  #  + 
		H	%  ? %  C 1  @+ @ @@  @  @  @F )#k )# )#X  @+  @  @F  ]a!!!&!8<!UY!!&A-u||)) A-HAMELL.. AMHF<++ F<R	NU\\'' 	NQ%,,,, Qh)u||)) )0$-ell(( $-N9++ 90V5<<%% V,[-%,,$$ [-|R&%,,$$ R&jV&ELL&& V&r%- %
  ,<)u||11 <)~ jFell(( jF jFZ d:&' :&	:&z++ 4/ELL&& /> 3
q)#8 q)
q)h  MP#8:V MPMPr.   