
    sg0              	          d Z ddlZddlmZ ddlmZmZmZ ddlZddl	Zddlm
Z
 ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZmZmZmZmZmZmZm Z m!Z! ddl"m#Z# ddl$m%Z%  e       r	ddl&m'Z'm(Z( nd Z(d Z' ejR                  e*      Z+dZ,dZ-g dZ.dZ/dZ0e G d de             Z1e G d de             Z2e G d de             Z3 G d de
jh                        Z5 G d d e
jh                        Z6 G d! d"e
jh                        Z7dGd#ejp                  d$e9d%e:d&ejp                  fd'Z; G d( d)e
jh                        Z< G d* d+e
jh                        Z= G d, d-e
jh                        Z> G d. d/e
jh                        Z? G d0 d1e
jh                        Z@ G d2 d3e
jh                        ZA G d4 d5e
jh                        ZB G d6 d7e
jh                        ZC G d8 d9e
jh                        ZD G d: d;e      ZEd<ZFd=ZG ed>eF       G d? d@eE             ZH edAeF       G dB dCeE             ZI edDeF       G dE dFeEe#             ZJy)Hz1PyTorch Neighborhood Attention Transformer model.    N)	dataclass)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BackboneOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)	ModelOutputOptionalDependencyNotAvailableadd_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardis_natten_availableloggingreplace_return_docstringsrequires_backends)BackboneMixin   )	NatConfig)
natten2davnatten2dqkrpbc                      t               Nr   argskwargss     b/var/www/html/venv/lib/python3.12/site-packages/transformers/models/deprecated/nat/modeling_nat.pyr   r   1       ,..    c                      t               r    r!   r"   s     r%   r   r   4   r&   r'   r   zshi-labs/nat-mini-in1k-224)r      r)   i   z	tiger catc                       e Zd ZU dZdZej                  ed<   dZe	e
ej                  df      ed<   dZe	e
ej                  df      ed<   dZe	e
ej                  df      ed<   y)NatEncoderOutputa  
    Nat encoder's outputs, with potential hidden states and attentions.

    Args:
        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, hidden_size, height, width)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
            include the spatial dimensions.
    Nlast_hidden_state.hidden_states
attentionsreshaped_hidden_states)__name__
__module____qualname____doc__r,   torchFloatTensor__annotations__r-   r   r   r.   r/    r'   r%   r+   r+   I   sx    2 ,0u((/=AM8E%"3"3S"89:A:>Ju00#567>FJHU5+<+<c+A%BCJr'   r+   c                       e Zd ZU dZdZej                  ed<   dZe	ej                     ed<   dZ
e	eej                  df      ed<   dZe	eej                  df      ed<   dZe	eej                  df      ed<   y)	NatModelOutputaS  
    Nat model's outputs that also contains a pooling of the last hidden states.

    Args:
        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        pooler_output (`torch.FloatTensor` of shape `(batch_size, hidden_size)`, *optional*, returned when `add_pooling_layer=True` is passed):
            Average pooling of the last layer hidden-state.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, hidden_size, height, width)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
            include the spatial dimensions.
    Nr,   pooler_output.r-   r.   r/   )r0   r1   r2   r3   r,   r4   r5   r6   r:   r   r-   r   r.   r/   r7   r'   r%   r9   r9   j   s    6 ,0u((/15M8E--.5=AM8E%"3"3S"89:A:>Ju00#567>FJHU5+<+<c+A%BCJr'   r9   c                       e Zd ZU dZdZeej                     ed<   dZ	ej                  ed<   dZ
eeej                  df      ed<   dZeeej                  df      ed<   dZeeej                  df      ed<   y)	NatImageClassifierOutputa   
    Nat outputs for image classification.

    Args:
        loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Classification (or regression if config.num_labels==1) loss.
        logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
            Classification (or regression if config.num_labels==1) scores (before SoftMax).
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
            shape `(batch_size, hidden_size, height, width)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
            include the spatial dimensions.
    Nlosslogits.r-   r.   r/   )r0   r1   r2   r3   r=   r   r4   r5   r6   r>   r-   r   r.   r/   r7   r'   r%   r<   r<      s    6 )-D(5$$
%, $FE$=AM8E%"3"3S"89:A:>Ju00#567>FJHU5+<+<c+A%BCJr'   r<   c                   f     e Zd ZdZ fdZdeej                     deej                     fdZ
 xZS )NatEmbeddingsz6
    Construct the patch and position embeddings.
    c                     t         |           t        |      | _        t	        j
                  |j                        | _        t	        j                  |j                        | _
        y r    )super__init__NatPatchEmbeddingspatch_embeddingsr   	LayerNorm	embed_dimnormDropouthidden_dropout_probdropoutselfconfig	__class__s     r%   rC   zNatEmbeddings.__init__   sG     26 :LL!1!12	zz&"<"<=r'   pixel_valuesreturnc                 l    | j                  |      }| j                  |      }| j                  |      }|S r    )rE   rH   rK   )rM   rP   
embeddingss      r%   forwardzNatEmbeddings.forward   s4    **<8
YYz*
\\*-
r'   )r0   r1   r2   r3   rC   r   r4   r5   r   TensorrT   __classcell__rO   s   @r%   r@   r@      s4    >HU->->$? E%,,DW r'   r@   c                   `     e Zd ZdZ fdZdeej                     dej                  fdZ	 xZ
S )rD   z
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, height, width, hidden_size)` to be consumed by a
    Transformer.
    c           
      P   t         |           |j                  }|j                  |j                  }}|| _        |dk(  rnt        d      t        j                  t        j                  | j                  |dz  ddd      t        j                  |dz  |ddd            | _	        y )Nr   z2Dinat only supports patch size of 4 at the moment.      r\   rZ   rZ   r   r   )kernel_sizestridepadding)
rB   rC   
patch_sizenum_channelsrG   
ValueErrorr   
SequentialConv2d
projection)rM   rN   rb   rc   hidden_sizerO   s        r%   rC   zNatPatchEmbeddings.__init__   s    &&
$*$7$79I9Ik(? QRR--IId'')9vV\flmIIkQ&PV`fg
r'   rP   rQ   c                     |j                   \  }}}}|| j                  k7  rt        d      | j                  |      }|j	                  dddd      }|S )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.r   rZ   r\   r   )shaperc   rd   rg   permute)rM   rP   _rc   heightwidthrS   s          r%   rT   zNatPatchEmbeddings.forward   s`    )5););&<4,,,w  __\2
''1a3
r'   )r0   r1   r2   r3   rC   r   r4   r5   rU   rT   rV   rW   s   @r%   rD   rD      s/    
"	HU->->$? 	ELL 	r'   rD   c                        e Zd ZdZej
                  fdedej                  ddf fdZde	j                  de	j                  fdZ xZS )	NatDownsamplerz
    Convolutional Downsampling Layer.

    Args:
        dim (`int`):
            Number of input channels.
        norm_layer (`nn.Module`, *optional*, defaults to `nn.LayerNorm`):
            Normalization layer class.
    dim
norm_layerrQ   Nc                     t         |           || _        t        j                  |d|z  dddd      | _         |d|z        | _        y )NrZ   r[   r]   r^   F)r_   r`   ra   bias)rB   rC   rq   r   rf   	reductionrH   )rM   rq   rr   rO   s      r%   rC   zNatDownsampler.__init__   sE    3CVF\binoq3w'	r'   input_featurec                     | j                  |j                  dddd            j                  dddd      }| j                  |      }|S )Nr   r\   r   rZ   )ru   rk   rH   )rM   rv   s     r%   rT   zNatDownsampler.forward   sJ    }'<'<Q1a'HIQQRSUVXY[\]		-0r'   )r0   r1   r2   r3   r   rF   intModulerC   r4   rU   rT   rV   rW   s   @r%   rp   rp      sJ     :< (C (RYY ($ (U\\ ell r'   rp   input	drop_probtrainingrQ   c                    |dk(  s|s| S d|z
  }| j                   d   fd| j                  dz
  z  z   }|t        j                  || j                  | j
                        z   }|j                          | j                  |      |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
            r   r   )r   )dtypedevice)rj   ndimr4   randr   r   floor_div)rz   r{   r|   	keep_probrj   random_tensoroutputs          r%   	drop_pathr     s     CxII[[^

Q 77E

5ELL YYMYYy!M1FMr'   c                   x     e Zd ZdZd	dee   ddf fdZdej                  dej                  fdZ	de
fdZ xZS )
NatDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr{   rQ   c                 0    t         |           || _        y r    )rB   rC   r{   )rM   r{   rO   s     r%   rC   zNatDropPath.__init__  s    "r'   r-   c                 D    t        || j                  | j                        S r    )r   r{   r|   rM   r-   s     r%   rT   zNatDropPath.forward  s    FFr'   c                 8    dj                  | j                        S )Nzp={})formatr{   rM   s    r%   
extra_reprzNatDropPath.extra_repr!  s    }}T^^,,r'   r    )r0   r1   r2   r3   r   floatrC   r4   rU   rT   strr   rV   rW   s   @r%   r   r     sG    b#(5/ #T #GU\\ Gell G-C -r'   r   c                   p     e Zd Z fdZd Z	 ddej                  dee   de	ej                     fdZ
 xZS )NeighborhoodAttentionc                    t         |           ||z  dk7  rt        d| d| d      || _        t	        ||z        | _        | j                  | j
                  z  | _        || _        t        j                  t        j                  |d| j                  z  dz
  d| j                  z  dz
              | _        t        j                  | j                  | j                  |j                        | _        t        j                  | j                  | j                  |j                        | _        t        j                  | j                  | j                  |j                        | _        t        j$                  |j&                        | _        y )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()rZ   r   )rt   )rB   rC   rd   num_attention_headsrx   attention_head_sizeall_head_sizer_   r   	Parameterr4   zerosrpbLinearqkv_biasquerykeyvaluerI   attention_probs_dropout_probrK   rM   rN   rq   	num_headsr_   rO   s        r%   rC   zNeighborhoodAttention.__init__&  s=   ?a#C5(^_h^iijk  $- #&sY#7 !558P8PP& <<ID<L<L8Lq8PTUX\XhXhThklTl noYYt1143E3EFOO\
99T//1C1C&//ZYYt1143E3EFOO\
zz&"E"EFr'   c                     |j                         d d | j                  | j                  fz   }|j                  |      }|j	                  ddddd      S )Nr   r\   r   rZ   r   )sizer   r   viewrk   )rM   xnew_x_shapes      r%   transpose_for_scoresz*NeighborhoodAttention.transpose_for_scores;  sN    ffhsmt'?'?AYAY&ZZFF;yyAq!Q''r'   r-   output_attentionsrQ   c                    | j                  | j                  |            }| j                  | j                  |            }| j                  | j                  |            }|t	        j
                  | j                        z  }t        ||| j                  | j                  d      }t        j                  j                  |d      }| j                  |      }t        ||| j                  d      }|j                  ddddd      j!                         }|j#                         d d | j$                  fz   }	|j'                  |	      }|r||f}
|
S |f}
|
S )	Nr   r   rq   r   rZ   r\   r   )r   r   r   r   mathsqrtr   r   r   r_   r   
functionalsoftmaxrK   r   rk   
contiguousr   r   r   )rM   r-   r   query_layer	key_layervalue_layerattention_scoresattention_probscontext_layernew_context_layer_shapeoutputss              r%   rT   zNeighborhoodAttention.forward@  sE   
 //

=0IJ--dhh}.EF	//

=0IJ
 "DIId.F.F$GG )i4K[K[]^_ --//0@b/I ,,7"?KAQAQSTU%--aAq!<GGI"/"4"4"6s";t?Q?Q>S"S%**+BC6G=/2 O\M]r'   F)r0   r1   r2   rC   r   r4   rU   r   boolr   rT   rV   rW   s   @r%   r   r   %  sE    G*( -2|| $D> 
u||		r'   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )NeighborhoodAttentionOutputc                     t         |           t        j                  ||      | _        t        j
                  |j                        | _        y r    )rB   rC   r   r   denserI   r   rK   rM   rN   rq   rO   s      r%   rC   z$NeighborhoodAttentionOutput.__init__c  s6    YYsC(
zz&"E"EFr'   r-   input_tensorrQ   c                 J    | j                  |      }| j                  |      }|S r    r   rK   )rM   r-   r   s      r%   rT   z#NeighborhoodAttentionOutput.forwardh  s$    

=1]3r'   r0   r1   r2   rC   r4   rU   rT   rV   rW   s   @r%   r   r   b  s2    G
U\\  RWR^R^ r'   r   c                   p     e Zd Z fdZd Z	 ddej                  dee   de	ej                     fdZ
 xZS )NeighborhoodAttentionModulec                     t         |           t        ||||      | _        t	        ||      | _        t               | _        y r    )rB   rC   r   rM   r   r   setpruned_headsr   s        r%   rC   z$NeighborhoodAttentionModule.__init__p  s8    )&#y+N	1&#>Er'   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   r   )lenr   rM   r   r   r   r   r   r   r   r   r   r   union)rM   headsindexs      r%   prune_headsz'NeighborhoodAttentionModule.prune_headsv  s   u:?749900$))2O2OQUQbQb
u
 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r'   r-   r   rQ   c                 f    | j                  ||      }| j                  |d   |      }|f|dd  z   }|S Nr   r   )rM   r   )rM   r-   r   self_outputsattention_outputr   s         r%   rT   z#NeighborhoodAttentionModule.forward  sC    
 yy0AB;;|AF#%QR(88r'   r   )r0   r1   r2   rC   r   r4   rU   r   r   r   rT   rV   rW   s   @r%   r   r   o  sD    ";* -2|| $D> 
u||		r'   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )NatIntermediatec                    t         |           t        j                  |t	        |j
                  |z              | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y r    )rB   rC   r   r   rx   	mlp_ratior   
isinstance
hidden_actr   r   intermediate_act_fnr   s      r%   rC   zNatIntermediate.__init__  sa    YYsC(8(83(>$?@
f''-'-f.?.?'@D$'-'8'8D$r'   r-   rQ   c                 J    | j                  |      }| j                  |      }|S r    )r   r   r   s     r%   rT   zNatIntermediate.forward  s&    

=100?r'   r   rW   s   @r%   r   r     s#    9U\\ ell r'   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )	NatOutputc                     t         |           t        j                  t	        |j
                  |z        |      | _        t        j                  |j                        | _	        y r    )
rB   rC   r   r   rx   r   r   rI   rJ   rK   r   s      r%   rC   zNatOutput.__init__  sF    YYs6#3#3c#9:C@
zz&"<"<=r'   r-   rQ   c                 J    | j                  |      }| j                  |      }|S r    r   r   s     r%   rT   zNatOutput.forward  s$    

=1]3r'   r   rW   s   @r%   r   r     s#    >
U\\ ell r'   r   c            	            e Zd Zd fd	Zd Z	 ddej                  dee   de	ej                  ej                  f   fdZ
 xZS )	NatLayerc                 z   t         |           |j                  | _        |j                  | _        t	        j
                  ||j                        | _        t        |||| j                        | _	        |dkD  rt        |      nt	        j                         | _        t	        j
                  ||j                        | _        t        ||      | _        t!        ||      | _        |j$                  dkD  r?t	        j&                  |j$                  t)        j*                  d|f      z  d      | _        y d | _        y )Neps)r_   r~   r   rZ   T)requires_grad)rB   rC   chunk_size_feed_forwardr_   r   rF   layer_norm_epslayernorm_beforer   	attentionr   Identityr   layernorm_afterr   intermediater   r   layer_scale_init_valuer   r4   oneslayer_scale_parameters)rM   rN   rq   r   drop_path_raterO   s        r%   rC   zNatLayer.__init__  s    '-'E'E$!-- "Sf6K6K L4VS)Y]YiYij8F8L^4RTR]R]R_!||CV5J5JK+FC8, ,,q0 LL66QH9MM]ab 	#  	#r'   c                     | j                   }d}||k  s||k  rJdx}}t        d||z
        }t        d||z
        }	dd||||	f}t        j                  j	                  ||      }||fS )N)r   r   r   r   r   r   r   )r_   maxr   r   pad)
rM   r-   rm   rn   window_size
pad_valuespad_lpad_tpad_rpad_bs
             r%   	maybe_padzNatLayer.maybe_pad  s    &&'
K5;#6EE;./E;/0EQueU;JMM--mZHMj((r'   r-   r   rQ   c                    |j                         \  }}}}|}| j                  |      }| j                  |||      \  }}|j                  \  }	}
}}	| j	                  ||      }|d   }|d   dkD  xs |d   dkD  }|r|d d d |d |d d f   j                         }| j                  | j                  d   |z  }|| j                  |      z   }| j                  |      }| j                  | j                  |            }| j                  | j                  d   |z  }|| j                  |      z   }|r	||d   f}|S |f}|S )N)r   r   r\      r   )r   r   r   rj   r   r   r   r   r   r   r   )rM   r-   r   
batch_sizerm   rn   channelsshortcutr   rl   
height_pad	width_padattention_outputsr   
was_paddedlayer_outputlayer_outputss                    r%   rT   zNatLayer.forward  s|   
 /<.@.@.B+
FE8 --m<$(NN=&%$P!z&3&9&9#:y! NN=L]N^,Q/]Q&;*Q-!*;
/7F7FUFA0EFQQS&&2#::1=@PP 4>>2B#CC++M:{{4#4#4\#BC&&266q9LHL$t~~l'CC@Q'8';< YeWfr'   )r~   r   )r0   r1   r2   rC   r   r4   rU   r   r   r   rT   rV   rW   s   @r%   r   r     sM    
 	) -2$||$ $D>$ 
u||U\\)	*	$r'   r   c                   j     e Zd Z fdZ	 ddej
                  dee   deej
                     fdZ	 xZ
S )NatStagec                 4   t         |           || _        || _        t	        j
                  t        |      D cg c]  }t        |||||          c}      | _        |% ||t        j                        | _
        d| _        y d | _
        d| _        y c c}w )N)rN   rq   r   r   )rq   rr   F)rB   rC   rN   rq   r   
ModuleListranger   layersrF   
downsamplepointing)	rM   rN   rq   depthr   r   r  irO   s	           r%   rC   zNatStage.__init__  s    mm u  !'#1!#4	

 !(SR\\JDO  #DO#s   Br-   r   rQ   c                     |j                         \  }}}}t        | j                        D ]  \  }} |||      }|d   } |}	| j                  | j                  |	      }||	f}
|r|
dd  z  }
|
S r   )r   	enumerater  r  )rM   r-   r   rl   rm   rn   r  layer_moduler	  !hidden_states_before_downsamplingstage_outputss              r%   rT   zNatStage.forward
  s    
 ,00265!(5 	-OA|(8IJM)!,M	- -:)??& OO,MNM&(IJ]12..Mr'   r   )r0   r1   r2   rC   r4   rU   r   r   r   rT   rV   rW   s   @r%   r  r    s?    6 -2|| $D> 
u||		r'   r  c                   ~     e Zd Z fdZ	 	 	 	 d	dej
                  dee   dee   dee   dee   dee	e
f   fdZ xZS )

NatEncoderc                    t         |           t        |j                        | _        || _        t        j                  d|j                  t        |j                              D cg c]  }|j                          }}t        j                  t        | j                        D cg c]  }t        |t        |j                   d|z  z        |j                  |   |j"                  |   |t        |j                  d |       t        |j                  d |dz           || j                  dz
  k  rt$        nd        c}      | _        y c c}w c c}w )Nr   rZ   r   )rN   rq   r  r   r   r  )rB   rC   r   depths
num_levelsrN   r4   linspacer   sumitemr   r  r  r  rx   rG   r   rp   levels)rM   rN   r   dpri_layerrO   s        r%   rC   zNatEncoder.__init__   s   fmm,!&63H3H#fmmJ\!]^Aqvvx^^mm  %T__5
  !F,,q'z9: --0$..w7#&s6=='+B'Cc&--XeZadeZeJfFg#h29DOOa<O2O~VZ

 _
s   'E&BEr-   r   output_hidden_states(output_hidden_states_before_downsamplingreturn_dictrQ   c                    |rdnd }|rdnd }|rdnd }|r |j                  dddd      }	||fz  }||	fz  }t        | j                        D ]l  \  }
} |||      }|d   }|d   }|r#|r!|j                  dddd      }	||fz  }||	fz  }n$|r"|s |j                  dddd      }	||fz  }||	fz  }|se||dd  z  }n |st        d |||fD              S t	        ||||      S )Nr7   r   r\   r   rZ   c              3   &   K   | ]	  }||  y wr    r7   ).0vs     r%   	<genexpr>z%NatEncoder.forward.<locals>.<genexpr>Z  s     mq_`_lms   )r,   r-   r.   r/   )rk   r  r!  tupler+   )rM   r-   r   r$  r%  r&  all_hidden_statesall_reshaped_hidden_statesall_self_attentionsreshaped_hidden_stater  r  r	  r  s                 r%   rT   zNatEncoder.forward3  s]    #7BD+?RT"$5b4$1$9$9!Q1$E!-!11&+@*BB&(5 	9OA|(8IJM)!,M0=a0@-#(P(I(Q(QRSUVXY[\(]%!&G%II!*/D.FF*%.V(5(=(=aAq(I%!m%55!*/D.FF* #}QR'88#%	9( m]4EGZ$[mmm++*#=	
 	
r'   )FFFT)r0   r1   r2   rC   r4   rU   r   r   r   r   r+   rT   rV   rW   s   @r%   r  r    st    
, -2/4CH&*.
||.
 $D>.
 'tn	.

 3;4..
 d^.
 
u&&	'.
r'   r  c                   "    e Zd ZdZeZdZdZd Zy)NatPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    natrP   c                    t        |t        j                  t        j                  f      rm|j                  j
                  j                  d| j                  j                         |j                  %|j                  j
                  j                          yyt        |t        j                        rJ|j                  j
                  j                          |j                  j
                  j                  d       yy)zInitialize the weightsr~   )meanstdNg      ?)r   r   r   rf   weightdatanormal_rN   initializer_rangert   zero_rF   fill_)rM   modules     r%   _init_weightsz NatPreTrainedModel._init_weightsn  s    fryy"))45 MM&&CT[[5R5R&S{{&  &&( '-KK""$MM$$S) .r'   N)	r0   r1   r2   r3   r   config_classbase_model_prefixmain_input_namer>  r7   r'   r%   r2  r2  d  s    
 L$O
*r'   r2  aF  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`NatConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See [`ViTImageProcessor.__call__`]
            for details.

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
z]The bare Nat Model transformer outputting raw hidden-states without any specific head on top.c                        e Zd Zd fd	Zd Zd Z ee       ee	e
ede      	 	 	 	 ddeej                     dee   dee   d	ee   d
eee
f   f
d              Z xZS )NatModelc                    t         |   |       t        | dg       || _        t	        |j
                        | _        t        |j                  d| j                  dz
  z  z        | _	        t        |      | _        t        |      | _        t        j                  | j                  |j                         | _        |rt        j$                  d      nd | _        | j)                          y )NnattenrZ   r   r   )rB   rC   r   rN   r   r  r  rx   rG   num_featuresr@   rS   r  encoderr   rF   r   	layernormAdaptiveAvgPool1dpooler	post_init)rM   rN   add_pooling_layerrO   s      r%   rC   zNatModel.__init__  s     $
+fmm, 0 0119L3M MN'/!&)d&7&7V=R=RS1Bb**1- 	r'   c                 .    | j                   j                  S r    rS   rE   r   s    r%   get_input_embeddingszNatModel.get_input_embeddings      ///r'   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsrG  layerr   r   )rM   heads_to_prunerS  r   s       r%   _prune_headszNatModel._prune_heads  sE    
 +002 	CLE5LLu%//;;EB	Cr'   vision)
checkpointoutput_typer?  modalityexpected_outputrP   r   r$  r&  rQ   c                 R   ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }|t	        d      | j                  |      }| j                  ||||      }|d   }| j                  |      }d }| j                  G| j                  |j                  dd      j                  dd            }t        j                  |d      }|s||f|dd  z   }	|	S t        |||j                  |j                  |j                        S )Nz You have to specify pixel_valuesr   r$  r&  r   r   rZ   )r,   r:   r-   r.   r/   )rN   r   r$  use_return_dictrd   rS   rG  rH  rJ  flatten	transposer4   r9   r-   r.   r/   )
rM   rP   r   r$  r&  embedding_outputencoder_outputssequence_outputpooled_outputr   s
             r%   rT   zNatModel.forward  sA    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]?@@??<8,,/!5#	 ' 
 *!,..9;;" KK(?(?1(E(O(OPQST(UVM!MM-;M%}58KKFM-')77&11#2#I#I
 	
r'   )T)NNNN)r0   r1   r2   rC   rO  rU  r   NAT_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr9   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr   r4   r5   r   r   r   rT   rV   rW   s   @r%   rC  rC    s    
$0C ++?@&"$. 59,0/3&*,
u001,
 $D>,
 'tn	,

 d^,
 
un$	%,
 A,
r'   rC  z
    Nat Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    c                        e Zd Z fdZ ee       eeee	e
      	 	 	 	 	 d
deej                     deej                     dee   dee   dee   deeef   fd	              Z xZS )NatForImageClassificationc                 X   t         |   |       t        | dg       |j                  | _        t	        |      | _        |j                  dkD  r4t        j                  | j
                  j                  |j                        nt        j                         | _
        | j                          y )NrE  r   )rB   rC   r   
num_labelsrC  r3  r   r   rF  r   
classifierrK  rL   s     r%   rC   z"NatForImageClassification.__init__  s     $
+ ++F# DJCTCTWXCXBIIdhh++V->->?^`^i^i^k 	
 	r'   )rW  rX  r?  rZ  rP   labelsr   r$  r&  rQ   c                 *   ||n| j                   j                  }| j                  ||||      }|d   }| j                  |      }d}	|| j                   j                  | j
                  dk(  rd| j                   _        nl| j
                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j                  dk(  rIt               }
| j
                  dk(  r& |
|j                         |j                               }	n |
||      }	n| j                   j                  dk(  r=t               }
 |
|j                  d| j
                        |j                  d            }	n,| j                   j                  dk(  rt               }
 |
||      }	|s|f|dd z   }|	|	f|z   S |S t        |	||j                   |j"                  |j$                  	      S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr\  r   
regressionsingle_label_classificationmulti_label_classificationr   rZ   )r=   r>   r-   r.   r/   )rN   r]  r3  rl  problem_typerk  r   r4   longrx   r
   squeezer	   r   r   r<   r-   r.   r/   )rM   rP   rm  r   r$  r&  r   rc  r>   r=   loss_fctr   s               r%   rT   z!NatForImageClassification.forward	  s   * &1%<k$++B]B]((/!5#	  
  
/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))#*#A#A
 	
r'   )NNNNN)r0   r1   r2   rC   r   rd  r   _IMAGE_CLASS_CHECKPOINTr<   rf  _IMAGE_CLASS_EXPECTED_OUTPUTr   r4   r5   
LongTensorr   r   r   rT   rV   rW   s   @r%   ri  ri    s      ++?@*,$4	 59-1,0/3&*<
u001<
 ))*<
 $D>	<

 'tn<
 d^<
 
u..	/<
 A<
r'   ri  zBNAT backbone, to be used with frameworks like DETR and MaskFormer.c                        e Zd Z fdZd Z ee       eee	      	 	 	 d
de
j                  dee   dee   dee   def
d	              Z xZS )NatBackbonec           	      .   t         |   |       t         | 	  |       t        | dg       t	        |      | _        t        |      | _        |j                  gt        t        |j                              D cg c]  }t        |j                  d|z  z         c}z   | _        i }t        | j                  | j                         D ]  \  }}t#        j$                  |      ||<    t#        j&                  |      | _        | j+                          y c c}w )NrE  rZ   )rB   rC   _init_backboner   r@   rS   r  rG  rG   r  r   r  rx   rF  zipout_featuresr  r   rF   
ModuleDicthidden_states_normsrK  )rM   rN   r  r  stagerc   rO   s         r%   rC   zNatBackbone.__init__T  s     v&$
+'/!&)#--.X]^abhbobo^pXq1rST#f6F6FA6M2N1rr !#&t'8'8$--#H 	DE<)+l)C&	D#%==1D#E  	 2ss   9"Dc                 .    | j                   j                  S r    rN  r   s    r%   rO  z NatBackbone.get_input_embeddingsg  rP  r'   )rX  r?  rP   r$  r   r&  rQ   c                    ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }| j	                  |      }| j                  ||ddd      }|j                  }d}t        | j                  |      D ]  \  }	}
|	| j                  v s|
j                  \  }}}}|
j                  dddd      j                         }
|
j                  |||z  |      }
 | j                  |	   |
      }
|
j                  ||||      }
|
j                  dddd      j                         }
||
fz  } |s|f}|r||j                  fz  }|S t!        ||r|j                  nd|j"                  	      S )
aA  
        Returns:

        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("shi-labs/nat-mini-in1k-224")
        >>> model = AutoBackbone.from_pretrained(
        ...     "shi-labs/nat-mini-in1k-224", out_features=["stage1", "stage2", "stage3", "stage4"]
        ... )

        >>> inputs = processor(image, return_tensors="pt")

        >>> outputs = model(**inputs)

        >>> feature_maps = outputs.feature_maps
        >>> list(feature_maps[-1].shape)
        [1, 512, 7, 7]
        ```NT)r   r$  r%  r&  r7   r   rZ   r\   r   )feature_mapsr-   r.   )rN   r]  r$  r   rS   rG  r/   r}  stage_namesr~  rj   rk   r   r   r  r-   r   r.   )rM   rP   r$  r   r&  r`  r   r-   r  r  hidden_stater  rc   rm   rn   r   s                   r%   rT   zNatBackbone.forwardj  s   H &1%<k$++B]B]$8$D $++JjJj 	 2C1N-TXT_T_TqTq??<8,,/!%59  
  66#&t'7'7#G 		0E<))):F:L:L7
L&%+33Aq!Q?JJL+00Ve^\Z>t77>|L+00VULY+33Aq!Q?JJL/		0 "_F#70022M%3G'//T))
 	
r'   )NNN)r0   r1   r2   rC   rO  r   rd  r   r   rf  r4   rU   r   r   rT   rV   rW   s   @r%   rz  rz  O  s    
&0 ++?@>X 04,0&*J
llJ
 'tnJ
 $D>	J

 d^J
 
J
 Y AJ
r'   rz  )r~   F)Kr3   r   dataclassesr   typingr   r   r   r4   torch.utils.checkpointr   torch.nnr   r	   r
   activationsr   modeling_outputsr   modeling_utilsr   pytorch_utilsr   r   utilsr   r   r   r   r   r   r   r   r   utils.backbone_utilsr   configuration_natr   natten.functionalr   r   
get_loggerr0   loggerrf  re  rg  rv  rw  r+   r9   r<   ry   r@   rD   rp   rU   r   r   r   r   r   r   r   r   r   r   r  r  r2  NAT_START_DOCSTRINGrd  rC  ri  rz  r7   r'   r%   <module>r     s   8  ! ) )    A A " / . R
 
 
 3 ( ;;// 
		H	%  3 '  7 *  K{ K K@  K[  K  KF  K{  K  KFBII ,! !HRYY .U\\ e T V[VbVb (-")) -:BII :z
")) 
!")) !Hbii 			 	@ryy @F+ryy +\B
 B
J* *.	  " cR
! R
	R
j  T
 2 T
T
n Hc
$m c
	c
r'   