
    sgR              	       L   d Z ddlZddlmZ ddlmZmZmZ ddlZddl	Zddlm
Z
 ddlmZmZmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZmZmZmZ ddlmZ  ej<                  e      Z dZ!dZ"g dZ#dZ$dZ%d5dejL                  de'de(dejL                  fdZ) G d de
jT                        Z+ G d de
jT                        Z, G d de
jT                        Z- G d de
jT                        Z. G d d e
jT                        Z/ G d! d"e
jT                        Z0 G d# d$e
jT                        Z1 G d% d&e
jT                        Z2 G d' d(e
jT                        Z3 G d) d*e
jT                        Z4 G d+ d,e      Z5d-Z6d.Z7 ed/e6       G d0 d1e5             Z8 ed2e6       G d3 d4e5             Z9y)6z-PyTorch Visual Attention Network (VAN) model.    N)OrderedDict)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardlogging   )	VanConfigr   z!Visual-Attention-Network/van-base)r   i      r   ztabby, tabby catinput	drop_probtrainingreturnc                    |dk(  s|s| S d|z
  }| j                   d   fd| j                  dz
  z  z   }|t        j                  || j                  | j
                        z   }|j                          | j                  |      |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
            r   r   )r   )dtypedevice)shapendimtorchrandr   r   floor_div)r   r   r   	keep_probr    random_tensoroutputs          b/var/www/html/venv/lib/python3.12/site-packages/transformers/models/deprecated/van/modeling_van.py	drop_pathr*   3   s     CxII[[^

Q 77E

5ELL YYMYYy!M1FM    c                   x     e Zd ZdZd	dee   ddf fdZdej                  dej                  fdZ	de
fdZ xZS )
VanDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 0    t         |           || _        y N)super__init__r   )selfr   	__class__s     r)   r1   zVanDropPath.__init__J   s    "r+   hidden_statesc                 D    t        || j                  | j                        S r/   )r*   r   r   )r2   r4   s     r)   forwardzVanDropPath.forwardN   s    FFr+   c                 8    dj                  | j                        S )Nzp={})formatr   )r2   s    r)   
extra_reprzVanDropPath.extra_reprQ   s    }}T^^,,r+   r/   )__name__
__module____qualname____doc__r   floatr1   r"   Tensorr6   strr9   __classcell__r3   s   @r)   r-   r-   G   sG    b#(5/ #T #GU\\ Gell G-C -r+   r-   c            	       n     e Zd ZdZd
dedededef fdZdej                  dej                  fd	Z xZ	S )VanOverlappingPatchEmbedderz
    Downsamples the input using a patchify operation with a `stride` of 4 by default making adjacent windows overlap by
    half of the area. From [PVTv2: Improved Baselines with Pyramid Vision
    Transformer](https://arxiv.org/abs/2106.13797).
    in_channelshidden_size
patch_sizestridec                     t         |           t        j                  |||||dz        | _        t        j
                  |      | _        y )N   )kernel_sizerH   padding)r0   r1   r   Conv2dconvolutionBatchNorm2dnormalization)r2   rE   rF   rG   rH   r3   s        r)   r1   z$VanOverlappingPatchEmbedder.__init__\   sD    99*VU_cdUd
  ^^K8r+   r   r   c                 J    | j                  |      }| j                  |      }|S r/   )rN   rP   )r2   r   hidden_states      r)   r6   z#VanOverlappingPatchEmbedder.forwardc   s(    ''.)),7r+   )r   r   
r:   r;   r<   r=   intr1   r"   r?   r6   rA   rB   s   @r)   rD   rD   U   sE    9C 9c 9s 9X[ 9U\\ ell r+   rD   c                   v     e Zd ZdZ	 	 ddededededef
 fdZdej                  d	ej                  fd
Z
 xZS )VanMlpLayerz
    MLP with depth-wise convolution, from [PVTv2: Improved Baselines with Pyramid Vision
    Transformer](https://arxiv.org/abs/2106.13797).
    rE   rF   out_channels
hidden_actdropout_ratec                 X   t         |           t        j                  ||d      | _        t        j                  ||dd|      | _        t        |   | _        t        j                  |      | _	        t        j                  ||d      | _
        t        j                  |      | _        y )Nr   rK      rK   rL   groups)r0   r1   r   rM   in_dense
depth_wiser   
activationDropoutdropout1	out_densedropout2)r2   rE   rF   rW   rX   rY   r3   s         r)   r1   zVanMlpLayer.__init__o   s~     			+{J))K!UV_jk ,

<0;!L

<0r+   rR   r   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }| j	                  |      }| j                  |      }|S r/   )r_   r`   ra   rc   rd   re   r2   rR   s     r)   r6   zVanMlpLayer.forward   s\    }}\2|4|4}}\2~~l3}}\2r+   )gelu      ?)r:   r;   r<   r=   rT   r@   r>   r1   r"   r?   r6   rA   rB   s   @r)   rV   rV   i   s`     !!11 1 	1
 1 1 ELL U\\ r+   rV   c                   `     e Zd ZdZdef fdZdej                  dej                  fdZ xZ	S )VanLargeKernelAttentionz-
    Basic Large Kernel Attention (LKA).
    rF   c                     t         |           t        j                  ||dd|      | _        t        j                  ||ddd|      | _        t        j                  ||d	      | _        y )
N   rJ   r]   r   r\   	   )rK   dilationrL   r^   r   r[   )r0   r1   r   rM   r`   depth_wise_dilated
point_wiser2   rF   r3   s     r)   r1   z VanLargeKernelAttention.__init__   s]    ))K!UV_jk"$))!aS^#
 ))K!Lr+   rR   r   c                 l    | j                  |      }| j                  |      }| j                  |      }|S r/   )r`   rp   rq   rg   s     r)   r6   zVanLargeKernelAttention.forward   s4    |4..|<|4r+   rS   rB   s   @r)   rk   rk      s1    MC MELL U\\ r+   rk   c                   `     e Zd ZdZdef fdZdej                  dej                  fdZ xZ	S )VanLargeKernelAttentionLayerzV
    Computes attention using Large Kernel Attention (LKA) and attends the input.
    rF   c                 B    t         |           t        |      | _        y r/   )r0   r1   rk   	attentionrr   s     r)   r1   z%VanLargeKernelAttentionLayer.__init__   s    0=r+   rR   r   c                 2    | j                  |      }||z  }|S r/   )rw   )r2   rR   rw   attendeds       r)   r6   z$VanLargeKernelAttentionLayer.forward   s    NN<0	)+r+   rS   rB   s   @r)   ru   ru      s/    >C >ELL U\\ r+   ru   c                   f     e Zd ZdZddedef fdZdej                  dej                  fdZ	 xZ
S )	VanSpatialAttentionLayerz
    Van spatial attention layer composed by projection (via conv) -> act -> Large Kernel Attention (LKA) attention ->
    projection (via conv) + residual connection.
    rF   rX   c           
         t         |           t        j                  t	        dt        j
                  ||d      fdt        |   fg            | _        t        |      | _	        t        j
                  ||d      | _
        y )Nconvr   r[   act)r0   r1   r   
Sequentialr   rM   r   pre_projectionru   attention_layerpost_projection)r2   rF   rX   r3   s      r)   r1   z!VanSpatialAttentionLayer.__init__   sr     mmRYY{KQOPF:./
  <KH!yykqQr+   rR   r   c                 z    |}| j                  |      }| j                  |      }| j                  |      }||z   }|S r/   )r   r   r   r2   rR   residuals      r)   r6   z VanSpatialAttentionLayer.forward   sG    **<8++L9++L9#h.r+   )rh   )r:   r;   r<   r=   rT   r@   r1   r"   r?   r6   rA   rB   s   @r)   r{   r{      s9    
RC RS RELL U\\ r+   r{   c                   f     e Zd ZdZddedef fdZdej                  dej                  fdZ	 xZ
S )	VanLayerScalingzT
    Scales the inputs by a learnable parameter initialized by `initial_value`.
    rF   initial_valuec                     t         |           t        j                  |t	        j
                  |      z  d      | _        y )NT)requires_grad)r0   r1   r   	Parameterr"   onesweight)r2   rF   r   r3   s      r)   r1   zVanLayerScaling.__init__   s/    ll=5::{3L#L\`ar+   rR   r   c                 `    | j                   j                  d      j                  d      |z  }|S )N)r   	unsqueezerg   s     r)   r6   zVanLayerScaling.forward   s,    {{,,R0::2>Mr+   )g{Gz?)r:   r;   r<   r=   rT   r>   r1   r"   r?   r6   rA   rB   s   @r)   r   r      s9    bC b bELL U\\ r+   r   c            	       r     e Zd ZdZ	 	 d
dedededef fdZdej                  dej                  fd	Z
 xZS )VanLayerzv
    Van layer composed by normalization layers, large kernel attention (LKA) and a multi layer perceptron (MLP).
    configrF   	mlp_ratiodrop_path_ratec                    t         |           |dkD  rt        |      nt        j                         | _        t        j                  |      | _        t        ||j                        | _
        t        ||j                        | _        t        j                  |      | _        t        |||z  ||j                  |j                         | _        t        ||j                        | _        y )Nr   )r0   r1   r-   r   Identityr*   rO   pre_normomalizationr{   rX   rw   r   layer_scale_init_valueattention_scalingpost_normalizationrV   rY   mlpmlp_scaling)r2   r   rF   r   r   r3   s        r)   r1   zVanLayer.__init__   s     	8F8L^4RTR]R]R_#%>>+#> 1+v?P?PQ!0f>[>[!\"$.."=y0+v?P?PRXReRe
 +;8U8UVr+   rR   r   c                 2   |}| j                  |      }| j                  |      }| j                  |      }| j                  |      }||z   }|}| j	                  |      }| j                  |      }| j                  |      }| j                  |      }||z   }|S r/   )r   rw   r   r*   r   r   r   r   s      r)   r6   zVanLayer.forward   s    //=~~l3--l;~~l3,...|<xx-''5~~l3,.r+   )r   ri   r:   r;   r<   r=   r   rT   r>   r1   r"   r?   r6   rA   rB   s   @r)   r   r      s[      #WW W 	W
 W$ELL U\\ r+   r   c                        e Zd ZdZ	 	 ddededededededed	ef fd
Zdej                  dej                  fdZ
 xZS )VanStagez2
    VanStage, consisting of multiple layers.
    r   rE   rF   rG   rH   depthr   r   c	                    t         
|           t        ||||      | _        t	        j
                  t        |      D 	cg c]  }	t        ||||       c}	 | _        t	        j                  ||j                        | _        y c c}	w )N)r   r   eps)r0   r1   rD   
embeddingsr   r   ranger   layers	LayerNormlayer_norm_epsrP   )r2   r   rE   rF   rG   rH   r   r   r   _r3   s             r)   r1   zVanStage.__init__  s     	5k;PZ\bcmm u  '#1	

  \\+6;P;PQs   BrR   r   c                    | j                  |      }| j                  |      }|j                  \  }}}}|j                  d      j	                  dd      }| j                  |      }|j                  ||||      j                  dddd      }|S )NrJ   r   r   r\   )r   r   r    flatten	transposerP   viewpermute)r2   rR   
batch_sizerF   heightwidths         r)   r6   zVanStage.forward!  s    |4{{<01=1C1C.
K#++A.88A>)),7#((VUKPXXYZ\]_`bcdr+   )r   r   r   rB   s   @r)   r   r     s      #RR R 	R
 R R R R R4	ELL 	U\\ 	r+   r   c                   p     e Zd ZdZdef fdZ	 	 d	dej                  dee	   dee	   de
eef   fdZ xZS )

VanEncoderz4
    VanEncoder, consisting of multiple stages.
    r   c                 T   t         |           t        j                  g       | _        |j
                  }|j                  }|j                  }|j                  }|j                  }t        j                  d|j                  t        |j                              D cg c]  }|j                          }}t        t!        ||||||            D ]S  \  }	\  }
}}}}}|	dk(  }||	dz
     }|r|j"                  }| j                  j%                  t'        ||||
||||             U y c c}w )Nr   r   )rG   rH   r   r   r   )r0   r1   r   
ModuleListstagespatch_sizesstrideshidden_sizesdepths
mlp_ratiosr"   linspacer   sumitem	enumeratezipnum_channelsappendr   )r2   r   r   r   r   r   r   xdrop_path_rates	num_stagerG   rH   rF   r   mlp_expantionr   is_first_stagerE   r3   s                     r)   r1   zVanEncoder.__init__2  s"   mmB'((..**&&
-2^^Av?T?TVYZ`ZgZgVh-ij1668jjbkWlFJXc
 	^I^
FK~ '!^N&y1}5K$11KK)!+#1		 ks   D%rR   output_hidden_statesreturn_dictr   c                     |rdnd }t        | j                        D ]  \  }} ||      }|s||fz   } |st        d ||fD              S t        ||      S )N c              3   &   K   | ]	  }||  y wr/   r   ).0vs     r)   	<genexpr>z%VanEncoder.forward.<locals>.<genexpr>_  s     WqWs   )last_hidden_stater4   )r   r   tupler   )r2   rR   r   r   all_hidden_statesr   stage_modules          r)   r6   zVanEncoder.forwardP  sp     #7BD(5 	HOA|'5L#$5$G!		H W\3D$EWWW-\mnnr+   )FT)r:   r;   r<   r=   r   r1   r"   r?   r   boolr   r   r   r6   rA   rB   s   @r)   r   r   -  sd    y B 05&*	ollo 'tno d^	o
 
u44	5or+   r   c                   &    e Zd ZdZeZdZdZdZd Z	y)VanPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    vanpixel_valuesTc                    t        |t        j                        rt        j                  j	                  |j
                  | j                  j                         t        |t        j                        r8|j                  +t        j                  j                  |j                  d       yyyt        |t        j                        rUt        j                  j                  |j                  d       t        j                  j                  |j
                  d       yt        |t        j                        r|j                  d   |j                  d   z  |j                  z  }||j                  z  }|j
                  j                  j!                  dt#        j$                  d|z               |j                  %|j                  j                  j'                          yyy)zInitialize the weights)stdNr   g      ?r   g       @)
isinstancer   Linearinittrunc_normal_r   r   initializer_rangebias	constant_r   rM   rK   rW   r^   datanormal_mathsqrtzero_)r2   modulefan_outs      r)   _init_weightsz VanPreTrainedModel._init_weightso  sC   fbii(GG!!&--T[[5R5R!S&")),1H!!&++q1 2I,-GGfkk1-GGfmmS1		*((+f.@.@.CCfFYFYYG%GMM&&q$))C'M*BC{{&  &&( '	 +r+   N)
r:   r;   r<   r=   r   config_classbase_model_prefixmain_input_namesupports_gradient_checkpointingr   r   r+   r)   r   r   d  s$    
 L$O&*#)r+   r   aE  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. Use it
    as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`VanConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
aF  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`ConvNextImageProcessor.__call__`] for details.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all stages. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zxThe bare VAN model outputting raw features without any specific head on top. Note, VAN does not have an embedding layer.c                        e Zd Z fdZ ee       eeee	de
      	 	 d	deej                     dee   dee   deeef   fd              Z xZS )
VanModelc                     t         |   |       || _        t        |      | _        t        j                  |j                  d   |j                        | _	        | j                          y )Nr   r   )r0   r1   r   r   encoderr   r   r   r   	layernorm	post_initr2   r   r3   s     r)   r1   zVanModel.__init__  sP     !&)f&9&9"&=6CXCXYr+   vision)
checkpointoutput_typer   modalityexpected_outputr   r   r   r   c                    ||n| j                   j                  }||n| j                   j                  }| j                  |||      }|d   }|j	                  ddg      }|s
||f|dd  z   S t        |||j                        S )Nr   r   r   r   )dimr   )r   pooler_outputr4   )r   r   use_return_dictr   meanr   r4   )r2   r   r   r   encoder_outputsr   pooled_outputs          r)   r6   zVanModel.forward  s     %9$D $++JjJj 	 &1%<k$++B]B],,!5# ' 

 ,A.)..B8.<%}58KKK7/')77
 	
r+   )NN)r:   r;   r<   r1   r   VAN_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr   r"   FloatTensorr   r   r   r6   rA   rB   s   @r)   r   r     s     ++?@&<$. 04&*	
u001
 'tn
 d^	

 
u>>	?
 A
r+   r   z
    VAN Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                        e Zd Z fdZ ee       eeee	e
      	 	 	 	 d	deej                     deej                     dee   dee   deeef   f
d              Z xZS )
VanForImageClassificationc                    t         |   |       t        |      | _        |j                  dkD  r-t        j                  |j                  d   |j                        nt        j                         | _	        | j                          y )Nr   r   )r0   r1   r   r   
num_labelsr   r   r   r   
classifierr   r   s     r)   r1   z"VanForImageClassification.__init__  sl     F# FLEVEVYZEZBIIf))"-v/@/@A`b`k`k`m 	
 	r+   )r   r   r   r   r   labelsr   r   r   c                 h   ||n| j                   j                  }| j                  |||      }|r|j                  n|d   }| j	                  |      }d}|| j                   j
                  | j                   j                  dk(  rd| j                   _        nv| j                   j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j
                  dk(  rSt               }	| j                   j                  dk(  r& |	|j                         |j                               }n |	||      }n| j                   j
                  dk(  rGt               }	 |	|j                  d| j                   j                        |j                  d            }n,| j                   j
                  dk(  rt               }	 |	||      }|s|f|dd z   }
||f|
z   S |
S t!        |||j"                  	      S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   
regressionsingle_label_classificationmulti_label_classificationr   rJ   )losslogitsr4   )r   r  r   r   r  problem_typer  r   r"   longrT   r
   squeezer	   r   r   r   r4   )r2   r   r  r   r   outputsr  r  r  loss_fctr(   s              r)   r6   z!VanForImageClassification.forward  s   ( &1%<k$++B]B]((<>R`k(l1<--'!*/{{''/;;))Q./;DKK,[[++a/V\\UZZ5OSYS_S_chclclSl/LDKK,/KDKK,{{''<7"9;;))Q.#FNN$4fnn6FGD#FF3D))-JJ+-B0F0F GUWY))-II,./Y,F)-)9TGf$EvE3f\c\q\qrrr+   )NNNN)r:   r;   r<   r1   r   r  r   _IMAGE_CLASS_CHECKPOINTr   r  _IMAGE_CLASS_EXPECTED_OUTPUTr   r"   r	  
LongTensorr   r   r   r6   rA   rB   s   @r)   r  r    s    	 ++?@*8$4	 59-1/3&*0su0010s ))*0s 'tn	0s
 d^0s 
u::	;0s A0sr+   r  )r   F):r=   r   collectionsr   typingr   r   r   r"   torch.utils.checkpointr   torch.nnr   r	   r
   activationsr   modeling_outputsr   r   r   modeling_utilsr   utilsr   r   r   r   configuration_vanr   
get_loggerr:   loggerr  r  r  r  r  r?   r>   r   r*   Moduler-   rD   rV   rk   ru   r{   r   r   r   r   r   VAN_START_DOCSTRINGr  r   r  r   r+   r)   <module>r+     s   4  # ) )    A A " 
 / v v ( 
		H	%  : '  > 1 U\\ e T V[VbVb (-")) -")) (")) @bii (299 ryy 8bii (ryy (V(ryy (V4o 4on) )8	   
-
! -

-
`  Cs 2 CsCsr+   