
    sg?U              	          d Z ddlmZmZmZ ddlZddlZddlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZmZmZmZ dd	lmZ dd
lmZmZmZmZmZ ddlmZ ddlmZ  ej>                  e       Z!dZ"dZ#g dZ$dZ%dZ&d0dejN                  de(de)dejN                  fdZ* G d dejV                        Z, G d dejV                        Z- G d dejV                        Z. G d dejV                        Z/ G d d ejV                        Z0 G d! d"ejV                        Z1 G d# d$e      Z2d%Z3d&Z4 ed'e3       G d( d)e2             Z5 ed*e3       G d+ d,e2             Z6 ed-e3       G d. d/e2e             Z7y)1zPyTorch ConvNext model.    )OptionalTupleUnionN)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings)BackboneMixin   )ConvNextConfigr   zfacebook/convnext-tiny-224)r   i      r   ztabby, tabby catinput	drop_probtrainingreturnc                    |dk(  s|s| S d|z
  }| j                   d   fd| j                  dz
  z  z   }|t        j                  || j                  | j
                        z   }|j                          | j                  |      |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
            r   r   )r   )dtypedevice)shapendimtorchrandr    r!   floor_div)r   r   r   	keep_probr"   random_tensoroutputs          a/var/www/html/venv/lib/python3.12/site-packages/transformers/models/convnext/modeling_convnext.py	drop_pathr,   :   s     CxII[[^

Q 77E

5ELL YYMYYy!M1FM    c                   x     e Zd ZdZd	dee   ddf fdZdej                  dej                  fdZ	de
fdZ xZS )
ConvNextDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 0    t         |           || _        y N)super__init__r   )selfr   	__class__s     r+   r3   zConvNextDropPath.__init__R   s    "r-   hidden_statesc                 D    t        || j                  | j                        S r1   )r,   r   r   r4   r6   s     r+   forwardzConvNextDropPath.forwardV   s    FFr-   c                 8    dj                  | j                        S )Nzp={})formatr   )r4   s    r+   
extra_reprzConvNextDropPath.extra_reprY   s    }}T^^,,r-   r1   )__name__
__module____qualname____doc__r   floatr3   r$   Tensorr9   strr<   __classcell__r5   s   @r+   r/   r/   O   sG    b#(5/ #T #GU\\ Gell G-C -r-   r/   c                   \     e Zd ZdZd fd	Zdej                  dej                  fdZ xZS )ConvNextLayerNormaA  LayerNorm that supports two data formats: channels_last (default) or channels_first.
    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch_size, height,
    width, channels) while channels_first corresponds to inputs with shape (batch_size, channels, height, width).
    c                 N   t         |           t        j                  t	        j
                  |            | _        t        j                  t	        j                  |            | _        || _	        || _
        | j                  dvrt        d| j                         |f| _        y )N)channels_lastchannels_firstzUnsupported data format: )r2   r3   r   	Parameterr$   onesweightzerosbiasepsdata_formatNotImplementedErrornormalized_shape)r4   rS   rP   rQ   r5   s       r+   r3   zConvNextLayerNorm.__init__c   s    ll5::.>#?@LL-=!>?	&#FF%(A$BRBRAS&TUU!1 3r-   xr   c                 d   | j                   dk(  rWt        j                  j                  j	                  || j
                  | j                  | j                  | j                        }|S | j                   dk(  r|j                  }|j                         }|j                  dd      }||z
  j                  d      j                  dd      }||z
  t        j                  || j                  z         z  }|j                  |      }| j                  d d d d f   |z  | j                  d d d d f   z   }|S )NrI   rJ   r   T)keepdim   )r    )rQ   r$   r   
functional
layer_normrS   rM   rO   rP   r    rA   meanpowsqrtto)r4   rT   input_dtypeuss        r+   r9   zConvNextLayerNorm.forwardm   s
   .##..q$2G2GVZV_V_aeaiaijA  !11''K	Aq$'AQA##At#4AQ%**Q\22A;'AAtTM*Q.1dD=1IIAr-   )ư>rI   )	r=   r>   r?   r@   r3   r$   rB   r9   rD   rE   s   @r+   rG   rG   ]   s(    
4 %,, r-   rG   c                   Z     e Zd ZdZ fdZdej                  dej                  fdZ xZ	S )ConvNextEmbeddingszThis class is comparable to (and inspired by) the SwinEmbeddings class
    found in src/transformers/models/swin/modeling_swin.py.
    c                    t         |           t        j                  |j                  |j
                  d   |j                  |j                        | _        t        |j
                  d   dd      | _	        |j                  | _        y )Nr   kernel_sizestridera   rJ   rP   rQ   )
r2   r3   r   Conv2dnum_channelshidden_sizes
patch_sizepatch_embeddingsrG   	layernormr4   configr5   s     r+   r3   zConvNextEmbeddings.__init__   sr     "		!4!4Q!7VEVEV_e_p_p!
 +6+>+>q+AtYij"//r-   pixel_valuesr   c                     |j                   d   }|| j                  k7  rt        d      | j                  |      }| j	                  |      }|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)r"   rj   
ValueErrorrm   rn   )r4   rq   rj   
embeddingss       r+   r9   zConvNextEmbeddings.forward   sV    #))!,4,,,w  **<8
^^J/
r-   
r=   r>   r?   r@   r3   r$   FloatTensorrB   r9   rD   rE   s   @r+   rc   rc   {   s*    0E$5$5 %,, r-   rc   c                   \     e Zd ZdZd fd	Zdej                  dej                  fdZ xZ	S )ConvNextLayera3  This corresponds to the `Block` class in the original implementation.

    There are two equivalent implementations: [DwConv, LayerNorm (channels_first), Conv, GELU,1x1 Conv]; all in (N, C,
    H, W) (2) [DwConv, Permute to (N, H, W, C), LayerNorm (channels_last), Linear, GELU, Linear]; Permute back

    The authors used (2) as they find it slightly faster in PyTorch.

    Args:
        config ([`ConvNextConfig`]): Model configuration class.
        dim (`int`): Number of input channels.
        drop_path (`float`): Stochastic depth rate. Default: 0.0.
    c                 $   t         |           t        j                  ||dd|      | _        t        |d      | _        t        j                  |d|z        | _        t        |j                     | _        t        j                  d|z  |      | _        |j                  dkD  r7t        j                  |j                  t        j                   |      z  d	      nd | _        |d
kD  rt%        |      | _        y t        j&                         | _        y )Nr   r
   )rf   paddinggroupsra   rP      r   T)requires_gradr   )r2   r3   r   ri   dwconvrG   rn   Linearpwconv1r   
hidden_actactpwconv2layer_scale_init_valuerK   r$   rL   layer_scale_parameterr/   Identityr,   )r4   rp   dimr,   r5   s       r+   r3   zConvNextLayer.__init__   s    iiSa3O*3D9yya#g.&++,yyS#. ,,q0 LL66S9JJZ^_ 	"
 9BC))4R[[]r-   r6   r   c                 b   |}| j                  |      }|j                  dddd      }| j                  |      }| j                  |      }| j	                  |      }| j                  |      }| j                  | j                  |z  }|j                  dddd      }|| j                  |      z   }|S )Nr   rW   r
   r   )r   permutern   r   r   r   r   r,   )r4   r6   r   rT   s       r+   r9   zConvNextLayer.forward   s    KK&IIaAq!NN1LLOHHQKLLO%%1**Q.AIIaAq!DNN1%%r-   )r   ru   rE   s   @r+   rx   rx      s+    [U%6%6 5<< r-   rx   c                   \     e Zd ZdZd fd	Zdej                  dej                  fdZ xZ	S )ConvNextStagea  ConvNeXT stage, consisting of an optional downsampling layer + multiple residual blocks.

    Args:
        config ([`ConvNextConfig`]): Model configuration class.
        in_channels (`int`): Number of input channels.
        out_channels (`int`): Number of output channels.
        depth (`int`): Number of residual blocks.
        drop_path_rates(`List[float]`): Stochastic depth rates for each layer.
    c                 ~   t         	|           ||k7  s|dkD  r?t        j                  t	        |dd      t        j
                  ||||            | _        nt        j                         | _        |xs dg|z  }t        j                  t        |      D cg c]  }t        ||||          c} | _
        y c c}w )Nr   ra   rJ   rh   re   r   )r   r,   )r2   r3   r   
SequentialrG   ri   downsampling_layerr   rangerx   layers)
r4   rp   in_channelsout_channelsrf   rg   depthdrop_path_ratesjr5   s
            r+   r3   zConvNextStage.__init__   s    ,&&1*&(mm!+4EUV		+|U[\'D#
 ')kkmD#):cUU]mm]bch]ijXYmFPQ@RSj
js   B:r6   r   c                 J    | j                  |      }| j                  |      }|S r1   )r   r   r8   s     r+   r9   zConvNextStage.forward   s&    //>M2r-   )rW   rW   rW   Nru   rE   s   @r+   r   r      s*    
U%6%6 5<< r-   r   c                   f     e Zd Z fdZ	 	 ddej
                  dee   dee   dee	e
f   fdZ xZS )ConvNextEncoderc           
      (   t         |           t        j                         | _        t        j                  d|j                  t        |j                              j                  |j                        D cg c]  }|j                          }}|j                  d   }t        |j                        D ]V  }|j                  |   }t        ||||dkD  rdnd|j                  |   ||         }| j                  j!                  |       |}X y c c}w )Nr   rW   r   )r   r   rg   r   r   )r2   r3   r   
ModuleListstagesr$   linspacedrop_path_ratesumdepthssplittolistrk   r   
num_stagesr   append)	r4   rp   rT   r   prev_chsiout_chsstager5   s	           r+   r3   zConvNextEncoder.__init__   s    mmo %q&2G2GV]]I[ \ b bcicpcp q
AHHJ
 
 &&q)v(() 	A))!,G!$$EqqmmA& / 2E KKu%H		
s   8Dr6   output_hidden_statesreturn_dictr   c                     |rdnd }t        | j                        D ]  \  }}|r||fz   } ||      } |r||fz   }|st        d ||fD              S t        ||      S )N c              3   &   K   | ]	  }||  y wr1   r   ).0vs     r+   	<genexpr>z*ConvNextEncoder.forward.<locals>.<genexpr>  s     Xq!-Xs   )last_hidden_stater6   )	enumerater   tupler   )r4   r6   r   r   all_hidden_statesr   layer_modules          r+   r9   zConvNextEncoder.forward   s     #7BD(5 	8OA|#$58H$H!(7M		8   1]4D DX]4E$FXXX-++
 	
r-   )FT)r=   r>   r?   r3   r$   rv   r   boolr   r   r   r9   rD   rE   s   @r+   r   r      sT    . 05&*	
((
 'tn
 d^	

 
u44	5
r-   r   c                   (    e Zd ZdZeZdZdZdgZd Z	y)ConvNextPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    convnextrq   rx   c                    t        |t        j                  t        j                  f      rm|j                  j
                  j                  d| j                  j                         |j                  %|j                  j
                  j                          yyt        |t        j                        rJ|j                  j
                  j                          |j                  j
                  j                  d       yy)zInitialize the weightsr   )rZ   stdNg      ?)
isinstancer   r   ri   rM   datanormal_rp   initializer_rangerO   zero_	LayerNormfill_)r4   modules     r+   _init_weightsz%ConvNextPreTrainedModel._init_weights  s    fryy"))45 MM&&CT[[5R5R&S{{&  &&( '-KK""$MM$$S) .r-   N)
r=   r>   r?   r@   r   config_classbase_model_prefixmain_input_name_no_split_modulesr   r   r-   r+   r   r     s'    
 "L"$O()
*r-   r   aJ  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. Use it
    as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`ConvNextConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
aF  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`ConvNextImageProcessor.__call__`] for details.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zQThe bare ConvNext model outputting raw features without any specific head on top.c                        e Zd Z fdZ ee       eeee	de
      	 	 	 d	dej                  dee   dee   deeef   fd              Z xZS )
ConvNextModelc                     t         |   |       || _        t        |      | _        t        |      | _        t        j                  |j                  d   |j                        | _        | j                          y )Nr|   )r2   r3   rp   rc   rt   r   encoderr   r   rk   layer_norm_epsrn   	post_initro   s     r+   r3   zConvNextModel.__init__D  s`     ,V4&v. f&9&9"&=6CXCXY 	r-   vision)
checkpointoutput_typer   modalityexpected_outputrq   r   r   r   c                 d   ||n| j                   j                  }||n| j                   j                  }|t        d      | j	                  |      }| j                  |||      }|d   }| j                  |j                  ddg            }|s
||f|dd  z   S t        |||j                        S )Nz You have to specify pixel_valuesr   r   r   r   r   )r   pooler_outputr6   )
rp   r   use_return_dictrs   rt   r   rn   rZ   r   r6   )r4   rq   r   r   embedding_outputencoder_outputsr   pooled_outputs           r+   r9   zConvNextModel.forwardQ  s     %9$D $++JjJj 	 &1%<k$++B]B]?@@??<8,,!5# ' 
 ,A. '8'='=r2h'GH%}58KKK7/')77
 	
r-   )NNN)r=   r>   r?   r3   r   CONVNEXT_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr$   rv   r   r   r   r   r9   rD   rE   s   @r+   r   r   ?  s    
 ++DE&<$. +//3&*	"
''"
 'tn"
 d^	"

 
u>>	?"
 F"
r-   r   z
    ConvNext Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                        e Zd Z fdZ ee       eeee	e
      	 	 	 	 d	dej                  deej                     dee   dee   deeef   f
d              Z xZS )
ConvNextForImageClassificationc                 0   t         |   |       |j                  | _        t        |      | _        |j                  dkD  r-t        j                  |j                  d   |j                        nt        j                         | _	        | j                          y )Nr   r   )r2   r3   
num_labelsr   r   r   r   rk   r   
classifierr   ro   s     r+   r3   z'ConvNextForImageClassification.__init__  sy      ++%f- FLEVEVYZEZBIIf))"-v/@/@A`b`k`k`m 	
 	r-   )r   r   r   r   rq   labelsr   r   r   c                    ||n| j                   j                  }| j                  |||      }|r|j                  n|d   }| j	                  |      }d}|| j                   j
                  | j                  dk(  rd| j                   _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j
                  dk(  rIt               }	| j                  dk(  r& |	|j                         |j                               }n |	||      }n| j                   j
                  dk(  r=t               }	 |	|j                  d| j                        |j                  d            }n,| j                   j
                  dk(  rt               }	 |	||      }|s|f|dd z   }
||f|
z   S |
S t!        |||j"                  	      S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   
regressionsingle_label_classificationmulti_label_classificationr   rW   )losslogitsr6   )rp   r   r   r   r   problem_typer   r    r$   longintr	   squeezer   viewr   r   r6   )r4   rq   r   r   r   outputsr   r   r   loss_fctr*   s              r+   r9   z&ConvNextForImageClassification.forward  s   ( &1%<k$++B]B]--CWep-q1<--'!*/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE3!//
 	
r-   )NNNN)r=   r>   r?   r3   r   r   r   _IMAGE_CLASS_CHECKPOINTr   r   _IMAGE_CLASS_EXPECTED_OUTPUTr$   rv   r   
LongTensorr   r   r   r9   rD   rE   s   @r+   r   r   ~  s     ++DE*8$4	 +/-1/3&*3
''3
 ))*3
 'tn	3

 d^3
 
u::	;3
 F3
r-   r   zQ
    ConvNeXt backbone, to be used with frameworks like DETR and MaskFormer.
    c                        e Zd Z fdZ ee       eee      	 	 dde	j                  dee   dee   defd              Z xZS )	ConvNextBackbonec                    t         |   |       t         | 	  |       t        |      | _        t        |      | _        |j                  d   g|j                  z   | _        i }t        | j                  | j                        D ]  \  }}t        |d      ||<    t        j                  |      | _        | j!                          y )Nr   rJ   )rQ   )r2   r3   _init_backbonerc   rt   r   r   rk   num_featureszip_out_featureschannelsrG   r   
ModuleDicthidden_states_normsr   )r4   rp   r   r   rj   r5   s        r+   r3   zConvNextBackbone.__init__  s     v&,V4&v.#0034v7J7JJ !#&t'9'94==#I 	gE<):<Ue)f&	g#%==1D#E  	r-   )r   r   rq   r   r   r   c                    ||n| j                   j                  }||n| j                   j                  }| j                  |      }| j	                  |d|      }|r|j
                  n|d   }d}t        | j                  |      D ]/  \  }}	|| j                  v s | j                  |   |	      }	||	fz  }1 |s|f}
|r|
|fz  }
|
S t        ||r|d      S dd      S )az  
        Returns:

        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("facebook/convnext-tiny-224")
        >>> model = AutoBackbone.from_pretrained("facebook/convnext-tiny-224")

        >>> inputs = processor(image, return_tensors="pt")
        >>> outputs = model(**inputs)
        ```NTr   r   r   )feature_mapsr6   
attentions)rp   r   r   rt   r   r6   r   stage_namesout_featuresr   r   )r4   rq   r   r   r   r   r6   r  r   hidden_stater*   s              r+   r9   zConvNextBackbone.forward  s   8 &1%<k$++B]B]$8$D $++JjJj 	  ??<8,,!%#  
 2=--'!*#&t'7'7#G 	0E<)))>t77>|L/	0
 "_F#=**M%+?-
 	
EI
 	
r-   )NN)r=   r>   r?   r3   r   r   r   r   r   r$   rB   r   r   r9   rD   rE   s   @r+   r   r     sm    " ++DE>X 04&*	9
ll9
 'tn9
 d^	9

 
9
 Y F9
r-   r   )r   F)8r@   typingr   r   r   r$   torch.utils.checkpointr   torch.nnr   r   r	   activationsr   modeling_outputsr   r   r   r   modeling_utilsr   utilsr   r   r   r   r   utils.backbone_utilsr   configuration_convnextr   
get_loggerr=   loggerr   r   r   r   r   rB   rA   r   r,   Moduler/   rG   rc   rx   r   r   r   CONVNEXT_START_DOCSTRINGr   r   r   r   r   r-   r+   <module>r     s    ) )    A A !  .  2 2 
		H	% # 3 '  7 1 U\\ e T V[VbVb *-ryy -		 < 0)BII )XBII @,
bii ,
^*o *0	   W8
+ 8
	8
v  I
%< I
I
X  	M
. M
M
r-   