
    sgLM                        d Z ddlZddlmZ ddlZddlZddlmZmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZmZmZmZ dd	lmZ dd
lmZmZmZmZmZ ddlmZ ddlmZ  ej>                  e       Z!dZ"dZ#g dZ$dZ%dZ& G d dejN                        Z( G d dejN                        Z) G d dejN                        Z* G d dejN                        Z+ G d dejN                        Z, G d dejN                        Z- G d dejN                        Z. G d  d!e      Z/d"Z0d#Z1 ed$e0       G d% d&e/             Z2 ed'e0       G d( d)e/             Z3 ed*e0       G d+ d,e/e             Z4y)-zPyTorch ResNet model.    N)Optional)Tensornn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings)BackboneMixin   )ResNetConfigr   zmicrosoft/resnet-50)r   i      r   z	tiger catc                   H     e Zd Z	 d
dededededef
 fdZdedefd	Z xZS )ResNetConvLayerin_channelsout_channelskernel_sizestride
activationc                     t         |           t        j                  |||||dz  d      | _        t        j
                  |      | _        |t        |   | _	        y t        j                         | _	        y )N   F)r   r   paddingbias)
super__init__r   Conv2dconvolutionBatchNorm2dnormalizationr
   Identityr   )selfr   r   r   r   r   	__class__s         ]/var/www/html/venv/lib/python3.12/site-packages/transformers/models/resnet/modeling_resnet.pyr%   zResNetConvLayer.__init__;   sf     	99;vWbfgWgns
  ^^L90:0F&,BKKM    inputreturnc                 l    | j                  |      }| j                  |      }| j                  |      }|S N)r'   r)   r   r+   r/   hidden_states      r-   forwardzResNetConvLayer.forwardE   s6    ''.)),7|4r.   )r	   r   relu)	__name__
__module____qualname__intstrr%   r   r5   __classcell__r,   s   @r-   r   r   :   sL    lrZZ.1Z@CZQTZfiZV  r.   r   c                   8     e Zd ZdZdef fdZdedefdZ xZS )ResNetEmbeddingszO
    ResNet Embeddings (stem) composed of a single aggressive convolution.
    configc                     t         |           t        |j                  |j                  dd|j
                        | _        t        j                  ddd      | _	        |j                  | _        y )Nr   r!   )r   r   r   r	   r   )r   r   r"   )
r$   r%   r   num_channelsembedding_size
hidden_actembedderr   	MaxPool2dpoolerr+   r@   r,   s     r-   r%   zResNetEmbeddings.__init__Q   s\    '!6!6Aa\b\m\m
 llqAF"//r.   pixel_valuesr0   c                     |j                   d   }|| j                  k7  rt        d      | j                  |      }| j	                  |      }|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)shaperB   
ValueErrorrE   rG   )r+   rI   rB   	embeddings       r-   r5   zResNetEmbeddings.forwardY   sT    #))!,4,,,w  MM,/	KK	*	r.   )	r7   r8   r9   __doc__r   r%   r   r5   r<   r=   s   @r-   r?   r?   L   s'    0| 0F v r.   r?   c                   B     e Zd ZdZd	dededef fdZdedefdZ xZS )
ResNetShortCutz
    ResNet shortcut, used to project the residual features to the correct size. If needed, it is also used to
    downsample the input using `stride=2`.
    r   r   r   c                     t         |           t        j                  ||d|d      | _        t        j
                  |      | _        y )Nr   F)r   r   r#   )r$   r%   r   r&   r'   r(   r)   )r+   r   r   r   r,   s       r-   r%   zResNetShortCut.__init__j   s:    99[,AV\chi^^L9r.   r/   r0   c                 J    | j                  |      }| j                  |      }|S r2   )r'   r)   r3   s      r-   r5   zResNetShortCut.forwardo   s(    ''.)),7r.   )r!   )	r7   r8   r9   rN   r:   r%   r   r5   r<   r=   s   @r-   rP   rP   d   s5    
:C :s :C :
V  r.   rP   c            	       <     e Zd ZdZddedededef fdZd Z xZS )	ResNetBasicLayerzO
    A classic ResNet's residual layer composed by two `3x3` convolutions.
    r   r   r   r   c                    t         |           ||k7  xs |dk7  }|rt        |||      nt        j                         | _        t        j                  t        |||      t        ||d             | _        t        |   | _
        y )Nr   r   r   r$   r%   rP   r   r*   shortcut
Sequentialr   layerr
   r   )r+   r   r   r   r   should_apply_shortcutr,   s         r-   r%   zResNetBasicLayer.__init__z   s{     +| ; Jv{H]N;VDcecncncp 	 ]]KfEL,4H

 !,r.   c                 z    |}| j                  |      }| j                  |      }||z  }| j                  |      }|S r2   r[   rY   r   r+   r4   residuals      r-   r5   zResNetBasicLayer.forward   A    zz,/==* |4r.   )r   r6   )	r7   r8   r9   rN   r:   r;   r%   r5   r<   r=   s   @r-   rT   rT   u   s/    
-C 
-s 
-C 
-Y\ 
-r.   rT   c                   L     e Zd ZdZ	 	 	 	 d
dedededededef fdZd	 Z xZ	S )ResNetBottleNeckLayera  
    A classic ResNet's bottleneck layer composed by three `3x3` convolutions.

    The first `1x1` convolution reduces the input by a factor of `reduction` in order to make the second `3x3`
    convolution faster. The last `1x1` convolution remaps the reduced features to `out_channels`. If
    `downsample_in_bottleneck` is true, downsample will be in the first layer instead of the second layer.
    r   r   r   r   	reductiondownsample_in_bottleneckc           
      F   t         	|           ||k7  xs |dk7  }||z  }|rt        |||      nt        j                         | _        t        j                  t        ||d|r|nd      t        |||s|nd      t        ||dd             | _        t        |   | _
        y )Nr   rV   )r   r   )r   r   rX   )
r+   r   r   r   r   rd   re   r\   reduces_channelsr,   s
            r-   r%   zResNetBottleNeckLayer.__init__   s     	 +| ; Jv{'94H]N;VDcecncncp 	 ]]-1OgVmn ,.>Umvstu,lVZ[

 !,r.   c                 z    |}| j                  |      }| j                  |      }||z  }| j                  |      }|S r2   r^   r_   s      r-   r5   zResNetBottleNeckLayer.forward   ra   r.   )r   r6      F)
r7   r8   r9   rN   r:   r;   boolr%   r5   r<   r=   s   @r-   rc   rc      sZ      ).-- - 	-
 - - #'-0r.   rc   c                   N     e Zd ZdZ	 	 ddededededef
 fdZded	efd
Z xZ	S )ResNetStagez4
    A ResNet stage composed by stacked layers.
    r@   r   r   r   depthc                    t         	|           |j                  dk(  rt        nt        }|j                  dk(  r" |||||j
                  |j                        }n |||||j
                        }t        j                  |gt        |dz
        D cg c]  } ||||j
                         c} | _
        y c c}w )N
bottleneck)r   r   re   )r   r   r   rW   )r$   r%   
layer_typerc   rT   rD   re   r   rZ   rangelayers)
r+   r@   r   r   r   rm   r[   first_layer_r,   s
            r-   r%   zResNetStage.__init__   s     	)/):):l)J%P`,!,,)/)H)HK  \&U[UfUfgKmm
dijorsjsdtu_`5|HYHYZu
us   B;
r/   r0   c                 <    |}| j                   D ]
  } ||      } |S r2   )rr   )r+   r/   r4   r[   s       r-   r5   zResNetStage.forward   s*    [[ 	/E .L	/r.   )r!   r!   )
r7   r8   r9   rN   r   r:   r%   r   r5   r<   r=   s   @r-   rl   rl      sX     

 
 	

 
 
4V  r.   rl   c            	       @     e Zd Zdef fdZ	 ddedededefdZ xZ	S )	ResNetEncoderr@   c           
         t         |           t        j                  g       | _        | j                  j                  t        ||j                  |j                  d   |j                  rdnd|j                  d                t        |j                  |j                  dd        }t        ||j                  dd        D ]0  \  \  }}}| j                  j                  t        ||||             2 y )Nr   r!   r   )r   rm   )rm   )r$   r%   r   
ModuleListstagesappendrl   rC   hidden_sizesdownsample_in_first_stagedepthszip)r+   r@   in_out_channelsr   r   rm   r,   s         r-   r%   zResNetEncoder.__init__   s    mmB'%%##A&"<<q!mmA&	
 f1163F3Fqr3JK25ov}}UVUWGX2Y 	\.'[,KK{6;TYZ[	\r.   r4   output_hidden_statesreturn_dictr0   c                     |rdnd }| j                   D ]  }|r||fz   } ||      } |r||fz   }|st        d ||fD              S t        ||      S )N c              3   &   K   | ]	  }||  y wr2   r   ).0vs     r-   	<genexpr>z(ResNetEncoder.forward.<locals>.<genexpr>   s     SqQ]Ss   )last_hidden_statehidden_states)rz   tupler   )r+   r4   r   r   r   stage_modules         r-   r5   zResNetEncoder.forward   sv     3 KK 	6L# - ?'5L		6  )\O;MS\=$ASSS-*'
 	
r.   )FT)
r7   r8   r9   r   r%   r   rj   r   r5   r<   r=   s   @r-   rw   rw      s=    \| \$ ]a
"
:>
UY
	'
r.   rw   c                   *    e Zd ZdZeZdZdZddgZd Z	y)ResNetPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    resnetrI   r   rP   c                 J   t        |t        j                        r-t        j                  j	                  |j
                  dd       y t        |t        j                        rt        j                  j                  |j
                  t        j                  d             |j                  xt        j                  j                  |j
                        \  }}|dkD  rdt        j                  |      z  nd}t        j                  j                  |j                  | |       y y t        |t        j                  t        j                  f      rUt        j                  j                  |j
                  d       t        j                  j                  |j                  d       y y )Nfan_outr6   )modenonlinearity   )ar   r   )
isinstancer   r&   initkaiming_normal_weightLinearkaiming_uniform_mathsqrtr#   _calculate_fan_in_and_fan_outuniform_r(   	GroupNorm	constant_)r+   modulefan_inrt   bounds        r-   _init_weightsz#ResNetPreTrainedModel._init_weights  s   fbii(GG##FMM	PV#W		*GG$$V]]diil$C{{&GGAA&--P	17!DIIf--  ufe< '  >?GGfmmQ/GGfkk1- @r.   N)
r7   r8   r9   rN   r   config_classbase_model_prefixmain_input_name_no_split_modulesr   r   r.   r-   r   r     s*    
  L $O*,<=.r.   r   aH  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. Use it
    as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`ResNetConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
aF  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`ConvNextImageProcessor.__call__`] for details.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zOThe bare ResNet model outputting raw features without any specific head on top.c                   |     e Zd Z fdZ ee       eeee	de
      	 d	dedee   dee   defd              Z xZS )
ResNetModelc                     t         |   |       || _        t        |      | _        t        |      | _        t        j                  d      | _	        | j                          y )N)r   r   )r$   r%   r@   r?   rE   rw   encoderr   AdaptiveAvgPool2drG   	post_initrH   s     r-   r%   zResNetModel.__init__@  sK     (0$V,**62r.   vision)
checkpointoutput_typer   modalityexpected_outputrI   r   r   r0   c                 (   ||n| j                   j                  }||n| j                   j                  }| j                  |      }| j	                  |||      }|d   }| j                  |      }|s
||f|dd  z   S t        |||j                        S )Nr   r   r   r   )r   pooler_outputr   )r@   r   use_return_dictrE   r   rG   r   r   )r+   rI   r   r   embedding_outputencoder_outputsr   pooled_outputs           r-   r5   zResNetModel.forwardI  s     %9$D $++JjJj 	 &1%<k$++B]B]==6,,3GU` ' 
 ,A.$56%}58KKK7/')77
 	
r.   NN)r7   r8   r9   r%   r   RESNET_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr   r   rj   r5   r<   r=   s   @r-   r   r   ;  sp    
 ++BC&<$. pt
"
:B4.
^fgk^l
	1
 D
r.   r   z
    ResNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                        e Zd Z fdZ ee       eeee	e
      	 	 	 	 d	deej                     deej                     dee   dee   def
d              Z xZS )
ResNetForImageClassificationc                 |   t         |   |       |j                  | _        t        |      | _        t        j                  t        j                         |j                  dkD  r-t        j                  |j                  d   |j                        nt        j                               | _        | j                          y )Nr   )r$   r%   
num_labelsr   r   r   rZ   Flattenr   r|   r*   
classifierr   rH   s     r-   r%   z%ResNetForImageClassification.__init__u  s      ++!&)--JJLEKEVEVYZEZBIIf))"-v/@/@A`b`k`k`m

 	r.   )r   r   r   r   rI   labelsr   r   r0   c                    ||n| j                   j                  }| j                  |||      }|r|j                  n|d   }| j	                  |      }d}|| j                   j
                  | j                  dk(  rd| j                   _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j
                  dk(  rIt               }	| j                  dk(  r& |	|j                         |j                               }n |	||      }n| j                   j
                  dk(  r=t               }	 |	|j                  d| j                        |j                  d            }n,| j                   j
                  dk(  rt               }	 |	||      }|s|f|dd z   }
||f|
z   S |
S t!        |||j"                  	      S )
a0  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   
regressionsingle_label_classificationmulti_label_classificationr   r!   )losslogitsr   )r@   r   r   r   r   problem_typer   dtypetorchlongr:   r   squeezer   viewr   r   r   )r+   rI   r   r   r   outputsr   r   r   loss_fctoutputs              r-   r5   z$ResNetForImageClassification.forward  s   & &1%<k$++B]B]++lAUcn+o1<--'!*/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F'+'7D7V#CVC3f\c\q\qrrr.   )NNNN)r7   r8   r9   r%   r   r   r   _IMAGE_CLASS_CHECKPOINTr   r   _IMAGE_CLASS_EXPECTED_OUTPUTr   r   FloatTensor
LongTensorrj   r5   r<   r=   s   @r-   r   r   m  s    
 ++BC*8$4	 59-1/3&*/su001/s ))*/s 'tn	/s
 d^/s 
./s D/sr.   r   zO
    ResNet backbone, to be used with frameworks like DETR and MaskFormer.
    c                   v     e Zd Z fdZ ee       eee      	 dde	de
e   de
e   defd              Z xZS )	ResNetBackbonec                     t         |   |       t         | 	  |       |j                  g|j                  z   | _        t        |      | _        t        |      | _	        | j                          y r2   )r$   r%   _init_backbonerC   r|   num_featuresr?   rE   rw   r   r   rH   s     r-   r%   zResNetBackbone.__init__  s]     v&#223f6I6II(0$V, 	r.   )r   r   rI   r   r   r0   c                    ||n| j                   j                  }||n| j                   j                  }| j                  |      }| j	                  |dd      }|j
                  }d}t        | j                        D ]  \  }}	|	| j                  v s|||   fz  } |s|f}
|r|
|j
                  fz  }
|
S t        ||r|j
                  d      S dd      S )a3  
        Returns:

        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
        >>> model = AutoBackbone.from_pretrained(
        ...     "microsoft/resnet-50", out_features=["stage1", "stage2", "stage3", "stage4"]
        ... )

        >>> inputs = processor(image, return_tensors="pt")

        >>> outputs = model(**inputs)
        >>> feature_maps = outputs.feature_maps
        >>> list(feature_maps[-1].shape)
        [1, 2048, 7, 7]
        ```NTr   r   )feature_mapsr   
attentions)
r@   r   r   rE   r   r   	enumeratestage_namesout_featuresr   )r+   rI   r   r   r   r   r   r   idxstager   s              r-   r5   zResNetBackbone.forward  s    > &1%<k$++B]B]$8$D $++JjJj 	  ==6,,/dX\,]--#D$4$45 	6JC)))s!3 55	6 "_F#70022M%3G'//
 	
MQ
 	
r.   r   )r7   r8   r9   r%   r   r   r   r   r   r   r   rj   r5   r<   r=   s   @r-   r   r     s`    	 ++BC>Xos7
"7
:B4.7
^fgk^l7
	7
 Y D7
r.   r   )5rN   r   typingr   r   torch.utils.checkpointr   r   torch.nnr   r   r   activationsr
   modeling_outputsr   r   r   r   modeling_utilsr   utilsr   r   r   r   r   utils.backbone_utilsr   configuration_resnetr   
get_loggerr7   loggerr   r   r   r   r   Moduler   r?   rP   rT   rc   rl   rw   r   RESNET_START_DOCSTRINGr   r   r   r   r   r.   r-   <module>r      s         A A !  .  2 . 
		H	% ! , (  0 * bii $ryy 0RYY "ryy 4'BII 'T#")) #L&
BII &
R.O .4	   U+
' +
	+
\  Cs#8 CsCsL  	E
*M E
E
r.   