
    sg?_                        d Z ddlZddlmZmZmZmZmZmZm	Z	 erddl
mZ ddlZddlmZmZmZ ddlmZmZmZ ddlmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ dd	l%m&Z&m'Z'm(Z(m)Z)m*Z*  e       rddl+Z+ e(       rddl,Z, e)jZ                  e.      Z/	 dd
ej`                  de	e1ee1   f   de2de1dee	e3ef      dee1e1f   fdZ4 G d de      Z5y)zImage processor class for DPT.    N)TYPE_CHECKINGDictIterableListOptionalTupleUnion   )DepthEstimatorOutput)BaseImageProcessorBatchFeatureget_size_dict)padresizeto_channel_dimension_format)IMAGENET_STANDARD_MEANIMAGENET_STANDARD_STDChannelDimension
ImageInputPILImageResamplingget_image_sizeinfer_channel_dimension_formatis_scaled_imageis_torch_availableis_torch_tensormake_list_of_imagesto_numpy_arrayvalid_imagesvalidate_preprocess_arguments)
TensorTypefilter_out_non_signature_kwargsis_vision_availableloggingrequires_backendsinput_imageoutput_sizekeep_aspect_ratiomultipleinput_data_formatreturnc                     dd}t        |t              r||fn|}t        | |      \  }}|\  }}	||z  }
|	|z  }|r"t        d|z
        t        d|
z
        k  r|}
n|
} ||
|z  |      } |||z  |      }||fS )Nc                     t        | |z        |z  }| ||kD  rt        j                  | |z        |z  }||k  rt        j                  | |z        |z  }|S N)roundmathfloorceil)valr(   min_valmax_valxs        _/var/www/html/venv/lib/python3.12/site-packages/transformers/models/dpt/image_processing_dpt.pyconstrain_to_multiple_ofz>get_resize_output_image_size.<locals>.constrain_to_multiple_ofF   s[    #.!H,1w;

3>*X5Aw;		#.)H4A       )r(   )r   N)
isinstanceintr   abs)r%   r&   r'   r(   r)   r7   input_heightinput_widthoutput_heightoutput_widthscale_heightscale_width
new_height	new_widths                 r6   get_resize_output_image_sizerE   ?   s    	 1;;0L;,R]K .{<M NL+"-M< !</L,Kq;#a,&6"77&L 'K),*EPXYJ({)BXVI	""r8   c            %           e Zd ZdZdgZddej                  dddddddddfded	ee	e
f   d
edede
dedee
ef   dedeeeee   f      deeeee   f      dede
ddf fdZddej                  ddfdej"                  d	ee	e
f   dede
d
edeee	ef      deee	ef      dej"                  fdZ	 	 d#dej(                  de
deee	ef      deee	ef      fdZ e       dddddddddddddej.                  dfdeded	e
dede
d
ededededeeeee   f      deeeee   f      dede
deee	ef      dedeee	ef      dej6                  j6                  f"d       Zd$dee   fdZ	 d$d d!deeeeee
e
f      df      deee	ef      fd"Z xZ S )%DPTImageProcessora
  
    Constructs a DPT image processor.

    Args:
        do_resize (`bool`, *optional*, defaults to `True`):
            Whether to resize the image's (height, width) dimensions. Can be overidden by `do_resize` in `preprocess`.
        size (`Dict[str, int]` *optional*, defaults to `{"height": 384, "width": 384}`):
            Size of the image after resizing. Can be overidden by `size` in `preprocess`.
        resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`):
            Defines the resampling filter to use if resizing the image. Can be overidden by `resample` in `preprocess`.
        keep_aspect_ratio (`bool`, *optional*, defaults to `False`):
            If `True`, the image is resized to the largest possible size such that the aspect ratio is preserved. Can
            be overidden by `keep_aspect_ratio` in `preprocess`.
        ensure_multiple_of (`int`, *optional*, defaults to 1):
            If `do_resize` is `True`, the image is resized to a size that is a multiple of this value. Can be overidden
            by `ensure_multiple_of` in `preprocess`.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Whether to rescale the image by the specified scale `rescale_factor`. Can be overidden by `do_rescale` in
            `preprocess`.
        rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            Scale factor to use if rescaling the image. Can be overidden by `rescale_factor` in `preprocess`.
        do_normalize (`bool`, *optional*, defaults to `True`):
            Whether to normalize the image. Can be overridden by the `do_normalize` parameter in the `preprocess`
            method.
        image_mean (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_MEAN`):
            Mean to use if normalizing the image. This is a float or list of floats the length of the number of
            channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method.
        image_std (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_STD`):
            Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
            number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
        do_pad (`bool`, *optional*, defaults to `False`):
            Whether to apply center padding. This was introduced in the DINOv2 paper, which uses the model in
            combination with DPT.
        size_divisor (`int`, *optional*):
            If `do_pad` is `True`, pads the image dimensions to be divisible by this value. This was introduced in the
            DINOv2 paper, which uses the model in combination with DPT.
    pixel_valuesTNFr9   gp?	do_resizesizeresampler'   ensure_multiple_of
do_rescalerescale_factordo_normalize
image_mean	image_stddo_padsize_divisorr*   c                    t        |   di | ||nddd}t        |      }|| _        || _        || _        || _        || _        || _        || _	        || _
        |	|	nt        | _        |
|
nt        | _        || _        || _        y )Ni  )heightwidth )super__init__r   rI   rJ   r'   rL   rK   rM   rN   rO   r   rP   r   rQ   rR   rS   )selfrI   rJ   rK   r'   rL   rM   rN   rO   rP   rQ   rR   rS   kwargs	__class__s                 r6   rY   zDPTImageProcessor.__init__   s      	"6"'tc-JT""	!2"4 $,((2(>*DZ&/&;AV(r8   imagedata_formatr)   c                     t        |      }d|vsd|vrt        d|j                                t        ||d   |d   f|||      }	t	        |f|	|||d|S )aE  
        Resize an image to target size `(size["height"], size["width"])`. If `keep_aspect_ratio` is `True`, the image
        is resized to the largest possible size such that the aspect ratio is preserved. If `ensure_multiple_of` is
        set, the image is resized to a size that is a multiple of this value.

        Args:
            image (`np.ndarray`):
                Image to resize.
            size (`Dict[str, int]`):
                Target size of the output image.
            keep_aspect_ratio (`bool`, *optional*, defaults to `False`):
                If `True`, the image is resized to the largest possible size such that the aspect ratio is preserved.
            ensure_multiple_of (`int`, *optional*, defaults to 1):
                The image is resized to a size that is a multiple of this value.
            resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BICUBIC`):
                Defines the resampling filter to use if resizing the image. Otherwise, the image is resized to size
                specified in `size`.
            resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BICUBIC`):
                Resampling filter to use when resiizing the image.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format of the image. If not provided, it will be the same as the input image.
            input_data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format of the input image. If not provided, it will be inferred.
        rU   rV   zDThe size dictionary must contain the keys 'height' and 'width'. Got )r&   r'   r(   r)   )rJ   rK   r^   r)   )r   
ValueErrorkeysrE   r   )
rZ   r]   rJ   r'   rL   rK   r^   r)   r[   r&   s
             r6   r   zDPTImageProcessor.resize   s    F T"47$#6cdhdmdmdocpqrr2hg7/'/
 
#/
 
 	
r8   c                     d }|t        |      }t        ||      \  }} |||      \  }}	 |||      \  }
}t        |||	f|
|ff|      S )a)  
        Center pad an image to be a multiple of `multiple`.

        Args:
            image (`np.ndarray`):
                Image to pad.
            size_divisor (`int`):
                The width and height of the image will be padded to a multiple of this number.
            data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
                The channel dimension format for the output image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - Unset: Use the channel dimension format of the input image.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
        c                 ^    t        j                  | |z        |z  }|| z
  }|dz  }||z
  }||fS )N   )r/   r1   )rJ   rS   new_sizepad_sizepad_size_leftpad_size_rights         r6   _get_padz-DPTImageProcessor.pad_image.<locals>._get_pad  sA    yy!45DH$H$MM%5N .00r8   )r^   )r   r   r   )rZ   r]   rS   r^   r)   ri   rU   rV   rg   rh   pad_size_toppad_size_bottoms               r6   	pad_imagezDPTImageProcessor.pad_image   sl    8	1 $ >u E&u.?@(0(F%~(0(E%o5M>:\?<[\juvvr8   imagesreturn_tensorsc                 R   ||n| j                   }||n| j                  }t        |      }||n| j                  }||n| j                  }||n| j
                  }||n| j                  }||n| j                  }|	|	n| j                  }	|
|
n| j                  }
||n| j                  }||n| j                  }||n| j                  }t        |      }t        |      st        d      t!        |||	|
||||||
       |D cg c]  }t#        |       }}t%        |d         r|rt&        j)                  d       |t+        |d         }|r$|D cg c]  }| j-                  ||||||       }}|r!|D cg c]  }| j/                  |||       }}|	r"|D cg c]  }| j1                  ||
||       }}|r!|D cg c]  }| j3                  |||       }}|D cg c]  }t5        |||	       }}d
|i}t7        ||      S c c}w c c}w c c}w c c}w c c}w c c}w )a  
        Preprocess an image or batch of images.

        Args:
            images (`ImageInput`):
                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):
                Size of the image after reszing. If `keep_aspect_ratio` is `True`, the image is resized to the largest
                possible size such that the aspect ratio is preserved. If `ensure_multiple_of` is set, the image is
                resized to a size that is a multiple of this value.
            keep_aspect_ratio (`bool`, *optional*, defaults to `self.keep_aspect_ratio`):
                Whether to keep the aspect ratio of the image. If False, the image will be resized to (size, size). If
                True, the image will be resized to keep the aspect ratio and the size will be the maximum possible.
            ensure_multiple_of (`int`, *optional*, defaults to `self.ensure_multiple_of`):
                Ensure that the image size is a multiple of this value.
            resample (`int`, *optional*, defaults to `self.resample`):
                Resampling filter to use if resizing the image. This can be one of the enum `PILImageResampling`, Only
                has an effect if `do_resize` is set to `True`.
            do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
                Whether to rescale the image values between [0 - 1].
            rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
                Rescale factor to rescale the image by if `do_rescale` is set to `True`.
            do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
                Whether to normalize the image.
            image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`):
                Image mean.
            image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`):
                Image standard deviation.
            return_tensors (`str` or `TensorType`, *optional*):
                The type of tensors to return. Can be one of:
                    - Unset: Return a list of `np.ndarray`.
                    - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
                    - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
                    - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
                    - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
            data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
                The channel dimension format for the output image. Can be one of:
                    - `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                    - `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
        zkInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.)
rM   rN   rO   rP   rQ   rR   size_divisibilityrI   rJ   rK   r   zIt looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.)r]   rJ   rK   r'   rL   r)   )r]   scaler)   )r]   meanstdr)   )r]   rS   r)   )input_channel_dimrH   )datatensor_type)rI   rJ   r   r'   rL   rK   rM   rN   rO   rP   rQ   rR   rS   r   r   r`   r   r   r   loggerwarning_oncer   r   rescale	normalizerl   r   r   )rZ   rm   rI   rJ   r'   rL   rK   rM   rN   rO   rP   rQ   rR   rS   rn   r^   r)   r]   ru   s                      r6   
preprocesszDPTImageProcessor.preprocess  s   J "+!6IDNN	'tTYYT"1B1N-TXTjTj3E3Q/W[WnWn'38#-#9Zt
+9+E4K^K^'3'?|TEVEV#-#9Zt
!*!6IDNN	!-4;;'3'?|TEVEV$V,F#:  	&!)%!*	
 6<<E.'<<6!9%*s
 $ >vay I $
  %&7'9&7  
F 
  $ 5RcdF 
  $ U^opF 
  $ UYjkF  ou
ej'{N_`
 
 '>BBa =


s$   HH;HHH#H$target_sizesc                 &   |j                   }|t        |      t        |      k7  rt        d      t        |      r|j	                         }g }t        t        |            D ]k  }t        j                  j                  j                  ||   j                  d      ||   dd      }|d   j                  d      }|j                  |       m |S |j                  d      }t        |j                  d         D cg c]  }||   	 }}|S c c}w )a4  
        Converts the output of [`DPTForSemanticSegmentation`] into semantic segmentation maps. Only supports PyTorch.

        Args:
            outputs ([`DPTForSemanticSegmentation`]):
                Raw outputs of the model.
            target_sizes (`List[Tuple]` of length `batch_size`, *optional*):
                List of tuples corresponding to the requested final size (height, width) of each prediction. If unset,
                predictions will not be resized.

        Returns:
            semantic_segmentation: `List[torch.Tensor]` of length `batch_size`, where each item is a semantic
            segmentation map of shape (height, width) corresponding to the target_sizes entry (if `target_sizes` is
            specified). Each entry of each `torch.Tensor` correspond to a semantic class id.
        zTMake sure that you pass in as many target sizes as the batch dimension of the logitsr   )dimbilinearFrJ   modealign_cornersr9   )logitslenr`   r   numpyrangetorchnn
functionalinterpolate	unsqueezeargmaxappendshape)	rZ   outputsr|   r   semantic_segmentationidxresized_logitssemantic_mapis	            r6   "post_process_semantic_segmentationz4DPTImageProcessor.post_process_semantic_segmentation  s(   "  #6{c,// j  |,+113$&!S[) ;!&!4!4!@!@3K))a)0|C7Hzin "A "  .a077A7>%,,\:; %$ %+MMaM$8!GLMbMhMhijMkGl$m!%:1%=$m!$m$$ %ns   >Dr   r   c                    t        | d       |j                  }|"t        |      t        |      k7  rt        d      g }|dgt        |      z  n|}t	        ||      D ]s  \  }}|Yt
        j                  j                  j                  |j                  d      j                  d      |dd      j                         }|j                  d	|i       u |S )
a  
        Converts the raw output of [`DepthEstimatorOutput`] into final depth predictions and depth PIL images.
        Only supports PyTorch.

        Args:
            outputs ([`DepthEstimatorOutput`]):
                Raw outputs of the model.
            target_sizes (`TensorType` or `List[Tuple[int, int]]`, *optional*):
                Tensor of shape `(batch_size, 2)` or list of tuples (`Tuple[int, int]`) containing the target size
                (height, width) of each image in the batch. If left to None, predictions will not be resized.

        Returns:
            `List[Dict[str, TensorType]]`: A list of dictionaries of tensors representing the processed depth
            predictions.
        r   Nz]Make sure that you pass in as many target sizes as the batch dimension of the predicted depthr   r9   bicubicFr   predicted_depth)r$   r   r   r`   zipr   r   r   r   r   squeezer   )rZ   r   r|   r   resultsdepthtarget_sizes          r6   post_process_depth_estimationz/DPTImageProcessor.post_process_depth_estimation  s    ( 	$(!11$3+?3|CT+To  8D8LvO 44R^"%o|"D 	7E;&++77OOA&003+Iej 8 ')  NN-u56	7 r8   )NNr-   )!__name__
__module____qualname____doc__model_input_namesr   BICUBICboolr   strr;   r	   floatr   r   rY   npndarrayr   r   arrayrl   r!   FIRSTr   r    PILImager{   r   r   r   __classcell__)r\   s   @r6   rG   rG   i   s   $L (( #'9'A'A"'"#,3!:>9= )) 38n) %	)
  )  ) ) c5j)) ) U5$u+#567) E%e"456) ) ) 
)H #("#'9'A'A>BDH5
zz5
 38n5
  	5

  5
 %5
 eC)9$9:;5
 $E#/?*?$@A5
 
5
v ?CDH+wxx+w +w eC)9$9:;	+w
 $E#/?*?$@A+wZ %& "&"&'+ $!:>9= ;?(8(>(>DH#VCVC VC 	VC
  VC  VC %VC VC VC VC U5$u+#567VC E%e"456VC VC VC !sJ!78VC  &!VC" $E#/?*?$@A#VC$ 
%VC 'VCr)%U )%\ RV''' uZeCHo1F%LMN' 
d3
?#	$	'r8   rG   r-   )6r   r/   typingr   r   r   r   r   r   r	   modeling_outputsr   r   r   image_processing_utilsr   r   r   image_transformsr   r   r   image_utilsr   r   r   r   r   r   r   r   r   r   r   r   r   r   utilsr    r!   r"   r#   r$   r   r   
get_loggerr   rw   r   r;   r   r   rE   rG   rW   r8   r6   <module>r      s    %  N N N 8  U U H H       
		H	% AE'#'#sHSM)*'# '# 	'#
  c+;&; <='# 38_'#TY* Yr8   