
    sg                    @   d Z ddlZddlmZ ddlmZ ddlmZmZm	Z	m
Z
mZmZ ddlZddlmZmZmZ ddlmZmZmZmZ dd	lmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ dd
l%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,  e)       rddl-Z-ddl.m/c m0Z1  e*       rddl2m3Z3  e(       rddl4Z5ddl6mZ7 ddl8m9Z9m:Z:  e+jv                  e<      Z= G d de      Z>ddde?de@fdZAddde?de@fdZBde@dej                  fdZD	 d5de@dej                  dee@e@f   dej                  fdZE	 	 	 	 	 d6de@de@de?d e
e@   d!e
e	e@      d"e
eeFef      dee	e	e@      e	e@   f   fd#ZGd$ ZH	 d7d%ZId&e	e@   d'e@d(e@fd)ZJd&e	e@   d'e@d(e@fd*ZKd8d+ZLd8d,ZMd9d-ZNd:d.ZOd;d/ZPd<d0ZQd1eeFef   dej                  fd2ZRd=d3ZSd=d4ZTy)>zImage processor class for SAM.    N)deepcopy)product)AnyDictListOptionalTupleUnion   )BaseImageProcessorBatchFeatureget_size_dict)convert_to_rgbpadresizeto_channel_dimension_format)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STDChannelDimension
ImageInputPILImageResamplingget_image_sizeinfer_channel_dimension_formatis_scaled_imagemake_list_of_imagesto_numpy_arrayvalid_imagesvalidate_preprocess_arguments)
TensorTypefilter_out_non_signature_kwargsis_tf_availableis_torch_availableis_torchvision_availableloggingrequires_backends)batched_nms)numpy)flatten
shape_listc            )       Z    e Zd ZdZdgZdddej                  dddddddddfdedee	e
f   dee	e
f   d	ed
edee
ef   dedeeeee   f      deeeee   f      dede
de
deddf fdZ	 	 d4dej"                  dee	e
f   deee	ef      deee	ef      dej"                  f
dZdee
e
f   de
fdZej,                  ddfdej"                  dee	e
f   d	edeee	ef      deee	ef      dej"                  fdZ	 	 	 	 	 	 	 	 d5deded
ededeee	e
f      d	edee   deeeee   f      deeeee   f      dee   deee	e
f      deee	ef      fdZ	 	 	 	 	 	 	 	 	 	 	 	 	 d6dedee   dee	e
f   d	ed
edee   dee   deeeee   f      deeeee   f      dee   deee	e
f      dee   deee	ef      deee	ef      deej"                  ee
e
f   ee
e
f   f   fdZ	 	 	 	 	 d7dedee   dee	e
f   dee   deee	e
f      deee	ef      dej"                  fd Z e       dddddddddddddddej:                  dfd!ed"ee   dee   deee	e
f      deee	e
f      d	ed#   d
ee   deee
ef      dee   deeeee   f      deeeee   f      dee   deee	e
f      deee	e
f      dee   d$eee	ef      dedeee	ef      f$d%       Z	 	 	 	 d8d&Z 	 d9d'Z!	 d9d(Z"	 d:d)Z#	 	 	 	 	 	 	 d;d*e
d+ed,ee
   d-eee
      d.ed/   deee	ef      d$e	fd0Z$	 	 	 	 	 d<d1Z%	 	 	 	 d=d2Z&	 	 	 	 d=d3Z' xZ(S )>SamImageProcessora  
    Constructs a SAM image processor.

    Args:
        do_resize (`bool`, *optional*, defaults to `True`):
            Whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden by the
            `do_resize` parameter in the `preprocess` method.
        size (`dict`, *optional*, defaults to `{"longest_edge": 1024}`):
            Size of the output image after resizing. Resizes the longest edge of the image to match
            `size["longest_edge"]` while maintaining the aspect ratio. Can be overridden by the `size` parameter in the
            `preprocess` method.
        mask_size (`dict`, *optional*, defaults to `{"longest_edge": 256}`):
            Size of the output segmentation map after resizing. Resizes the longest edge of the image to match
            `size["longest_edge"]` while maintaining the aspect ratio. Can be overridden by the `mask_size` parameter
            in the `preprocess` method.
        resample (`PILImageResampling`, *optional*, defaults to `Resampling.BILINEAR`):
            Resampling filter to use if resizing the image. Can be overridden by the `resample` parameter in the
            `preprocess` method.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Wwhether to rescale the image by the specified scale `rescale_factor`. Can be overridden by the
            `do_rescale` parameter in the `preprocess` method.
        rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            Scale factor to use if rescaling the image. Only has an effect if `do_rescale` is set to `True`. Can be
            overridden by the `rescale_factor` parameter in the `preprocess` method.
        do_normalize (`bool`, *optional*, defaults to `True`):
            Whether to normalize the image. Can be overridden by the `do_normalize` parameter in the `preprocess`
            method. Can be overridden by the `do_normalize` parameter in the `preprocess` method.
        image_mean (`float` or `List[float]`, *optional*, defaults to `IMAGENET_DEFAULT_MEAN`):
            Mean to use if normalizing the image. This is a float or list of floats the length of the number of
            channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method. Can be
            overridden by the `image_mean` parameter in the `preprocess` method.
        image_std (`float` or `List[float]`, *optional*, defaults to `IMAGENET_DEFAULT_STD`):
            Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
            number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
            Can be overridden by the `image_std` parameter in the `preprocess` method.
        do_pad (`bool`, *optional*, defaults to `True`):
            Whether to pad the image to the specified `pad_size`. Can be overridden by the `do_pad` parameter in the
            `preprocess` method.
        pad_size (`dict`, *optional*, defaults to `{"height": 1024, "width": 1024}`):
            Size of the output image after padding. Can be overridden by the `pad_size` parameter in the `preprocess`
            method.
        mask_pad_size (`dict`, *optional*, defaults to `{"height": 256, "width": 256}`):
            Size of the output segmentation map after padding. Can be overridden by the `mask_pad_size` parameter in
            the `preprocess` method.
        do_convert_rgb (`bool`, *optional*, defaults to `True`):
            Whether to convert the image to RGB.
    pixel_valuesTNgp?	do_resizesize	mask_sizeresample
do_rescalerescale_factordo_normalize
image_mean	image_stddo_padpad_sizemask_pad_sizedo_convert_rgbreturnc                    t        |   d	i | ||nddi}t        |t              st	        |d      n|}||nddd}t	        |d      }||nddi}t        |t              st	        |d      n|}||nddd}t	        |d      }|| _        || _        || _        || _        || _	        || _
        || _        ||nt        | _        |	|	nt        | _        |
| _        || _        || _        || _        y )
Nlongest_edgei   Fmax_sizedefault_to_square)heightwidthTr?       )super__init__
isinstancedictr   r-   r.   r/   r0   r1   r2   r3   r   r4   r   r5   r6   r7   r8   r9   )selfr-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   kwargs	__class__s                  _/var/www/html/venv/lib/python3.12/site-packages/transformers/models/sam/image_processing_sam.pyrF   zSamImageProcessor.__init__v   s   " 	"6"'tnd-CLVW[]aLb}deDhl'38DSW9X TB!*!6I^S<Q	 i. 9F 	 *7)BSVadHe%mtL"	" $,((2(>*DY&/&;AU *,    imagedata_formatinput_data_formatc                 z    |d   |d   }}t        ||      \  }}	||	z
  }
||z
  }t        |d|fd|
fff||d|}|S )a  
        Pad an image to `(pad_size["height"], pad_size["width"])` with zeros to the right and bottom.

        Args:
            image (`np.ndarray`):
                Image to pad.
            pad_size (`Dict[str, int]`):
                Size of the output image after padding.
            data_format (`str` or `ChannelDimension`, *optional*):
                The data format of the image. Can be either "channels_first" or "channels_last". If `None`, the
                `data_format` of the `image` will be used.
            input_data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format of the input image. If not provided, it will be inferred.
        r@   rA   channel_dimr   )rO   rP   )r   r   )rI   rN   r7   rO   rP   rJ   output_heightoutput_widthinput_heightinput_width	pad_width
pad_heightpadded_images                rL   	pad_imagezSamImageProcessor.pad_image   sy    , '/x&8(7:K|$25FW$X!k ;.	"\1
_q)n-
 $/	

 
 rM   	old_shaper<   c                     |\  }}|dz  t        ||      z  }||z  ||z  }}t        |dz         }t        |dz         }||fS )zW
        Compute the output size given input size and target long side length.
              ?      ?)maxint)rI   r\   r<   oldholdwscalenewhnewws           rL   _get_preprocess_shapez'SamImageProcessor._get_preprocess_shape   sW     
ds"St_4E\4%<d4#:4#:d|rM   c                     t        |      }d|vrt        d|j                                t        ||      }| j	                  ||d         \  }}	t        |f||	f|||d|S )a/  
        Resize an image to `(size["height"], size["width"])`.

        Args:
            image (`np.ndarray`):
                Image to resize.
            size (`Dict[str, int]`):
                Dictionary in the format `{"longest_edge": int}` specifying the size of the output image. The longest
                edge of the image will be resized to the specified size, while the other edge will be resized to
                maintain the aspect ratio.
            resample:
                `PILImageResampling` filter to use when resizing the image e.g. `PILImageResampling.BILINEAR`.
            data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the output image. If unset, the channel dimension format of the input
                image is used. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.

        Returns:
            `np.ndarray`: The resized image.
        r<   z?The `size` dictionary must contain the key `longest_edge`. Got rR   )r.   r0   rO   rP   )r   
ValueErrorkeysr   rg   r   )
rI   rN   r.   r0   rO   rP   rJ   
input_sizerT   rU   s
             rL   r   zSamImageProcessor.resize   s    F T"%^_c_h_h_j^klmm#E7HI
&*&@&@TR`Ma&b#|
.#/
 
 	
rM   c                     |r| j                  ||||      }t        ||      }|r| j                  |||      }|r| j                  |||	|      }|
r| j	                  |||      }||fS )N)rN   r.   r0   rP   rR   )rN   rd   rP   )rN   meanstdrP   )rN   r7   rP   )r   r   rescale	normalizer[   )rI   rN   r-   r1   r3   r.   r0   r2   r4   r5   r6   r7   rP   reshaped_input_sizes                 rL   _preprocesszSamImageProcessor._preprocess  s     KKe$]nKoE,U@QRLLuNVgLhENNZYbsNtENNUfNgE)))rM   c                 *   t        |      }|rt        |      }t        |      }t        |      r|rt        j	                  d       |t        |      }t        ||      }| j                  |||||||||	|
||      \  }}|t        |||      }|||fS )NzIt looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.rR   )rN   r-   r.   r0   r1   r2   r3   r4   r5   r6   r7   rP   )input_channel_dim)	r   r   r   loggerwarning_oncer   r   rr   r   )rI   rN   r-   r.   r0   r1   r2   r3   r4   r5   r6   r7   r9   rO   rP   original_sizerq   s                    rL   _preprocess_imagez#SamImageProcessor._preprocess_image%  s    " u% "5)E u%5!js
 $ >u E&u:KL%)%5%5!)%!/ &6 &
"" "/{VghEm%888rM   segmentation_mapc                 d   t        |      }|j                  dk(  rd}|d   }t        j                  }nd}|t	        |d      }t        ||      }| j                  |||t        j                  dd|||	      \  }}	|r|j                  d	      }|j                  t        j                        }||fS )
N   T)N.F   )num_channelsrR   )	rN   r-   r.   r0   r1   r3   r6   r7   rP   r   )r   ndimr   FIRSTr   r   rr   r   NEARESTsqueezeastypenpint64)
rI   ry   r-   r/   r6   r8   rP   added_channel_dimrw   _s
             rL   _preprocess_maskz"SamImageProcessor._preprocess_mask^  s     **:;   A% $/	: 0 6 6 % ($BCSbc$d!&'7EVW".."'//"/ / 

! /77:+22288<..rM   imagessegmentation_mapsr   return_tensorsc                    	
 n j                   n j                  t        t              st	        d      nn j
                  t        t              st	        d      nn j                  n j                  n j                  		n j                  	

n j                  
n j                  n j                  n j                  t	        d      n j                  t	        d      n j                  t!        |      }t#        |      st%        d      |#t!        |d      }t#        |      st%        d      t'        	
	
       t)        	
 fd
|D         \  }}}|||d}|At)         fd|D         \  }}t+        d t)        ||      D              sJ d       ||d<   t-        ||      S )a  
        Preprocess an image or batch of images.

        Args:
            images (`ImageInput`):
                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            segmentation_maps (`ImageInput`, *optional*):
                Segmentation map to preprocess.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):
                Controls the size of the image after `resize`. The longest edge of the image is resized to
                `size["longest_edge"]` whilst preserving the aspect ratio.
            mask_size (`Dict[str, int]`, *optional*, defaults to `self.mask_size`):
                Controls the size of the segmentation map after `resize`. The longest edge of the image is resized to
                `size["longest_edge"]` whilst preserving the aspect ratio.
            resample (`PILImageResampling`, *optional*, defaults to `self.resample`):
                `PILImageResampling` filter to use when resizing the image e.g. `PILImageResampling.BILINEAR`.
            do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
                Whether to rescale the image pixel values by rescaling factor.
            rescale_factor (`int` or `float`, *optional*, defaults to `self.rescale_factor`):
                Rescale factor to apply to the image pixel values.
            do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
                Whether to normalize the image.
            image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`):
                Image mean to normalize the image by if `do_normalize` is set to `True`.
            image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`):
                Image standard deviation to normalize the image by if `do_normalize` is set to `True`.
            do_pad (`bool`, *optional*, defaults to `self.do_pad`):
                Whether to pad the image.
            pad_size (`Dict[str, int]`, *optional*, defaults to `self.pad_size`):
                Controls the size of the padding applied to the image. The image is padded to `pad_size["height"]` and
                `pad_size["width"]` if `do_pad` is set to `True`.
            mask_pad_size (`Dict[str, int]`, *optional*, defaults to `self.mask_pad_size`):
                Controls the size of the padding applied to the segmentation map. The image is padded to
                `mask_pad_size["height"]` and `mask_pad_size["width"]` if `do_pad` is set to `True`.
            do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`):
                Whether to convert the image to RGB.
            return_tensors (`str` or `TensorType`, *optional*):
                The type of tensors to return. Can be one of:
                    - Unset: Return a list of `np.ndarray`.
                    - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
                    - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
                    - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
                    - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
            data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
                The channel dimension format for the output image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - Unset: Use the channel dimension format of the input image.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
        Fr=   TrB   zkInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.r{   )expected_ndimszvInvalid segmentation map type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.)
r1   r2   r3   r4   r5   r6   size_divisibilityr-   r.   r0   c              3   \   K   | ]#  }j                  |	
        % yw))rN   r-   r.   r0   r1   r2   r3   r4   r5   r6   r7   r9   rO   rP   N)rx   ).0imgrO   r9   r3   r6   r1   r-   r4   r5   rP   r7   r0   r2   rI   r.   s     rL   	<genexpr>z/SamImageProcessor.preprocess.<locals>.<genexpr>  sU      " ! &&'%)#1!-)'!%#1 +&7 ' s   ),)r,   original_sizesreshaped_input_sizesc           	   3   L   K   | ]  }j                  |         yw))ry   r-   r/   r6   r8   rP   N)r   )r   maskr6   r-   rP   r8   r/   rI   s     rL   r   z/SamImageProcessor.preprocess.<locals>.<genexpr>)  s=      
  )))-"+"+%&3*; * 
s   !$c              3   ,   K   | ]  \  }}||k(    y wNrD   )r   original_im_sizeoriginal_mask_sizes      rL   r   z/SamImageProcessor.preprocess.<locals>.<genexpr>7  s$      8$&8 !$66s   z:Segmentation maps should be the same size as input images.labels)datatensor_type)r-   r.   rG   rH   r   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r   r   ri   r   zipallr   )rI   r   r   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r   rO   rP   r   r   r   original_mask_sizess   `  ````````````` ``    rL   
preprocesszSamImageProcessor.preprocess  sk   ` "+!6IDNN	'tTYYLVW[]aLb}deDhl!*!6IDNN	 i. 9F 	
  (38#-#9Zt
+9+E4K^K^'3'?|TEVEV#-#9Zt
!*!6IDNN	!-4;;'38 TB)6)BHZHZ%mtL+9+E4K^K^$V,F#: 
 ( 34EVW X 12 >  	&!)%!&	
 8;  " "#8
4 4. #,$8
 (58
 
 !2
622  <?Pc<d  L LL 
 /DN>BBrM   c                     |dk(  r| j                  ||||||      S |dk(  r| j                  ||||||      S t        d      )a  
        Remove padding and upscale masks to the original image size.

        Args:
            masks (`Union[List[torch.Tensor], List[np.ndarray], List[tf.Tensor]]`):
                Batched masks from the mask_decoder in (batch_size, num_channels, height, width) format.
            original_sizes (`Union[torch.Tensor, tf.Tensor, List[Tuple[int,int]]]`):
                The original sizes of each image before it was resized to the model's expected input shape, in (height,
                width) format.
            reshaped_input_sizes (`Union[torch.Tensor, tf.Tensor, List[Tuple[int,int]]]`):
                The size of each image as it is fed to the model, in (height, width) format. Used to remove padding.
            mask_threshold (`float`, *optional*, defaults to 0.0):
                The threshold to use for binarizing the masks.
            binarize (`bool`, *optional*, defaults to `True`):
                Whether to binarize the masks.
            pad_size (`int`, *optional*, defaults to `self.pad_size`):
                The target size the images were padded to before being passed to the model. If None, the target size is
                assumed to be the processor's `pad_size`.
            return_tensors (`str`, *optional*, defaults to `"pt"`):
                If `"pt"`, return PyTorch tensors. If `"tf"`, return TensorFlow tensors.
        Returns:
            (`Union[torch.Tensor, tf.Tensor]`): Batched masks in batch_size, num_channels, height, width) format, where
            (height, width) is given by original_size.
        pt)masksr   r   mask_thresholdbinarizer7   tfz*return_tensors must be either 'pt' or 'tf')_post_process_masks_pt_post_process_masks_tfri   )rI   r   r   r   r   r   r7   r   s           rL   post_process_masksz$SamImageProcessor.post_process_masks@  sy    D T!..-%9-!! /   t#..-%9-!! /   IJJrM   c                    t        | dg       || j                  n|}|d   |d   f}t        |t        j                  t
        j                  f      r|j                         }t        |t        j                  t
        j                  f      r|j                         }g }t        |      D ]  \  }	}
t        ||	   t
        j                        rt        j                  ||	         ||	<   n(t        ||	   t        j                        st        d      t        j                  ||	   |dd      }|d	d||	   d
   d||	   d   f   }t        j                  ||
dd      }|r||kD  }|j                  |        |S )aF  
        Remove padding and upscale masks to the original image size.

        Args:
            masks (`Union[List[torch.Tensor], List[np.ndarray]]`):
                Batched masks from the mask_decoder in (batch_size, num_channels, height, width) format.
            original_sizes (`Union[torch.Tensor, List[Tuple[int,int]]]`):
                The original sizes of each image before it was resized to the model's expected input shape, in (height,
                width) format.
            reshaped_input_sizes (`Union[torch.Tensor, List[Tuple[int,int]]]`):
                The size of each image as it is fed to the model, in (height, width) format. Used to remove padding.
            mask_threshold (`float`, *optional*, defaults to 0.0):
                The threshold to use for binarizing the masks.
            binarize (`bool`, *optional*, defaults to `True`):
                Whether to binarize the masks.
            pad_size (`int`, *optional*, defaults to `self.pad_size`):
                The target size the images were padded to before being passed to the model. If None, the target size is
                assumed to be the processor's `pad_size`.
        Returns:
            (`torch.Tensor`): Batched masks in batch_size, num_channels, height, width) format, where (height, width)
            is given by original_size.
        torchNr@   rA   zIInput masks should be a list of `torch.tensors` or a list of `np.ndarray`bilinearF)modealign_corners.r   r|   )r%   r7   rG   r   Tensorr   ndarraytolist	enumerate
from_numpyri   Finterpolateappend)rI   r   r   r   r   r   r7   target_image_sizeoutput_masksirw   interpolated_masks               rL   r   z(SamImageProcessor._post_process_masks_ptw  sw   2 	$	*$,$44==(%h/'1BCnu||RZZ&@A+224N*U\\2::,FG#7#>#>#@  ). 9 
	3A}%(BJJ/ ++E!H5aa%,,7 !lmm !eAh8IPZjo p 1#7S9Ma9PQR9S7SUqWklmWnopWqUq2q r !.?U_ot u$5$F! 12
	3 rM   c                    t        | dg       || j                  n|}|d   |d   f}g }t        |      D ]  \  }	}
t        j                  ||	   g d      }t        j
                  j                  ||d      }|ddd||	   d	   d||	   d
   ddf   }t        j
                  j                  ||
d      }|r||kD  }|j                  t        j                  |g d              |S )a  
        Remove padding and upscale masks to the original image size.

        Args:
            masks (`tf.Tensor`):
                Batched masks from the mask_decoder in (batch_size, num_channels, height, width) format.
            original_sizes (`tf.Tensor`):
                The original size of the images before resizing for input to the model, in (height, width) format.
            reshaped_input_sizes (`tf.Tensor`):
                The size of the image input to the model, in (height, width) format. Used to remove padding.
            mask_threshold (`float`, *optional*, defaults to 0.0):
                The threshold to use for binarizing the masks.
            binarize (`bool`, *optional*, defaults to `True`):
                Whether to binarize the masks.
            pad_size (`int`, *optional*, defaults to `self.pad_size`):
                The target size the images were padded to before being passed to the model. If None, the target size is
                assumed to be the processor's `pad_size`.
        Returns:
            (`tf.Tensor`): Batched masks in batch_size, num_channels, height, width) format, where (height, width) is
            given by original_size.
        r   Nr@   rA   )r   r{   r   r|   permr   )methodr   r|   )r   r   r|   r{   )r%   r7   r   r   	transposerN   r   r   )rI   r   r   r   r   r   r7   r   r   r   rw   r   r   s                rL   r   z(SamImageProcessor._post_process_masks_tf  s   0 	$'$,$44==(%h/'1BC ). 9 		TA}<<a|<D "6GPZ [ 1!5Q7KA7Nq7Q5QSoUijkUlmnUoSoqr2r s "0A=Yc d$5$F!->\ RS		T rM   c                 P    |dk(  rt        ||||      S |dk(  rt        ||||      S y)a  
        Post processes mask that are generated by calling the Non Maximum Suppression algorithm on the predicted masks.

        Args:
            all_masks (`Union[List[torch.Tensor], List[tf.Tensor]]`):
                List of all predicted segmentation masks
            all_scores (`Union[List[torch.Tensor], List[tf.Tensor]]`):
                List of all predicted iou scores
            all_boxes (`Union[List[torch.Tensor], List[tf.Tensor]]`):
                List of all bounding boxes of the predicted masks
            crops_nms_thresh (`float`):
                Threshold for NMS (Non Maximum Suppression) algorithm.
            return_tensors (`str`, *optional*, defaults to `pt`):
                If `pt`, returns `torch.Tensor`. If `tf`, returns `tf.Tensor`.
        r   r   N)_postprocess_for_mg_postprocess_for_mg_tf)rI   	all_masks
all_scores	all_boxescrops_nms_threshr   s         rL    post_process_for_mask_generationz2SamImageProcessor.post_process_for_mask_generation  s=    $ T!&y*iIYZZt#))ZL\]] $rM   crop_n_layersoverlap_ratiopoints_per_cropcrop_n_points_downscale_factordeviceztorch.devicec
           	         t        |||||||      \  }
}}}|	dk(  r]|t        j                  d      }t        j                  |
|      }
t        j                  ||      }t        j                  ||      }n]|	dk(  rM|t	        d      t        j                  |
      }
t        j                  |      }t        j                  |      }nt	        d      |
|||fS )aC  
        Generates a list of crop boxes of different sizes. Each layer has (2**i)**2 boxes for the ith layer.

        Args:
            image (`np.array`):
                Input original image
            target_size (`int`):
                Target size of the resized image
            crop_n_layers (`int`, *optional*, defaults to 0):
                If >0, mask prediction will be run again on crops of the image. Sets the number of layers to run, where
                each layer has 2**i_layer number of image crops.
            overlap_ratio (`float`, *optional*, defaults to 512/1500):
                Sets the degree to which crops overlap. In the first crop layer, crops will overlap by this fraction of
                the image length. Later layers with more crops scale down this overlap.
            points_per_crop (`int`, *optional*, defaults to 32):
                Number of points to sample from each crop.
            crop_n_points_downscale_factor (`List[int]`, *optional*, defaults to 1):
                The number of points-per-side sampled in layer n is scaled down by crop_n_points_downscale_factor**n.
            device (`torch.device`, *optional*, defaults to None):
                Device to use for the computation. If None, cpu will be used.
            input_data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format of the input image. If not provided, it will be inferred.
            return_tensors (`str`, *optional*, defaults to `pt`):
                If `pt`, returns `torch.Tensor`. If `tf`, returns `tf.Tensor`.
        r   cpur   r   z=device is not a supported argument when return_tensors is tf!z+return_tensors must be either 'pt' or 'tf'.)_generate_crop_boxesr   r   tensorri   r   convert_to_tensor)rI   rN   target_sizer   r   r   r   r   rP   r   
crop_boxescropped_imagesinput_labelss                rL   generate_crop_boxesz%SamImageProcessor.generate_crop_boxes  s    J EY*E
A
O^\ T!~e,j@J#ll?6JO <<VDLt#! !`aa--j9J 22?CO//=LJKK?NLHHrM   c
           
      |    |	dk(  r| j                  ||||||||      S |	dk(  r| j                  ||||||||      S y)a  
        Filters the predicted masks by selecting only the ones that meets several criteria. The first criterion being
        that the iou scores needs to be greater than `pred_iou_thresh`. The second criterion is that the stability
        score needs to be greater than `stability_score_thresh`. The method also converts the predicted masks to
        bounding boxes and pad the predicted masks if necessary.

        Args:
            masks (`Union[torch.Tensor, tf.Tensor]`):
                Input masks.
            iou_scores (`Union[torch.Tensor, tf.Tensor]`):
                List of IoU scores.
            original_size (`Tuple[int,int]`):
                Size of the orginal image.
            cropped_box_image (`np.array`):
                The cropped image.
            pred_iou_thresh (`float`, *optional*, defaults to 0.88):
                The threshold for the iou scores.
            stability_score_thresh (`float`, *optional*, defaults to 0.95):
                The threshold for the stability score.
            mask_threshold (`float`, *optional*, defaults to 0):
                The threshold for the predicted masks.
            stability_score_offset (`float`, *optional*, defaults to 1):
                The offset for the stability score used in the `_compute_stability_score` method.
            return_tensors (`str`, *optional*, defaults to `pt`):
                If `pt`, returns `torch.Tensor`. If `tf`, returns `tf.Tensor`.
        r   )r   
iou_scoresrw   cropped_box_imagepred_iou_threshstability_score_threshr   stability_score_offsetr   N)_filter_masks_pt_filter_masks_tf)
rI   r   r   rw   r   r   r   r   r   r   s
             rL   filter_maskszSamImageProcessor.filter_masks(  s|    L T!((%+"3 /'=-'= ) 	 	 t#((%+"3 /'=-'= ) 	 	 $rM   c	                    t        | dg       |\  }	}
|j                  dd      }|j                  dd      }|j                  d   |j                  d   k7  rt        d      |j                  |j                  k7  r|j                  |j                        }|j                  d   }t        j                  |t        j                  |j                        }|dkD  r|||kD  z  }|dkD  rt        |||      }|||kD  z  }||   }||   }||kD  }t        |      }t        ||dd|
|	g       }||   }||   }||   }t        |||	|
      }t        |      }|||fS )a  
        Filters the predicted masks by selecting only the ones that meets several criteria. The first criterion being
        that the iou scores needs to be greater than `pred_iou_thresh`. The second criterion is that the stability
        score needs to be greater than `stability_score_thresh`. The method also converts the predicted masks to
        bounding boxes and pad the predicted masks if necessary.

        Args:
            masks (`torch.Tensor`):
                Input masks.
            iou_scores (`torch.Tensor`):
                List of IoU scores.
            original_size (`Tuple[int,int]`):
                Size of the orginal image.
            cropped_box_image (`np.array`):
                The cropped image.
            pred_iou_thresh (`float`, *optional*, defaults to 0.88):
                The threshold for the iou scores.
            stability_score_thresh (`float`, *optional*, defaults to 0.95):
                The threshold for the stability score.
            mask_threshold (`float`, *optional*, defaults to 0):
                The threshold for the predicted masks.
            stability_score_offset (`float`, *optional*, defaults to 1):
                The offset for the stability score used in the `_compute_stability_score` method.

        r   r   r|   3masks and iou_scores must have the same batch size.dtyper           )r%   r(   shaperi   r   tor   onesbool_compute_stability_score_pt_batched_mask_to_box_is_box_near_crop_edge
_pad_masks_mask_to_rle_pytorchrI   r   r   rw   r   r   r   r   r   original_heightoriginal_width
batch_size	keep_maskstability_scoresscoresconverted_boxess                   rL   r   z"SamImageProcessor._filter_masks_pte  s   H 	$	**7'''1-
a#;;q>Z--a00RSS<<:,,,#u||4J[[^
JJzELLQ	S !Z/%ABI "C':5.Rhi!%58N%NOII&i  &.u5+.A~0W
 
	 	"i ))45"3_nU$U+fo--rM   c	                    t        | dg       |\  }	}
t        j                  ||j                  d   |j                  d   z  |j                  dd g      }t        j                  ||j                  d   |j                  d   z  |j                  dd g      }|j                  d   |j                  d   k7  rt	        d      |j                  d   }t        j
                  |t        j                        }|dkD  r|||kD  z  }|dkD  rt        |||      }|||kD  z  }||   }||   }||kD  }t        |      }t        ||dd|
|	g       }||   }||   }||   }t        |||	|
      }t        |      }|||fS )	a  
        Filters the predicted masks by selecting only the ones that meets several criteria. The first criterion being
        that the iou scores needs to be greater than `pred_iou_thresh`. The second criterion is that the stability
        score needs to be greater than `stability_score_thresh`. The method also converts the predicted masks to
        bounding boxes and pad the predicted masks if necessary.

        Args:
            masks (`tf.Tensor`):
                Input masks.
            iou_scores (`tf.Tensor`):
                List of IoU scores.
            original_size (`Tuple[int,int]`):
                Size of the orginal image.
            cropped_box_image (`np.array`):
                The cropped image.
            pred_iou_thresh (`float`, *optional*, defaults to 0.88):
                The threshold for the iou scores.
            stability_score_thresh (`float`, *optional*, defaults to 0.95):
                The threshold for the stability score.
            mask_threshold (`float`, *optional*, defaults to 0):
                The threshold for the predicted masks.
            stability_score_offset (`float`, *optional*, defaults to 1):
                The offset for the stability score used in the `_compute_stability_score` method.

        r   r   r|   r{   Nr   r   r   )r%   r   reshaper   ri   r   r   _compute_stability_score_tf_batched_mask_to_box_tf_is_box_near_crop_edge_tf_pad_masks_tf_mask_to_rle_tfr   s                   rL   r   z"SamImageProcessor._filter_masks_tf  s   H 	$'*7'ZZ
Z-=-=a-@:CSCSTUCV-VXbXhXhijikXl,mn


55;;q>EKKN#BEKKPQPRO"TU;;q>Z--a00RSS[[^
GGJbgg6	S !Z/%ABI "C':5.Rhi!%58N%NOII&i  &1%8..A~0W
 
	 	"i ))4e%6X&fo--rM   )NN)NNNNNNNN)NNNNNNNNNNNNN)NNNNN)r   TNr   )r   TN)r   )r   g?    r|   NNr   ))\(?ffffff?r   r|   r   )r  r  r   r|   ))__name__
__module____qualname____doc__model_input_namesr   BILINEARr   r   strra   r
   floatr   r   rF   r   r   r   r[   r	   rg   BICUBICr   r   rr   rx   r   r    r   r   r   r   r   r   r   r   r   r   r   __classcell__)rK   s   @rL   r+   r+   C   s   .` (( #$('9'B'B,3!:>9=!#.-.- 38n.- S>	.-
 %.- .- c5j).- .- U5$u+#567.- E%e"456.- .- .- .- .-  
!.-h ?CDH#zz# sCx.# eC)9$9:;	#
 $E#/?*?$@A# 
#J	uS#X 	c 	 (:'A'A>BDH/
zz/
 38n/
 %	/

 eC)9$9:;/
 $E#/?*?$@A/
 
/
n *.'+*.:>9=!%-1DH** * 	*
 * tCH~&* %* !* U5$u+#567* E%e"456* * 4S>** $E#/?*?$@A*B %)#'+*.'+:>9=!%-1)->BDH7979 D>79 38n	79
 %79 79 !79 tn79 U5$u+#56779 E%e"45679 79 4S>*79 !79 eC)9$9:;79 $E#/?*?$@A79  
rzz5c?E#s(O;	<!79x %)$(!%26DH(/$(/ D>(/ S>	(/
 (/  S#X/(/ $E#/?*?$@A(/ 
(/T %& 37$()-.237%)6:'+:>9=!%-126)-;?(8(>(>DH'uCuC $J/uC D>	uC
 tCH~&uC DcN+uC /0uC TNuC !sEz!23uC tnuC U5$u+#567uC E%e"456uC uC 4S>*uC  S#X/uC  !!uC" !sJ!78#uC$ &%uC& $E#/?*?$@A'uC 'uCx 5Kp hl-` hl(V RV^6 ))+>?+/DH"?I 	?I
 ?I "#?I )1c(;?I (?I $E#/?*?$@A?I ?IN # ;F # N.l # K.rM   r+   r   torch.Tensorr   r   c                 (   | ||z   kD  j                  dt        j                        j                  dt        j                        }| ||z
  kD  j                  dt        j                        j                  dt        j                        }||z  }|S )Nr   )sumr   int16int32r   r   r   intersectionsunionsr   s         rL   r   r     s     
.#99	:??%++?VZZ[]ejepepZq  ~(>>?DDRu{{D[__`bjojuju_vF$v-rM   	tf.Tensorc                     t        j                  | ||z   kD  ddgt         j                        }t        j                  | ||z
  kD  ddgt         j                        }||z  }|S )Nr  )axisr   )r   count_nonzerofloat32r  s         rL   r   r     sq     $$"889RPRPZPZM e~8N'NOWY[]V^fhfpfpqF$v-rM   
n_per_sider:   c                    dd| z  z  }t        j                  |d|z
  |       }t        j                  |dddf   | df      }t        j                  |dddf   d| f      }t        j                  ||gd      j	                  dd      }|S )z;Generates a 2D grid of points evenly spaced in [0,1]x[0,1].r|   r{   Nr  r  )r   linspacetilestackr   )r  offsetpoints_one_sidepoints_xpoints_ypointss         rL   _build_point_gridr'    s    !j.!Fkk&!f*jAOwwtQw/*aAHwwq$w/!ZAHXXx*4<<RCFMrM   r   coordsrw   c                 F   |\  }}| dz  t        ||      z  }||z  ||z  }}t        |dz         }t        |dz         }t        |      j                  t              }|r|j                  ddd      }|d   ||z  z  |d<   |d   ||z  z  |d<   |r|j                  dd      }|S )z
    Expects a numpy array of length 2 in the final dimension. Requires the original image size in (height, width)
    format.
    r^   r_   r  r{   ).r   ).r|      )r`   ra   r   r   r
  r   )	r   r(  rw   is_bounding_box
old_height	old_widthrd   
new_height	new_widths	            rL   _normalize_coordinatesr0  #  s     *J	#J	 ::E&.	E0A	JIO$IZ#%&Jf$$U+FAq)F^y9'<=F6NF^zJ'>?F6NA&MrM   r   r   r   r   rP   c           	      `   t        | t              rt        d      t        |       } t	        | |      }g }t        |dz         D ]-  }	t        |||	z  z        }
|j                  t        |
             / t        |||      \  }}t        || |||||      \  }}t        j                  |      }|j                  t        j                        }t        j                  |g      }t        j                  |d      }t        j                   |dddddddf   t        j"                        }||||fS )a  
    Generates a list of crop boxes of different sizes. Each layer has (2**i)**2 boxes for the ith layer.

    Args:
        image (Union[`numpy.ndarray`, `PIL.Image`, `torch.Tensor`]):
            Image to generate crops for.
        target_size (`int`):
            Size of the smallest crop.
        crop_n_layers (`int`, *optional*):
            If `crops_n_layers>0`, mask prediction will be run again on crops of the image. Sets the number of layers
            to run, where each layer has 2**i_layer number of image crops.
        overlap_ratio (`int`, *optional*):
            Sets the degree to which crops overlap. In the first crop layer, crops will overlap by this fraction of the
            image length. Later layers with more crops scale down this overlap.
        points_per_crop (`int`, *optional*):
            Number of points to sample per crop.
        crop_n_points_downscale_factor (`int`, *optional*):
            The number of points-per-side sampled in layer n is scaled down by crop_n_points_downscale_factor**n.
        input_data_format (`str` or `ChannelDimension`, *optional*):
            The channel dimension format of the input image. If not provided, it will be inferred.
    z.Only one image is allowed for crop generation.r|   )r   r{   r|   r   )axesNr   r   )rG   listri   r   r   rangera   r   r'  _generate_per_layer_crops_generate_crop_imagesr   arrayr   r  r   	ones_liker   )rN   r   r   r   r   r   rP   rw   points_gridr   n_pointsr   
layer_idxsr   point_grid_per_cropr   s                   rL   r   r   ?  s$   > %IJJ5!E"5*;<MK=1$% 8*H!*KLM,X678 7}mUbcJ
*?E;
KPa+'N' *%J""2::.Jhh 345Oll?FO<<1a
 ;288LLDDrM   c           	         g g }}|\  }}t        ||      }|j                  dd||g       |j                  d       t        |       D ]  }d|dz   z  }	t        ||z  d|	z  z        }
t        t	        j
                  |
|	dz
  z  |z   |	z              }t        t	        j
                  |
|	dz
  z  |z   |	z              }t        |	      D cg c]  }t        ||
z
  |z         }}t        |	      D cg c]  }t        ||
z
  |z         }}t        ||      D ]J  \  }}||t        ||z   |      t        ||z   |      g}|j                  |       |j                  |dz          L  ||fS c c}w c c}w )aq  
    Generates 2 ** (layers idx + 1) crops for each crop_n_layers. Crops are in the XYWH format : The XYWH format
    consists of the following required indices:
        - X: X coordinate of the top left of the bounding box
        - Y: Y coordinate of the top left of the bounding box
        - W: width of the bounding box
        - H: height of the bounding box
    r   r{   r|   )minr   r4  ra   mathceilr   )r   r   rw   r   r;  	im_heightim_width
short_sidei_layern_crops_per_sideoverlap
crop_widthcrop_heightr   crop_box_x0crop_box_y0lefttopboxs                      rL   r5  r5  w  s     
J'IxY)J q!Xy12a' +1-mj0A8H4HIJG/?!/C$Dx$OSc#cde
$))W0@10D%E	%QUe$efg@EFV@WX1sJ0A56XXAFGWAXYAsK'1Q67YY k: 	+ID#c$"3X>C+DUW`@abCc"gk*	++ z!! YYs   E)/E.c                    g }g }t        |       D ]  \  }	}
|
\  }}}}t        ||      }|t        j                  k(  r|||||ddf   }n|dd||||f   }|j	                  |       t        ||      }t        j                  |      ddddf   }|||	      |z  }t        |||      }|j	                  |        ||fS )z
    Takes as an input bounding boxes that are used to crop the image. Based in the crops, the corresponding points are
    also passed.
    Nr  )	r   r   r   LASTr   r   r   r7  r0  )r   rN   r9  r;  r   rw   rP   r   total_points_per_cropr   crop_boxrK  rL  rightbottomrS   
cropped_imcropped_im_sizepoints_scaler&  normalized_pointss                        rL   r6  r6    s     N , 88#+ c5&4U<MN*///s6z4:q89Jq#f*d5j89Jj)([Axx0tt<Z]+l:2;V$$%67!8$ 000rM   rQ  orig_height
orig_widthc                     |\  }}}}|dk(  r|dk(  r||k(  r||k(  r| S |||z
  z
  |||z
  z
  }	}|||z
  ||	|z
  f}
t         j                  j                  j                  | |
d      S )Nr   )value)r   nn
functionalr   r   rQ  rX  rY  rK  rL  rR  rS  pad_xpad_yr   s              rL   r   r     s    'D#ufqySAX%:"5&K:O.v|0L5EsECK
0C88""5#Q"77rM   c                     |\  }}}}|dk(  r|dk(  r||k(  r||k(  r| S |||z
  z
  |||z
  z
  }	}|||z
  ||	|z
  f}
t        j                  | |
d      S )Nr   )constant_values)r   r   r^  s              rL   r   r     st    'D#ufqySAX%:"5&K:O.v|0L5EsECK
0C66%a00rM   c                 p   t        j                  |t         j                  | j                        }t        j                  |t         j                  | j                        }|\  }}}}t        j                  ||||gg| j                        }	t        | j                        dk(  r|	j                  d      }	| |	z   j                         } t        j                  | |dddf   |d      }
t        j                  | |dddf   |d      }t        j                  |
|       }
t        j                  |
d      S )	NFilter masks at the edge of a crop, but not at the edge of the original image.r   r   r   r|   Nr   atolrtoldim)r   	as_tensorr
  r   r   lenr   	unsqueezeiscloselogical_andany)boxesrQ  orig_boxrf  crop_box_torchorig_box_torchrK  rL  r   r"  near_crop_edgenear_image_edges               rL   r   r     s    __XU[[VN__XU[[VNOD#q!\\D#tS125<<HF
5;;1!!!$V^""$E]]5.q*ASTUNmmE>$'+BTUVO&&~7GHN99^++rM   c                 r   t        j                  |t         j                        }t        j                  |t         j                        }|\  }}}}t        j                  ||||gg      }	t        | j                        dk(  rt        j
                  |	d      }	t        j                  | |	z   t         j                        } t        j                  | |dddf   |d      }
t        j                  | |dddf   |d      }t         j                  j                  |
|       }
t        j                  |
d      S )rd  r   r   r|   Nr   re  r  )r   r   r  rk  r   expand_dimscasttnprm  r?  rn  
reduce_any)rp  rQ  rq  rf  crop_box_tforig_box_tfrK  rL  r   r"  rt  ru  s               rL   r   r     s    &&xrzzBK&&xrzzBKOD#q!!!D#tS#9":;F
5;;1*GGEFNBJJ/E[[D!G(<4aPNkk%T1W)=DqQOWW((/9IJN==a00rM   c                 l   t        j                  |       dk(  r1t        j                  g | j                  dd dd| j                  iS | j                  }|dd \  }}t        j
                  | d      \  }}|t        j                  ||j                        dddf   z  }t        j
                  |d      \  }}||| z  z   }t        j                  |d      \  }}t        j
                  | d      \  }	}|	t        j                  ||	j                        dddf   z  }
t        j
                  |
d      \  }}|
||	 z  z   }
t        j                  |
d      \  }}||k  ||k  z  }t        j                  ||||gd      }|| j                  d      z  } |j                  g |dd d }|S )	aL  
    Computes the bounding boxes around the given input masks. The bounding boxes are in the XYXY format which
    corresponds the following required indices:
        - LEFT: left hand side of the bounding box
        - TOP: top of the bounding box
        - RIGHT: right of the bounding box
        - BOTTOM: bottom of the bounding box

    Return [0,0,0,0] for an empty mask. For input shape channel_1 x channel_2 x ... x height x width, the output shape
    is channel_1 x channel_2 x ... x 4.

    Args:
        - masks (`torch.Tensor` of shape `(batch, nb_mask, height, width)`)
    r   Nr  r*  r   r  rh  r   )r   numelzerosr   r   r`   aranger>  r!  rl  r   )r   r   r@   rA   	in_heightr   in_height_coordsbottom_edges	top_edgesin_widthin_width_coordsright_edges
left_edgesempty_filterouts                  rL   r   r     s   " {{5Q{{EEKK,EaEEE KKE"#JMFE 99U+LIq 5<<y?O?O#PQUWXQX#YYii 0b9OL!'&YJ*??99-26LIq ))Er*KHaeHOO!LTSTW!UUOYYB7NK%((;;OIIo26MJ  *,	1IJL
++z9k<Hb
QC
,))"-
-C #++
%uSbz
%1
%CJrM   c                 $   t        j                  |       dk(  r&t        j                  g | j                  dd d      S t	        |       }|dd \  }}t        j
                  | d      }|t        j                  |      dddf   z  }t        j
                  |d      }||| z  z   }t        j                  |d      }t        j
                  | d      \  }}	|t        j                  |      dddf   z  }
t        j
                  |
d      \  }}	|
|| z  z   }
t        j                  |
d      \  }}	||k  ||k  z  }t        j                  ||||gd      }|t        j                  | d      z  }t        j                  |g|dd d }|S )aI  
    Computes the bounding boxes around the given input masks. The bounding boxes are in the XYXY format which
    corresponds the following required indices:
        - LEFT: left hand side of the bounding box
        - TOP: top of the bounding box
        - RIGHT: right of the bounding box
        - BOTTOM: bottom of the bounding box

    Return [0,0,0,0] for an empty mask. For input shape channel_1 x channel_2 x ... x height x width, the output shape
    is channel_1 x channel_2 x ... x 4.

    Args:
        - masks (`tf.Tensor` of shape `(batch, nb_mask, height, width)`)
    r   Nr  r*  r  r  )r   r.   r  r   r)   
reduce_maxr4  
reduce_minr!  rw  r   )r   r   r@   rA   r  r  r  r  r  r   r  r  r  r  r  s                  rL   r   r      s     
wwu~xx.%++cr*.A.// uE"#JMFE e"-I 288F#3D!G#<<==!1;L'&YJ*??.R8I --B/KHa%q!99O]]?<NK%((;;OMM/;MJ  *,	1IJL
((J	;EB
OC
}b1
1C **S
)5":
)q
)CJrM   c                    | j                   \  }}}| j                  ddd      j                  d      } | ddddf   | ddddf   z  }|j                         }g }t	        |      D ]{  }||dddf   |k(  df   dz   }|dd |dd z
  }	| |df   dk(  rg ndg}
|
|d   j                         g|	j                         z   ||z  |d   z
  gz   z  }
|j                  ||g|
d       } |S )^
    Encodes masks the run-length encoding (RLE), in the format expected by pycoco tools.
    r   r{   r|   Nr  r.   counts)r   permuter(   nonzeror4  itemr   r   
input_maskr   r@   rA   diffchange_indicesr  r   cur_idxsbtw_idxsr  s              rL   r   r   P  s*   
 !+ 0 0J##Aq!,44Q7J aez!SbS&11D\\^N C: @!.A"6!";Q">?!CAB<(3B-/!!Q$'1,1#8A;##%&)::funxXZ|>[=\\\

VUOv>?@ JrM   c                    | j                   \  }}}t        t        j                  | d      d      } | ddddf   | ddddf   z  }t        j                  |      }g }t        |      D ]{  }||dddf   |k(  df   dz   }|dd |dd z
  }	| |df   dk(  rg ndg}
|
|d   j                         g|	j                         z   ||z  |d   z
  gz   z  }
|j                  ||g|
d       } |S )r  )r   r{   r|   r   r|   Nr  r   r  )	r   r(   r   r   wherer4  r  r   r   r  s              rL   r   r   g  s$   
 !+ 0 0JjyA1EJ aez!SbS&11DXXd^N C: @!.A"6!";Q">?!CAB<(3B-/!!Q$'1,1#8A;##%&)::funxXZ|>[=\\\

VUOv>?@ JrM   rlec                     | d   \  }}t        j                  ||z  t              }d}d}| d   D ]  }|||||z    ||z  }| } |j                  ||      }|j	                         S )z/Compute a binary mask from an uncompressed RLE.r.   r   r   Fr  )r   emptyr   r   r   )r  r@   rA   r   idxparitycounts          rL   _rle_to_maskr  ~  s    KMFE88FUN$/D
CFX "(S3;u <<v&D>>rM   c                    t        |j                         |t        j                  |j                  d         |      }||   }|D cg c]  }| |   	 } }||   }| D cg c]  }t        |       }}||| |fS c c}w c c}w )a  
    Perform NMS (Non Maximum Suppression) on the outputs.

    Args:
            rle_masks (`torch.Tensor`):
                binary masks in the RLE format
            iou_scores (`torch.Tensor` of shape (nb_masks, 1)):
                iou_scores predicted by the model
            mask_boxes (`torch.Tensor`):
                The bounding boxes corresponding to segmentation masks
            amg_crops_nms_thresh (`float`, *optional*, defaults to 0.7):
                NMS threshold.
    r   rp  r   idxsiou_threshold)r&   r
  r   r  r   r  	rle_masksr   
mask_boxesamg_crops_nms_threshkeep_by_nmsr   r  r   s           rL   r   r     s      [[))!,-*	K K(J'23!13I3K(J*343\#4E4*i33	 44s   A8A=c                 ,   t         j                  j                  |j                         |t	        j
                  |j                  d         |      }||   }|D cg c]  }| |   	 } }||   }| D cg c]  }t        |       }}||| |fS c c}w c c}w )a  
    Perform NMS (Non Maximum Suppression) on the outputs.

    Args:
            rle_masks (`tf.Tensor`):
                binary masks in the RLE format
            iou_scores (`tf.Tensor` of shape (nb_masks, 1)):
                iou_scores predicted by the model
            mask_boxes (`tf.Tensor`):
                The bounding boxes corresponding to segmentation masks
            amg_crops_nms_thresh (`float`, *optional*, defaults to 0.7):
                NMS threshold.
    r   r  )r   rN   combined_non_max_suppressionr
  r   r  r   r  r  s           rL   r   r     s     ((77 [[))!,-*	 8 K K(J'23!13I3K(J*343\#4E4*i33	 44s   B2B)F)r   r   r   r|   Nr   )g      4@)r   r  )r   r  )r  r  )r  r  )gffffff?)Ur  r?  copyr   	itertoolsr   typingr   r   r   r   r	   r
   r'   r   image_processing_utilsr   r   r   image_transformsr   r   r   r   image_utilsr   r   r   r   r   r   r   r   r   r   r   r   utilsr   r    r!   r"   r#   r$   r%   r   torch.nn.functionalr\  r]  r   torchvision.ops.boxesr&   
tensorflowr   tensorflow.experimentalry  tf_utilsr(   r)   
get_loggerr  ru   r+   r
  ra   r   r   r   r'  r0  r	  r   r5  r6  r   r   r   r   r   r   r   r   r  r   r   rD   rM   rL   <module>r     sL   %    : :  U U X X      ##14/			H	%}.* }.@~ u fi { E cf # "**  [` jj9>sCxZZ> %%':;@D5E5E 5E 	5E
 c]5E %-T#Y$75E  c+;&; <=5E 4S	?DI%&5Ep"F _c1<8S	 8 8 8149 13 1C 1,$1$.b-`..d38n  4:4rM   