
    sgf                        d Z ddlmZmZmZmZmZmZmZ ddl	Z
ddlmZmZmZ ddlmZmZmZmZmZ ddlmZmZmZmZmZmZmZmZmZmZm Z m!Z! ddl"m#Z#m$Z$m%Z%m&Z&  e%       rddl'Z' e&jP                  e)      Z*d	ee   d
ee   fdZ+	 dde
jX                  dee-e-f   deee.ef      d
e
jX                  fdZ/	 ddee
jX                     deee.ef      d
ee-   fdZ0	 	 	 	 dde
jX                  de-de-de-deee.ef      d
ee-e-f   fdZ1 G d de      Z2y)z&Image processor class for BridgeTower.    )AnyDictIterableListOptionalTupleUnionN   )BaseImageProcessorBatchFeatureget_size_dict)PaddingModecenter_croppadresizeto_channel_dimension_format)OPENAI_CLIP_MEANOPENAI_CLIP_STDChannelDimension
ImageInputPILImageResamplingget_image_sizeinfer_channel_dimension_format
is_batchedis_scaled_imageto_numpy_arrayvalid_imagesvalidate_preprocess_arguments)
TensorTypefilter_out_non_signature_kwargsis_vision_availableloggingvaluesreturnc                 J    t        |  D cg c]  }t        |       c}S c c}w )zO
    Return the maximum value across all indices of an iterable of values.
    )zipmax)r#   values_is     o/var/www/html/venv/lib/python3.12/site-packages/transformers/models/bridgetower/image_processing_bridgetower.pymax_across_indicesr*   /   s      +.v,7hCM777s    imageoutput_sizeinput_data_formatc                     t        | |      \  }}t        j                  |t        j                        }d|d|d|f<   |S )a  
    Make a pixel mask for the image, where 1 indicates a valid pixel and 0 indicates padding.

    Args:
        image (`np.ndarray`):
            Image to make the pixel mask for.
        output_size (`Tuple[int, int]`):
            Output size of the mask.
    channel_dim)dtype   N)r   npzerosint64)r+   r,   r-   input_heightinput_widthmasks         r)   make_pixel_maskr9   7   sF     !/uBS TL+88Krxx0D()D,	$%K    imagesc                 J   |t        | d         }|t        j                  k(  r+t        | D cg c]  }|j                   c}      \  }}}||fS |t        j
                  k(  r+t        | D cg c]  }|j                   c}      \  }}}||fS t        d|       c c}w c c}w )zH
    Get the maximum height and width across all images in a batch.
    r   z"Invalid channel dimension format: )r   r   FIRSTr*   shapeLAST
ValueError)r;   r-   img_
max_height	max_widths         r)   get_max_height_widthrE   J   s      :6!9E,222#5F6SSsyy6S#T :y
 	""	 
.33	3#5F6SSsyy6S#T 
Iq 	"" =>O=PQRR	 7T6Ss   B+B input_imageshorterlongersize_divisorc                    t        | |      \  }}||}}|t        ||      z  }	||k  r|}
|	|z  }n|	|z  }
|}t        |
|      |kD  r|t        |
|      z  }	|	|
z  }
|	|z  }t        |
dz         t        |dz         }}
|
|z  |z  }
||z  |z  }|
|fS )Ng      ?)r   minr'   int)rF   rG   rH   rI   r-   r6   r7   min_sizemax_sizescale
new_height	new_widths               r)   get_resize_output_image_sizerR   ]   s     !/{<M NL+ &hHs<55Ek!
K'	\)
	
:y!H,3z955Z'
I%	
S 013y33G	J|+l:J\)L8Iy  r:   c            %       *    e Zd ZdZdgZdddej                  ddddddddfdedee	e
f   d	e
d
ededee
ef   dedeeeee   f      deeeee   f      dedee	e
f   deddf fdZdej                  ddfdej"                  dee	e
f   d	e
d
edeee	ef      deee	ef      dej"                  fdZ	 	 d"dej"                  dee	e
f   deee	ef      deee	ef      dej"                  f
dZ	 	 	 d#dej"                  dee
e
f   deeee   f   dee   deee	ef      dej"                  fdZ	 	 	 	 	 d$deej"                     deeee   f   dedeee	ef      dee   deee	ef      defd Z e       dddddddddddddej8                  dfdedee   deee	e
f      d	ee
   d
edee   dee   dee   deeeee   f      deeeee   f      dee   dee   dee	e
f   deee	ef      dedeee	ef      dej>                  j>                  f"d!       Z  xZ!S )%BridgeTowerImageProcessora3  
    Constructs a BridgeTower image processor.

    Args:
        do_resize (`bool`, *optional*, defaults to `True`):
            Whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden by the
            `do_resize` parameter in the `preprocess` method.
        size (`Dict[str, int]` *optional*, defaults to `{'shortest_edge': 288}`):
            Resize the shorter side of the input to `size["shortest_edge"]`. The longer side will be limited to under
            `int((1333 / 800) * size["shortest_edge"])` while preserving the aspect ratio. Only has an effect if
            `do_resize` is set to `True`. Can be overridden by the `size` parameter in the `preprocess` method.
        size_divisor (`int`, *optional*, defaults to 32):
            The size by which to make sure both the height and width can be divided. Only has an effect if `do_resize`
            is set to `True`. Can be overridden by the `size_divisor` parameter in the `preprocess` method.
        resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`):
            Resampling filter to use if resizing the image. Only has an effect if `do_resize` is set to `True`. Can be
            overridden by the `resample` parameter in the `preprocess` method.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by the `do_rescale`
            parameter in the `preprocess` method.
        rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            Scale factor to use if rescaling the image. Only has an effect if `do_rescale` is set to `True`. Can be
            overridden by the `rescale_factor` parameter in the `preprocess` method.
        do_normalize (`bool`, *optional*, defaults to `True`):
            Whether to normalize the image. Can be overridden by the `do_normalize` parameter in the `preprocess`
            method. Can be overridden by the `do_normalize` parameter in the `preprocess` method.
        image_mean (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_MEAN`):
            Mean to use if normalizing the image. This is a float or list of floats the length of the number of
            channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method. Can be
            overridden by the `image_mean` parameter in the `preprocess` method.
        image_std (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_STD`):
            Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
            number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
            Can be overridden by the `image_std` parameter in the `preprocess` method.
        do_center_crop (`bool`, *optional*, defaults to `True`):
            Whether to center crop the image. Can be overridden by the `do_center_crop` parameter in the `preprocess`
            method.
        crop_size (`Dict[str, int]`, *optional*):
            Desired output size when applying center-cropping. Only has an effect if `do_center_crop` is set to `True`.
            Can be overridden by the `crop_size` parameter in the `preprocess` method. If unset defaults to `size`,
        do_pad (`bool`, *optional*, defaults to `True`):
            Whether to pad the image to the `(max_height, max_width)` of the images in the batch. Can be overridden by
            the `do_pad` parameter in the `preprocess` method.
    pixel_valuesTN    gp?	do_resizesizerI   resample
do_rescalerescale_factordo_normalize
image_mean	image_stddo_center_crop	crop_sizedo_padr$   c                 @   d|v r|j                  d      }t        |   di | ||nddi}t        |d      }|| _        || _        || _        || _        || _        || _	        || _
        ||nt        | _        |	|	nt        | _        || _        |
| _        || _        y )Npad_and_return_pixel_maskshortest_edgei   Fdefault_to_square )popsuper__init__r   rW   rX   rI   rY   rZ   r[   r\   r   r]   r   r^   ra   r_   r`   )selfrW   rX   rI   rY   rZ   r[   r\   r]   r^   r_   r`   ra   kwargs	__class__s                 r)   rj   z"BridgeTowerImageProcessor.__init__   s      '&0ZZ ;<F"6"'tos-CTU;"	( $,((2(>*DT&/&;,"r:   r+   data_formatr-   c                     t        |d      }d|vrt        d|j                                |d   }t        d|z        }	t	        |||	||      }
t        |f|
|||d|S )a  
        Resize an image.

        Resizes the shorter side of the image to `size["shortest_edge"]` while preserving the aspect ratio. If the
        longer side is larger than the max size `(int(`size["shortest_edge"]` * 1333 / 800))`, the longer side is then
        resized to the max size while preserving the aspect ratio.

        Args:
            image (`np.ndarray`):
                Image to resize.
            size (`Dict[str, int]`):
                Controls the size of the output image. Should be of the form `{"shortest_edge": int}`.
            size_divisor (`int`, *optional*, defaults to 32):
                The image is resized to a size that is a multiple of this value.
            resample (`PILImageResampling` filter, *optional*, defaults to `PILImageResampling.BICUBIC`):
                Resampling filter to use when resiizing the image.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format of the image. If not provided, it will be the same as the input image.
            input_data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format of the input image. If not provided, it will be inferred.
        Fre   rd   z@The `size` dictionary must contain the key `shortest_edge`. Got g)\?)rG   rH   rI   r-   )rX   rY   rn   r-   )r   r@   keysrL   rR   r   )rk   r+   rX   rI   rY   rn   r-   rl   rG   rH   r,   s              r)   r   z BridgeTowerImageProcessor.resize   s    > TU;$&_`d`i`i`k_lmnn'Z')*276`q
 
#/
 
 	
r:   c                 0    |d   }t        |f||f||d|S )a"  
        Center crop an image to `(size["height"], size["width"])`. If the input size is smaller than `crop_size` along
        any edge, the image is padded with 0's and then center cropped.

        Args:
            image (`np.ndarray`):
                Image to center crop.
            size (`Dict[str, int]`):
                Size of the output image in the form `{"height": h, "width": w}`.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format of the image. If not provided, it will be the same as the input image.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format of the input image. If not provided, it will be inferred from the input
                image.
        rd   )rX   rn   r-   )r   )rk   r+   rX   rn   r-   rl   r,   s          r)   r   z%BridgeTowerImageProcessor.center_crop  s;    . ?+
{+#/	

 
 	
r:   r,   constant_valuesc                     t        ||      \  }}|\  }}	||z
  }
|	|z
  }d|
fd|ff}t        ||t        j                  |||      }|S )z<
        Pad an image with zeros to the given size.
        r/   r   )moderr   rn   r-   )r   r   r   CONSTANT)rk   r+   r,   rr   rn   r-   r6   r7   output_heightoutput_width
pad_bottom	pad_rightpaddingpadded_images                 r)   
_pad_imagez$BridgeTowerImageProcessor._pad_image"  sp     %35FW$X!k&1#|"\1
 ;.	z?Q	N3%%+#/
 r:   r;   return_pixel_maskreturn_tensorsc           
          t        ||      }|D cg c]  }| j                  |||||       }	}d|	i}
|r |D cg c]  }t        |||       }}||
d<   t        |
|      S c c}w c c}w )a  
        Pads a batch of images to the bottom and right of the image with zeros to the size of largest height and width
        in the batch and optionally returns their corresponding pixel mask.

        Args:
            image (`np.ndarray`):
                Image to pad.
            constant_values (`float` or `Iterable[float]`, *optional*):
                The value to use for the padding if `mode` is `"constant"`.
            return_pixel_mask (`bool`, *optional*, defaults to `True`):
                Whether to return a pixel mask.
            return_tensors (`str` or `TensorType`, *optional*):
                The type of tensors to return. Can be one of:
                    - Unset: Return a list of `np.ndarray`.
                    - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
                    - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
                    - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
                    - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format of the image. If not provided, it will be the same as the input image.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format of the input image. If not provided, it will be inferred.
        )r-   )rr   rn   r-   rU   )r+   r,   r-   
pixel_maskdatatensor_type)rE   r|   r9   r   )rk   r;   rr   r}   r~   rn   r-   pad_sizer+   padded_imagesr   maskss               r)   r   zBridgeTowerImageProcessor.pad>  s    @ (BST  	
  OO /'"3  	
 	
 . $  eUfgE  "'D>BB'	
s
   A$A)c                    ||n| j                   }||n| j                  }||n| j                  }||n| j                  }||n| j                  }||n| j
                  }|	|	n| j                  }	|
|
n| j                  }
||n| j                  }||n| j                   ||n#| j                  | j                  n| j                  }||n| j                  }t        |d      }t        |      s|g}t        |      st        d      t!        ||||	|
|||||||       |D cg c]  }t#        |       }}t%        |d         r|rt&        j)                  d       |r#|D cg c]  }| j+                  |||||       }}|r!|D cg c]  }| j-                  |||       }}|r!|D cg c]  }| j/                  |||	       }}|r"|D cg c]  }| j1                  ||	|
|
       }}|D cg c]  }t3        |||       }}|r| j5                  |d||      }|S t7        d|i|      }|S c c}w c c}w c c}w c c}w c c}w c c}w )ah  
        Preprocess an image or batch of images.

        Args:
            images (`ImageInput`):
                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):
                Controls the size of the image after `resize`. The shortest edge of the image is resized to
                `size["shortest_edge"]` whilst preserving the aspect ratio. If the longest edge of this resized image
                is > `int(size["shortest_edge"] * (1333 / 800))`, then the image is resized again to make the longest
                edge equal to `int(size["shortest_edge"] * (1333 / 800))`.
            size_divisor (`int`, *optional*, defaults to `self.size_divisor`):
                The image is resized to a size that is a multiple of this value.
            resample (`PILImageResampling`, *optional*, defaults to `self.resample`):
                Resampling filter to use if resizing the image. Only has an effect if `do_resize` is set to `True`.
            do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
                Whether to rescale the image values between [0 - 1].
            rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
                Rescale factor to rescale the image by if `do_rescale` is set to `True`.
            do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
                Whether to normalize the image.
            image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`):
                Image mean to normalize the image by if `do_normalize` is set to `True`.
            image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`):
                Image standard deviation to normalize the image by if `do_normalize` is set to `True`.
            do_pad (`bool`, *optional*, defaults to `self.do_pad`):
                Whether to pad the image to the (max_height, max_width) in the batch. If `True`, a pixel mask is also
                created and returned.
            do_center_crop (`bool`, *optional*, defaults to `self.do_center_crop`):
                Whether to center crop the image. If the input size is smaller than `crop_size` along any edge, the
                image is padded with 0's and then center cropped.
            crop_size (`Dict[str, int]`, *optional*, defaults to `self.crop_size`):
                Size of the image after center crop. If one edge the image is smaller than `crop_size`, it will be
                padded with zeros and then cropped
            return_tensors (`str` or `TensorType`, *optional*):
                The type of tensors to return. Can be one of:
                    - Unset: Return a list of `np.ndarray`.
                    - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
                    - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
                    - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
                    - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
            data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
                The channel dimension format for the output image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - Unset: Use the channel dimension format of the input image.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
        Fre   zkInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.)rZ   r[   r\   r]   r^   ra   size_divisibilityr_   r`   rW   rX   rY   r   zIt looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.)r+   rX   rI   rY   r-   )r+   rX   r-   )r+   rO   r-   )r+   meanstdr-   )input_channel_dimT)r}   r~   r-   rU   r   )rW   rI   rY   rZ   r[   r\   r]   r^   ra   r_   r`   rX   r   r   r   r@   r   r   r   loggerwarning_oncer   r   rescale	normalizer   r   r   )rk   r;   rW   rX   rI   rY   rZ   r[   r\   r]   r^   ra   r_   r`   r~   rn   r-   r+   encoded_outputss                      r)   
preprocessz$BridgeTowerImageProcessor.preprocessu  s   X "+!6IDNN	'3'?|TEVEV'38#-#9Zt
+9+E4K^K^'3'?|TEVEV#-#9Zt
!*!6IDNN	!-4;;(4$:M:M #.It~~GaT^^gkgpgp 	 'tTYYTU;&!XFF#: 
 	&!)%!*)	
 6<<E.'<<6!9%*s
  $	  !-%&7  	F 	 pvgl  u9Pa bF   $ 5RcdF 
  $ U^opF  ou
ej'{N_`
 
 "hh$~al ' O  +0HVdeOa =	


s$   %H2$H7	H<,II1I)NN)r   NN)r   TNNN)"__name__
__module____qualname____doc__model_input_namesr   BICUBICboolr   strrL   r	   floatr   r   rj   r3   ndarrayr   r   r   r   r   r|   r   r   r   r    r=   r   PILImager   __classcell__)rm   s   @r)   rT   rT   |   s   +Z (( #'9'A'A,3!:>9=#$("#"# 38n"# 	"#
 %"# "# c5j)"# "# U5$u+#567"# E%e"456"# "# S>"# "# 
"#R '9'A'A>BDH.
zz.
 38n.
 	.

 %.
 eC)9$9:;.
 $E#/?*?$@A.
 
.
h ?CDH
zz
 38n
 eC)9$9:;	

 $E#/?*?$@A
 

J :;26DHzz 38_ uhuo56	
 ./ $E#/?*?$@A 
> :;"&;?26DH5CRZZ 5C uhuo565C  	5C
 !sJ!785C ./5C $E#/?*?$@A5C 
5Cn %& %))-&*'+%)*.'+:>9=!%)-$(;?(8(>(>DH#ff D>f tCH~&	f
 smf %f TNf !f tnf U5$u+#567f E%e"456f f !f S>f !sJ!78f  &!f" $E#/?*?$@A#f$ 
%f 'fr:   rT   )N)i   i5  rV   N)3r   typingr   r   r   r   r   r   r	   numpyr3   image_processing_utilsr   r   r   image_transformsr   r   r   r   r   image_utilsr   r   r   r   r   r   r   r   r   r   r   r   utilsr   r    r!   r"   r   
get_loggerr   r   r*   r   rL   r   r9   rE   rR   rT   rg   r:   r)   <module>r      s|   - D D D  U U b b    _ ^ 			H	%8x} 8c 8 rv::$)#s(OHPQVWZ\lWlQmHnZZ( [_##19%EU@U:V1W#	#Y#* @D!!! ! 	!
  c+;&; <=! 38_!>` 2 `r:   