
    sgH;                     ^   d Z ddlmZmZmZmZmZmZ ddlZ	ddl
mZmZ ddlmZmZmZ ddlmZmZ ddlmZmZmZmZmZmZmZ dd	lmZmZmZ  e       rddl Z erd
dl!m"Z"  e       rddl#Z# ejH                  e%      Z&	 ddedeee'ef      fdZ(	 ddedeee'ef      defdZ) G d de      Z*y)z%Image processor class for SuperPoint.    )TYPE_CHECKINGDictListOptionalTupleUnionN   )is_torch_availableis_vision_available)BaseImageProcessorBatchFeatureget_size_dict)resizeto_channel_dimension_format)ChannelDimension
ImageInputinfer_channel_dimension_formatis_scaled_imagemake_list_of_imagesto_numpy_arrayvalid_images)
TensorTypeloggingrequires_backends   )#SuperPointKeypointDescriptionOutputimageinput_data_formatc                 H   |t         j                  k(  r>t        j                  | d   | d   k(        xr t        j                  | d   | d   k(        S |t         j                  k(  r>t        j                  | d   | d   k(        xr t        j                  | d   | d   k(        S y )Nr   .r   .   ..r   .r   .r#   )r   FIRSTnpallLAST)r   r   s     m/var/www/html/venv/lib/python3.12/site-packages/transformers/models/superpoint/image_processing_superpoint.pyis_grayscaler,   /   s     ,222vveFmuV}45`"&&vRWX^R_A_:``	.33	3vveFmuV}45`"&&vRWX^R_A_:`` 
4    returnc                    t        t        dg       t        | t        j                        r|t
        j                  k(  r7| d   dz  | d   dz  z   | d   dz  z   }t        j                  |gdz  d	
      }|S |t
        j                  k(  r5| d   dz  | d   dz  z   | d   dz  z   }t        j                  |gdz  d
      }S t        | t        j                  j                        s| S | j                  d      } | S )ao  
    Converts an image to grayscale format using the NTSC formula. Only support numpy and PIL Image. TODO support torch
    and tensorflow grayscale conversion

    This function is supposed to return a 1-channel image, but it returns a 3-channel image with the same value in each
    channel, because of an issue that is discussed in :
    https://github.com/huggingface/transformers/pull/25786#issuecomment-1730176446

    Args:
        image (Image):
            The image to convert.
        input_data_format (`ChannelDimension` or `str`, *optional*):
            The channel dimension format for the input image.
    visionr    gŏ1w-!?r!   gbX9?r"   gv/?r	   r   )axisr$   r%   r&   L)r   convert_to_grayscale
isinstancer(   ndarrayr   r'   stackr*   PILImageconvert)r   r   
gray_images      r+   r4   r4   9   s    $ *XJ7%$ 0 6 66v/%-&2HH5QW=[aKaaJ:,"2;J  "2"7"77v/%-&2HH5QW=[aKaaJ:,"2<JeSYY__-MM#ELr-   c                   X    e Zd ZdZdgZ	 	 	 	 ddedeeef   dede	ddf
 fd	Z
	 	 dd
ej                  deeef   deeeef      deeeef      fdZdddddej"                  dfdedeeef   dede	deeeef      dedeeeef      defdZdddeeee   f   deeedf      fdZ xZS )SuperPointImageProcessora  
    Constructs a SuperPoint image processor.

    Args:
        do_resize (`bool`, *optional*, defaults to `True`):
            Controls whether to resize the image's (height, width) dimensions to the specified `size`. Can be overriden
            by `do_resize` in the `preprocess` method.
        size (`Dict[str, int]` *optional*, defaults to `{"height": 480, "width": 640}`):
            Resolution of the output image after `resize` is applied. Only has an effect if `do_resize` is set to
            `True`. Can be overriden by `size` in the `preprocess` method.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Whether to rescale the image by the specified scale `rescale_factor`. Can be overriden by `do_rescale` in
            the `preprocess` method.
        rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            Scale factor to use if rescaling the image. Can be overriden by `rescale_factor` in the `preprocess`
            method.
    pixel_valuesN	do_resizesize
do_rescalerescale_factorr.   c                     t        |   di | ||nddd}t        |d      }|| _        || _        || _        || _        y )Ni  i  )heightwidthFdefault_to_square )super__init__r   r?   r@   rA   rB   )selfr?   r@   rA   rB   kwargs	__class__s         r+   rJ   z!SuperPointImageProcessor.__init__r   sO     	"6"'tc-JTU;"	$,r-   r   data_formatr   c                 L    t        |d      }t        |f|d   |d   f||d|S )aL  
        Resize an image.

        Args:
            image (`np.ndarray`):
                Image to resize.
            size (`Dict[str, int]`):
                Dictionary of the form `{"height": int, "width": int}`, specifying the size of the output image.
            data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format of the output image. If not provided, it will be inferred from the input
                image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
        FrF   rD   rE   )r@   rN   r   )r   r   )rK   r   r@   rN   r   rL   s         r+   r   zSuperPointImageProcessor.resize   sE    : TU;
x.$w-0#/	

 
 	
r-   return_tensorsc	                 X   ||n| j                   }||n| j                  }||n| j                  }||n| j                  }t	        |d      }t        |      }t        |      st        d      |r|t        d      |r|t        d      |D 
cg c]  }
t        |
       }}
t        |d         r|rt        j                  d       |t        |d         }|r!|D 
cg c]  }
| j                  |
||       }}
|r!|D 
cg c]  }
| j                  |
||	       }}
|t        |d         }t        t!        |            D ]%  }t#        ||   |      rt%        ||   |
      ||<   ' |D 
cg c]  }
t'        |
||       }}
d|i}t)        ||      S c c}
w c c}
w c c}
w c c}
w )aR  
        Preprocess an image or batch of images.

        Args:
            images (`ImageInput`):
                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to `self.size`):
                Size of the output image after `resize` has been applied. If `size["shortest_edge"]` >= 384, the image
                is resized to `(size["shortest_edge"], size["shortest_edge"])`. Otherwise, the smaller edge of the
                image will be matched to `int(size["shortest_edge"]/ crop_pct)`, after which the image is cropped to
                `(size["shortest_edge"], size["shortest_edge"])`. Only has an effect if `do_resize` is set to `True`.
            do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
                Whether to rescale the image values between [0 - 1].
            rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
                Rescale factor to rescale the image by if `do_rescale` is set to `True`.
            return_tensors (`str` or `TensorType`, *optional*):
                The type of tensors to return. Can be one of:
                    - Unset: Return a list of `np.ndarray`.
                    - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
                    - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
                    - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
                    - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
            data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
                The channel dimension format for the output image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - Unset: Use the channel dimension format of the input image.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
        FrF   zkInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.z,Size must be specified if do_resize is True.z7Rescale factor must be specified if do_rescale is True.r   zIt looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.)r   r@   r   )r   scaler   )r   )input_channel_dimr>   )datatensor_type)r?   rA   rB   r@   r   r   r   
ValueErrorr   r   loggerwarning_oncer   r   rescalerangelenr,   r4   r   r   )rK   imagesr?   r@   rA   rB   rP   rN   r   rL   r   irT   s                r+   
preprocessz#SuperPointImageProcessor.preprocess   s   d "+!6IDNN	#-#9Zt
+9+E4K^K^'tTYYTU;$V,F#: 
 KLL.0VWW 6<<E.'<<6!9%*s
 $ >vay IlrschdkkDL]k^sFs $ 5RcdF 
 $ >vay I s6{# 	aAq	+<=0N_`q		a
 ou
ej'{N_`
 
 '>BBI = t
s   F FF"0F'outputsr   target_sizesztorch.Tensorc                    t        |j                        t        |      k7  rt        d      t        |t              rt        j                  |      }n|j                  d   dk7  rt        d      |}t        j                  |dg      }|j                  |dddf   z  }|j                  t
        j                        }g }t        |j                  ||j                  |j                        D ]O  \  }}}}	t        j                  |      j!                  d      }
||
   }||
   }|	|
   }	|j#                  |||	d       Q |S )a  
        Converts the raw output of [`SuperPointForKeypointDetection`] into lists of keypoints, scores and descriptors
        with coordinates absolute to the original image sizes.

        Args:
            outputs ([`SuperPointKeypointDescriptionOutput`]):
                Raw outputs of the model containing keypoints in a relative (x, y) format, with scores and descriptors.
            target_sizes (`torch.Tensor` or `List[Tuple[int, int]]`):
                Tensor of shape `(batch_size, 2)` or list of tuples (`Tuple[int, int]`) containing the target size
                `(height, width)` of each image in the batch. This must be the original
                image size (before any processing).
        Returns:
            `List[Dict]`: A list of dictionaries, each dictionary containing the keypoints in absolute format according
            to target_sizes, scores and descriptors for an image in the batch as predicted by the model.
        zRMake sure that you pass in as many target sizes as the batch dimension of the maskr   r#   zTEach element of target_sizes must contain the size (h, w) of each image of the batchN)	keypointsscoresdescriptors)r[   maskrV   r5   r   torchtensorshapefliprb   toint32ziprc   rd   nonzerosqueezeappend)rK   r_   r`   image_sizesmasked_keypointsresults
image_maskrb   rc   rd   indicess              r+   post_process_keypoint_detectionz8SuperPointImageProcessor.post_process_keypoint_detection  s<   $ w||L 11qrrlD),,|4K!!!$) j  'K jjqc2",,{1d7/CC ,..u{{;:=LL*GNNG<O<O;
 	c6J	6; mmJ/77:G!'*IG_F%g.KNNfU`ab	c r-   )TNTgp?)NN)__name__
__module____qualname____doc__model_input_namesboolr   strintfloatrJ   r(   r6   r   r   r   r   r'   r   r   r^   r   r   ru   __classcell__)rM   s   @r+   r=   r=   ]   s   $ (( # '-- 38n- 	-
 - 
-* ?CDH%
zz%
 38n%
 eC)9$9:;	%

 $E#/?*?$@A%
T # $;?(8(>(>DHlC lC 38n	lC
 lC lC !sJ!78lC &lC $E#/?*?$@AlC 
lC\/</LQR\^bch^iRiLj/	d3&'	(/r-   r=   )N)+ry   typingr   r   r   r   r   r   numpyr(    r
   r   image_processing_utilsr   r   r   image_transformsr   r   image_utilsr   r   r   r   r   r   r   utilsr   r   r   rf   modeling_superpointr   r8   
get_loggerrv   rW   r|   r,   r4   r=   rH   r-   r+   <module>r      s    , D D  7 U U C   < ; H			H	%
 AEaac+;&; <=a AE!!c+;&; <=! !Hj1 jr-   