
    sg3	                    H   d Z ddlZddlmZmZmZmZmZmZm	Z	m
Z
mZ ddlZddlmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZmZ ddlmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z- dd	l.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8  e5       r
ddl9Z9dd
l9m:Z:  e7       rddl;Z; e2       rddl<Z=ddl>Z= e8j~                  e@      ZAe j                  e j                  fZD	 dFdeej                     deeeFe"f      deeG   fdZH	 dGde
eGeGf   deGdeeG   deGde
eGeGf   f
dZI	 dFdej                  deGdeGdeeeFe"f      de
eGeGf   f
dZJ	 	 dHdej                  deeGe
eGeGf   eeG   f   deeG   deeeFe"f      de
eGeGf   f
dZKdefdZLdFdej                  deeG   dej                  fdZMdede
eGeGf   defdZNdee   dee   fd ZO	 dFd!ej                  d"e
eGeGf   deeeFe"f      dej                  fd#ZPd$eGd%eGdej                  fd&ZQ	 	 dId'eRdeee"eFf      fd(ZSd)ej                  dej                  fd*ZT	 	 dJd!ej                  d+ed,eeFej                  f   d-eRdee"eFf   defd.ZV	 dKd)ej                  d/e
d0e
fd1ZWd2ej                  d0e
eGeGf   d3eGdej                  fd4ZXd5ej                  de
ej                  ej                  f   fd6ZYd7e$j                  fdeeFef   d8e
eGeGf   d0e
eGeGf   d9e[d:e$f
d;Z\d< Z]d= Z^d> Z_dLd?Z`	 	 	 	 dMd@e[dAe[dBee	eG      d0e
eGeGf   fdCZa G dD dEe      Zby)Nz Image processor class for YOLOS.    N)	AnyCallableDictIterableListOptionalSetTupleUnion   )BatchFeature)BaseImageProcessorget_size_dict)	PaddingModecenter_to_corners_formatcorners_to_center_format	id_to_rgbpadrescaleresize	rgb_to_idto_channel_dimension_format)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STDAnnotationFormatAnnotationTypeChannelDimension
ImageInputPILImageResamplingget_image_sizeinfer_channel_dimension_formatis_scaled_imagemake_list_of_imagesto_numpy_arrayvalid_imagesvalidate_annotationsvalidate_kwargsvalidate_preprocess_arguments)

TensorTypeis_flax_availableis_jax_tensoris_scipy_availableis_tf_availableis_tf_tensoris_torch_availableis_torch_tensoris_vision_availablelogging)nnimagesinput_data_formatreturnc                 J   |t        | d         }|t        j                  k(  r+t        | D cg c]  }|j                   c}      \  }}}||fS |t        j
                  k(  r+t        | D cg c]  }|j                   c}      \  }}}||fS t        d|       c c}w c c}w )zH
    Get the maximum height and width across all images in a batch.
    r   z"Invalid channel dimension format: )r!   r   FIRSTmax_across_indicesshapeLAST
ValueError)r4   r5   img_
max_height	max_widths         c/var/www/html/venv/lib/python3.12/site-packages/transformers/models/yolos/image_processing_yolos.pyget_max_height_widthrB   V   s      :6!9E,222#5F6SSsyy6S#T :y
 	""	 
.33	3#5F6SSsyy6S#T 
Iq 	"" =>O=PQRR	 7T6Ss   B+B 
image_sizesizemax_sizemod_sizec                    | \  }}d}|St        t        ||f            }t        t        ||f            }||z  |z  |kD  r||z  |z  }t        t	        |            }||k  r*|}	||t        ||z  |z        }
nTt        ||z  |z        }
nB||k  r||k(  s
||k  r
||k(  r||}	}
n)|}
||t        ||z  |z        }	nt        ||z  |z        }	|6t        j                  |	|      }t        j                  |
|      }|	|z
  }	|
|z
  }
|
|	fS )a  
    Computes the output image size given the input image size and the desired output size with multiple of divisible_size.

    Args:
        image_size (`Tuple[int, int]`):
            The input image size.
        size (`int`):
            The desired output size.
        max_size (`int`, *optional*):
            The maximum allowed output size.
        mod_size (`int`, *optional*):
            The size to make multiple of mod_size.
    N)floatminmaxintroundnpmod)rC   rD   rE   rF   heightwidthraw_sizemin_original_sizemax_original_sizeowohow_modoh_mods                rA   get_size_with_aspect_ratiorX   h   sJ     MFEH!#vuo"67!#vuo"670047(B"336GGHuX'Dv~H$8X&./BTF]U*+B
E/fn%6/etmBH$8X%./BTE\F*+BH%H%&[&[8O    input_imager?   r@   c                     t        | |      }|\  }}||z  }||z  }t        ||      }	t        ||	z        }
t        ||	z        }|
|fS )a  
    Computes the output image size given the input image and the maximum allowed height and width. Keep aspect ratio.
    Important, even if image_height < max_height and image_width < max_width, the image will be resized
    to at least one of the edges be equal to max_height or max_width.

    For example:
        - input_size: (100, 200), max_height: 50, max_width: 50 -> output_size: (25, 50)
        - input_size: (100, 200), max_height: 200, max_width: 500 -> output_size: (200, 400)

    Args:
        input_image (`np.ndarray`):
            The image to resize.
        max_height (`int`):
            The maximum allowed height.
        max_width (`int`):
            The maximum allowed width.
        input_data_format (`ChannelDimension` or `str`, *optional*):
            The channel dimension format of the input image. If not provided, it will be inferred from the input image.
    )r    rI   rK   )rZ   r?   r@   r5   rC   rO   rP   height_scalewidth_scale	min_scale
new_height	new_widths               rA   #get_image_size_for_max_height_widthra      sc    2  ->?JMFE&Le#KL+.IVi'(JEI%&Iy  rY   c                 d    t        | |      }t        |t        t        f      r|S t	        |||      S )a   
    Computes the output image size given the input image size and the desired output size. If the desired output size
    is a tuple or list, the output image size is returned as is. If the desired output size is an integer, the output
    image size is computed by keeping the aspect ratio of the input image size.

    Args:
        input_image (`np.ndarray`):
            The image to resize.
        size (`int` or `Tuple[int, int]` or `List[int]`):
            The desired output size.
        max_size (`int`, *optional*):
            The maximum allowed output size.
        input_data_format (`ChannelDimension` or `str`, *optional*):
            The channel dimension format of the input image. If not provided, it will be inferred from the input image.
    )r    
isinstancelisttuplerX   )rZ   rD   rE   r5   rC   s        rA   get_resize_output_image_sizerf      s3    *  ->?J$u&%j$AArY   c                 f   t        | t        j                        rt        j                  S t	               rt        |       rddl}|j                  S t               rt        |       rddl
}|j                  S t               rt        |       rddlm} |j                  S t!        dt#        |              )z
    Returns a function that converts a numpy array to the framework of the input array.

    Args:
        arr (`np.ndarray`): The array to convert.
    r   NzCannot convert arrays of type )rc   rM   ndarrayarrayr-   r.   
tensorflowconvert_to_tensorr/   r0   torchtensorr*   r+   	jax.numpynumpyr<   type)arrtfrl   jnps       rA   get_numpy_to_framework_fnrt      s     #rzz"xx\#.### 4||}S1yy
5d3i[A
BBrY   rq   axisc                 n    || j                         S 	 | j                  |      S # t        $ r | cY S w xY w)zF
    Squeezes an array, but only if the axis specified has dim 1.
    ru   )squeezer<   )rq   ru   s     rA   safe_squeezery      s?     |{{}{{{%% 
s   & 44
annotationc                     |\  }}i }| j                         D ]N  \  }}|dk(  r?|}t        |      }|t        j                  ||||gt        j                        z  }|||<   J|||<   P |S )Nboxesdtype)itemsr   rM   asarrayfloat32)rz   rC   image_heightimage_widthnorm_annotationkeyvaluer|   s           rA   normalize_annotationr     s     *L+O &&( )
U'>E,U3ERZZlK V^`^h^hiiE#(OC #(OC ) rY   valuesc                 J    t        |  D cg c]  }t        |       c}S c c}w )zO
    Return the maximum value across all indices of an iterable of values.
    )ziprJ   )r   values_is     rA   r9   r9     s      +.v,7hCM777s    imageoutput_sizec                     t        | |      \  }}t        j                  |t        j                        }d|d|d|f<   |S )a  
    Make a pixel mask for the image, where 1 indicates a valid pixel and 0 indicates padding.

    Args:
        image (`np.ndarray`):
            Image to make the pixel mask for.
        output_size (`Tuple[int, int]`):
            Output size of the mask.
    channel_dimr}      N)r    rM   zerosint64)r   r   r5   input_heightinput_widthmasks         rA   make_pixel_maskr     sF     !/uBS TL+88Krxx0D()D,	$%KrY   rO   rP   c                    	 ddl m} g }| D ]  }|j                  |||      }|j	                  |      }t        |j                        dk  r|d   }t        j                  |t        j                        }t        j                  |d      }|j                  |        |rt        j                  |d      }|S t        j                  d||ft        j                        }|S # t        $ r t        d      w xY w)	a1  
    Convert a COCO polygon annotation to a mask.

    Args:
        segmentations (`List[List[float]]`):
            List of polygons, each polygon represented by a list of x-y coordinates.
        height (`int`):
            Height of the mask.
        width (`int`):
            Width of the mask.
    r   r   z1Pycocotools is not installed in your environment.r   ).Nr}      rw   )pycocotoolsr   ImportErrorfrPyObjectsdecodelenr:   rM   r   uint8anyappendstackr   )segmentationsrO   rP   	coco_maskmaskspolygonsrlesr   s           rA   convert_coco_poly_to_maskr   ,  s    O1 E! $$Xvu=%tzz?Q	?Dzz$bhh/vvd#T Q' L !VU+288<L#  OMNNOs   C$ $C9return_segmentation_masksc                    t        | |      \  }}|d   }t        j                  |gt        j                        }|d   }|D cg c]  }d|vs	|d   dk(  s| }}|D cg c]  }|d   	 }	}t        j                  |	t        j                        }	t        j                  |D cg c]  }|d   	 c}t        j                        }
t        j                  |D cg c]  }d|v r|d   nd c}t        j                        }|D cg c]  }|d	   	 }}t        j                  |t        j                        j                  d
d      }|ddddfxx   |ddddf   z  cc<   |dddddf   j                  d|      |dddddf<   |dddddf   j                  d|      |dddddf<   |dddf   |dddf   kD  |dddf   |dddf   kD  z  }i }||d<   |	|   |d<   ||   |d<   |
|   |d<   ||   |d<   t        j                  t        |      t        |      gt        j                        |d<   |rld|d   v re|D cg c]  }|d   	 }}t        j                  |t        j                        }||   }|j                  d   }|r|j                  d      n|}||d<   |r'|D cg c]  }|d   	 }}t        |||      }||   |d<   |S c c}w c c}w c c}w c c}w c c}w c c}w c c}w )zM
    Convert the target in COCO format into the format expected by DETR.
    r   image_idr}   annotationsiscrowdr   category_idareabbox   Nr   )rI   rJ   r   r   class_labelsr|   	orig_size	keypoints)r   r   segmentationr   )
r    rM   r   r   r   reshapecliprK   r:   r   )r   targetr   r5   r   r   r   r   objclassesr   r   r|   keep
new_targetr   num_keypointssegmentation_masksr   s                      rA   !prepare_coco_detection_annotationr   O  s    !/uBS TL+j!Hzz8*BHH5H 'K"-]3#1EY[\I\3]K]-89cs=!9G9jj1G ::k:ss6{:"**MDjj{[I,<#i.!C[ceckcklG$/0SS[0E0JJuBJJ/77A>E	!QR%LE!RaR%L L1add7^((QK(@E!QTT'N1add7^((QL(AE!QTT'N!Q$K%1+%%1+ad*CDDJ%Jz!(J~+JwdJv#DMJy jj#l*;S=M)NVXV^V^_J{{k!n41<=#S%=	=JJy

;	dO	!*2?I%%g.Y	"+
; =HIcc.1II)*<lKX#Dk
7S ^9 ;[0$ > Js0   KK!K'K$K)
K.K35K8r   c                 |   | j                   dk(  rt        j                  d      S | j                  dd \  }}t        j                  d|t        j
                        }t        j                  d|t        j
                        }t        j                  ||d      \  }}| t        j                  |d      z  }|j                  |j                  d   d	      j                  d	      }t        j                  j                  |t        j                  | t               
      }|j                  d      }|j                  |j                  d   d	      j                  d	      }| t        j                  |d      z  }|j                  |j                  d   d	      j                  d	      }	t        j                  j                  |t        j                  | t               
      }|j                  d      }
|
j                  |
j                  d   d	      j                  d	      }
t        j                  ||
||	gd      S )a  
    Compute the bounding boxes around the provided panoptic segmentation masks.

    Args:
        masks: masks in format `[number_masks, height, width]` where N is the number of masks

    Returns:
        boxes: bounding boxes in format `[number_masks, 4]` in xyxy format
    r   )r   r   Nr}   ij)indexingrw   r   r   g    חA)
fill_valuer   )rD   rM   r   r:   aranger   meshgridexpand_dimsr   rJ   mari   boolfilledrI   r   )r   hwyxx_maskx_maxx_miny_masky_maxy_mins              rA   masks_to_boxesr     s    zzQxx;;rsDAq
		!Qbjj)A
		!Qbjj)A;;q!d+DAqR^^AA..FNN6<<?B/33B7E
F288E#>!?@AHHH$EMM%++a."-11"5ER^^AA..FNN6<<?B/33B7E
F288E#>!?@AHHH$EMM%++a."-11"5E88UE5%0!44rY   r   
masks_pathreturn_masksc                 z   t        | |      \  }}t        j                  |      |d   z  }i }t        j                  d|v r|d   n|d   gt        j
                        |d<   t        j                  ||gt        j
                        |d<   t        j                  ||gt        j
                        |d<   d|v rkt        j                  t        j                  j                  |      t        j                        }	t        |	      }	t        j                  |d   D 
cg c]  }
|
d   	 c}
      }|	|d	d	d	d	f   k(  }	|	j                  t        j                        }	|r|	|d
<   t        |	      |d<   t        j                  |d   D 
cg c]  }
|
d   	 c}
t        j
                        |d<   t        j                  |d   D 
cg c]  }
|
d   	 c}
t        j
                        |d<   t        j                  |d   D 
cg c]  }
|
d   	 c}
t        j                        |d<   |S c c}
w c c}
w c c}
w c c}
w )z7
    Prepare a coco panoptic annotation for YOLOS.
    r   	file_namer   idr}   rD   r   segments_infoNr   r|   r   r   r   r   )r    pathlibPathrM   r   r   PILImageopenuint32r   ri   astyper   r   r   )r   r   r   r   r5   r   r   annotation_pathr   r   segment_infoidss               rA    prepare_coco_panoptic_annotationr     s    !/uBS TL+ll:.1DDOJZZzV?S
);Y_`dYe(fnpnvnvwJz\;$?rxxPJv jj,)DBHHUJ{& 

399>>/:"))L% hhvo?VW|T*WXQd]++RXX&"'Jw,U3
7%'XX=CO=TU\\-(U]_]e]e&

>" !#

9?9PQ\)$QY[YaYa!

9  ZZ6<_6MNl\&!NVXV`V`

6 ! X V R Os   H)	H.H3H8
input_sizetarget_sizec                    |\  }}|\  }}t         j                  j                  | j                  dd      d      }	|	j                  d   dk(  r(t        j                  ||ft
        j                        }	n!|	j                  d      j                  ||      }	|r'|j                         D ]  }
|
D ]  }|
d   |	|	|k(  <     t        |	      }t        |||ft        j                        }|S )Nr   r   r   r}   resample)scipyspecialsoftmax	transposer:   rM   r   r   argmaxr   r   r   r   r   NEAREST)r   r   r   stuff_equiv_classesdeduplicater   r   final_hfinal_wm_idequiveq_idseg_imgs                rA   get_segmentation_imager     s     DAq"GW==  A!6;Dzz"~xxAbhh/{{2&&q!,(//1 	/E /&+AhTU]#/	/ oGWw0;M;U;UVGNrY   r   	n_classesc                     |\  }}| j                  t        j                        }|j                  ||d      }t	        |      }t        |      D cg c]  }||k(  j                          }}|S c c}w )Nr   )r   rM   r   r   r   rangesum)	r   r   r   r   r   
np_seg_imgr   ir   s	            rA   get_mask_arear    si    "GW)J##GWa8JZ D',Y'78!TQYOO8D8K 9s   A.logitsc                     t         j                  j                  | d      }|j                  dd      }t	        j
                  ||d      }|j                  d      |j                  d      }}||fS )Nr   rw   T)keepdims)r   r   r   r   rM   take_along_axisrx   )r  probslabelsscoress       rA   %score_labels_from_class_probabilitiesr    sf    MM!!&r!2E\\"t\,FvB7F^^B');FF6>rY         ?r   	thresholdr   c                 (   t        d t        ||      D              }|\  }}i }||d<   | j                         D ]  \  }	}
|	dk(  r4|
}|t        j                  ||||gt        j
                        z  }||d<   ?|	dk(  r|
}|||z  z  }||d<   T|	dk(  ri|
dddf   }t        j                  |D cg c]  }t        |||       c}      }|j                  t        j
                        }|ddd	f   |kD  }||d<   |	dk(  r||d<   |
||	<    |S c c}w )
ax  
    Resizes an annotation to a target size.

    Args:
        annotation (`Dict[str, Any]`):
            The annotation dictionary.
        orig_size (`Tuple[int, int]`):
            The original size of the input image.
        target_size (`Tuple[int, int]`):
            The target size of the image, as returned by the preprocessing `resize` step.
        threshold (`float`, *optional*, defaults to 0.5):
            The threshold used to binarize the segmentation masks.
        resample (`PILImageResampling`, defaults to `PILImageResampling.NEAREST`):
            The resampling filter to use when resizing the masks.
    c              3   P   K   | ]  \  }}t        |      t        |      z     y wN)rH   ).0ss_origs      rA   	<genexpr>z$resize_annotation.<locals>.<genexpr>  s!     Y	658eFm+Ys   $&rD   r|   r}   r   r   Nr   r   )	re   r   r   rM   r   r   ri   r   r   )rz   r   r   r  r   ratiosratio_heightratio_widthnew_annotationr   r   r|   scaled_boxesr   scaled_arear   r   s                    rA   resize_annotationr    s?   , YSi=XYYF &L+N(N6 &&( (
U'>E 2::{L+Wc.dlnlvlv#wwL&2N7#F]D+"<=K%0N6"G^!T'NEHHW\]tfT;J]^ELL,E!Q$K)+E&+N7#F]%0N6""'N3%((  ^s   .D
c                    t        |       r| j                         } | j                         }t        j                  dg|dgg      }t        j
                  |dd |dd k7        d   dz   }|dddxx   |ddd   z  cc<   t        |      S )a  
    Converts given binary mask of shape `(height, width)` to the run-length encoding (RLE) format.

    Args:
        mask (`torch.Tensor` or `numpy.array`):
            A binary mask tensor of shape `(height, width)` where 0 denotes background and 1 denotes the target
            segment_id or class_id.
    Returns:
        `List`: Run-length encoded list of the binary mask. Refer to COCO API for more information about the RLE
        format.
    r   r   Nr   r   )r0   ro   flattenrM   concatenatewhererd   )r   pixelsrunss      rA   binary_mask_to_rler"  :  s     tzz|\\^F^^aS&1#./F88F12J&"+-.q1A5DAJ$ss)J:rY   c                     t        j                  |       }g }|D ]8  }t        j                  | |k(  dd      }t        |      }|j	                  |       : |S )a  
    Converts given segmentation map of shape `(height, width)` to the run-length encoding (RLE) format.

    Args:
        segmentation (`torch.Tensor` or `numpy.array`):
            A segmentation map of shape `(height, width)` where each value denotes a segment or class id.
    Returns:
        `List[List]`: A list of lists, where each list is the run-length encoding of a segment / class id.
    r   r   )rl   uniquer  r"  r   )r   segment_idsrun_length_encodingsidxr   rles         rA   convert_segmentation_to_rler)  Q  s^     ,,|,K ){{<3.15 &##C()
  rY   c                     | j                   d   |j                   d   cxk(  r|j                   d   k(  st        d       t        d      |j                  |      ||kD  z  }| |   ||   ||   fS )a	  
    Binarize the given masks using `object_mask_threshold`, it returns the associated values of `masks`, `scores` and
    `labels`.

    Args:
        masks (`torch.Tensor`):
            A tensor of shape `(num_queries, height, width)`.
        scores (`torch.Tensor`):
            A tensor of shape `(num_queries)`.
        labels (`torch.Tensor`):
            A tensor of shape `(num_queries)`.
        object_mask_threshold (`float`):
            A number between 0 and 1 used to binarize the masks.
    Raises:
        `ValueError`: Raised when the first dimension doesn't match in all input tensors.
    Returns:
        `Tuple[`torch.Tensor`, `torch.Tensor`, `torch.Tensor`]`: The `masks`, `scores` and `labels` without the region
        < `object_mask_threshold`.
    r   z1mask, scores and labels must have the same shape!)r:   r<   ne)r   r
  r	  object_mask_threshold
num_labelsto_keeps         rA   remove_low_and_no_objectsr/  g  sy    ( KKNfll1o@a@LMM ALMMii
#v0E'EFG>6'?F7O;;rY   c                     | |k(  }|j                         }||   |k\  j                         }|dkD  xr |dkD  }|r||z  }	|	j                         |kD  sd}||fS )Nr   F)r   item)
mask_labels
mask_probskmask_thresholdoverlap_mask_area_thresholdmask_kmask_k_areaoriginal_areamask_exists
area_ratios
             rA   check_segment_validityr<    sr    AF**,K  ]n499;M/7ma&7K  =0
 #>>KrY   r5  r6  label_ids_to_fusec                    || j                   d   n|d   }|| j                   d   n|d   }t        j                  ||ft        j                  | j                        }	g }
|5t
        j                  j                  | j                  d      |dd      d   } d}| |j                  ddd      z  } | j                  d      }i }t        |j                   d         D ]}  }||   j                         }||v }t        || |||      \  }}|s/||v r||   }n|dz  }||	|<   t        ||   j                         d	      }|
j                  ||||d
       |sy|||<    |	|
fS )Nr   r   r   )r~   devicebilinearF)rD   modealign_cornersr      )r   label_id	was_fusedscore)r:   rl   r   int32r?  r3   
functionalinterpolate	unsqueezeviewr   r   r1  r<  rL   r   )r3  pred_scorespred_labelsr5  r6  r=  r   rO   rP   r   segmentscurrent_segment_idr2  stuff_memory_listr4  
pred_classshould_fuser:  r7  segment_scores                       rA   compute_segmentsrT    s    %0$7Za [^F#.#6JQKNE;;ekk*J[J[\LH]]..  #+JV[ / 


  +""2q!,,J##A&K )+;$$Q'( C ^((*
 $55 5Q8S
V ..%6z%B""a'" $6L !+a."5"5"7;MOO, *!,*	 0B!*-7C: !!rY   c            (           e Zd ZdZddgZej                  ddej                  ddddddddfde	e
ef   ded	ee
ef   d
edede	eef   dede	eee   f   de	eee   f   dee   dedeee
ef      ddf fdZedee
ef   f fd       Z	 	 	 	 d5dej,                  dedee   dedee	e
ej0                  f      dee	e
ef      defdZej                  ddfdej,                  d	ee
ef   d
edee   dee	e
ef      dej,                  fdZej8                  fd
edefdZ	 	 d6dej,                  dedee	e
ef      dee	e
ef      dej,                  f
d Zd!ed"eeef   defd#Z d!ed$eeef   d%eeef   defd&Z!	 	 	 	 	 d7dej,                  d'eeef   d!eee
ef      d(e	ee"e   f   dee   dee	e
ef      d)edej,                  fd*Z#	 	 	 	 	 	 	 	 d8d+eej,                     d,eeee
ef         d(e	ee"e   f   d-ed.ee	e
e$f      dee   dee	e
ef      d)edeee
ef      de%fd/Z&dddddddddddddddejN                  ddfd+e(d,ee	e)ee)   f      dedee	e
ej0                  f      dee   d	eee
ef      dee   dee	eef      dee   dee	eee   f      dee	eee   f      dee   dee   dee	e
ef      d.ee	e$e
f      de	e
ef   dee	e
ef      deee
ef      de%f&d0Z*d1 Z+	 d9d2ed3e	e$ee   f   fd4Z, xZ-S ):YolosImageProcessoraG  
    Constructs a Detr image processor.

    Args:
        format (`str`, *optional*, defaults to `"coco_detection"`):
            Data format of the annotations. One of "coco_detection" or "coco_panoptic".
        do_resize (`bool`, *optional*, defaults to `True`):
            Controls whether to resize the image's (height, width) dimensions to the specified `size`. Can be
            overridden by the `do_resize` parameter in the `preprocess` method.
        size (`Dict[str, int]` *optional*, defaults to `{"shortest_edge": 800, "longest_edge": 1333}`):
            Size of the image's `(height, width)` dimensions after resizing. Can be overridden by the `size` parameter
            in the `preprocess` method. Available options are:
                - `{"height": int, "width": int}`: The image will be resized to the exact size `(height, width)`.
                    Do NOT keep the aspect ratio.
                - `{"shortest_edge": int, "longest_edge": int}`: The image will be resized to a maximum size respecting
                    the aspect ratio and keeping the shortest edge less or equal to `shortest_edge` and the longest edge
                    less or equal to `longest_edge`.
                - `{"max_height": int, "max_width": int}`: The image will be resized to the maximum size respecting the
                    aspect ratio and keeping the height less or equal to `max_height` and the width less or equal to
                    `max_width`.
        resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BILINEAR`):
            Resampling filter to use if resizing the image.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Controls whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by the
            `do_rescale` parameter in the `preprocess` method.
        rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            Scale factor to use if rescaling the image. Can be overridden by the `rescale_factor` parameter in the
            `preprocess` method.
        do_normalize:
            Controls whether to normalize the image. Can be overridden by the `do_normalize` parameter in the
            `preprocess` method.
        image_mean (`float` or `List[float]`, *optional*, defaults to `IMAGENET_DEFAULT_MEAN`):
            Mean values to use when normalizing the image. Can be a single value or a list of values, one for each
            channel. Can be overridden by the `image_mean` parameter in the `preprocess` method.
        image_std (`float` or `List[float]`, *optional*, defaults to `IMAGENET_DEFAULT_STD`):
            Standard deviation values to use when normalizing the image. Can be a single value or a list of values, one
            for each channel. Can be overridden by the `image_std` parameter in the `preprocess` method.
        do_pad (`bool`, *optional*, defaults to `True`):
            Controls whether to pad the image. Can be overridden by the `do_pad` parameter in the `preprocess`
            method. If `True`, padding will be applied to the bottom and right of the image with zeros.
            If `pad_size` is provided, the image will be padded to the specified dimensions.
            Otherwise, the image will be padded to the maximum height and width of the batch.
        pad_size (`Dict[str, int]`, *optional*):
            The size `{"height": int, "width" int}` to pad the images to. Must be larger than any image size
            provided for preprocessing. If `pad_size` is not provided, images will be padded to the largest
            height and width in the batch.
    pixel_values
pixel_maskTNgp?format	do_resizerD   r   
do_rescalerescale_factordo_normalize
image_mean	image_stddo_convert_annotationsdo_padpad_sizer6   c                    d|v r|j                  d      }d|v r't        j                  d       |j                  d      }n|d nd}||nddd}t        ||d      }|
|}
t	        |   d
i | || _        || _        || _        || _	        || _
        || _        || _        |
| _        ||nt        | _        |	|	nt         | _        || _        || _        g d	| _        y )Npad_and_return_pixel_maskrE   vThe `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.i5  i   )shortest_edgelongest_edgeFrE   default_to_square)r4   r   r   r   rZ  rD   r   r[  r\  r]  r^  r_  r`  ra  rb  rY  return_tensorsdata_formatr5    )poploggerwarning_oncer   super__init__rY  rZ  rD   r   r[  r\  r]  r`  r   r^  r   r_  ra  rb  _valid_processor_keys)selfrY  rZ  rD   r   r[  r\  r]  r^  r_  r`  ra  rb  kwargsrE   	__class__s                  rA   rq  zYolosImageProcessor.__init__  s     '&0ZZ ;<FD zz*-H#|tH'tsTX-YTHN ")%1""6""	 $,(&<#(2(>*DY&/&;AU &
"rY   image_processor_dictc                     |j                         }d|v r|j                  d      |d<   d|v r|j                  d      |d<   t        |   |fi |S )a  
        Overrides the `from_dict` method from the base class to make sure parameters are updated if image processor is
        created using from_dict and kwargs e.g. `YolosImageProcessor.from_pretrained(checkpoint, size=600,
        max_size=800)`
        rE   rd  )copyrm  rp  	from_dict)clsrv  rt  ru  s      rA   ry  zYolosImageProcessor.from_dictL  sd      488:/5zz*/E ,&&0@F

Kf@g !<=w !5@@@rY   r   r   r   r   r5   c                     ||n| j                   }|t        j                  k(  r|dn|}t        ||||      }|S |t        j                  k(  r|dn|}t        |||||      }|S t        d| d      )zD
        Prepare an annotation for feeding into DETR model.
        Fr5   T)r   r   r5   zFormat z is not supported.)rY  r   COCO_DETECTIONr   COCO_PANOPTICr   r<   )rs  r   r   rY  r   r   r5   s          rA   prepare_annotationz&YolosImageProcessor.prepare_annotation\  s     "-4;;%4441J1RXq%6v8L]F  '5550I0QWp%5%6"3F  wvh.@ABBrY   rk  c                 v   d|v r't         j                  d       |j                  d      }nd}t        ||d      }d|v rd|v rt	        ||d   |d   |      }nNd	|v rd
|v rt        ||d	   |d
   |      }n0d|v rd|v r|d   |d   f}nt        d|j                          d      t        |f||||d|}|S )a  
        Resize the image to the given size. Size can be `min_size` (scalar) or `(height, width)` tuple. If size is an
        int, smaller edge of the image will be matched to this number.

        Args:
            image (`np.ndarray`):
                Image to resize.
            size (`Dict[str, int]`):
                Size of the image's `(height, width)` dimensions after resizing. Available options are:
                    - `{"height": int, "width": int}`: The image will be resized to the exact size `(height, width)`.
                        Do NOT keep the aspect ratio.
                    - `{"shortest_edge": int, "longest_edge": int}`: The image will be resized to a maximum size respecting
                        the aspect ratio and keeping the shortest edge less or equal to `shortest_edge` and the longest edge
                        less or equal to `longest_edge`.
                    - `{"max_height": int, "max_width": int}`: The image will be resized to the maximum size respecting the
                        aspect ratio and keeping the height less or equal to `max_height` and the width less or equal to
                        `max_width`.
            resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BILINEAR`):
                Resampling filter to use if resizing the image.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format for the output image. If unset, the channel dimension format of the input
                image is used.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format of the input image. If not provided, it will be inferred.
        rE   re  NFrh  rf  rg  r|  r?   r@   rO   rP   z\Size must contain 'height' and 'width' keys or 'shortest_edge' and 'longest_edge' keys. Got .)rD   r   rk  r5   )	rn  ro  rm  r   rf   ra   r<   keysr   )	rs  r   rD   r   rk  r5   rt  rE   new_sizes	            rA   r   zYolosImageProcessor.resize}  s   D D zz*-HHTHNd"~'=3tO,d>.BVgH T!kT&9:tL)4+<PaH 'T/XW6HIIK=#  
#/
 
 rY   c                      t        ||||      S )z
        Resize the annotation to match the resized image. If size is an int, smaller edge of the mask will be matched
        to this number.
        )r   r   r   )r  )rs  rz   r   rD   r   s        rA   r  z%YolosImageProcessor.resize_annotation  s     !yd]effrY   c                      t        ||||      S )a  
        Rescale the image by the given factor. image = image * rescale_factor.

        Args:
            image (`np.ndarray`):
                Image to rescale.
            rescale_factor (`float`):
                The value to use for rescaling.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format for the output image. If unset, the channel dimension format of the input
                image is used. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            input_data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format for the input image. If unset, is inferred from the input image. Can be
                one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
        )rk  r5   )r   )rs  r   r\  rk  r5   s        rA   r   zYolosImageProcessor.rescale  s    4 un+YjkkrY   rz   rC   c                     t        ||      S )z
        Normalize the boxes in the annotation from `[top_left_x, top_left_y, bottom_right_x, bottom_right_y]` to
        `[center_x, center_y, width, height]` format and from absolute to relative pixel values.
        )rC   )r   )rs  rz   rC   s      rA   r   z(YolosImageProcessor.normalize_annotation  s    
 $J:FFrY   input_image_sizeoutput_image_sizec           
         i }||d<   |j                         D ]  \  }}|dk(  r@|}	t        |	|t        j                  dt        j
                        }	t        |	d      }	|	|d<   K|dk(  rJ|rH|}
|
t        j                  |d   |d   z  |d   |d   z  |d   |d   z  |d   |d   z  g      z  }
|
|d<   |dk(  r||d<   |||<    |S )z;
        Update the annotation for a padded image.
        rD   r   r   )rA  constant_valuesr5   r   r|   )	r   r   r   CONSTANTr   r8   ry   rM   r   )rs  rz   r  r  paddingupdate_bboxesr  r   r   r   r|   s              rA   #_update_annotation_for_padded_imagez7YolosImageProcessor._update_annotation_for_padded_image  s    !2v$**, 	,JCg~$--$%&6&<&< %UA.*/w'M(+.?.BB(+.?.BB(+.?.BB(+.?.BB	  +0w'):v&&+s#5	,6 rY   r   r  r  c                     t        ||      \  }}	|\  }
}|
|z
  }||	z
  }d|fd|ff}t        ||t        j                  |||      }|| j	                  |||	f|
|f||      }||fS )z<
        Pad an image with zeros to the given size.
        r   r   )rA  r  rk  r5   )r    r   r   r  r  )rs  r   r   rz   r  rk  r5   r  r   r   output_heightoutput_width
pad_bottom	pad_rightr  padded_images                   rA   
_pad_imagezYolosImageProcessor._pad_image!  s     %35FW$X!k&1#|"\1
 ;.	z?Q	N3%%+#/
 !AA\;7-9VX_anJ Z''rY   r4   r   return_pixel_maskrj  c
           
         |	|	n| j                   }	|	|	d   |	d   f}
nt        ||      }
||ndgt        |      z  }g }g }t        ||      D ]B  \  }}| j	                  ||
|||||      \  }}|j                  |       |j                  |       D d|i}|r |D cg c]  }t        ||
|       }}||d<   t        ||	      }||D cg c]  }t        ||
       c}|d<   |S c c}w c c}w )a;	  
        Pads a batch of images to the bottom and right of the image with zeros to the size of largest height and width
        in the batch and optionally returns their corresponding pixel mask.

        Args:
            image (`np.ndarray`):
                Image to pad.
            annotations (`List[Dict[str, any]]`, *optional*):
                Annotations to pad along with the images. If provided, the bounding boxes will be updated to match the
                padded images.
            constant_values (`float` or `Iterable[float]`, *optional*):
                The value to use for the padding if `mode` is `"constant"`.
            return_pixel_mask (`bool`, *optional*, defaults to `True`):
                Whether to return a pixel mask.
            return_tensors (`str` or `TensorType`, *optional*):
                The type of tensors to return. Can be one of:
                    - Unset: Return a list of `np.ndarray`.
                    - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
                    - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
                    - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
                    - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format of the image. If not provided, it will be the same as the input image.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format of the input image. If not provided, it will be inferred.
            update_bboxes (`bool`, *optional*, defaults to `True`):
                Whether to update the bounding boxes in the annotations to match the padded images. If the
                bounding boxes have not been converted to relative coordinates and `(centre_x, centre_y, width, height)`
                format, the bounding boxes will not be updated.
            pad_size (`Dict[str, int]`, *optional*):
                The size `{"height": int, "width" int}` to pad the images to. Must be larger than any image size
                provided for preprocessing. If `pad_size` is not provided, images will be padded to the largest
                height and width in the batch.
        NrO   rP   r|  )r  rk  r5   r  rW  )r   r   r5   rX  datatensor_typer  r	  )rb  rB   r   r   r  r   r   r   )rs  r4   r   r  r  rj  rk  r5   r  rb  padded_sizeannotation_listpadded_imagespadded_annotationsr   rz   r  padded_annotationr  r   encoded_inputss                        rA   r   zYolosImageProcessor.padB  sP   \  (38#H-x/@AK.vIZ[K)4)@+tfsSY{FZ!$V_!= 	9E:.2oo /'"3+ /> /+L+   .%%&78	9 . $  eXijE  "'D%4^L"Wi(ISZ^D(N8$ (s   C(C-c                 .	   d|v r&t         j                  d       |j                  d      }d}d|v r&t         j                  d       |j                  d      }|| j                  n|}|| j                  n|}t        ||d      }|| j                  n|}|| j                  n|}|	| j                  n|	}	|
| j                  n|
}
|| j                  n|}|| j                  n|}|| j                  n|}|| j                  n|}|| j                  n|}|| j                  n|}t!        |j#                         | j$                         t'        |      }t)        |      st+        d	      t-        ||	|
|||||
       |t/        |t0              r|g}|;t3        |      t3        |      k7  r$t+        dt3        |       dt3        |       d      t5        |      }|t7        |t8        |       |K|t4        j:                  k(  r8t/        |t<        j>                  t@        f      st+        dtC        |       d      |D cg c]  }tE        |       }}tG        |d         r|rt         j                  d       |tI        |d         }|Wg }g }tK        ||      D ]>  \  }}| jM                  ||||||      }|jO                  |       |jO                  |       @ |}|}~~|r|g g }}tK        ||      D ]f  \  }}tQ        ||      }| jS                  |||||      }| jU                  ||tQ        ||            }|jO                  |       |jO                  |       h |}|}~~n"|D cg c]  }| jS                  ||||       }}|r!|D cg c]  }| jW                  ||	|       }}|
r"|D cg c]  }| jY                  ||||       }}|r:|8tK        ||      D cg c]!  \  }}| j[                  |tQ        ||            # }}}|r| j]                  ||d|||||      } | S |D cg c]  }t_        |||       }}ta        d|i|      } ||D cg c]  }ta        ||       c}| d<   | S c c}w c c}w c c}w c c}w c c}}w c c}w c c}w )a`  
        Preprocess an image or a batch of images so that it can be used by the model.

        Args:
            images (`ImageInput`):
                Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
                from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
                List of annotations associated with the image or batch of images. If annotation is for object
                detection, the annotations should be a dictionary with the following keys:
                - "image_id" (`int`): The image id.
                - "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a
                  dictionary. An image can have no annotations, in which case the list should be empty.
                If annotation is for segmentation, the annotations should be a dictionary with the following keys:
                - "image_id" (`int`): The image id.
                - "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary.
                  An image can have no segments, in which case the list should be empty.
                - "file_name" (`str`): The file name of the image.
            return_segmentation_masks (`bool`, *optional*, defaults to self.return_segmentation_masks):
                Whether to return segmentation masks.
            masks_path (`str` or `pathlib.Path`, *optional*):
                Path to the directory containing the segmentation masks.
            do_resize (`bool`, *optional*, defaults to self.do_resize):
                Whether to resize the image.
            size (`Dict[str, int]`, *optional*, defaults to self.size):
                Size of the image's `(height, width)` dimensions after resizing. Available options are:
                    - `{"height": int, "width": int}`: The image will be resized to the exact size `(height, width)`.
                        Do NOT keep the aspect ratio.
                    - `{"shortest_edge": int, "longest_edge": int}`: The image will be resized to a maximum size respecting
                        the aspect ratio and keeping the shortest edge less or equal to `shortest_edge` and the longest edge
                        less or equal to `longest_edge`.
                    - `{"max_height": int, "max_width": int}`: The image will be resized to the maximum size respecting the
                        aspect ratio and keeping the height less or equal to `max_height` and the width less or equal to
                        `max_width`.
            resample (`PILImageResampling`, *optional*, defaults to self.resample):
                Resampling filter to use when resizing the image.
            do_rescale (`bool`, *optional*, defaults to self.do_rescale):
                Whether to rescale the image.
            rescale_factor (`float`, *optional*, defaults to self.rescale_factor):
                Rescale factor to use when rescaling the image.
            do_normalize (`bool`, *optional*, defaults to self.do_normalize):
                Whether to normalize the image.
            image_mean (`float` or `List[float]`, *optional*, defaults to self.image_mean):
                Mean to use when normalizing the image.
            image_std (`float` or `List[float]`, *optional*, defaults to self.image_std):
                Standard deviation to use when normalizing the image.
            do_convert_annotations (`bool`, *optional*, defaults to self.do_convert_annotations):
                Whether to convert the annotations to the format expected by the model. Converts the bounding
                boxes from the format `(top_left_x, top_left_y, width, height)` to `(center_x, center_y, width, height)`
                and in relative coordinates.
            do_pad (`bool`, *optional*, defaults to self.do_pad):
                Whether to pad the image. If `True`, padding will be applied to the bottom and right of
                the image with zeros. If `pad_size` is provided, the image will be padded to the specified
                dimensions. Otherwise, the image will be padded to the maximum height and width of the batch.
            format (`str` or `AnnotationFormat`, *optional*, defaults to self.format):
                Format of the annotations.
            return_tensors (`str` or `TensorType`, *optional*, defaults to self.return_tensors):
                Type of tensors to return. If `None`, will return the list of images.
            data_format (`str` or `ChannelDimension`, *optional*, defaults to self.data_format):
                The channel dimension format of the image. If not provided, it will be the same as the input image.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
            pad_size (`Dict[str, int]`, *optional*):
                The size `{"height": int, "width" int}` to pad the images to. Must be larger than any image size
                provided for preprocessing. If `pad_size` is not provided, images will be padded to the largest
                height and width in the batch.
        rd  zjThe `pad_and_return_pixel_mask` argument is deprecated and will be removed in v4.33, use `do_pad` instead.NrE   zgThe `max_size` argument is deprecated and will be removed in v4.33, use `size['longest_edge']` instead.F)rD   rE   ri  )captured_kwargsvalid_processor_keyszkInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.)r[  r\  r]  r^  r_  rZ  rD   r   zThe number of images (z) and annotations (z) do not match.zxThe path to the directory containing the mask PNG files should be provided as a `pathlib.Path` or string object, but is z	 instead.r   zIt looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.)r   r   r5   )rD   rE   r   r5   )rD   r   r5   r|  )r   r  rk  r5   r  rj  rb  )input_channel_dimrW  r  r  r	  )1rn  ro  rm  rZ  rD   r   r   r[  r\  r]  r^  r_  r`  ra  rb  rY  r'   r  rr  r#   r%   r<   r(   rc   dictr   r   r&   SUPPORTED_ANNOTATION_FORMATSr~  r   r   strrp   r$   r"   r!   r   r  r   r    r   r  r   	normalizer   r   r   r   )!rs  r4   r   r   r   rZ  rD   r   r[  r\  r]  r^  r_  r`  ra  rY  rj  rk  r5   rb  rt  rE   r   prepared_imagesprepared_annotationsr   resized_imagesresized_annotationsr   resized_imageresized_annotationrz   r  s!                                    rA   
preprocesszYolosImageProcessor.preprocess  s~   | '&0( ZZ ;<F3 ::j)D&/&7DNNY	 Ltyyd$US$,$44==((2(:T__

0>0F,,N,8,@t((l(2(:T__

&/&7DNNY	+A+ID''Oe 	 !'F$,$44==( &FDLfLfg$V,F#: 
 	&!)%!		
 "z+t'D&-K"s6{c+6F'F(V5H[IYHZZij  "&)" )E{S "*888zGLL#+>?<<@<L;MYX  6<<E.'<<6!9%*s
 $ >vay I " O#% !$V[!9 
4v00.G)&7 1   &&u-$++F3
4 %F.K!5 &68" 3%(%= 	CME6 .u6G HI$(KKD8hbs %0 %M *.)?)?	>-IZ+[*& #))-8'../AB	C (1"$7 "( KKD8WhKi 
 lrschdll5.L]l^sFsoufkuj)O`aF  "k&= *-[&)A%J ))*nUL]6^_K 
 !XX'"''"34-! & 	N*  $ ,E;RcdF  */GUcdN&[f,MWLH,x( w =b t
$,s*   ?Q3)Q8Q=0R!&R+RRc           	      r   t         j                  d       |j                  |j                  }}t	        |      t	        |      k7  rt        d      |j                  d   dk7  rt        d      t        j                  j                  |d      }|dddf   j                  d      \  }}t        |      }|j                  d      \  }	}
t        j                  |
|	|
|	gd	      j                  |j                         }||dddddf   z  }t#        |||      D cg c]  \  }}}|||d
 }}}}|S c c}}}w )a[  
        Converts the raw output of [`YolosForObjectDetection`] into final bounding boxes in (top_left_x, top_left_y,
        bottom_right_x, bottom_right_y) format. Only supports PyTorch.

        Args:
            outputs ([`YolosObjectDetectionOutput`]):
                Raw outputs of the model.
            target_sizes (`torch.Tensor` of shape `(batch_size, 2)`):
                Tensor containing the size (height, width) of each image of the batch. For evaluation, this must be the
                original image size (before any data augmentation). For visualization, this should be the image size
                after data augment, but before padding.
        Returns:
            `List[Dict]`: A list of dictionaries, each dictionary containing the scores, labels and boxes for an image
            in the batch as predicted by the model.
        z`post_process` is deprecated and will be removed in v5 of Transformers, please use `post_process_object_detection` instead, with `threshold=0.` for equivalent results.TMake sure that you pass in as many target sizes as the batch dimension of the logitsr   r   zTEach element of target_sizes must contain the size (h, w) of each image of the batchr   .Ndimr
  r	  r|   )rn  ro  r  
pred_boxesr   r<   r:   r3   rH  r   rJ   r   unbindrl   r   tor?  r   )rs  outputstarget_sizes
out_logitsout_bboxprobr
  r	  r|   img_himg_w	scale_fctr  lbresultss                   rA   post_processz YolosImageProcessor.post_process  s5     	d	

  '~~w/A/AH
z?c,//stta A%stt}}$$Z4c3B3h++B/ )2#**1-uKKue <!DGGU		!T1*--ILVU[]bIcddgaAa1q9dd es   D2r  r  c                    |j                   |j                  }}|"t        |      t        |      k7  rt        d      t        j
                  j                  |d      }|dddf   j                  d      \  }}t        |      }	|t        |t              rMt        j                  |D 
cg c]  }
|
d   	 c}
      }t        j                  |D 
cg c]  }
|
d   	 c}
      }n|j                  d      \  }}t        j                  ||||gd      j                  |	j                         }|	|dddddf   z  }	g }t#        |||	      D ]3  \  }}}|||kD     }|||kD     }|||kD     }|j%                  |||d       5 |S c c}
w c c}
w )	a  
        Converts the raw output of [`YolosForObjectDetection`] into final bounding boxes in (top_left_x, top_left_y,
        bottom_right_x, bottom_right_y) format. Only supports PyTorch.

        Args:
            outputs ([`YolosObjectDetectionOutput`]):
                Raw outputs of the model.
            threshold (`float`, *optional*):
                Score threshold to keep object detection predictions.
            target_sizes (`torch.Tensor` or `List[Tuple[int, int]]`, *optional*):
                Tensor of shape `(batch_size, 2)` or list of tuples (`Tuple[int, int]`) containing the target size
                `(height, width)` of each image in the batch. If unset, predictions will not be resized.
        Returns:
            `List[Dict]`: A list of dictionaries, each dictionary containing the scores, labels and boxes for an image
            in the batch as predicted by the model.
        Nr  r   .r   r   r  r  )r  r  r   r<   r3   rH  r   rJ   r   rc   r   rl   Tensorr  r   r  r?  r   r   )rs  r  r  r  r  r  r  r
  r	  r|   r  r  r  r  r  r  r  r  rF  labelboxs                        rA   post_process_object_detectionz1YolosImageProcessor.post_process_object_detection  s   &  '~~w/A/AH
#:#l"33 j  }}$$Z4c3B3h++B/ )2 #,-L%Aqad%ABL%Aqad%AB+2215uUE5%$@aHKKELLYIIaqj11E6651 	MGAq!a)m$Ea)m$EA	M"CNNeusKL		M  &B%As   (F F)NNNNNN)Nr   NNT)Nr   FNNNTN)r  N).__name__
__module____qualname____doc__model_input_namesr   r}  r   BILINEARr   r  r   r   rK   rH   r   r   rq  classmethodr   ry  rM   rh   r   r   r   r  r   r   r  r   r
   r   r  r   r  r)   r   r   r8   r   r   r  r  r  __classcell__)ru  s   @rA   rV  rV    s   .` (6 0@/N/N#'9'B'B,3!04/315-1D
c++,D
 D
 38n	D

 %D
 D
 c5j)D
 D
 %e,-D
 U+,D
 !)D
 D
 4S>*D
 
D
L AT#s(^ A A$ .2*.9=DHzz  )*	
 $( U3#456 $E#/?*?$@A 
J (:'B'B26DHBzzB 38nB %	B
 ./B $E#/?*?$@AB 
BT (:'A'Ag
 %g 
g$ ?CDHlzzl l eC)9$9:;	l
 $E#/?*?$@Al 
l:Gt GsCx GUY G))  S/) !c?	) 
)` 049:26DH"(zz( 38_( T#s(^,	(
 uhuo56( ./( $E#/?*?$@A( ( 
(H 7;9:"';?26DH"-1TRZZ T d4S>23T uhuo56	T
  T !sJ!78T ./T $E#/?*?$@AT T 4S>*T 
Tr NR*.9=$()-%)6:'+:>9=15!%9=;?4D4J4JDH-1)CC eND4H$HIJC $(	C
 U3#456C D>C tCH~&C TNC !sEz!23C tnC U5$u+#567C E%e"456C !)C C  s$4456!C" !z3!78#C$ 3 001%C& $E#/?*?$@A'C( 4S>*)C, 
-CN'V _c3"'3=B:tTY{CZ=[3rY   rV  r  )N   r  )FN)TN)F)r  皙?)r  r  NN)cr  r   typingr   r   r   r   r   r   r	   r
   r   ro   rM   feature_extraction_utilsr   image_processing_utilsr   r   image_transformsr   r   r   r   r   r   r   r   r   image_utilsr   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   utilsr)   r*   r+   r,   r-   r.   r/   r0   r1   r2   rl   r3   r   scipy.specialr   scipy.stats
get_loggerr  rn  r}  r~  r  rh   r  rK   rB   rX   ra   rf   rt   ry   r   r9   r   r   r   r   r   r   r   r   r  r  r   rH   r  r"  r)  r/  r<  rT  rV  rl  rY   rA   <module>r     s   '  S S S  4 G
 
 
    $     			H	% 0 ? ?AQA_A_` 
 [_##19%EU@U:V1W#	#Y#& ]_.c3h.'*.6>sm.VY.
38_.l AE	 ! ! !  !  c+;&; <=	 !
 38_ !N #@D	BB
U38_d3i/
0B smB  c+;&; <=	B
 38_B:Ch C2
bjj 
 
 
T uS#X 4 8x} 8c 8 rv::$)#s(OHPQVWZ\lWlQmHnZZ&S   L ',@D	9  $9  &6&; <=	9z5"** 5 5N 6:&::&& c7<<'(& 	&
 -s23& 
&V `e::#(7<42:: E#s(O PS XZXbXb "** rzzSUS]S]G]A^  #5#=#=0S#X0S#X0 sCx0 	0
 !0h. ,<:.  ),,0#'9" 	9"
 "'9"  C)9" sCx9"xi, irY   