
    sgT                         d dl Z d dlmZ d dlmc mZ ddlmZmZm	Z	 ddl
mZmZmZmZmZmZ  e       rd dlmZ  e       rd dlmZ  G d d	ej*                        Z G d
 dej*                        Z	 	 	 	 	 ddZy)    N   )is_scipy_availableis_vision_availablerequires_backends   )_set_aux_lossbox_iou	dice_lossgeneralized_box_iounested_tensor_from_tensor_listsigmoid_focal_loss)linear_sum_assignment)center_to_corners_formatc                   P     e Zd ZdZ fdZ ej                         d        Z xZS )RTDetrHungarianMatchera  This class computes an assignment between the targets and the predictions of the network

    For efficiency reasons, the targets don't include the no_object. Because of this, in general, there are more
    predictions than targets. In this case, we do a 1-to-1 matching of the best predictions, while the others are
    un-matched (and thus treated as non-objects).

    Args:
        config: RTDetrConfig
    c                    t         |           t        | dg       |j                  | _        |j
                  | _        |j                  | _        |j                  | _	        |j                  | _        |j                  | _        | j                  | j                  cxk(  r| j                  cxk(  rdk(  rt        d       y y )Nscipyr   z#All costs of the Matcher can't be 0)super__init__r   matcher_class_cost
class_costmatcher_bbox_cost	bbox_costmatcher_giou_cost	giou_costuse_focal_lossmatcher_alphaalphamatcher_gammagamma
ValueError)selfconfig	__class__s     Q/var/www/html/venv/lib/python3.12/site-packages/transformers/loss/loss_rt_detr.pyr   zRTDetrHungarianMatcher.__init__1   s    $	* 331111$33))
))
??dnnCC!CBCC D    c           	      L   |d   j                   dd \  }}|d   j                  dd      }t        j                  |D cg c]  }|d   	 c}      }t        j                  |D cg c]  }|d   	 c}      }| j                  rt        j                  |d   j                  dd            }	|	dd|f   }	d| j                  z
  |	| j                  z  z  d|	z
  d	z   j                          z  }
| j                  d|	z
  | j                  z  z  |	d	z   j                          z  }||
z
  }n.|d   j                  dd      j                  d
      }	|	dd|f    }t        j                  ||d      }t        t        |      t        |             }| j                  |z  | j                  |z  z   | j                   |z  z   }|j#                  ||d
      j%                         }|D cg c]  }t'        |d          }}t)        |j+                  |d
            D cg c]  \  }}t-        ||          }}}|D cg c]O  \  }}t        j.                  |t        j0                        t        j.                  |t        j0                        fQ c}}S c c}w c c}w c c}w c c}}w c c}}w )a  Performs the matching

        Params:
            outputs: This is a dict that contains at least these entries:
                 "logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates

            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
                 "class_labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
                           objects in the target) containing the class labels
                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates

        Returns:
            A list of size batch_size, containing tuples of (index_i, index_j) where:
                - index_i is the indices of the selected predictions (in order)
                - index_j is the indices of the corresponding selected targets (in order)
            For each batch element, it holds:
                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
        logitsNr   
pred_boxesr   r   class_labelsboxesg:0yE>)pdtype)shapeflattentorchcatr   Fsigmoidr   r    logsoftmaxcdistr   r   r   r   r   viewcpulen	enumeratesplitr   	as_tensorint64)r"   outputstargets
batch_sizenum_queriesout_bboxv
target_idstarget_bboxout_probneg_cost_classpos_cost_classr   r   r   cost_matrixsizesicindicesjs                        r%   forwardzRTDetrHungarianMatcher.forward@   so   * #*("3"9"9"1"=
K <(00A6YY7Ca. 1CD
iiW =7 => yy!2!:!:1a!@AH:.H$**n4::1EFAPXL[_L_KdKdKfJfgN!ZZALTZZ+GHhY]oMbMbMdLdeN'.8Jx(00A6>>rBH"1j=11J KK+;	()A()KMefqMrss	nny04??Z3OORVR`R`clRll!&&z;CGGI*12QQwZ22;D[EVEVW\^`Ea;bc41a(1.cckrscgcdfg%++6QVQ\Q\8]^ss5  D =, 3css   J$JJJ3AJ )	__name__
__module____qualname____doc__r   r2   no_gradrQ   __classcell__r$   s   @r%   r   r   &   s,    D U]]_3t 3tr&   r   c                        e Zd ZdZ fdZddZddZ ej                         d        Z	d Z
d ZddZd	 Zd
 ZddZd Zed        Zd Z xZS )
RTDetrLossah  
    This class computes the losses for RTDetr. The process happens in two steps: 1) we compute hungarian assignment
    between ground truth boxes and the outputs of the model 2) we supervise each pair of matched ground-truth /
    prediction (supervise class and box).

    Args:
        matcher (`DetrHungarianMatcher`):
            Module able to compute a matching between targets and proposals.
        weight_dict (`Dict`):
            Dictionary relating each loss with its weights. These losses are configured in RTDetrConf as
            `weight_loss_vfl`, `weight_loss_bbox`, `weight_loss_giou`
        losses (`List[str]`):
            List of all the losses to be applied. See `get_loss` for a list of all available losses.
        alpha (`float`):
            Parameter alpha used to compute the focal loss.
        gamma (`float`):
            Parameter gamma used to compute the focal loss.
        eos_coef (`float`):
            Relative classification weight applied to the no-object category.
        num_classes (`int`):
            Number of object categories, omitting the special no-object category.
    c                    t         |           t        |      | _        |j                  | _        |j                  |j                  |j                  d| _	        ddg| _
        |j                  | _        t        j                  |j                  dz         }| j                  |d<   | j                  d|       |j                   | _        |j$                  | _        y )N)loss_vfl	loss_bbox	loss_giouvflr+   r   r,   empty_weight)r   r   r   matcher
num_labelsnum_classesweight_loss_vflweight_loss_bboxweight_loss_giouweight_dictlosseseos_coefficienteos_coefr2   onesregister_bufferfocal_loss_alphar   focal_loss_gammar    )r"   r#   r`   r$   s      r%   r   zRTDetrLoss.__init__   s    -f5!,,..0000

 g&..zz&"3"3a"78==R^\:,,
,,
r&   c           	         d|vrt        d      d|vrt        d      | j                  |      }|d   |   }t        j                  t	        ||      D 	
cg c]  \  }\  }	}
|d   |
    c}
}	}d      }t        t        |      t        |            \  }}	t        j                  |      j                         }|d   }t        j                  t	        ||      D 	
cg c]  \  }\  }	}
|d   |
    c}
}	}      }t        j                  |j                  d d	 | j                  t        j                  |j                  
      }|||<   t        j                  || j                  dz         dd df   }t        j                   ||j"                        }|j%                  |j"                        ||<   |j'                  d      |z  }t        j(                  |      j                         }| j*                  |j-                  | j.                        z  d|z
  z  |z   }t        j0                  |||d      }|j3                  d      j5                         |j                  d   z  |z  }d|iS c c}
}	}w c c}
}	}w )Nr)   #No predicted boxes found in outputsr(   z$No predicted logits found in outputsr+   r   dimr*   r   r/   devicer   rc   .r,   r.   none)weight	reductionr\   )KeyError_get_source_permutation_idxr2   r3   zipr	   r   diagdetachfullr0   rc   r?   rt   r4   one_hot
zeros_liker/   to	unsqueezer5   r   powr     binary_cross_entropy_with_logitsmeansum)r"   r@   rA   rO   	num_boxesr6   idx	src_boxes_target_rM   target_boxesious
src_logitstarget_classes_originaltarget_classestargettarget_score_originaltarget_score
pred_scorerw   losss                         r%   loss_labels_vflzRTDetrLoss.loss_labels_vfl   sK   w&@AA7"ABB..w7L)#.	yySQXZaMb!c!c/'6Aq''"21"5!cijk29=?WXd?efazz$&&(X&
"'))_bcjls_t,u,uOGU[VWYZW^-DQ-G,u"vRa $"2"2%++jN_N_
 6s>t7G7G!7KLSRUSURUXV % 0 0zGWGW X%)WW-B-H-H%Ic",66r:VCYYz*113
jnnTZZ88AJG,V11*lSYeklyy|!J$4$4Q$77)CD!!+ "d
 -vs   I+I2c           	         d|vrt        d      |d   }| j                  |      }t        j                  t	        ||      D 	
cg c]  \  }\  }	}
|d   |
    c}
}	}      }t        j
                  |j                  dd | j                  t        j                  |j                        }|||<   t        j                  |j                  dd      || j                        }d|i}|S c c}
}	}w )	zClassification loss (NLL)
        targets dicts must contain the key "class_labels" containing a tensor of dim [nb_target_boxes]
        r(   z#No logits were found in the outputsr*   Nr   rs   r   loss_ce)ry   rz   r2   r3   r{   r~   r0   rc   r?   rt   r4   cross_entropy	transposeclass_weight)r"   r@   rA   rO   r   r6   r   r   r   r   rM   r   r   r   rh   s                  r%   loss_labelszRTDetrLoss.loss_labels   s     7"@AAX&
..w7"'))_bcjls_t,u,uOGU[VWYZW^-DQ-G,u"vRa $"2"2%++jN_N_
 6s//*"6"6q!"<ndN_N_`W% -vs   C'c           	      z   |d   }|j                   }t        j                  |D cg c]  }t        |d          c}|      }|j	                  d      |j
                  d   dz
  k7  j                  d      }	t        j                  j                  |	j                         |j                               }
d|
i}|S c c}w )z
        Compute the cardinality error, i.e. the absolute error in the number of predicted non-empty boxes. This is not
        really a loss, it is intended for logging purposes only. It doesn't propagate gradients.
        r(   r*   )rt   r,   r   cardinality_error)rt   r2   r>   r;   argmaxr0   r   nn
functionall1_lossfloat)r"   r@   rA   rO   r   r(   rt   rE   target_lengths	card_predcard_errrh   s               r%   loss_cardinalityzRTDetrLoss.loss_cardinality   s     "')RQ#a.?*@)R[ab]]2&&,,r*:Q*>>CCAF	==(():N<P<P<RS%x0 *Ss   B8c           	         d|vrt        d      | j                  |      }|d   |   }t        j                  t	        ||      D 	cg c]  \  }\  }}	|d   |	    c}	}}d      }
i }t        j                  ||
d      }|j                         |z  |d<   d	t        j                  t        t        |      t        |
                  z
  }|j                         |z  |d
<   |S c c}	}}w )a;  
        Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss. Targets dicts must
        contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]. The target boxes are expected in
        format (center_x, center_y, w, h), normalized by the image size.
        r)   rp   r+   r   rq   rv   rx   r]   r   r^   )ry   rz   r2   r3   r{   r4   r   r   r|   r   r   )r"   r@   rA   rO   r   r   r   tr   rM   r   rh   r]   r^   s                 r%   
loss_boxeszRTDetrLoss.loss_boxes   s     w&@AA..w7L)#.	yyWgAV!W!WIAv1!G*Q-!W]^_IIiH	'mmo	9{

 8 CE]^jEkl
 
	 (mmo	9{ "Xs   C$c                 :   d|vrt        d      | j                  |      }| j                  |      }|d   }||   }|D cg c]  }|d   	 }	}t        |	      j	                         \  }
}|
j                  |      }
|
|   }
t        j                  j                  |dddf   |
j                  dd dd      }|ddd	f   j                  d
      }|
j                  d
      }
|
j                  |j                        }
t        ||
|      t        ||
|      d}|S c c}w )z
        Compute the losses related to the masks: the focal loss and the dice loss. Targets dicts must contain the key
        "masks" containing a tensor of dim [nb_target_boxes, h, w].
        
pred_masksz#No predicted masks found in outputsmasksNbilinearF)sizemodealign_cornersr   r   )	loss_mask	loss_dice)ry   rz   _get_target_permutation_idxr   	decomposer   r   r   interpolater0   r1   r9   r   r
   )r"   r@   rA   rO   r   
source_idx
target_idxsource_masksr   r   target_masksvalidrh   s                r%   
loss_maskszRTDetrLoss.loss_masks   s:   
 w&@AA55g>
55g>
|,#J/%,-7--<UCMMOe#|4#J/ }}00D!(:(:23(?j`e 1 
 $AqD)11!4#++A.#((););<+L,	R"<yI
 # .s    Dc           	      H   |d   }| j                  |      }t        j                  t        ||      D 	
cg c]  \  }\  }	}
|d   |
    c}
}	}      }t        j                  |j
                  d d | j                  t        j                  |j                        }|||<   t        j                  || j                  dz         dd df   }t        j                  ||d	z  d
      }|j                  d      j                         |j
                  d   z  |z  }d|iS c c}
}	}w )Nr(   r*   r   rs   r   ru   .r,   g      ?rv   r   loss_bce)rz   r2   r3   r{   r~   r0   rc   r?   rt   r4   r   r   r   r   r"   r@   rA   rO   r   r6   r   r   r   r   rM   r   r   r   r   s                  r%   loss_labels_bcezRTDetrLoss.loss_labels_bce  s   X&
..w7"'))_bcjls_t,u,uOGU[VWYZW^-DQ-G,u"vRa $"2"2%++jN_N_
 6s>t7G7G!7KLSRUSURUXV11*fslV\]yy|!J$4$4Q$77)CD!! -vs   Dc                    t        j                  t        |      D cg c]  \  }\  }}t        j                  ||        c}}}      }t        j                  |D cg c]  \  }}|	 c}}      }||fS c c}}}w c c}}w Nr2   r3   r<   	full_like)r"   rO   rM   sourcer   	batch_idxr   s          r%   rz   z&RTDetrLoss._get_source_permutation_idx'  si    IIPYZaPbccna&!uvq9cd	YY'B;FABC
*$$ dB   #A7A>
c                    t        j                  t        |      D cg c]  \  }\  }}t        j                  ||        c}}}      }t        j                  |D cg c]  \  }}|	 c}}      }||fS c c}}}w c c}}w r   r   )r"   rO   rM   r   r   r   r   s          r%   r   z&RTDetrLoss._get_target_permutation_idx-  si    IIPYZaPbccna!Vuvq9cd	YY'B;AvBC
*$$ dBr   c           	      t   d|vrt        d      |d   }| j                  |      }t        j                  t	        ||      D 	
cg c]  \  }\  }	}
|d   |
    c}
}	}      }t        j
                  |j                  d d | j                  t        j                  |j                        }|||<   t        j                  || j                  dz         dd d	f   }t        ||| j                  | j                        }|j                  d      j!                         |j                  d   z  |z  }d
|iS c c}
}	}w )Nr(   zNo logits found in outputsr*   r   rs   r   ru   .r,   
loss_focal)ry   rz   r2   r3   r{   r~   r0   rc   r?   rt   r4   r   r   r   r    r   r   r   s                  r%   loss_labels_focalzRTDetrLoss.loss_labels_focal3  s+   7"788X&
..w7"'))_bcjls_t,u,uOGU[VWYZW^-DQ-G,u"vRa $"2"2%++jN_N_
 6s>t7G7G!7KLSRUSURUXV!*fdjj$**Myy|!J$4$4Q$77)Cd## -vs   D3c                     | j                   | j                  | j                  | j                  | j                  | j
                  | j                  d}||vrt        d| d       ||   ||||      S )N)labelscardinalityr+   r   bcefocalr_   zLoss z not supported)r   r   r   r   r   r   r   r!   )r"   r   r@   rA   rO   r   loss_maps          r%   get_losszRTDetrLoss.get_lossE  sy    &&00____''++''
 xuTF.9::x~gwCCr&   c           	      <   | d   | d   }}|D cg c]  }t        |d          }}|d   d   j                  }g }t        |      D ]  \  }}	|	dkD  rjt        j                  |	t        j
                  |      }
|
j                  |      }
t        ||         t        |
      k(  sJ |j                  ||   |
f       u|j                  t        j                  dt        j
                  |      t        j                  dt        j
                  |      f        |S c c}w )Ndn_positive_idxdn_num_groupr*   r   rs   )	r;   rt   r<   r2   aranger?   tileappendzeros)dn_metarA   r   r   r   num_gtsrt   dn_match_indicesrM   num_gtgt_idxs              r%   get_cdn_matched_indicesz"RTDetrLoss.get_cdn_matched_indicesS  s   (/0A(BGND[3:;a3q();;N+22"7+ 	IAvzfEKKO\2?1-.#f+=== '');V(DE ''AU[[HAU[[H	  % <s   Dc           
         |j                         D ci c]  \  }}d|vs|| }}}| j                  ||      }t        d |D              }t        j                  |gt        j
                  t        t        |j                                     j                        }t        j                  |d      j                         }i }| j                  D ]Z  }	| j                  |	||||      }
|
D ci c]'  }|| j                  v s||
|   | j                  |   z  ) }
}|j                  |
       \ d|v rt!        |d         D ]  \  }}| j                  ||      }| j                  D ]  }	|	dk(  r	| j                  |	||||      }
|
D ci c]'  }|| j                  v s||
|   | j                  |   z  ) }
}|
j                         D ci c]  \  }}|d| z   | }
}}|j                  |
         d|v rd	|vrt#        d
      | j%                  |d	   |      }||d	   d   z  }t!        |d         D ]  \  }}| j                  D ]  }	|	dk(  r	i } | j                  |	||||fi |}
|
D ci c]'  }|| j                  v s||
|   | j                  |   z  ) }
}|
j                         D ci c]  \  }}|d| z   | }
}}|j                  |
         |S c c}}w c c}w c c}w c c}}w c c}w c c}}w )a  
        This performs the loss computation.

        Args:
             outputs (`dict`, *optional*):
                Dictionary of tensors, see the output specification of the model for the format.
             targets (`List[dict]`, *optional*):
                List of dicts, such that `len(targets) == batch_size`. The expected keys in each dict depends on the
                losses applied, see each loss' doc.
        auxiliary_outputsc              3   8   K   | ]  }t        |d            yw)r*   N)r;   ).0r   s     r%   	<genexpr>z%RTDetrLoss.forward.<locals>.<genexpr>{  s     @1An-.@s   rs   r   )minr   _aux_dn_auxiliary_outputsdenoising_meta_valuesz}The output must have the 'denoising_meta_values` key. Please, ensure that 'outputs' includes a 'denoising_meta_values' entry.r   _dn_)itemsra   r   r2   r>   r   nextitervaluesrt   clampitemrh   r   rg   updater<   r!   r   )r"   r@   rA   krE   outputs_without_auxrO   r   rh   r   l_dictrM   r   kwargss                 r%   rQ   zRTDetrLoss.forwardj  sI    18`1CV^_C_q!t`` ,,2G< @@@	OOYKu{{4PTU\UcUcUePfKgKnKno	KK	q1668	 KK 	"D]]4'7INFBHbQAQUQaQaLaaT%5%5a%888bFbMM&!	" ')(1':M2N(O 	*$$,,'8'B KK *Dw !]]41BGWV_`FJPjQTUY]YiYiTiaT-=-=a-@!@@jFj=C\\^LTQaE!+oq0LFLMM&)*	* "W,&g5  T  227;R3SU\]G!G,C$D^$TTI(1':P2Q(R 
*$$ KK *Dw F*T]]41BGWV_jcijFJPjQTUY]YiYiTiaT-=-=a-@!@@jFj<BLLNKDAqaD*na/KFKMM&)*
* g a c kL( kKs:   KK-KKK$K$K)
+K/?K/,K4
)T)rR   rS   rT   rU   r   r   r   r2   rV   r   r   r   r   rz   r   r   r   staticmethodr   rQ   rW   rX   s   @r%   rZ   rZ   w   sp    .-$">( U]]_ .>"%%$$D    ,>r&   rZ   c
                 f   t        |      }|j                  |       i }| |d<   ||d<   |j                  r|	<t        j                  ||	d   d      \  }}t        j                  ||	d   d      \  }}t        |d d d df   j                  dd      |d d d df   j                  dd            }||d	<   |d	   j                  t        |g|g             |	4t        j                  dd      j                  dd            |d
<   |	|d<    |||      }t        |j                               }||fS )Nr(   r)   dn_num_splitr   rq   r,   r   r   r   r   r   )
rZ   r   auxiliary_lossr2   r=   r   r   extendr   r   )r(   r   rt   r)   r#   outputs_classoutputs_coordenc_topk_logitsenc_topk_bboxesr   r   	criterionoutputs_lossdn_out_coorddn_out_classr   	loss_dictr   s                     r%   RTDetrForObjectDetectionLossr    sf    6"ILLL#L!+L ,*/++mEZ[iEjpq*r'L-*/++mEZ[iEjpq*r'L-)-3B3*?*I*I!Q*OQ^_`becebe_eQfQpQpqrtuQvw,=()()00?PSbRc1de ,3@&&q!,l.D.DQ.J4L/0 5JL01,/Iy!"D---r&   )NNNNN)r2   torch.nnr   torch.nn.functionalr   r4   utilsr   r   r   loss_for_object_detectionr   r	   r
   r   r   r   scipy.optimizer   transformers.image_transformsr   Moduler   rZ   r   r&   r%   <module>r     sw        N N  4 FNtRYY Ntbq qt	 $.r&   