
    sgɢ             !       Fs   U d dl Z d dlZd dlZd dlZd dlZd dlmZ d dl mZmZ d dlm	Z	m
Z
 d dlmZmZmZmZmZmZmZmZ d dlZd dlZd dlmZ d dlmZ d dlmc mZ d dlm Z m!Z!m"Z" d dl#m$Z$ d dl%m&Z& d d	lm'Z'm(Z(m)Z)m*Z*m+Z+ d d
l,m-Z-m.Z.m/Z/m0Z0 d dl1m2Z3 d dl4m5Z5 ejl                  jn                  Z7g Z8ee9   e:d<   ejv                  jx                  jz                  Z= G d de      Z>	 ddedej~                  de@fdZA eeAej~                  j                  d      ZC eeAej~                  j                        ZD eeAej~                  j                        ZFde"deGde"fdZH e$e=j                         e0d      eDde"de"fd                     ZI e$e=j                         e0d      eDde"de"fd                      ZJ e$e=j                         e0d      eDde"de"d!eLd"eLfd#                     ZK e$e=j                         e0d      eDd$e"d%eLd&eLd'eLd(e@d)e"fd*                     ZM e$e=j                  j                  g      d+        ZP e$e=j                  jD                  g      d,e"fd-       ZQ e$e=j                         e0       eDd.e"de"fd/                     ZR e$e=j                         e0d      eDd$e"d.e"fd0                     ZS e$e=j                         e0d      d$e"d.e"d1eLd2eLfd3              ZT e$e=j                         e0       eDd.e"de"fd4                     ZU e$e=j                         e0       eDd$e"d.e"de"fd5                     ZV e$e=j                         e0d      d$e"d.e"d"eLfd6              ZW e$e=j                         e0d      eDd$e"d.e"d7eLd8e@fd9                     ZX e$e=j                         e0d      eDdd:e"d.e"d;e9fd<                     ZY e$e=j                        eDd$e"d=e"fd>              ZZ e$e=j                         e0       eDd.e"de"fd?                     Z[ e$e=j                         e0d      eDd$e"d.e"de"fd@                     Z\ e$e=j                        d.e"dAe"de"fdB       Z] e$e=j                        d$e"d.e"dAe"dee"e"f   fdC       Z^ e$e=j                        e=j                  j                  j                  e7j                         e0       eD	 	 	 	 dd.e"dDe"dEeLdFeLdGe@dHeej                     de"fdI                            Z_ e$e=j                        e=j                  j                  j                  e7j                        eD	 	 	 	 dd.e"dDe"dEeLdFeLdGe@dHeej                     de"fdJ                     Zd e$e=j                         e0       eDd$e"d.e"dDe"dEeLdFeLdGe@d8e@de"fdK                     Ze e$e=j                         e0d      eDd$e"d.e"dLe"de"fdM                     ZfdNe"dOeGfdPZgdQej                  fdRZi e$e=j                         e0       eDe>j                  j                  fd.e"dSe"dOeGde"fdT                     Zj e$e=j                         e0d      eDd$e"d=e"dSe"dOeGfdU                     Zm e$e=j                        ddV       Zo e$e=j                         e0       eDe>j                  j                  dWfd.e"dSe"dOeGd!eLfdX                     Zp e$e=j                  j                        eDd$e"d.e"dSe"dOeGd!eLf
dY              Zq e$e=j                  j                        eDd$e"d.e"dSe"dOeGd!eLde"fdZ              Zs e$e=j                  j                        eDd$e"d.e"dSe"dOeGd[eLf
d\              Zt e$e=j                  j                        eDd$e"d.e"dSe"dOeGd[eLde"fd]              Zvd$e"d.e"dSe"dAee"   dOeGd^eGd_e"de"fd`Zw e$e=j                         e0d      eDd$e"d.e"deGde"fda                     Zx e$e=j                         e0d      d$e"d.e"dSe"dAee"   dOeGd^eGd_e"de"fdb              Zy e$e=j                         e0d      d$e"d.e"dSe"dAee"   dOeGd^eGd_e"de"fdc              Zz e$e=j                         e0       eDde>j                  j                  fd.e"dSe"dAee"   dOeGde"f
dd                     Z{ e$e=j                         e0d      eDde>j                  j                  fd$e"d.e"dSe"dAee"   dOeGde"fde                     Z| e$e=j                         e0       eDe>j                  j                  fd=e"dSe"dOeGde"fdf                     Z} e$e=j                         e0d      eDe>j                  j                  fd$e"d.e"dSe"dOeGde"f
dg                     Z~ e$e=j                         e0       dd=e"dhe"dieLfdj              Z e$e=j                          e0       dke"dle"de"fdm              Z e$e=j                         e0       d$e"dneeG   deGdoeGdpeGdqeGfdr              Z e$e=j                  jD                        	 	 	 	 dd.e"deGdoeeG   dpeeG   dqeGf
dt       Zde"deGdoeeG   dpeeG   deeGeGf   f
duZ e$e=j
                         e0       	 	 	 	 dd=e"dve"deGdoeeG   dpeeG   dqeGfdw              Z e$e=j                         e0       d$e"dneeG   deGdxeGfdy              Z e$e=j                         e0       d$e"dneeG   dzeGd{eGd|eGf
d}              Zd$e"de"d~ej                  fdZ e$e=j                         e0d      eCd$e"de"deGd~ej                  fd                     Z e$e=j                         e0       eCd$e"de"deGd~ej                  fd                     Zd Z e$e=j                         e0       d=e"deeG   deeG   deeG   deeG   de"fd              Z e$e=j                         e0       eDd=e"deeG   deeG   deeG   deeG   deeG   de"fd                     Z e$e=j                         e0       d$e"de"d&eLfd              Z e$e=j                         e0       d:e"deeG   deGdeGdqeGde"fd              Z e$e=j                   j                        eD	 dd$e"d.e"deeL   de"fd              Z e$e=j"                        e=j"                  j                  j                  e7j$                        e=j"                  j                  j                  e7j&                        d=e"dieLdee@   fd                     Z e$e=j(                         e0dd      d=e"dieLdee@   fd              Z e$e=j*                         e0       de"deGde@fd              Z e$e=j,                         e0       de"deGde@fd              Z e$e=j.                         e0       	 	 	 ddAe"de"deGde@de@de"fd              Z e$e=j0                         e0       d$e"de"deGdeGde@f
d              ZdeeG   fdZdee"   deGdeGdee"   fdZdee"   fdZdee"   deGfdZdee"   deGdeGfdZ e$e=j<                  j                  e=j<                  j                  g      	 ddee"   deGdeGdee"   de"f
d       Z e$e=j>                        	 dd.e"deeG   deGdee"   fd       Z e$e=j@                  j                  e=j@                  j                  g      	 	 dd.e"deeG   deGdeee"      deee"      f
d       Z e$e=jB                  jD                        dd=e"deGdeGdee"df   fd       Z e$e=jD                  j                        	 dd=e"deeG   deGdee"df   fd       Z e$e=jF                  jD                        dd.e"deGdeGdee"df   fd       Ze=jH                  jJ                  j                  e7j$                        	 dd.e"de"deGdee"df   fd       Z e$e=jN                         e0       eDdd.e"de"de"d!eGd%eGf
d                     Z e$e=jP                         e0       eD	 	 	 dd.e"de"de"d!eGd%eGde@fd                     Z e$e=jR                         e0       eDdd.e"de"de"d!eGd%eGf
d                     Z e$e=jT                  j                        eDd$e"d=e"de"de"dee"   deGdeGdeGdeGdee@   deee"   ee"   ee"   f   fdĄ              Z e$e=jT                  j                        d$e"d=e"de"de"dee"   deGdeGdeGdeGdee@   dejD                  dejD                  dejD                  deee"   ee"   ee"   f   fdƄ       Zdee"   dee"   fdǄZ e$e=jZ                  j                        de"d=e"deeG   de"de"dAee"   dee"   dee@   deee"   ee"   ee"   f   fd˄       Z e$e=jZ                  j                        de"d=e"deeG   de"de"dAee"   dee"   dee@   dejD                  dejD                  dejD                  deee"   ee"   ee"   f   fd̄       Zd=e"dAee"   dee"   dee"   dee"   dGe@deLdeLde@dee"e"e"ee"   ee"   f   fdфZ e$e=j`                         e0dddӫ      d=e"dAee"   dee"   dee"   dee"   dGe@deLdeLdee"e"e"f   fdԄ              Ze=j`                  j                  j                  e7j&                        e=j`                  j                  j                  e7j$                        d=e"dAee"   dee"   dee"   dee"   dGe@deLdeLdee"e"e"f   fdՄ              Ze=jd                  j                  j                  e7j$                        ddee"   fdք       Z e$e=jh                  j                        d=e"dAee"   dee"   de"de"deLdeLdee"e"e"f   fdׄ       Z e$e=jj                  j                        d=e"dAee"   dee"   de"de"dGe@deLdeLdee"e"e"f   fd؄       Z e$e=jj                  jl                        d=e"dAee"   dee"   dGe@deLdeLdee"e"e"f   fdل       Z e$e=jp                  j                        d=e"dAee"   dee"   de"de"dGe@deLdeLdee"e"e"e"e"f   fdڄ       Zd=e"dAee"   dee"   de"de"deLdGe@de"fdۄZ e$e=jt                  j                        d=e"dAee"   dee"   de"de"deLdeLdee"e"e"e"f   fd܄       Z e$e=jv                  j                        d=e"dAee"   dee"   de"de"deLdeLdee"e"e"e"e"e"f   fd݄       Z e$e=jx                  j                        d=e"dAee"   dee"   de"de"deLdeLdee"e"e"e"f   fdބ       Z e$e=jz                         e0dd      eDdd߄                     Z e$e=j~                         e0       ddddddddee"e(f   dQeej                     deej                     de@de@deej                     fd              Z e$e=j                  e=j                  e=j                  g       e0       d               Ze=j                  j                  j                  e7j&                         e$e=j                         e0dddd      d=e"dAe"dee"   dee"   dee"   dGe@deLdeLfd                     Zd Z e$e=j                  j                        de"d=e"dAee"   dee"   dee"   dee"   dee"   de@deLdee@   de"dee"ee"   ee"   f   fd       Z e$e=j                  j                        de"d=e"dAee"   dee"   dee"   dee"   dee"   de@deLdee@   dee"ee"   ee"   f   fd       Z e$e=j                  j                        de"d=e"dAee"   dee"   dee"   dee"   dee"   de@deLdee@   dejD                  dejD                  dejD                  dee"ee"   ee"   f   fd       Z e$e=j                         e0dddū      d=e"d$e"dAe"dee"   dee"   dee"   dee"   deLfd              Z e$e=j                         e0dddū      d=e"d$e"dAe"dee"   dee"   dee"   dee"   deLde"fd              Z e$e=j                         e0       eDd=e"deeGeGf   fd                     Z e$e=j                        dsdde*deGdxe*de*d%e(f
d       Z e$e=j                         e0       dsdde*deGdxe*de*d%e(f
d              Zdsdde*deGdxe*de*de@d%e(fdZ e$e=j                  j                        e=j                  j                  j                  e7j$                        dd              Z e$e=j                        de*deGdxe*de*fd       Z e$e=j                         e0       de*deGdxe*de*fd              Zde*deGdxe*de*de@f
dZ e$e=j                         e0ddL      eDd.e"dee"e"f   fd                     Z e$e=j                         e0       	 	 	 dde"d ee@eGeLf   dee@eGeLf   dHeej                     fd              Z e$e=j                        dd       Zؐd Zِd Z e$e=j                  j                         e$e=j                  j                         e$e=j                  j                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        d=e"deeeG      deeeL      de"fd                                                               Z e$e=j                  j                         e$e=j                  j                         e$e=j                  j                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        d=e"deeeG      deeeL      de"fd                                                               Zdd	Z e$e=j                  j                  e=j                  j                  g      e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                         e0dd
      	 dd=e"deeG   deeL   de"fd                            Z e$e=j                  j                  e=j                  j                  g      e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                         e0dd
      	 dd=e"deeG   deeL   de"fd                            Z e$e=j                  j                  e=j                  j                  g      e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                         e0dd
      	 	 dd=e"deeG   deeL   deeL   de"f
d                            Z e$e=j                  j                  e=j                  j                  g      e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                         e0dd
      	 	 dd=e"deeG   deeL   deeL   de"f
d                            Z e$e=j                  j                  e=j                  j                  g      e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                         e0dd
      	 	 	 dd=e"deeG   deeL   deeL   deeL   de"fd                            Z e$e=j                  j                  e=j                  j                  g      e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                         e0dd
      	 	 	 dd=e"deeG   deeL   deeL   deeL   de"fd                            ZeD	 dd=e"deeG   deeeL      de@de"f
d       Zd Zd Zd Zd Z	 ddZd Zd ZddZddZd  Z e$e=j                  j                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        d!                      Z e$e=j                  j                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        d"                      Z e$e=j                  j                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        d#                      Z e$e=j                  j                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        d$                      Zd% Zdd&Zdd'Zd( Z e$e=j                  j                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        d)                      Z e$e=j                  j                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        d*                      Zd+ Z d, Z e$e=j                  j                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        d-                      Z e$e=j                  j                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        d.                      Z e$e=j
                  j                        e=j
                  j                  j                  e7j$                        e=j
                  j                  j                  e7j&                        d/                      Z e$e=j                  j                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        d0                      Z e$e=j                  j                         e$e=j                  j                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        e=j                  j                  j                  e7j$                        e=j                  j                  j                  e7j&                        d1                                                         Z e$e=j                  j                  e=j                  j                  g       e0       	 dd=e"deeG   d2e@deeL   de"f
d3              Z e$e=j                  j                  e=j                  j                  g      e=j                  j                  j                  e7j&                         e0       	 	 dd=e"deeG   d2e@deeL   deeL   de"fd4                     Z	 e$e=j                  j                  e=j                  j                  g       e0       	 	 	 dd=e"deeG   d2e@deeL   deeL   deeL   de"fd5              Z
dd6Zd7 Zdvee"   d8ee"   d9e"de"fd:Zd8e+de"fd;ZeDd=e"deeG   d2e@deeeL      de"f
d<       Z e$e=j$                  j                        d=e"d>e"de@fd?       Z e$e=j&                  e=j(                  g       e0       d@               Z e$e=j*                  g      dA        Z e$e=j,                  g      ddB       Z e$e=j.                  g      dC        Z e$e=j0                  g      dD        Zd.e"dSe"dAee"   dOeGd^eGdee"e"f   fdEZ e$e=j4                         e0dd_      d.e"dSe"dAee"   dOeGd^eGdee"e"f   fdF              Z e$e=j6                         e0dd_      d.e"dSe"dAee"   dOeGd^eGdee"e"f   fdG              Zde"dHeLde"fdIZde"dHeLde"fdJZdKe"de+fdLZdMe+dNe"de"fdOZdNee"   de"fdPZ dQeGd2e@dQej                  dej                  fdRZ!dSe"dTeGdUeGd2e@fdVZ"dSe"dWeGdTeGdUeGd2e@f
dXZ#dSe"deeG   d2e@fdYZ$dSe"deeG   d2e@fdZZ% e$e=jL                         e0       eDdSe"deeG   d2e@fd[                     Z&	 	 	 	 dd=e"d\e"d]eGd^eGd2e@d_e@de"fd`Z' e$e=jP                         e0       eD	 	 	 dd=e"d\e"d]eGd^eGd2e@de"fda                     Z( e$e=jR                         e0       eDdb                      Z) e$e=jT                         e0       dde>j                  j                  fdc              Z*ddejD                  deejD                  dfe@de@fdgZ+e=jX                  j                  j                  e7j$                        e=jX                  j                  j                  e7j$                         e0dh      ddidj                     Z, e$e=jZ                  j                  e=jZ                  j                  g      e=jZ                  j                  j                  e7j&                         e0       eD	 	 dd=e"deeGeGf   d2e@dkeeL   dleeL   de"fdm                            Z. e$e=jZ                  j                        e=jZ                  j                  j                  e7j$                        e=jZ                  j                  j                  e7j&                         e0       eD	 dd=e"deeeGeGf      d2e@deeeLeLf      de"f
dn                                   Z/ e$e=j`                         e$e=jb                         e$e=jd                        eD e0       d=e"deeGdf   de"fdo                                   Z3 e$e=jh                         e$e=jj                         e$e=jl                        eD e0       d=e"deeGdf   de"fdp                                   Z7d=e"deeGdf   dqeeGeGeGge"f   de"fdrZ8 e$e=jr                         e$e=jt                         e$e=jv                         e0d      ds                             Z< e$e=jz                         e0dtdu      dddvdw              Z= e$e=j|                         e0       dddxdy              Z> e$e=j~                  j                  e=j~                  j                  g       e0       dej                  dddzdpe(dQeej                     d{ej                  deej                     de@f
d|              ZB e$e=j~                  j                  g      dej                  dddzdoe(dpe(dQeej                     d{ej                  deej                     de@fd}       ZD e$e&      d~        ZE e$e=j                        e=j                  j                  j                  e7j&                         e0       dsdsde>j                  j                  fd=e"dSe"die(de(dAee"   dOeGde"fd                     ZF e$e=j                        e=j                  j                  j                  e7j&                         e0dd      d=e"dSe"dOeGdee"e"f   fd                     ZG e$e=j                  j                        	 	 ddddde"de"d,e"deLde@dee"   d&eeL   dee"e"f   fd       ZId ZJ e$e=j                  g       e0       eDdd                     ZK e$e=j                         e0       d               ZL e$e=j                        d        ZM e$e=j                  j                  e=j                  j                  g      dddd.e"dQeej                     dee"   de"fd       ZO e$e=j                  j                  e=j                  j                  g      dd.e"deeG   fd       ZR e$ejx                  jz                  j                        dd       ZS e$e=j                         e0       dddd              ZTdddZUddddZV e$e=j                         e0       d               ZW e$e=j                        dd       ZX eJe=j                  e=j                          eJe=j                  e=jN                          eJe=j                  e=jR                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                          eJe=j                  e=j                           eJe=j                  e=j                          eJe=j                  e=j                         y(      N)Enum)partialreduce)chainproduct)AnyCallablecastIterableListOptionalTupleUnion)	sym_floatsym_intTensorregister_decomposition)	out_dtype)IntLike
NumberTypesuggest_memory_format
TensorLikeTensorSequenceType)_maybe_convert_to_dtype_maybe_resize_out_safe_copy_outout_wrapper)_pytree)tree_map__all__c                       e Zd ZdZdZdZy)	Reductionr         N)__name__
__module____qualname__NONEMEANSUM     O/var/www/html/venv/lib/python3.12/site-packages/torch/_decomp/decompositions.pyr#   r#   /   s    DD
Cr-   r#   Fftype_promotioncompute_dtype_onlyc                 J     t        j                          fd       }|S )Nc                     t        j                  | i |D cg c]  }t        |t              s| }}t	        j
                  |di\  fd}fd} 
t        ||       i t        ||      }	r|S t        ||      S c c}w )Ntype_promotion_kindc                 J    t        | t              r| j                        S | S N
isinstancer   to)xcomputation_dtypes    r.   increase_precz0type_casts.<locals>.inner.<locals>.increase_precG   s"    !V$tt-..r-   c                 J    t        | t              r| j                        S | S r6   r7   )r:   result_dtypes    r.   decrease_precz0type_casts.<locals>.inner.<locals>.decrease_precM   s!    !V$ttL))r-   )pytreearg_tree_leavesr8   r   utilselementwise_dtypesr    )argskwargsr:   	flat_argsr<   r?   rr;   r>   r1   r/   r0   s          @@r.   innerztype_casts.<locals>.inner=   s     --t>v>
*QPVBWA
	 
 +0*B*B+
,:+
'<
		 xt,P0OPHM1--1
s
   BB)	functoolswraps)r/   r0   r1   rH   s   ``` r.   
type_castsrK   8   s'    
 __Q. .6 Lr-   T)r0   r1   )r0   r:   dimreturnc                 j    t        || j                         z
        D ]  }| j                  d      }  | S N)rangerL   	unsqueeze)r:   rL   _s      r.   _unsqueeze_to_dimrT   j   s2    3=! KKOHr-   
grad_inputout_gradyc                 4    | d||z  z
  j                         z  S Nr$   conj_physicalrV   rW   s     r.   tanh_backwardr]   p   s      q1q5y//111r-   c                 4    | |d|z
  z  j                         z  S rY   rZ   r\   s     r.   sigmoid_backwardr_   w   s      qAE{11333r-   beta	thresholdc                 t    ||z  j                         }t        j                  ||z  |kD  | | |z  |dz   z        S N      ?)exptorchwhere)rV   r:   r`   ra   zs        r.   softplus_backwardri   ~   s=     
TA;;DI-xAS9QRRr-   grad_outputalphascaleinput_scale	is_resultself_or_resultc                     ||z  }|}|}|r&t        j                  |dk  | |z  ||z   z  | |z        S t        j                  |dk  | |z  |z  t        j                  ||z        z  | |z        S Nr   )rf   rg   re   )	rj   rk   rl   rm   rn   ro   negcoefposcoef
negiptcoefs	            r.   elu_backwardru      s     emGGJ{{a*$(@A'!
 	
 {{a*$w.>J;V1WW'!
 	
r-   c                 .    t        j                  | |      S r6   )rf   	full_likeselfvalues     r.   fill_scalarr{      s    ??4''r-   rz   c                     t        j                  j                         dk(  fd       t        j	                  |       S )Nr   c                  ,    d j                          dS )Nz@fill only supports 0-dimension value tensor but got tensor with z dimensionsrL   )rz   s   r.   <lambda>zfill_tensor.<locals>.<lambda>   s    RSXS\S\S^R__jk r-   )rf   _checkrL   atencopyrx   s    `r.   fill_tensorr      s3    	LL		qk 99T5!!r-   ry   c                 f    t        j                  t        j                  | dz   d      d      dz  S N   r   min   maxrf   clampry   s    r.   hardsigmoidr      s)     ;;u{{4!83;a??r-   c                 H    t        j                  |dkD  |dk  z  | dz  d      S )Ng      g      @gUUUUUU?        rf   rg   rj   ry   s     r.   hardsigmoid_backwardr      s0     ;;	$y! r-   min_valmax_valc                 B    t        j                  ||k  ||k\  z  d|       S )Nr   r   )rj   ry   r   r   s       r.   hardtanh_backwardr      s$    
 ;;DGO<c;OOr-   c                 l    | t        j                  t        j                  | dz   d      d      z  dz  S r   r   r   s    r.   	hardswishr      s.     %++ekk$(:BBQFFr-   c           
      x    t        j                  |dk  dt        j                  |dk  | |dz  dz   z  |             S )Nr   r         ?r   r   s     r.   hardswish_backwardr      sA     ;;r	DAI{tax3.>?M r-   c                 6    t        j                  ||k  d|       S rq   r   )rj   ry   ra   s      r.   threshold_backwardr      s     ;;ty(![99r-   negative_slopeself_is_resultc                 <    t        j                  |dkD  | | |z        S rq   r   )rj   ry   r   r   s       r.   leaky_relu_backwardr      s      ;;taxkN.JKKr-   gradapproximatec                    d}d}d}|dk(  ri||z  dz  }d}||z  }||z  }	||||	z  z   z  }
t        j                  |
      }d|z  }d|z   }d|z  }d||z  z
  }|dd|z  |z  z   z  }||z  |z  }| ||z   z  S |}||z  dz  }ddt        j                  ||z        z   z  }|t        j                  ||z  d	z        z  }| |||z  z   z  S )
Ng;f?g;f?gmBP?tanhr   gHm?r$   r   g      )rf   r   erfre   )r   ry   r   M_SQRT2	M_SQRT1_2
M_2_SQRTPIkBetakKappax_sqx_cuberH   
tanh_innerleftrightleft_derivativetanh_derivativeinner_derivativeright_derivativekAlphacdfpdfs                        r.   gelu_backwardr      s'    %G&I'Jf*$s*d{/0ZZ&
TzJ+j:55 AF
T(9$9:/14DD)99::Y&,Q4&=112eiitd 233sTCZ'((r-   inputc                     t        j                  t        j                  |            }t        j                  |      }||z  d||z  z
  z  }| ||z   z  S rY   )rf   r   Fsoftplussigmoid)rj   r   input_tanh_softplusinput_sigmoidouts        r.   mish_backwardr     sV      **QZZ%67MM%(M
-
1':=P'P#P
QC-344r-   c                 2    | t        j                  |       z  S r6   )rf   r   r   s    r.   silur     s     %--%%%r-   c                 \    ddt        j                  |       z   z  }| |z  d|d|z
  z  z   z  S rY   )rf   re   )rj   ry   r   s      r.   silu_backwardr     s<     1uyy$''(G AG(<$<==r-   weightc                 <    t        j                  | dkD  | || z        S rq   r   )ry   r   s     r.   _prelu_kernelr   $  s    ;;taxv}55r-   c                 ~    t        j                  |dkD  | || z        }t        j                  |dkD  d|| z        }||fS )Nr   r   r   )rj   ry   r   
input_gradweight_grads        r.   _prelu_kernel_backwardr   )  sE     TAX{F[4HIJ++dQhTK-?@K$$r-   noiseloweruppertraining	generatorc                    |J |r^| dk  }t         j                  | ||      }t        j                  || |z  |       }|j	                  t        j                  ||d             |S ||z   dz  }	t         j                  | |	      S )Nr   r$   r%   )r   uniformrf   rg   copy_
leaky_relu)
ry   r   r   r   r   r   not_positiverG   outputr   s
             r.   rrelu_with_noiser   4  s     qyLLue,\4!8T:EKKa34%-1,t^44r-   c           
      @    | j                  t        | |||||            S r6   )r   r   )ry   r   r   r   r   r   s         r.   rrelu_with_noise_r   L  s#     ::&tUE5(IVWWr-   c                 x    |r||z
  dkD  r| j                  |      S ||z   dz  }t        j                  | |||      S )Ngư>r%   )mulr   r   )rj   ry   r   r   r   r   r   r   s           r.   rrelu_with_noise_backwardr   Z  sK     EEMD(u%%%-1,''~~
 	
r-   bufferc                     |dk  }t        j                  |dd      }t        j                  |dd      }t        j                  t        j                  |             }| |||d|z   z  z  z
  z  S )Nr   r$   rP   )rf   rg   re   abs)rj   ry   r   in_negative	max_derivsignrh   s          r.   log_sigmoid_backwardr   o  sg     (KKA.I;;{Ar*D		599T?"#A)da1q5k&::;;r-   loss	reductionc                     |t         j                  j                  k(  rt        j                  |       S |t         j
                  j                  k(  rt        j                  |       S | S r6   )r#   r*   rz   rf   meanr+   sum)r   r   s     r.   apply_loss_reductionr   |  sH    INN(((zz$	imm))	)yyr-   dtypec                     | t         j                  k(  rt         j                  S | t         j                  k(  rt         j                  S | t         j
                  k(  rt         j                  S y r6   )rf   	complex32float16	complex64float32
complex128float64r   s    r.   to_real_dtyper     sK    }}	%//	!}}	%""	"}} 
#r-   targetc                 *    | |z
  dz  }t        ||      S )Nr%   )r   )ry   r   r   r   s       r.   mse_lossr     s     6MaDi00r-   c                 |    |t         j                  j                  k(  rd|j                         z  nd}|||z
  z  | z  S )N       @)r#   r*   rz   numel)rj   r   r   r   norms        r.   mse_loss_backwardr     s;     #,y~~/C/C"C3D56>"[00r-   c                     t        j                  | ||      }| j                  t        d            }t        j                  ||d      }t        j
                  |      }t        j                  |||      S )N)rL   r   z-infTrL   keepdim)rf   softmaxeqfloatall
zeros_likerg   )ry   rL   r   r   maskedmasked_rowszeross          r.   safe_softmaxr    s[    
--#U
3CWWU6]#F))FT:KS!E;;{E3//r-   rd   c                     | |z
  j                         }t        j                  ||k  d|dz  z  |z  |d|z  z
        }t        ||      S )Nr   r%   )r   rf   rg   r   )ry   r   r   r`   r   s        r.   smooth_l1_lossr    sO     6M D;;td{C$'MD$8$t:KLDi00r-   c                    |t         j                  j                  k(  rd|j                         z  nd}||z
  }t	        j
                  |      }|| z  }t	        j                  ||k  ||z  |z  |t	        j                  |      z        S rc   )r#   r*   rz   r   rf   r   rg   r   )	rj   ry   r   r   r`   r   r:   abs_x	norm_grads	            r.   smooth_l1_loss_backwardr    s{    
 "+inn.B.B!B3DvAIIaLE{"I;;AEJJqM! r-   c                 h    t        | ||||      }t        ||j                         t        ||d      S NT	copy_fromcopy_toexact_dtype)r  r   shaper   )rj   ry   r   r   r`   rU   results          r.   smooth_l1_loss_backward_outr    s3     %[$	4PFj&,,/FJDQQr-   deltac           
          |t         j                  j                  k(  rd|j                         z  nd}||z
  }t	        j
                  || k  | | z  |z  t	        j
                  ||kD  || z  |z  ||z  | z              S rc   )r#   r*   rz   r   rf   rg   )rj   ry   r   r   r  r   r:   s          r.   huber_loss_backwardr    s    
 "+inn.B.B!B3DvA;;	UF
	e#AItk1E94!8k;QR r-   c                 h    t        | ||||      }t        ||j                         t        ||d      S r  )r  r   r  r   )rj   ry   r   r   r  rU   r  s          r.   huber_loss_backward_outr    s3     !dFIuMFj&,,/FJDQQr-   ignore_indextotal_weightc                    |j                         dk  rdnd}|t        j                  j                  k(  r| |z  } |j	                  |      }t        j                  ||k7  |d      }t        j                  |      }	t        j                  |	||d      }	|	j                         | j                         cxkD  rdkD  rn n| j	                  |      } |Nt        |j                               D 
cg c]  }
d }}
|j                  d   ||<   |j                  |      }| |z  } t        j                  ||k7  | d      } |	| z  S c c}
w )Nr%   r   r$   g      )rL   r#   r*   rz   rR   rf   rg   r  scatterrQ   r  reshape)rj   ry   r   r   r   r   r!  channel_dimsafe_targetrU   rS   	new_shapes               r.   _nll_loss_backwardr(    s    xxzA~!1KINN(((!L0k*F++f4fa@K!!$'Jz;TJJ~~+//+/a/!++K8 %dhhj 121Q2	2!'a	+	*!F*++f4k1EK## 3s   *	D<c                    |j                         dkD  sJ d       t        j                  |j                         |      }|j                  |      }|dz  dk(  sJ d| d|        |dz  }|j	                  |d|      }|j	                  |||      }t        j                  |      }d|z
  |z  |z  | z  }	|| z  }t        j                  ||	g|      S )Nr   z*glu does not support 0-dimensional tensorsr%   z.Halving dimension must be even, but dimension z	 is size rd   r~   )rL   rB   canonicalize_dimsizenarrowrf   r   cat)
rj   ry   rL   wrap_dimnIn	inputSize	firstHalf
secondHalfgradInputFirstHalfgradInputSecondHalfs
             r.   glu_backwardr5    s     88:>GGG>%%dhhj#6H
))H
Ca1Q	7z3%PQqIHa3IXy)<Jz2	!	!%77)CkQ  ,k999(*=>HMMr-   c           	         d|j                         cxk  rdk  sJ d        J d       |j                         dk  sJ d       |j                         dk(  xr |j                         dk(  }|sA|j                  d   |j                  d   k(  s"J d|j                   d|j                   d       |j                         dk(  s'J d	|j                   d
|j                          df       |'|j                         |j                  d   k(  sJ d       |t        j                  j
                  k(  r}|j                         dk(  rj| j                         dk(  r| j                  d   |j                  d   k(  srJ d|j                  d    d| j                          d| j                  d           | j                         dk  r| j                         dk(  sJ d| j                          t        | ||||||      S )Nr   r%   input tensor should be 1D or 2Dr$   ;0D or 1D target tensor expected, multi-target not supportedsize mismatch (got input: 
, target: ):expected total_weight to be a single element tensor, got: z (z
 elements)rP   z<weight tensor should be defined either for all or no classesz7Expected a tensor of dimension 1 and tensor.size[0] == z but got: dimension z and tensor.size[0] == z7Expected a single element grad_output tensor, but got: )rL   r  r   r#   r)   rz   r(  )rj   ry   r   r   r   r   r!  no_batch_dims           r.   nll_loss_backwardr>  1  s    
aB!BBB!BB

EDE 88:?8vzz|q'8L

1a(J	#DJJ<z&,,qIJ  1$ D
b!3!3!5 6jA' $ 	&,,.DJJrN:FEF: INN(((TXXZ1_ A%+*;*;A*>$**Q-*O 	
EdjjQRm_ U)oo/00GHYHYZ[H\G]_	
O OO"{'8'8':a'?	YD[EVEVDWX	Y? T669lL r-   c           	      >   |j                         dk(  sJ d|j                                 |j                         dk(  sJ d|j                                 |j                  d   |j                  d   k(  r>|j                  d   |j                  d   k(  r|j                  d   |j                  d   k(  s!J d|j                   d	|j                          |j                         dk(  s&J d
|j                   d|j                          d       t        | ||||||      S )N   zSonly batches of spatial inputs supported (4D tensors), but got input of dimension: r   zUonly batches of spatial targets supported (3D tensors) but got targets of dimension: r   r%   r$   r9  r:  r<  z ( z, elements))rL   r  r   r(  )rj   ry   r   r   r   r   r!  s          r.   nll_loss2d_backwardrA  ]  sE    	
aj	\]a]e]e]g\hij 	

n	^_e_i_i_k^lmn 	

1a(JJqMV\\!_,JJqMV\\!_,I 
$DJJ<z&,,H	I	-
 1$ 	""#3|'9'9';&<K	I$
 T669lL r-   c           	      "   |dz
  t        j                  t        j                  |        | j                  dd            z  |t        j                  t        j                  |       | j                  dd            z  z
  }|||z  }t        ||      S )Nr$   r,   i)rf   maximumlog1pnew_fulllogr   )ry   r   r   r   r   s        r.   binary_cross_entropyrG    s     QJ%--TEDMM"d3 uyyb$0GHHID f}i00r-   c                     d}| ||z
  z  t        j                  |d|z
  z  |      z  }|||z  }|t        j                  j                  k(  r||j                         z  }|S )Ng-q=r$   r   )rf   r   r#   r*   rz   r   )rj   ry   r   r   r   EPSILONr  s          r.   binary_cross_entropy_backwardrJ    sg     GD6M*U[[T9JPW-XXF&INN((($**,&Mr-   c                 r    t        j                  t        j                  |  |z              }t        ||      S r6   )rf   rD  re   r   )r   r   r   r   s       r.   soft_margin_lossrL    s.     ;;uyy%&12Di00r-   c                     || z  t        j                  ||z        dz
  z  }|t        j                  j                  k(  r||j                         z  }|S rY   )rf   r   r#   r*   rz   r   )rj   ry   r   r   rU   s        r.   soft_margin_loss_backwardrN    sM     +%v})E)IJJINN((($**,.
r-   otherpc                 6    t         j                  | |z
  |      S )N)rP  )r   r   )r   rO  rP  s      r.   distrR    s     99UU]a9((r-   x1x2c                    | j                  d      j                  dd      }t        j                  |t        j                        }|j                  d      j                  dd      }t        j                  |t        j                        }t        j
                  | j                  d      ||gd      }t        j
                  |||gd      }|j                  |j                        }|j                  d      j                         S )Nr%   rP   Tmemory_formatr   )powr   rf   	ones_likecontiguous_formatr-  r   matmulmT	clamp_minsqrt)	rS  rT  x1_normx1_padx2_normx2_padx1_x2_r  s	            r.   _euclidean_distrf    s     ffQimmB%G__WE4K4KLFffQimmB%G__WE4K4KLF
))RVVBZ&12
6C
))R)2
.CZZFA##%%r-   input_sizesstartendstepc                 X    | j                  |      }t        j                  || ||||      S r6   )	new_zerosrf   slice_scatter)rj   rg  rL   rh  ri  rj  rU   s          r.   slice_backwardrn    s/     &&{3Jz;UCNNr-   r$   c                    ddl m}m} | j                         }|dk(  rt	        d      t        j                  | j                         |      }t        | j                               }t        | j                               }	|dk  rt	        d      ||nd}
||nt        j                  } ||
dk        r|
||   z  }
 ||dk        r|||   z  } ||
dk        rd}
n ||
||   kD        r||   }
 |||
k        r|
}n, ||t        j                  k(        s ||||   kD        r||   }| j                         |
|	|   z  z   }||
z
  }||z   dz
  |z  ||<   |	|xx   |z  cc<   | j                  rt        d      | j                  ||	|      S )Nr   )guard_size_obliviousstatically_known_truez,slice() cannot be applied to a 0-dim tensor.zslice step must be positiver$   z<Slice decomposition for quantized tensors aren't implemented)%torch.fx.experimental.symbolic_shapesrp  rq  rL   RuntimeErrorrB   r*  listr+  stridesysmaxsizestorage_offsetis_quantizedNotImplementedError
as_strided)ry   rL   rh  ri  rj  rp  rq  ndimsizesstrides	start_valend_valrx  lens                 r.   slice_forwardr    s   
 88:DqyIJJ

 
 S
1CE4;;=!Gqy899*I_c#++GIM*U3Z	GaK(5:IM*		i%*4	5#J	Gi/0	w#++5	6:N%*; *((*Y-EEN
I
C*q.T)E#JCLDL!J
 	
 ug~>>r-   c                 n    | j                   |   dt        ffd} ||dd      } |||      }||fS )zn
    Normalize start and end such that both are in the range
    [0, x.get_size()[dim]] and start <= end.
    rM   c                 L    | |S | dk  r| z   } t        t        | |      |      S rq   r   r   )valr   r   defaultdim_sizes       r.   
clamp_wrapz(_normalize_start_end.<locals>.clamp_wrap(  s0    ;N7.C3sE?E**r-   r   )r  int)r:   rL   rh  ri  r  r  s        @r.   _normalize_start_endr    sJ     wws|H+# + ua1-E
S%8
4C#:r-   srcc           	      |   t        j                  | j                  |      }| j                  |   }t	        | |||      \  }}t        | j                        }||z
  |dz
  z   |z  ||<   |j                  |      }|dk(  r||k(  r|dk(  r|j                         S d g| j                         z  }t        j                  || j                        }	|	|z
  |z  ||<   t        j                  || j                  t        j                        }
|dk7  rt        j                  |
|	|k\        }
||k7  rt        j                  |
|	|k        }
|dk7  rt        j                  |
|	|z
  |z  dk(        }
dg| j                         z  }d||<   |
j                  |      }
t         j#                  |
t         j%                  ||
|d      |       S )Nr$   r   devicer  r   rP   )rB   r*  r|  r  r  rt  expandclonerL   rf   aranger  onesboollogical_andviewr   rg   _unsafe_masked_index)r   r  rL   rh  ri  rj  r  src_sizeindicesidxmask
mask_shapes               r.   rm  rm  6  s    
 
 S
1C{{3H%eS%=JE3EKK H5[D1H-$6HSM
**X
CzcXo$!)yy{fuyy{"G
,,x
5C%KD(GCL::hu||5::FDz  se|4
h  sSy1qy  et';q'@Auyy{"JJsO99Z D::dD55c4!LeTTr-   indexc                 T    | j                  |      }t        j                  || ||      S r6   )rl  rf   select_scatter)rj   rg  rL   r  rU   s        r.   select_backwardr  _  s+     &&{3J
KeDDr-   offsetdim1dim2c                 V    | j                  |      }t        j                  || |||      S r6   )rl  rf   diagonal_scatter)rj   rg  r  r  r  rU   s         r.   diagonal_backwardr  f  s-    
 &&{3J!!*k64NNr-   input_dtypec                 F    | j                   |k7  r|j                  |      }|S r6   )r   r9   )rj   rU   r  s      r.   _cast_grad_to_input_dtyper  o  s&     K']];/
r-   r   c                 ~    | |z  }||t        j                  ||d      z  z
  }t        | ||      j                         S NTr  )rf   r   r  
contiguous)rj   r   rL   r  new_grad_outputrU   s         r.   _softmax_backward_datar  w  sK     "F*O 6EIIS$- $ J %[*kJUUWWr-   c                 ~    | t        j                  |      t        j                  | |d      z  z
  }t        | ||      S r  )rf   re   r   r  )rj   r   rL   r  rU   s        r.   _log_softmax_backward_datar    sA     uyy0599d4   J %[*kJJr-   c                     | |dz  z   ||dz
  z  z
  }t        t        j                  t        j                  |      } |d||      j	                  d      } |d||z  |      j	                  d      }	||	z   S )z/Utility function to implement im2col and col2imr%   r$   r   r  r   rP   )r   rf   r  int64rR   )
input_dkernel_d
dilation_d	padding_dstride_dr  blocks_d	arange_kwblocks_d_indiceskernel_grids
             r.    _im2col_col2im_indices_along_dimr    s     Q&x!|)DDHEKKGI !Hh7AA!D Ax*4jAKKBOK k))r-   kernel_sizedilationpaddingru  c           
         t        j                  t              dk(  d        t        j                  t              dk(  d        t        j                  t              dk(  d        t        j                  t              dk(  d        dd} |d        |d        |d	d
        |d       | j                  t              }t        j                  |dv xr t	        d dd  D              fd       t        d t        dd        D              t        j                  t	        d D              fd       |dk(  }|s| j                  d      } | j                  \  }}	}
}\  }}\  }}\  }}\  }}t        |
||||| j                        }t        |||||| j                        }t        j                  | ||||f      }|j                  d      j                  d      }|d d d d ||f   }|j                  dddddd      }|j                  d      }|j                  d      }|j                  ||	|z  |z  ||z        }|s|j                  d      }|S )Nr%   c                       y)Nz"im2col(): only 2D kernel supportedr,   r,   r-   r.   r   zim2col.<locals>.<lambda>      r-   c                       y)Nz$im2col(): only 2D dilation supportedr,   r,   r-   r.   r   zim2col.<locals>.<lambda>  r  r-   c                       y)Nz#im2col(): only 2D padding supportedr,   r,   r-   r.   r   zim2col.<locals>.<lambda>  r  r-   c                       y)Nz"im2col(): only 2D stride supportedr,   r,   r-   r.   r   zim2col.<locals>.<lambda>  r  r-   c                 ~    |rt        d | D              nt        d | D              }t        j                  |d        y )Nc              3   &   K   | ]	  }|d kD    ywr   Nr,   .0rP  s     r.   	<genexpr>z1im2col.<locals>.check_positive.<locals>.<genexpr>       (Q1q5(   c              3   &   K   | ]	  }|d k\    ywr  r,   r  s     r.   r  z1im2col.<locals>.check_positive.<locals>.<genexpr>       ;RqAF;Rr  c                       y)Nz<{param_name} should be greater {'than' zero, but got {param}r,   r,   r-   r.   r   z0im2col.<locals>.check_positive.<locals>.<lambda>  r  r-   r  rf   r   param
param_namestrictconds       r.   check_positivezim2col.<locals>.check_positive  s3    ,2s(%((;RE;R8RX	
r-   r  r  r  Fr  ru  r   r@  c              3   &   K   | ]	  }|d k7    ywr  r,   r  ds     r.   r  zim2col.<locals>.<genexpr>       :!qAv:r  r   c                       dt                S )NzmExpected 3D or 4D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: tupler  s   r.   r   zim2col.<locals>.<lambda>       --25\N< r-   c              3   \   K   | ]$  \  }}}}}d |d|z  z   ||d z
  z  z
  d z
  |z  z    & yw)r$   r%   Nr,   )r  r   paddilkersts         r.   r  zim2col.<locals>.<genexpr>  sF      "Cc3 	
S1s7]SC!G_,q0R77s   *,rX  c              3   &   K   | ]	  }|d kD    ywr  r,   )r  cs     r.   r  zim2col.<locals>.<genexpr>  s     'aAE'r  c                  F    dt        dd         d d  d d d dS )	Nz!Given an input with spacial size rX  , kernel_size=, dilation=
, padding=	, stride=z9, the calculated shape of the array of sliding blocks is z*, but its components must be at least one.r  )r  r  output_sizer  r  ru  s   r.   r   zim2col.<locals>.<lambda>  sL    3E%*4E3F G"m;xj 9)9VH -]D	F r-   r@  r   rP   r$   r      T)rf   r   r  r  r  r  ziprR   r  r  r   r  permuter+  r$  squeeze)r   r  r  r  ru  r  r|  batched_input	batch_dimr%  input_hinput_wstride_hstride_w	padding_h	padding_w
dilation_h
dilation_wkernel_hkernel_wblocks_row_indicesblocks_col_indicespadded_inputr   num_blocks_rownum_blocks_colr  r  s    ````                     @@r.   im2colr    s~    
LL[!Q&(TU	LLX!#%ST	LLW"$QR	LLV!#OP
 ;.8Z(8Yu568$KKEu:D	LL:3:uRSz::	<
  &)"#J;'
 K 
LL';''	F 	F AIM"/4{{,I{GWHh"Iy%J
$Hh9:y(ELL ::y(ELL 55Iy) LML+55b9CCBG!Q 24FFGF^^Aq!Q1-F',,Q/N',,Q/N^^;)H4n~6UF "Mr-   r  c                   !" t        j                  t              dk(  d        t        j                  t              dk(  d        t        j                  t              dk(  d        t        j                  t              dk(  d        t        j                  t              dk(  d        dd} |d	        |d
        |dd        |d        |d       | j                  "t        "      }t        j                  |dv xr t	        d "dd  D              "fd       d   d   z  }t        j                  "d   |z  dk(  "fd       t              D 	
cg c]"  \  }	}
}}}d|	d|
z  z   ||dz
  z  z
  dz
  |z  z   $ }}}}
}	}|d   |d   z  !t        j                  "d   !k(  !"fd       t        j                  !dkD  !"fd       |dk(  }|s| j                  d      } | j                  "\  }}\  }}\  }}\  }}\  }}| j                  "d   "d   |z  gz   |z         } | j                  dddddd      } t        |||||| j                        }t        |d      }t        |||||| j                        }t              D cg c]  \  }}|d|z  z    }}}| j                  "d   "d   t              z  g|z         }d d ||f} t        j                  || | d      }t!        j"                  || | | | f      }|s|j%                  d      }|S c c}}}}
}	w c c}}w )Nr%   c                       y)Nzonly 2D output_size supportedr,   r,   r-   r.   r   zcol2im.<locals>.<lambda>  r  r-   c                       y)Nzonly 2D kernel supportedr,   r,   r-   r.   r   zcol2im.<locals>.<lambda>  r  r-   c                       y)Nzonly 2D dilation supportedr,   r,   r-   r.   r   zcol2im.<locals>.<lambda>  r  r-   c                       y)Nzonly 2D padding supportedr,   r,   r-   r.   r   zcol2im.<locals>.<lambda>  r  r-   c                       y)Nzonly 2D stride supportedr,   r,   r-   r.   r   zcol2im.<locals>.<lambda>	  r  r-   Tc                 ~    |rt        d | D              nt        d | D              }t        j                  |d        y )Nc              3   &   K   | ]	  }|d kD    ywr  r,   r  s     r.   r  z1col2im.<locals>.check_positive.<locals>.<genexpr>  r  r  c              3   &   K   | ]	  }|d k\    ywr  r,   r  s     r.   r  z1col2im.<locals>.check_positive.<locals>.<genexpr>  r  r  c                       y)Nz9{param_name} should be greater than zero, but got {param}r,   r,   r-   r.   r   z0col2im.<locals>.check_positive.<locals>.<lambda>  r  r-   r  r  s       r.   r  zcol2im.<locals>.check_positive  s3    ,2s(%((;RE;R8RU	
r-   r  r  r  Fr  ru  r  )r%   r   c              3   &   K   | ]	  }|d k7    ywr  r,   r  s     r.   r  zcol2im.<locals>.<genexpr>  r  r  rX  c                       dt                S )NzmExpected 2D or 3D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: r  r  s   r.   r   zcol2im.<locals>.<lambda>  r  r-   r   r$   c                      dd    d  S )Nz|Expected size of input's first non-batch dimension to be divisible by the product of kernel_size, but got input.shape[-2] = rX  z and kernel_size=r,   )r  r  s   r.   r   zcol2im.<locals>.<lambda>!  s#     ==B2YK H"m% r-   rP   c                  :    d d d d d d  dd    d	S 
NzGiven output_size=r  r  r  r  z , expected input.size(-1) to be 	 but got rP   .r,   Lr  r  r  r  r  ru  s   r.   r   zcol2im.<locals>.<lambda>.  F    $[M} M:Zy	& B))*9U2YKqB r-   c                  :    d d d d d d  dd    d	S r  r,   r  s   r.   r   zcol2im.<locals>.<lambda>4  r  r-   r   r@  r  
accumulater  )rf   r   r  r  r  r  rR   r$  r  r  r  rT   rl  prodr   _unsafe_index_putr   r  r  )#r   r  r  r  r  ru  r  r|  prod_kernel_sizer   r  r  r  r  colr  out_hout_wr  r  r  r  r  r   r  r  indices_rowindices_colorP  output_padded_sizer   r  r  r  s#    `````                           @@r.   col2imr*    s    
LL[!Q&(OP	LL[!Q&(JK	LLX!#%IJ	LLW"$GH	LLV!#EF
 ;.8Z(7Ie468$;.KKEu:D	LL:3:uRSz::	<
 #1~A6	LLb	$$)	% '*(K'
 "Cc3 	
S1s7]SC!G_,q0R77C  	AQA	LLb	Q	B 	B 
LL	A	B 	B AIM"KKELE5Hh"Iy%J
$Hh MM58U1X1A%AB[PSVVWEMM!Q1a+E2xY%,,K $K3K2xY%,,K 14K0IJ1!a!e)JJ__	q58tK0014FFF {K
0C##FC4#HFUU6YJ
YJ
KLF"MkV Ks   6'M9Mr  c                 z    | |j                  |       |z  z  j                  t        j                  |             }|S NrV  )type_asr  rB   r   )rj   r  rl   rG   s       r.   native_dropout_backwardr.  ]  sB     
[1E9	:AA11+> 	B 	A Hr-   
input_size	dimensionr+  c                    t        |      dk(  rt        j                  | d      S t        j                  t        |      |      }t        j
                  ||   | j                  t        j                        }|j                  d||      j                         }| j                  d|dz         j                  ||dz         } | j                  |      }d|z  |fz   }t        j                  ||| d      j                         S )Nr   r  rP   r$   r6   Tr  )r  rf   squeeze_copyrB   r*  r  r  int32unfoldflattenmovedimrl  r   r!  r  )	r   r/  r0  r+  rj  rL   r  rU   r  s	            r.   unfold_backwardr7  l  s    
 :!!!$**

 
 Z)
<C
,,z#t{{%++
NC
**Qd
#
+
+
-C<<C!G$,,S#':D 
+JcMSF"E!!*eTd!KVVXXr-   epsc           
      .   |A|}d|z
  }t        j                  t        j                  ||k\  ||k        | |d|z
  z  z  d      S t        j                  t        j                  |dk\  |dk        | |d|z
  z  z  |j                  dt	        d                  S )Nrd   r   r,   nan)rf   rg   r  rE  r  )rj   ry   r8  lohis        r.   logit_backwardr=    s    
 2X{{dbj$"*543:./
 	
 {{dck43;743:./MM"eEl+
 	
r-   trainc                 d    |r|dk7  rt         j                  | ||      d   S | j                         S rq   )r   native_dropoutr  )r   rP  r>  s      r.   dropoutrA    s3     a""5!U3A66{{}r-   out0out1c                    |r|dk7  r|dk(  r:t        j                  |       t        j                  | t         j                        fS | j                  j                  st        d      t        j                  |       |kD  }|| z  t        dd|z
  z        z  }||fS | t        j                  | t         j                        fS )Nr   r$   r   z?result type Float can't be cast to the desired output type Longrd   )	rf   r  r  r   is_floating_pointrs  	rand_liker  rZ  )r   rP  r>  	bool_maskress        r.   r@  r@    s     a6$$U+U-=-=e5::-VWW{{,,Q  OOE*Q.	%%sQw"88YuuEJJ?@@r-   half_to_floatc                    | j                         } |r| j                  t        j                  k(  sJ t	        j
                  | t        j                  j                        \  }}| j                  |      } | j                         dk(  rt        j                  |       }n0t        j                  | |d      }t        j                  | |z
        }|t        j                  ||d      z  }|s|j                  |      }|S Nr4   r   T)r  )r  r   rf   halfrB   rC   ELEMENTWISE_TYPE_PROMOTION_KINDDEFAULTr9   r   re   amaxr   )r:   rL   rI  r;   r>   unnormalizedx_maxr  s           r.   _softmaxrS    s    
 	
Aww%**$$$&+&>&>	uDDLL'#| 	
AwwyA~yy|

1c40yyU+EIIlCFFF<(Mr-   c                     | j                         } |r| j                  t        j                  k(  sJ t	        j
                  | t        j                  j                        \  }}| j                  |      } | j                         dk(  r| }nt        j                  | |d      }| |z
  }t        j                  t        j                  t        j                  |      |d            }||z
  }|s|j                  |      }|S rK  )r  r   rf   rM  rB   rC   rN  rO  r9   r   rP  rF  r   re   )	r:   rL   rI  r;   r>   shiftedrR  shifted_logsumexpr  s	            r.   _log_softmaxrW    s    
 	
Aww%**$$$&+&>&>	uDDLL'#| 	
AwwyA~

1c40e)		%))EIIg,>T"RS((F<(Mr-   r  padding_idxscale_grad_by_freqsparsec                     | j                         dk(  sJ d       |j                  dk  r4| j                  d|      }|j                  dk(  r|j                  d      }|S | |   S )Nr%   z'weight' must be 2-Dr$   r   )rL   r|  index_selectr  )r   r  rX  rY  rZ  r   s         r.   	embeddingr]    sd     ::<1444||q!!!W-<<1++a.C
gr-   num_weightsc                 t   t        j                  | t         j                  j                        \  }}| j	                  |      } t        |t        j                        }|rZ|j                  |f      }t        j                  |      }t        j                  ||g|d      }||   }	| |	j                  d      z  } t        ||k(  | j                        }
| j                  |
d      }| j                  |f| j                   |j                  d  z         }t        j                  ||g|d      j	                  |      S )NrL  Tr  rP   r   )rB   rC   rN  rO  r9   r   rf   longrl  rZ  r   r!  rR   rT   r|  masked_fillr  )rj   r  r^  rX  rY  r;   r>   countsr  grad_weights_scaler  r   grad_weights                r.   embedding_dense_backwardre    s,    ',&>&>)N)N)V)V'#| ..!23K%guzz:G""K>2w'''	4D'Q#G_!$6$@$@$DDW3[5E5EFD""4+D''	**7<<>::K !!+y$4!PSS r-   c                 "    d}| D ]  }||z  }	 |S rY   r,   )r:   rG   is      r.   r   r     s$    	A 	QHr-   tensors
num_chunksc                 Z   g }| D ]  }|j                         }||   |z   dz
  |z  |z  }|||   k7  r;dgdz  |j                  |z
  dz
  z  d|||   z
  gz   }t        j                  ||d      }|d | t	        j
                  |dg      z   }|j                  |j                  |              |S )Nr$   r   r%   rP   )r+  r|  r   constant_pad_ndrf   Sizeappendr  )	rh  rL   ri  padded_tensorstensortensor_sizepad_along_dimr  	view_sizes	            r.   
_pad_chunkrs    s    
 N 6kkm$S)J6:zIJVK,,#'V[[3.23C 007 C ))&#q9F%

J3C(DD	fkk)456 r-   c                 R    | d   j                   }| D ]  }|j                   |k7  s y y)Nr   FTr|  )rh  r|  ro  s      r.   have_same_ndimsrv  .  s2    1:??D ;;$ r-   c                     | d   j                         d | }| D ]-  }t        j                  |j                         d | |k(  d        / y )Nr   c                       y)NzG_chunk_cat expects same sizes of 0,...,dim-1 dimensions for all tensorsr,   r,   r-   r.   r   z+leading_dimension_matches.<locals>.<lambda>;  r  r-   )r+  rf   r   )rh  rL   leading_dim_sizesro  s       r.   leading_dimension_matchesrz  6  sN    
)$3/ 
KKM$3#44]	

r-   c                    t        j                  |dk\  d        t        j                  t        |       dkD  d        | d   j                  }| d   j                  }| D ]r  }t        j                  |j                         dkD  d        t        j                  |j                  |k(  d        t        j                  |j                  |k(  d        t t        |       r(t        j                  | d   j                         |      }nEt        j                  |dk\  d        | D ]&  }t        j                  ||j                  k  d	        ( t        | |       |S )
Nr$   c                       y)Nz&_chunk_cat expects positive num_chunksr,   r,   r-   r.   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>D  r  r-   r   c                       y)Nz0_chunk_cat expects a non-empty input tensor listr,   r,   r-   r.   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>F  r  r-   c                       y)Nz#_chunk_cat expects non-empty tensorr,   r,   r-   r.   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>K  r  r-   c                       y)Nz8_chunk_cat expects all input tensors with the same dtyper,   r,   r-   r.   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>N  r  r-   c                       y)Nz8_chunk_cat expects all inputs tensors on the same devicer,   r,   r-   r.   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>R  r  r-   c                       y)NzK_chunk_cat expects non-negative dim when input tensors have different ndimsr,   r,   r-   r.   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>Y  r  r-   c                       y)Nz3_chunk_cat expects dim < ndim for all input tensorsr,   r,   r-   r.   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>^  r  r-   )rf   r   r  r   r  r   rv  rB   r*  rL   r|  rz  )rh  rL   ri  expected_dtypeexpected_devicero  s         r.   _preprocess_chunk_cat_inputsr  ?  s-   
 
LLq"RS	LLGqT QZ%%Naj''O 	
V\\^a')VWLLN*N	
 	MM_,N	
	
 w$$WQZ^^%5s;1Ha	
  	FLLfkk!M	
 gs+Jr-   r   c                     t        | ||      }t        | ||      }|t        j                  ||dz         S t        j                  ||dz   |       |S )Nr$   )r   )r  rs  rf   r-  )rh  rL   ri  r   rn  s        r.   
_chunk_catr  d  sS     'wZ
@Cj9N
{yyq11		.#'s3
r-   split_sizesc                 z    t        t                    D ]  }t        j                  |   d         t        j                  t
        t               j                     k(   fd       t              }g }d}t        |      D ].  }|   }|j                   j                  ||             ||z  }0 |S )Nc                       y)NzCsplit_with_sizes expects split_sizes have only non-negative entriesr,   r,   r-   r.   r   z"split_with_sizes.<locals>.<lambda>}  r  r-   c                  @    dt               dj                       S )NzSplit sizes add up to z but got the tensor's size of )r   r  )rL   ry   r  s   r.   r   z"split_with_sizes.<locals>.<lambda>  s*    ([)9(::XY]YcYcdgYhXij r-   r   )
rQ   r  rf   _check_is_size_check_with
ValueErrorr   r  rm  r,  )ry   r  rL   rg  
num_splitssplits	start_idxlengths   ```     r.   split_with_sizesr  t  s     3{#$ 
NY	


 
KDJJsO+j
 [!JFI: Qdkk#y&9:V	 Mr-   c                     t        | ||      }|.|D cg c]"  }|j                  t        j                        $ c}S t	        ||      D ])  \  }}t        ||j                         t        ||d       + y c c}w )Nr~   rV  Tr  )r  r  rf   r[  r  r   r  r   )ry   r  rL   r   r  sr   splits           r.   split_with_sizes_copyr    sz     dKS9F
{HNO1e&=&=>OO f- 	NMFEfekk2UFM	N  Ps   'A8
split_size.c                 D    t         j                  j                  | ||      S r6   )r   r  r   )r   r  rL   s      r.   unsafe_splitr    s    ::UJ44r-   c                 D    t         j                  j                  | ||      S r6   )r   r  r  )r   r  rL   s      r.   unsafe_split_with_sizesr    s       ((SAAr-   c                     | j                   }||   }|dk(  r
|dk(  sJ | fS ||z   dz
  |z  }ddlm}  ||      }t        |      D cg c]  }| }}|||z  |z
  z
  |d<   t	        j
                  | ||      S c c}w )Nr   r$   )	guard_intrP   )r  rr  r  rQ   rf   r  )	ry   r  rL   rg  r  chunksr  rg  r  s	            r.   r  r    s    **K3HQ1}}w#a'J6F @vF',V}5!:5K5 J$7($BCKO;;t[#.. 6s   	A7tensor_indices_or_sectionsc                    |j                   j                  dk(  sJ |j                  t        j                  k(  sJ |j                         t        j                  dk(  xs dk(  fd       dk(  r4|j                         }t        |t              sJ | j                  ||      S |D cg c]  }|j                          }}| j                  ||      S c c}w )Ncpur$   r   c                      d  dS )Nz{tensor_split expected tensor_indices_or_sections to be a zero-dimensional or one-dimensional tensor, but got a tensor with z dimsr,   )	split_dims   r.   r   zAtensor_split_tensor_indices_or_sections_py_impl.<locals>.<lambda>  s     <<E;eM r-   )r  typer   rf   r  rL   r   itemr8   r   tensor_split)ry   r  rL   sectionsrg  r  r  s         @r.   /tensor_split_tensor_indices_or_sections_py_implr    s     &,,11U:::%++u{{:::*..0I	LLQ()q.	M
 A~-224(G,,,  3//%?@1668@@   #.. As   ,Cmat1mat2c                     | j                         s&| j                         st        |      }t        |      }|t        j                  ||      z  }|dk(  r|S ||| z  z   S rq   )rE  
is_complexr  rf   mm)ry   r  r  r`   rk   r   s         r.   addmmr    s]     !!#DOO,=4yE

%((4&
&Cqy
 r-   use_geluc                     t        | ||||      }|r8| j                  rt        j                  |d      S t        j                  |      S t        j	                  |      S )Nr   )r   )r  is_cudar   gelurelu)ry   r  r  r`   rk   r  r   s          r.   _addmm_activationr    sO     dD$
.C<<99Sf95599S>!99S>r-   vecc                     | j                         s&| j                         st        |      }t        |      }|t        j                  ||      z  }|dk(  r|S ||| z  z   S rq   )rE  r  r  rf   mv)ry   r  r  r`   rk   r   s         r.   addmvr    s[     !!#DOO,=4yE

%((4%
%Cqy
r-   r   rstdgammaNCHxWgroupoutput_maskc
           	      R   t        j                  | ||d       t        j                  || d       t        j                  |d       t        j                  |j                         z  z  k(  fd       t        j                  j                  fk(  fd       t        j                  d u xs j                         k(  fd       t              \  }
}t        j                  |dk(  fd       t        j                  | |      j                        j                  dg	      }| j                        j                  dg	      }d }d }d }|	d   r*d
|
z  z  }t        j                  |j                  d            j                  |
      j                  d      }t        j                  |j                  d            j                  |
      j                  d      }t        j                  |j                  d      j                  d|
            }n|j                  |
      j                  d      }|j                  |
      j                  d      }t        j                  |j                  d      t        j                  d|
f|j                              }|z  |z
  |z  |z  |z  |z  }| z  ||z  |z  z
  }|j                  d      }t        |d      }t        |d      }t        j                  | j                  |
      |      t        j                  |j                  |
      |      z   |z   }|j                  |j                        j!                  |j"                        }|	d   rk|j                  |
      |j                  |
      j                  d      z  z
  |j                  d      z  j                  dg	      j                        }|	d   r|j                  dg	      }|||fS )NF)allow_cpu_scalar_tensorsc                      d z  z   dS )NzExpect input to have z	 elementsr,   )r  r  r  s   r.   r   z,native_group_norm_backward.<locals>.<lambda>.  s    'A}I> r-   c                  .    d  d dj                    S )NzExpect mean to have shape (, z
, but got r  )r  r  r   s   r.   r   z,native_group_norm_backward.<locals>.<lambda>2  s    -aS5'DJJ<P r-   c                  <    d  dj                          S d S )NzExpect gamma to have z elements but got rP   )r   )r  r  s   r.   r   z,native_group_norm_backward.<locals>.<lambda>6  s-    's*<eN_U[[]<hi eg<hi r-   r   c                      d  d S )NzExpect number of channels z, to be evenly-divisible by number of groups r,   )r  r  s   r.   r   z,native_group_norm_backward.<locals>.<lambda><  s    ,QC/[\a[bc r-   r%   r~   rd   rP   r$   r  r@  )rB   check_same_devicecheck_same_shaperf   r   r   r  divmodr   r  r   rR   r$  r  r  rT   r9   r   )rj   r   r   r  r  r  r  r  r  r  cpg_remdsdbd_inputd_gammad_biasr  ds_valdb_valc1c2c3s     ` `````              r.   native_group_norm_backwardr    s    
UD$ 
5+N	4F	LLQ$> 
LL

q%j P 
LL+!+i
 q% IC	LL	c 
;	&	+	+Aq#	6	:	:s	:	CB			!Q	$	(	(aS	(	1B $G $G#F1~39YYr5??1#56>>q%MQQRSTFYYr5??1#56>>q%MQQRSTFr"a,B
 ZZ5#.2215FZZ5#.2215Fr"

Auc?4;;?B tmf$,t3d:Q>S4Z&4-!++\\"r1%r1%IIk))!UC=rBiiaS92>? 	
 //%++.11%++>1~ E3'"''!UC*@4>>RTCU*UU..$% SaSS\WQZ 	 1~QCWf%%r-   out2c
                    t        | |||||||||	
      }|
||f}t        |      D ]2  \  }}|	t        ||   |j                         t	        |||   d       4 |S r  )r  	enumerater   r  r   )rj   r   r   r  r  r  r  r  r  r  rB  rC  r  r  rU   rg  rG   s                    r.   native_group_norm_backward_outr  r  sz    " (UD$q!S%F d#J&! Q1=jmQWW5Q
14PQ
 r-   c                 ,    | | j                  |      S | S r6   r9   )r:   r   s     r.   _maybe_castr    s    }ttE{Hr-   grad_outnormalized_shapebiasc                   ! |j                   }|j                         }	t        j                  |j                        !!fd| |||fD        \  }
}}}|
J |	t        |      z
  }||d  }|d | }g }g }t        |	      D ]*  }||k\  r|j                  |       |j                  |       , t        |      }t        |      }|dk  s|dk  rN|d   r|j                  |      nd |d   r|j                  ||d        nd |d   r|j                  ||d        fS d fS t        ||j                               }t        ||j                               }||z
  |z  }||
|z  }n|
}||z  }t        j                  ||d      }t        j                  ||      }t        j                  ||d      }t        j                  ||      }||z
  |z
  }d }d }d } |d   r||z  |z  }|d   r0|.t        |      dkD  rt        j                  |
|z  |d      }n|
|z  }|d   r8|6t        |      dkD  rt        j                  |
|d      } n|
j                         } t        ||j                        t        ||j                        t        | |j                        fS )Nc              3   d   K   | ]'  }||j                        j                         n| ) y wr6   )r9   r  r  r:   r;   s     r.   r  z-native_layer_norm_backward.<locals>.<genexpr>  s5      9 12**,1D9s   -0r   r$   r%   TF)r  rL   rB   get_computation_dtyper   r  rQ   rm  r   rl  rT   rf   r   r   r  r  )"r  r   r  r   r  r   r  r  input_shape
input_ndimgrad_out_cast
input_castweight_cast	bias_castaxis
inner_dims
outer_dimsinner_dim_indicesouter_dim_indicesrg  r  Mx_hat
grad_x_hatabr  r  r  rH   r  d_weightr  r;   s"                                    @r.   native_layer_norm_backwardr    s    ++KJ33EKK@9E64095M:{I $$$,--DTU#JUd#J#%#%: (9$$Q'$$Q'	( 	ZAZAAva,7NEOOK(3>q>EOOK./t3>q>EOOK./
 	
 HL
 	

 T:>>#34DT:>>#34D$$&E"[0
"
QA		*/6A	:u	%B	2($	/B	5"	BEBJE $G!%H#F1~!8u$1~+1 !A%yy!68I5QH$u,H1~)/ !A%YY}.?GF"((*F 	GU[[)Hekk*FEKK( r-   c          
          t        | |||||||      }||	|
f}t        |      D ]2  \  }}|	t        ||   |j                         t	        |||   d       4 |S r  )r  r  r   r  r   )r  r   r  r   r  r   r  r  rB  rC  r  r  rU   rg  rG   s                  r.   native_layer_norm_backward_outr    sw     (%)4vt[F d#J&! Q1=jmQWW5Q
14PQ
 r-   running_meanrunning_varmomentum
functionalc	                 2   dgt        t        d| j                                     z   }	t        j                  | j
                        }
|}|}|r"t        j                  | j
                        }
| j                  |
      }t        j                  ||	dd      \  }}t        j                  ||z         }| |z
  |z  }t        j                  ||	      }t        j                  ||	      }|!||z  d|z
  |z  z   }|s|j                  |       |2| j                         | j                  d   z  }t        j                  ||	      }|||dz
  z  z  }||z  d|z
  |z  z   }|s|j                  |       n||J |j                  |
d      }|}|j                  |
d      }|}|}dt        j                  ||z         z  }| j                  j                   dk7  r|}|}n"| j#                  d	      }| j#                  d	      }t%        || j                         dz
        }t%        || j                         dz
        }| |z
  |z  }|2|j'                         }t%        || j                         dz
        }||z  }|2|j'                         }t%        || j                         dz
        }||z   }| j                  j                   dk(  r8|j                  | j
                        }|j                  | j
                        }|j                  | j
                        ||||fS )
Nr   r%   r   T)rL   
correctionr  r$   )r   r   r  r   )rt  rQ   rL   rB   r  r   r9   rf   var_meanrsqrtr  r   r   r  r_  r  r  rl  rT   r5  )r   r   r  r  r  r   r  r8  r  reduction_dimsr;   new_running_meannew_running_var	input_acc
biased_varr   r  r   	save_mean	save_rstdnsqueezed_varunbiased_varinvstds                           r.   native_batch_norm_helperr     s    S4a 566N33EKK@#!O!77DHH#4H5	 >>>a

D {{:+,$,$&MM$7	MM$7	#')3q8||6SS""#34"A.A !==^DL'1A;7L&5X8TTO!!/2'K,CCC#->TJ'!nn+<4nH%ejjs!234<<%$II-I-I uyy{Q7"6599;?;$,&(!"6599;?;&||~ uyy{Q7$||E!LLu{{L3	LLu{{L3				$ r-   r  save_invstdc                 >    t        | |||||||d	      \  }}	}
}}||	|
fS NFr  r   r   r  r  r  r   r  r8  r   r  r	  rS   s               r.   native_batch_normr  R  s=     *Bvt\;(CQV*&FIy!Q 9i''r-   c           
          ||t         j                  | |||||      S |t        d      |t        d      |rt         j                  | |||||||      S t         j                  | ||||||      S )Nz`running_mean is None, but running_var is provided. They should both be None or both be provided.z`running_var is None, but running_mean is provided. They should both be None or both be provided.)r   _native_batch_norm_legitrs  $_native_batch_norm_legit_no_training)r   r   r  r  r  r   r  r8  s           r.   native_batch_norm_decompositionr  n  s      3,,648S
 	
 <
 	
 <
 	
 ,,64{HhPS
 	
 8864{Hc
 	
r-   c                 l   | j                  |      }||z   dz
  |z  }|dk(  rZ|dk(  rU|D cg c]  }| }}|||z  |z
  z
  ||dz
  <   t        j                  j                  j                  j                  | ||      S t        j                  j                  j                  j                  | ||      S c c}w Nr$   r   )r+  rf   opsr   r  r  r  r   )ro  r  rL   r  r  rS   r  s          r.   unsafe_chunk_py_implr    s    {{3HV#a'F2JQ8q=+12az22",
V0Ch0N"OFQJyy~~55==fkSVWW99>>&&--fj#FF 3s   	B1c           
      N    t         j                  j                  | ||||d||      S r  )r   r  r  )r   r   r  r  r  r  r8  s          r.   r  r    s5     ((00	 	r-   c                 >    t        | |||||||d	      \  }}	}
}}||	|
fS r  r  r  s               r.   r  r    s=     *Bvt\;(CQV*&FIy!Q 9i''r-   c                 >    t        | ||d d |||d	      \  }}}}	}	|||fS r  r  )
r   r   r  r   r  r8  r   r  r	  rS   s
             r.   !_native_batch_norm_legit_no_statsr     s<     *BvtT48S%*&FIy!Q 9i''r-   c                 f    t        | |||||||d	      \  }}	}
}}|J d       |J d       ||	|
||fS )NT#new_running_mean should not be None"new_running_var should not be Noner  )r   r   r  r  r  r   r  r8  r   r  r	  r  r  s                r.   #_native_batch_norm_legit_functionalr$    sl    " 	!vt\;(CQU	 'N)NN'&L(LL&9i)9?JJr-   c           	      T   t         j                  j                  | ||||d|      }d}|t         j                  j                  j                  k(  r t         j                  j                  | |      }t        j                  |t         j                  | j                  | j                        S )a  
    Return a reserve tensor for batch norm, used only by cudnn to pass forward state to the
    backward pass. This is needed for `_batch_norm_with_update` and `_batch_norm_no_update`,
    which support a variety of backends including cudnn. We create this tensor here to get
    the correct shape in the traced graph if we detect that will call the cudnn kernel,
    and rely on DCE to avoid materializing this tensor.
    Tr   )r   layoutr  )
rf   _C_select_batch_norm_backend_BatchNormBackendCudnn(_get_cudnn_batch_norm_reserve_space_sizeemptyuint8r&  r  )	r   r   r  r  r  r8  r   backendreserve_sizes	            r.   _get_batch_norm_reserve_tensorr0    s      hh11vt\;cG L%((,,222xxHHPXY;;EKKU\\ r-   c                 d    t        | ||||d||d	      \  }}}	}
}
t        | |||||d      }|||	|fS )NTFr   r  r0  r   r   r  r  r  r  r8  r   r  r	  rS   reserves               r.   _batch_norm_with_updater6  	  sa     *B
*&FIy!Q -vt\;dG 9i00r-   c                     t        | ||||d||d	      \  }}}	}
}t        | |||||d      }|
J d       |J d       |||	||
|fS )NTr2  r"  r#  r3  )r   r   r  r  r  r  r8  r   r  r	  new_rmnew_rvr5  s                r.   "_batch_norm_with_update_functionalr:  $  s      	!vt\;hT	 -vt\;dG DDDCCCIy'66BBr-   c                 d    t        | ||||d||d	      \  }}}	}
}
t        | |||||d      }|||	|fS )NFr2  r3  r4  s               r.   _batch_norm_no_updater<  ?  sa     *B
*&FIy!Q -vt\;eG 9i00r-   c                     |J t        j                  |       |k  j                  t         j                        }|j	                  |       | z  d|z  z  }||fS )Nr   rd   )rf   rF  r9   r-  r-  )r   rP  r   r  rH  s        r.   _fused_dropout_decompositionr>  Z  s[     OOE"Q&***=D
,,u

%q
1C;r-   )r   r&  r  
pin_memorynon_blockingrW  r  r?  r@  rW  c                   |r|t         j                  k(  sJ d       |rJ d       t        | t         j                  t        t
        t        t        f      sJ |0|.|,t        | t         j                        r| j                         S | S d}t        | t         j                        r| }nt        j                  |       }|b||j                  k7  rS|1|j                  dk(  r"t         j                  j                  ||      }d}t         j                  j                  ||      }|$|s"t         j                  j                  ||      }d}|t        j                  ||      S |S )NTODOFr  TrV  )rf   stridedr8   r   r  r  r  complexr  scalar_tensorr  r  _primsconvert_element_type
device_put)	r:   r   r&  r  r?  r@  rW  dtype_convertedx_tensors	            r.   _to_copyrK  d  s0    5==08&80!6!>a%,,UD'BCCC~%-M,Aa&779HO!U\\"&&q)f7!5||885IH"O<<**8V<<<44XuE {{8=AAOr-   c                 ,    t         j                  |       S r6   )r   alias)r:   s    r.   nop_decompositionrN    s     ::a=r-   out3exponential_average_factorepsilonc           
         t         j                  | |||||||      \  }}	}
|r%||	|
| j                  dt        j                        fS ||j                  d      |j                  d      | j                  dt        j                        fS )Nr   r   )r   r  rl  rf   r-  )r   r   r  r  r  r   rP  rQ  r  r  r  s              r.   cudnn_batch_normrS    s     $$"	GAq! 1aU[[ABB	EKK0	 r-   c                     t        |      D ]>  \  }}|dk(  s|| j                  k  r| j                  |   |k(  r.| j                  |      } @ | S rY   )r  r|  r  rR   )r:   broadcast_maskr  r  s       r.   _broadcast_batch_norm_backwardrV    sO    / "
d19dQVVm0ED!A" Hr-   r5  c                 *    t        | |||||||||	
      S r6   )native_batch_norm_backward)r  r   r   r  r  r  r  r>  r8  r  r5  s              r.   batch_norm_backwardrY    s/     & r-   c
                   & |j                   }
||j                   }n|
}t        j                  |j                         &&fd| ||||||fD        \  }}}}}}}|j                  }|j	                         }|dk\  sJ d       d}t        t        |            ||   z  }|}|}|r||"J ||J |}t        j                  ||z         }dg|z  }||   ||<   g }t        |      D ]  }||k7  s	|j                  |        t        ||      }d|z  }t        j                  ||      }t        j                  |||z
  z  |      }t        ||z  |      }t        t        j                  ||z  ||z        |      } |t        ||      dz  }!nt        ||z  |      }!|r||z
  | z  }"||"z
  |z
  |!z  }#n||!z  }#|	d   r||z  }$nd }$|	d   r|}%nd }%|#j                  |
      t        |$|      t        |%|      fS )Nc              3   H   K   | ]  }||j                        n|  y wr6   r  r  s     r.   r  z-native_batch_norm_backward.<locals>.<genexpr>  s,      	 $%=a7	s   "r%   z$rank of the input must be at least 2r$   rd   )r   rB   r  r  rL   r   rt  rf   r  rQ   rm  rV  r   r   r9   r  )'r  r   r   r  r  r  r  r>  r8  r  r  weight_dtyper  r  r  running_mean_castrunning_var_castsave_mean_castsave_invstd_castr  
input_rankr  num_featuresr   r  rU  reduction_axesrg  r   grad_output_sumdot_p	grad_mean
proj_scale
grad_scaleprojrU   rd  	grad_biasr;   s'                                         @r.   rX  rX    ss    ++K||"33EKK@	 
	 ++KJ?BBB?D[)*[->>LDF).>.JJJ ,1A1MMM -34!"j 0N&t,N4 "N: %9!!!$% *$?DDii~>OIImzD'89>JE./E~VI/		%$,QW0XZhiJ3FNKcQ
3[ .

 T!Z/$t+y8JF
"Z/
1~fn1~#		 	k"K.I|, r-   c
                    t        | |||||||||	
      }|
||f}t        |      D ]2  \  }}|	t        ||   |j                         t	        |||   d       4 |S r  )rX  r  r   r  r   )r  r   r   r  r  r  r  r>  r8  r  rB  rC  r  r  rU   rg  rG   s                    r.   native_batch_norm_backward_outrl  D	  s    " (F d#J&! Q1=jmQWW5Q
14PQ
 r-   save_varc                 B    t         j                  || |||||d|g d
      S NT)TTTr   rX  )r   rj   r   r  r  r  rm  rQ  s           r.   miopen_batch_norm_backwardrq  j	  s5     ** r-   reserveSpacec	                 B    t         j                  || |||||d|g d
      S ro  rp  )	r   rj   r   r  r  r  rm  rQ  rr  s	            r.   cudnn_batch_norm_backwardrt  	  s5     ** r-   c                    | j                   | j                  t              t        j                  dv fd       | j                  dd  D ]  }t        j                  |dk7  fd         d   |d   z  dk(  rxd   |d   z  dk(  rjt        d t        dd  |      D              }t        d t        dd  ||      D              }t        j                  j                  j                  | ||      S d	 d
 fd} |d   |d         \  }}}}	 |d   |d         \  }
}}}| dt        |d      |
f   }|	s|st        j                  |d      S d } |||||	d      \  }} |||||d      \  }}d }t        t        |j                  d         t        |j                  d               D ]!  \  }}||d|d d |f   }||d|d d |f   z   }# |||z  z  S )Nr  c                      d  S )Nz9adaptive_avg_pool2d(): Expected 3D or 4D tensor, but got r,   ru  s   r.   r   z%adaptive_avg_pool2d.<locals>.<lambda>	  s    KD6R r-   rX  r   c                  "    dt                dS )Nzjadaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has shape r  r  r  s   r.   r   z%adaptive_avg_pool2d.<locals>.<lambda>	  s     99>uaI r-   rP   c              3   ,   K   | ]  \  }}||z    y wr6   r,   )r  rg  r(  s      r.   r  z&adaptive_avg_pool2d.<locals>.<genexpr>	  s     G$!QqAvG   c              3   :   K   | ]  \  }}}||d z
  |z  z
    yw)r$   Nr,   )r  rg  r(  r  s       r.   r  z&adaptive_avg_pool2d.<locals>.<genexpr>	  s'      
 '1aAQ!O
s   c                 8    t        j                  | |z  |d      S )Ntruncrounding_moderf   divr  r  r  s      r.   start_indexz(adaptive_avg_pool2d.<locals>.start_index	  s    yyQ99r-   c                 J    t        j                  | dz   |z  |z   dz
  |d      S )Nr$   r|  r}  r  r  s      r.   	end_indexz&adaptive_avg_pool2d.<locals>.end_index	  s&    yy!a%1q1,awGGr-   c                    t        j                  |t         j                        } |||       }| |z  dz   }| |z  }|dk(  xs ||z  dk(   }|r|dz  }n
|dk(  r|dz  }t        j                  |t         j                        }|j                  d      |z   }|rUt        j                  | dz
  |j
                  |j                        }	t        j                  ||	      } |||       }
|
|z
  }n|}||||fS )Nr  r$   r   rP   r  )rf   r  r  rR   rE  r   r  minimum)in_sizeout_sizeorangei0	maxlengthin_size_modadaptive	range_maxr  maxvali1r  r  r  r  s               r.   compute_idxz(adaptive_avg_pool2d.<locals>.compute_idx	  s   hvU[[I73 x'!+	(#q(GH{,Ba,GHNIANILL6M	ll2* ((!399SZZF --V,C 68W5B"WFFFIx//r-   .r@  )r   rP   r~   c                     t        |t              r| |fS |dk  sJ ||j                  d      k\  }|dk(  rt        |d      }t	        j
                  | |d      } t        ||       }| |fS )Nr   rP   rX  r@  r   )r8   r   rR   rT   rf   ra  )valsr  r  r  rL   r  s         r.   
maybe_maskz'adaptive_avg_pool2d.<locals>.maybe_mask	  sw    fg&< 7N7 0 0 44Dby(q1$$T45D&vt4F<r-   )r  rL   r   )r  r  r  rf   r   r  r  nnr  
avg_pool2drT   r   r   rQ   )r   r  r  ru  kernelr  idxhlength_hrange_max_h
adaptive_hidxwlength_wrange_max_w
adaptive_wr  r  retrg  jr  r  r|  r  r  s                      @@@@@r.   adaptive_avg_pool2dr  	  s.   
 \\FKKEu:D	LLR [[ 
FI	

 Ry;r?"a'E"IB,G1,LG#eBCj+*FGG 
+.uRSz;+O
 
 xx""--eVVDD:H0@ /:%)[QS_.U+D(K.9%)[QS_.U+D(K'a0$67Djzz$H--   hjbND(  hjbND(
 Cdjjn-uTZZ^/DE +1;sAq!|$CS!Q\**C	+
 (X%&&r-   )rk   ro  c                $    t        | |||d|      S )NTinplacerk   
_index_addr:   rL   r  ro  rk   s        r.   
index_add_r  
  s     aeVTGGr-   c                $    t        | |||d|      S )NFr  r  r  s        r.   	index_addr  
  s     aeVU%HHr-   r  c                8   t        j                  | j                        t        j                  j                  dk  fd       j                  dk(  rj                  d      nd|j                  dkD  r|j                        ndt        j                  k(  fd       dk7  rft        j                  | j                        t        j                  t        k(  xs t        j                  t                    fd       |z  }| j                  dk(  }|r| j                  d      n| }dz  fz   }|rt        j                  nt        j                  }	 |	|||d      }
|r| S |r|
j                  d      S |
j!                         S )	Nr$   c                  $    d j                    dS Nz(Index should have dimension 1 or 0 (got r;  ru  r  s   r.   r   z_index_add.<locals>.<lambda>-
      :5::,aH r-   r   c                      d d d S )NzNumber of indices (z') should be equal to tensor.size(dim) (z), for dim=r,   )rL   
index_sizerp  s   r.   r   z_index_add.<locals>.<lambda>3
  s!    %j\1XYdXeeqmplrs r-   c                  (    dt                d dS )Nzalpha argument of type z cannot be safely cast to type !)r  )rk   python_types   r.   r   z_index_add.<locals>.<lambda>:
  s    -d5k]:YZeYffgh r-   r6   Tr  )rB   canonicalize_dimsr|  rf   r   r+  dtype_to_typer   r  is_weakly_lesser_typer  rR   r   
index_put_	index_putr  r  )r:   rL   r  ro  r  rk   zero_dimrS  r  r  r   r  r  rp  s    ``  `     @@@r.   r  r  !
  sM    
!
!!&&#
.C	LL

aH #(**/AqJ&,kkAo&++c"1K	LLz!s z))!''24 E**4;Dh	

 %vv{H#QB
C-5(
"C#*I
BV
5C!)s{{1~?s~~/??r-   c           
         t        j                  t        |       dkD  d        t        |       }| d   j                         }|dd  }t	        d | D              }|r||f}n||f}||z   }| d   j                  ||      }dt        |      z  }	t        |      D ]j  }
| |
   }t        j                  ||	d||j                  d      z
  fz   |      }|rt        j                  ||d|
      }Rt        j                  ||d|
      }l |S )Nr   c                       y)Nz#received an empty list of sequencesr,   r,   r-   r.   r   zpad_sequence.<locals>.<lambda>L
  r  r-   r$   c              3   >   K   | ]  }|j                  d         ywr  r+  )r  r:   s     r.   r  zpad_sequence.<locals>.<genexpr>P
  s     /!&&)/s   )r   r   rL   r  )
rf   r   r  r+  r   rE  rQ   r   rk  r  )	sequencesbatch_firstpadding_valuesequences_sizemax_sizetrailing_dimsmax_lenout_dimsr   dim_paddingsrg  currseqrows                r.   pad_sequencer  I
  s#    
LLY!#%RS^N|  "HQRLM/Y//G"G,^,-'H
A,

-
8CC..L>" @A,""\Q',,q/(A$BBM
 %%c3AQ%?C%%c3AQ%?C@ Jr-   c                 "    t        | |||d      S )NTr  _index_copyr:   rL   r  ro  s       r.   index_copy_r  d
  s    q#ufd;;r-   c                 "    t        | |||d      S )NFr  r  r  s       r.   
index_copyr  i
  s     q#ufe<<r-   c                   t        j                  | j                  |      }t        j                  j                  dk  fd       | j                  dk(  }|r| j                  d      n| }j                  dk(  rj                  d      nd|z  fz   }|rt        j                  nt        j                  } ||||      }	|r| S |r|	j                  d      S |	j                         S )Nr$   c                  $    d j                    dS r  ru  r  s   r.   r   z_index_copy.<locals>.<lambda>u
  r  r-   r   r6   )rB   r  r|  rf   r   rR   r   r  r  r  r  )
r:   rL   r  ro  r  r  rS  r  r  r   s
     `       r.   r  r  o
  s     
!
!!&&#
.C	LL

aH
 vv{H#QB"'**/EOOAuE
C-5(
"C#*I
BV
$C!)s{{1~?s~~/??r-   c                    t        j                  | j                  d      |       }t        j                  t        j                  |              }| j
                  r| j                  d      }n|}|t        j                  |      z
  |fS )Nr,   r   )rf   r  rl  re   r   r  rD  )ry   r   rh   r   s       r.   log_sigmoid_forwardr  
  sh     --r*D
1C		599T?"#A||%Q''r-   lowhighc                     t        j                  | j                  t        |      t        |      | j                  | j
                  |      S )N)r  r  r   r  r   )prims_uniform_helperr  r   r   r  )r:   r  r  r   s       r.   r   r   
  s=       	cNt_ggxx r-   c                 <    | j                  t        | |||            S r6   )r   r   )ry   r  r  r   s       r.   uniform_r  
  s    ::gdCy9::r-   c                 "   t        |       dz
  }|>t        j                  |d u d        t        j                  t        |      |k(  d        |S |t        j                  |d u d        t        j                  t        |      |k(  d        g }t        |      D ]Z  \  }}t	        |      |k(  r$|j                  | |dz      t	        |      z         8|j                  t        | |dz      |z               \ |S t        j                  dd        y )Nr%   c                       yNz9Must specify exactly one of output_size and scale_factorsr,   r,   r-   r.   r   z.upsample_compute_output_size.<locals>.<lambda>
  r  r-   c                       yN r,   r,   r-   r.   r   z.upsample_compute_output_size.<locals>.<lambda>
  r  r-   c                       yr  r,   r,   r-   r.   r   z.upsample_compute_output_size.<locals>.<lambda>
  r  r-   c                       yr  r,   r,   r-   r.   r   z.upsample_compute_output_size.<locals>.<lambda>
  r  r-   Fc                       yr  r,   r,   r-   r.   r   z.upsample_compute_output_size.<locals>.<lambda>
  r  r-   )r  rf   r   r  r  rm  r   )r/  r  scale_factorsspatial_dimensionsrg  r  s         r.   upsample_compute_output_sizer  
  s   Z1,T!O	
 	S%);;ZH 4O	
 	S'+==zJm, 	CDAq1v{"":a!e#4s1v#=>""7:a!e+<q+@#AB		C
 	LLRr-   c                     | y | |   S r6   r,   )scalesr  s     r.   get_scale_valuer  
  s    ~#;r-   r  c                 x    t        | j                         ||      }|r|nd gt        |      z  }t        | ||      S r6   r  r+  r  _upsample_nearestr   r  r  osizer  s        r.   _upsample_nearest_vecr  
  s@     ){MRE&TFSZ,?  UE622r-   c                 |    t        | j                         ||      }|r|nd gt        |      z  }t        | ||d      S NTexactr  r  s        r.   _upsample_nearest_exact_vecr  
  sB     ){MRE&TFSZ,?  UE6>>r-   c                    g }t        |      }|rdnd}t        |      D ]  }||   }| j                  | |z      }	||   |	|	||   z  z  n|	|z  }
t        j                  |t        j
                  | j                        }||z   |
z  j                  t        j                        }t        |dz
  |z
        D ]  }|j                  d      } |j                  |        |S )Nr   r   r  r$   rP   )r  rQ   r  rf   r  r   r  r9   r  rR   rm  )r   r  r  r  r  num_spatial_dimsr  r  r  isizerl   output_indicesinput_indicesrS   s                 r.   !_compute_upsample_nearest_indicesr  
  s     G;'SsF#$ & A--12/5ay/D*+%RW-e5==V(61U:>>u{{K'!+a/0 	8A)33B7M	8}%+&, Nr-   )preserve_memory_formatr  r  c                     t        | ||g      S r6   r  r   r  r  s      r.   upsample_nearest1dr    s     UK&::r-   c                 "    t        | ||gd      S r  r  r  s      r.   upsample_nearest_exact1dr  "  s     UK&FFr-   scales_hscales_wc                      t        | |||g      S r6   r  r   r  r  r  s       r.   upsample_nearest2dr  0  s     UK(H1EFFr-   c                 $    t        | |||gd      S r  r  r
  s       r.   _upsample_nearest_exact2dr  =  s     UK(H1ETRRr-   scales_dc                 "    t        | ||||g      S r6   r  r   r  r  r  r  s        r.   upsample_nearest3dr  L  s     UK(Hh1OPPr-   c                 &    t        | ||||gd      S r  r  r  s        r.   _upsample_nearest_exact3dr  Z  s!     {Xx:$ r-   r  c                 D   t        | |||      }d d g|z   }t        j                  | |      }|j                  dk(  rdt	        j
                  |       }| j                  d   }| j                  j                  dk(  r|dk  rt        j                  }|j                  |      }|S )Nr  r@  r$   cudarV  )r  r   _unsafe_indexr|  rB   r   r  r  r  rf   r[  r  )	r   r  r  r  spatial_indicesr  r  rW  
n_channelss	            r.   r  r  l  s     8{F%O Tl_,Gw/F{{a33E: [[^
<<&:>!33M"""?Mr-   c           	          |r|rd}n|rd}n|rd}nd}t        |       |z  dk(  sJ t        |              t        dt        |       |      D cg c]  }t        | |||z           c}S c c}w )Nr  r@  r   r%   r   )r  rQ   r  )params
has_biaseshas_projections
group_sizerg  s        r.   gather_paramsr    s{    o
	
	

v;#q(5#f+5(38CK3T./fQZ()  s   A(c                 ~    |r'| d|z     |d|z     }}| d|z  dz      |d|z  dz      }}n| |   ||   }}d\  }}||||fS )Nr%   r$   NNr,   )r  hiddensrg  bidirectional
cur_params
cur_hiddenbidir_paramsbidir_hiddens           r.   params_hiddensr'    sk    !'AAJ
%+AEAI%6A	8Jl!'GAJJ
%/"lz<==r-   c                     ||kD  sJ |j                  | j                  d|||z
               | j                  dd|      S rq   )rm  r,  )r$  last_batch_size
batch_sizer!  s       r.   update_hidden_for_packedr+    sE    Z'''NN:$$Q
Oj4PQRQ:..r-   c           	      t    ||k(  r| S ||k  sJ t        j                  | |j                  d|||z
        f      S rq   )rf   concatr,  )r$  r)  r*  
inp_hiddens       r.    update_hidden_for_packed_reverser/    sP     *$Z'''<<a*2NO	
 r-   c           	      X   |d   }|d   }|r|d   nd }	|r|d   nd }
g }g }|r|d   n|d   }|j                  dd|      }t        j                  | t        |            }|r|d d d   }|D ]V  } | j                  d   }||k(  rn|rt        ||||      }nt        ||||      } || |||	||
      }|}|j                  |       X |r|j                          n!|j                  |       |j                          t        j                  |d      }|st        j                  |d      n|}||fS )Nr   r$   r%   r   rP   )
r,  rf   r  rt  r  r/  r+  rm  reverser-  )inphiddenr  r  	hidden_fnbatch_sizesr1  	ih_weight	hh_weightih_biashh_biasstep_outputr!  r)  r$  	split_inprg  r   
hidden_outs                      r.   one_layer_rnn_datar=    sR    q	Iq	I%fQi4G%fQi4GK"$G)0k"ok!nOq!_5JCk!23IddO	 'IIaLa9OQJ 2OQJ sJ	7IwW
:&#'& z"
))K
#C.57A&:J
?r-   c                       fd}|S )Nc                 D     t        j                  |||      | z         S r6   r   linearrg  r$  r6  r8  r7  r9  nonlinearitys         r.   rH   zrnn_cell.<locals>.inner  s     AHHZGDqHIIr-   r,   rC  rH   s   ` r.   rnn_cellrE    s    J Lr-   c                       fd}|S )Nc                 r    t        j                  | ||      }  t        j                  |||      | z         S r6   r@  rB  s         r.   rH   zrnn_cell_data.<locals>.inner  s2    HHQ	7+AHHZGDqHIIr-   r,   rD  s   ` r.   rnn_cell_datarH    s    J Lr-   c           	      x   |d   }|d   }|r|d   nd }|r|d   nd }	t        j                  | ||      }
|r|
j                  d      n|
}
|j                  d      }g }|
D ]   } |||||||	      }|j	                  |       " |r|j                          t        j                  |d      }||j                  d      fS )Nr   r$   r%   r   )	r   rA  fliprR   rm  r1  rf   r-  r  )r2  r3  r  r  r4  r1  r6  r7  r8  r9  precomputed_inputr$  r:  rg  r   s                  r.   one_layer_rnnrL    s    q	Iq	I%fQi4G%fQi4Gi95<)..q1BS!!!$JK 'q*i)WU
:&' 
))K
#C
""1%%%r-   c                    |d   }|d   }|r|d   }|d   }nFt        j                  |j                               }t        j                  |j                               }|d   j                  d      }	|d   j                  d      }
g }d}|	j                  d      }d}d}d}d}| j	                         } |	j	                         }	|
j	                         }
t         j
                  j                  j                  j                  | |||||	|
|||||||||      }|d   |d   |d   }}}||j                  d      |j                  d      ffS )Nr   r$   r%   r   F)
rf   r
  r+  rR   r  r  r   mkldnn_rnn_layerr  r  )r2  r3  r  r  r1  w0w1w2w3hxcxr5  modehidden_size
num_layersr"  r  r>  outputsrW   hycys                         r.   mkldnn_one_layer_lstmr[  	  sS   	B	BAYAY[[#[[#			Q	B			Q	BKD''!*KJ MKE ..
C	B	Biinn--55





!G$ 
GAJ
2rArzz!}bjjm,,,r-   c
                    |r| j                  dd      n| } g }
t        |      D ]  }t        ||||      \  }}}}|r
||dz
  k  r|nd} |	| |||      \  }}|
j                  |       |r! |	| |||d      \  }}|
j                  |       |r*t	        j
                  |g|j                         dz
        } n|} |dk7  s|s||dz
  k  st	        j                  | |d      }  |r| j                  dd      n| } | |
fS )Nr   r$   r   T)r1  )r>  )	transposerQ   r'  rm  rf   r-  rL   rA  )r   r3  r  r  rW  rA  r>  r"  r  layer_fnfinal_hiddensrg  r#  r$  r%  r&  fwd_inp
fwd_hiddenbwd_inp
bwd_hiddens                       r.   _rnn_helperrd  ;  s"    &1EOOAq!eEM: >=KFA}>
:
Jl $
QU(:'&uj*jQZ("*|\:t#GZ   ,IIw0'++-!2CDEEa<Ea*q.&8MM%=E)>, &1EOOAq!eE-r-   c	                     |j                  d      }	t        ||d      }t        | |	|||||||t        t        t        t        j                              
      \  }
}|
t        j                  |d      fS Nr   Fr4  )	unbindr  rd  r   rL  rE  rf   r   stackr   rS  r  r  rW  rA  r>  r"  r  r3  r   r_  s               r.   rnn_tanh_inputrk  d  t     YYq\F6:u5F$%**)=>C M1---r-   c	                     |j                  d      }	t        ||d      }t        | |	|||||||t        t        t        t        j                              
      \  }
}|
t        j                  |d      fS rf  )	rh  r  rd  r   rL  rE  rf   r  ri  rj  s               r.   rnn_relu_inputrn    rl  r-   c	                     |j                  d      }	t        ||d      }t        | |	||||||dt        t        |t        t        j                              
      \  }
}|
t        j                  |d      fS Nr   Fr5  r4  )	rh  r  rd  r   r=  rH  rf   r  ri  datar5  rS  r  r  rW  rA  r>  r"  r3  r   r_  s               r.   rnn_relu_datart    {     YYq\F6:u5F$##EJJ/	
C  M1---r-   c	                     |j                  d      }	t        ||d      }t        | |	||||||dt        t        |t        t        j                              
      \  }
}|
t        j                  |d      fS rp  )	rh  r  rd  r   r=  rH  rf   r   ri  rr  s               r.   rnn_tanh_datarw    ru  r-   c                 l   t        j                  |||      | z   }|j                  d|      }|d   j                         }	|d   j                         }
|d   j	                         }|d   j                         }|
|z  |	|z  z   }||j	                         z  }||nt        j                  ||d       }||fS )Nr@  r   r$   r%   r   r   rA  chunkr   r   )r2  rS  rT  r7  r9  	hr_weight	chunk_dimgateschunked_gatesin_gateforget_gate	cell_gateout_gaterZ  rY  s                  r.   	lstm_cellr    s    HHRG,s2EKK9-MA&&(G"**,Ka %%'IQ'')H	r	Wy0	1B	BGGI	B ahhr9d&CBr6Mr-   c           
      (   |d   }|d   }|r|d   nd }|r|d   nd }t        |      dk(  r|d   nt        |      dk(  r|d   nd }	|d   j                  d      }
|d   j                  d      }t        j                  | ||      }|r|j	                  d      n|}g }|D ](  } t        | |
||||	d      \  }
}|j                  |
       * |r|j                          t        j                  |d      }||
j                  d      |j                  d      ffS )Nr   r$   r%   r   r  r@  r|  )r  rR   r   rA  rJ  r  rm  r1  rf   r-  r  )r2  r3  r  r  r1  r6  r7  r8  r9  r{  rS  rT  rK  r:  r   s                  r.   one_layer_lstmr    s*   q	Iq	I%fQi4G%fQi4G[A%q	Fq8H6!9d  
		Q	B			Q	Bi95<)..q1BSK  3B	7IQRSB2 
))K
#CA

1...r-   c           
         |d   }|d   }|r|d   nd }|r|d   nd }	t        |      dk(  r|d   nt        |      dk(  r|d   nd }
g }g }|r|d   n|d   }t        j                  | t        |            }|r|d d d   }|d   }|d   }|j	                  dd|      |j	                  dd|      }}|D ]  } | j
                  d   }t        j                  | ||      } ||k  ra|j                  |j	                  d|||z
        |j	                  d|||z
        f       |j	                  dd|      |j	                  dd|      }}||kD  rXt        j                  ||j	                  d|||z
        fd      }t        j                  ||j	                  d|||z
        fd      }t        | ||||	|
d      \  }}|}|j                  |        |r|j                          ||f}nZ|j                  ||f       |j                          t        | \  }}t        j                  |d      t        j                  |d      f}t        j                  |d      }||fS )	Nr   r$   r%   r   r  r@  rP   r  )r  rf   r  rt  r,  r  r   rA  rm  r-  r  r1  r  r-  )r2  r3  r  r  r5  r1  r6  r7  r8  r9  r{  r:  r!  r)  r;  orig_hxorig_cxrS  rT  rg  r<  hidden0hidden1r   s                           r.   one_layer_lstm_datar    sx   q	Iq	I%fQi4G%fQi4G[A%q	Fq8H6!9d  KG)0k"ok!nOCk!23IddO	QiGQiG^^Aq/2GNN	1o5B  IIaLhhsIw/ NNIIaOa$78IIaOa$78 YYq!Q'1a);B W^^AO8KLMqB W^^AO8KLMqB 3B	7IQRSB236 "X
Bx =YYw*EIIgq,AA

))K
#C
?r-   c                 4    d } || ||      rt         S t        S )a*  Check whether we could use decompose lstm with mkldnn_rnn_layer.
    All the below conditions need to be met:
        * ``torch._C._get_mkldnn_enabled()`` returns ``True``.
        * All the input args are on CPU.
        * The dtypes of args are either torch.float or torch.bfloat16.
        * Inference.
        * ``has_projections`` returns ``False``.

    Args:
        * input: the input sequence to LSTM
        * hx: a tuple of the input hidden state and cell state ``(h_0, c_0)`` to LSTM
        * params: the weight and bias tensors of LSTM
    c                 N   t         j                  j                         sy| gt        |      z   t        t	        j
                  |            z   }|D ch c]  }|j                   }}t        |      dk7  ry|j                         }|t        j                  d      k7  ry|D ch c]  }|j                   }}|D ]&  }|t         j                  t         j                  fvs& y | j                  ry|d   j                  d      |d   j                  d      k7  }	|	ryyc c}w c c}w )NFr$   r  r   r%   T)rf   r'  _get_mkldnn_enabledrt  r   from_iterabler  r  popr   r  bfloat16requires_gradr+  )
r   rS  r  rh  tdevicesr  dtypesr   r  s
             r.   
use_mkldnnz2select_one_layer_lstm_function.<locals>.use_mkldnn_  s    xx++-'DH$tE,?,?,G'HH%,-188--w<1U\\%((#*+a!''++ 	EU[[%..99	 Q%**Q-2a5::a=8) . ,s   D#D")r[  r  )r   rS  r  r  s       r.   select_one_layer_lstm_functionr  P  s!    : %V$$$r-   c	                    t        |      dk(  sJ d       t        |||d   j                  d      |d   j                  d      k7        }t        t	        |d   |d               }	t        | ||      }
t        | |	||||||||

      \  }}t        t	        |       }|t        j                  |d   d      t        j                  |d   d      fS )Nr%   lstm expects two hidden statesr   r$   )	r  r  r+  rt  r  r  rd  rf   ri  )r   rS  r  r  rW  rA  r>  r"  r  r3  r^  r   r_  s                r.   	lstm_implr    s     r7a<999<6:r!uzz!}1

1/MNF#beRU#$F-eR@H$C m,-MM!,a0%++mA>NPQ2RRRr-   c	                    t        |      dk(  sJ d       t        |||d   j                  d      |d   j                  d      k7        }t        t	        |d   |d               }	t        | |	||||||dt        t        |      
      \  }
}t        t	        |       }|
t        j                  |d   d      t        j                  |d   d      fS )Nr%   r  r   r$   F)r5  )
r  r  r+  rt  r  rd  r   r  rf   ri  rr  s               r.   lstm_data_implr    s     r7a<999<6:r!uzz!}1

1/MNF#beRU#$F$#=C m,-MM!,a0%++mA>NPQ2RRRr-   c                 &   | j                  dd      }t        j                  |||      j                  dd      }|d   |d   z   j                         }|d   |d   z   j                         }	|d   |d   |z  z   j	                         }
||
z
  |	z  |
z   S )Nr   r$   r%   r   )rz  r   rA  r   r   r2  r$  r6  r8  r7  r9  chunked_igateschunked_hgates
reset_gate
input_gatenew_gates              r.   gru_cellr    s    YYq!_NXXj)W=CCAqIN #nQ&77@@BJ #nQ&77@@BJq!^A%6%CDJJLH!Z/(::r-   c                 P   t        j                  | ||      j                  dd      }t        j                  |||      j                  dd      }|d   |d   z   j                         }|d   |d   z   j                         }	|d   |d   |z  z   j	                         }
||
z
  |	z  |
z   S )Nr   r$   r   r%   ry  r  s              r.   gru_cell_datar    s    XXc9g6<<QBNXXj)W=CCAqIN #nQ&77@@BJ #nQ&77@@BJq!^A%6%CDJJLH!Z/(::r-   c	                     t        ||d      }t        | |j                  d      ||||||dt        t        |t
              
      \  }	}
|	t        j                  |
d      fS )NFr   rq  )r  rd  rh  r   r=  r  rf   ri  )rs  r5  rS  r  r  rW  rA  r>  r"  r   r_  s              r.   gru_impl_datar    si     6:u5F$
		!"}UC M1---r-   c	                     t        ||d      }t        | |j                  d      |||||||t        t        t
              
      \  }	}
|	t        j                  |
d      fS )NFr   rg  )r  rd  rh  r   rL  r  rf   ri  )r   rS  r  r  rW  rA  r>  r"  r  r   r_  s              r.   gru_implr    sf     6:u5F$
		!2C M1---r-   c                     t        | j                         ||      }t        |d      }t        |d      }t        j                  j
                  j                  | ||||      S Nr   r$   )r  r+  r  rf   r  r   _upsample_bilinear2d_aar   r  align_cornersr  r  scale_hscale_ws          r.   upsample_bilinear2d_aa_vecr    sV     ){MREmQ/GmQ/G99>>11umWg r-   c                     t        | j                         ||      }t        |d      }t        |d      }t        j                  j
                  j                  | ||||      S r  )r  r+  r  rf   r  r   _upsample_bicubic2d_aar  s          r.   upsample_bicubic2d_aa_vecr    sV     ){MREmQ/GmQ/G99>>00umWg r-   c                 z    t        | j                         ||      }|r|nd gt        |      z  }t        | |||      S r6   )r  r+  r  _upsample_linear)r   r  r  r  r  r  s         r.   _upsample_linear_vecr  +  s=     ){MRE+]$#e*1DFE5-@@r-   r  c                      t        | |||g      S r6   r  )r   r  r  r  s       r.   upsample_linear1dr  9  s     E;zJJr-   c                 "    t        | ||||g      S r6   r  )r   r  r  r  r  s        r.   upsample_bilinear2dr  D  s     E;(?STTr-   c                 $    t        | |||||g      S r6   r  )r   r  r  r  r  r  s         r.   upsample_trilinear3dr  S  s!     {MHh+I r-   c                 L    |r|dkD  r| dz
  |dz
  z  S dS |
|dkD  rd|z  S | |z  S )Nr$   rd   r   r,   )r  r  r  rl   s       r.   _compute_scaler  d  sB    5=\#(S.1HqH#/EAIsU{U7XCUUr-   c                 &    |r| |z  S | |dz   z  dz
  S Nr   r,   )rl   	dst_indexr  s      r.   _compute_source_indexr  k  s$    y  	C(3..r-   weightsweights_precisionc                     t        d t        | |      D              d|dz
  z  z   }||z	  }t        j                  |dd      j	                  t        j
                        S )Nc              3      K   | ]F  \  }}|j                  t        j                        |j                  t        j                        z   H y wr6   )r9   rf   r3  )r  r  r  s      r.   r  z%_sum_tensors_uint8.<locals>.<genexpr>u  s8      26!QU[[ADD--s   AAr$   r      )_sum_tensorsr  rf   r   r9   r-  )r  r  r  r   s       r.   _sum_tensors_uint8r  r  sd      :=c7:K 	
 1$	%'F ((F;;vq#&))%++66r-   c                     t        j                  |       j                         }d}t        j                  ||j                        }d|d|dz   z  z  z   }|dk\  }||j                         z
  S )N   r  r   r$   i   )rf   ri  r   r  r  r   )r  
max_weightmax_weight_precision
precisionsvaluesr  s         r.   _compute_weight_precisionr  |  si    W%))+J2:;L;LMJ:zA~!677FgD$((*,,r-   c                      j                   d d \  }} j                   dd  }t        |      }t        j                   t        j                  j
                        \  } fd}	t        t        |||            D 
cg c]  \  }
\  }}} |	||||dz
  |
z
         }}}}
}t        t        |       \  }}}g }t        ddgg|z   D ]c  }d d gt        |      D cg c]  }||   dk(  r||   n||    c}z   }t        j                   |      }t        |      }|j                  |       e t        t        |            D ]p  }
||
   ||
   z
  j!                  dd      j#                        }t        |d d d   |dd d         D cg c]!  \  }}|t%        j&                  ||z
  |      z   # }}}r t        |      dk(  sJ |d   }t        j(                         } j*                  j,                  dk(  r|d	k  rt$        j.                  }t1        |t$        j2                        sJ |j5                  |
      } j7                         s|j9                         }|S c c}}}}
w c c}w c c}}w )Nr%   rL  c                 |   t        | |	|      }t        j                  |j                        j	                  
      }t        ||	      j                  d      } |j                  |j                  d   gdg|z   }|j	                  t        j                        }|dz   j                  | dz
        }|||fS )Nr  r   r   r   r   r$   r   )
r  rf   r  r  r9   r  r   r$  r  r  )inp_sizer  r  nsqueezescale_factorrg  x_f32r:   xp1r  r   r   s            r.   
get_valuesz$_upsample_linear.<locals>.get_values  s    %h-P LL%,,7:::G%lA}EKKPSKTekk!n@sh/?@HHU[[!1umm1m-a}r-   r$   r   r   rd   r     rV  )r  r  rB   rC   rN  INT_TO_FLOATr  r  rt  r   rQ   r   r  r   rm  reversedr   r9   rf   r   r   r  r  r[  r8   r   r  rE  round)r   r  r  r  n_batchr  	inp_sizesn_dimsrS   r  rg  r  r  r  xs_f32xsxp1svsr  kr  vxscalev1v2r  rW  r   s   ` `                        @r.   r  r    s     ++bq/GZABI^F''!AANNHAu
 09	;/0
 +A+(F 	8XvvzA~>F  CL)FB	B1vh'( TluV}U!qtqybed1g=UUuc*#Au-
		!	 eFm$ 
)be#**3477> b1gr!$Q$x0
 B 27F++
 

 r7a<<UF //6M ||F"zB//fell+++];F""$MQ V
s   I"
 I*&I/r  r  c                 4    | j                   |j                   k(  S r6   r  )r  r  s     r.   is_same_sizer    s    77aggr-   c                 .    t         j                  | |      S r6   )r   r  )r:   r  rD   s      r.   _reshape_aliasr    s     99Qr-   c                 .    t         j                  | |      S r6   )r   r  )r:   r  s     r.   r  r    s    ::a!!r-   c                 2    t         j                  | |||      S r6   )r   r  )r:   r  rz   r  s       r.   r!  r!    s    >>!WeZ88r-   c                 z   |D ]F  }|t        j                  |j                  t         j                  t         j                  fv d        H t        j                  |j                  t         j
                  k(  d        | j                         dk(  r<t         j                  j                  | |      }| j                  |j                  |      S t        t        |            D ]2  }||   }||j                  d| j                  |      dz
        ||<   4 t        j!                  | |      j#                  | |      S )Nc                       yNz3tensors used as indices must be long or int tensorsr,   r,   r-   r.   r   z&_unsafe_masked_index.<locals>.<lambda>  r  r-   c                       yNz*tensors used as masks must be bool tensorsr,   r,   r-   r.   r   z&_unsafe_masked_index.<locals>.<lambda>  r  r-   r   r$   r  )rf   r   r   r`  r  r  r   _meta_registrationsmeta_index_TensorrE  r  rQ   r  r   r+  r   r  ra  )r:   r  r  fillr  meta_resultrg  s          r.   r  r    s    LL

EII66M 
LL

ejj <
 	wwyA~//AA!WMzz+++T223w<  ?
q	A>GAJ?
 a)55teTBBr-   c                 L   |D ]F  }|t        j                  |j                  t         j                  t         j                  fv d        H t        j                  |j                  t         j
                  k(  d        | j                         dk(  r| j                         S t        t        |            D ]B  }||   }||j                  | j                  |       | j                  |      dz
        ||<   D |j                  | d      }t        j                  | ||d      S )Nc                       yr  r,   r,   r-   r.   r   z5_unsafe_masked_index_put_accumulate.<locals>.<lambda>  r  r-   c                       yr  r,   r,   r-   r.   r   z5_unsafe_masked_index_put_accumulate.<locals>.<lambda>	  r  r-   r   r$   r  Tr  )rf   r   r   r`  r  r  r   r  rQ   r  r   r+  ra  r   r!  )r:   r  r  r  r  rg  masked_values          r.   #_unsafe_masked_index_put_accumulater     s     LL

EII66M 
LL

ejj <
 	wwyA~wwy3w<  H
!&&)QGGAJH
 %%teQ/L!!!Wlt!LLr-   c                    | j                         }d}|dk  rd}|6|dkD  r*dg|z  }|j                  d   ||<   |j                  |      }n|}| |z  } t        j                  ||k7  |d      }	|	j                  |      }
t        j                  | ||
      j                  |       }t        j                  ||k7  |d      }|t        j                  j                  k(  r|dkD  r| j                  dd      }||fS |lj                  | j                        }t        j                  |||
      j                  |      }t        j                  ||k7  |d      }|j                         }n"||k7  j                         j                  |       }|t        j                  j                  k(  r|j                         }||fS |t        j                   j                  k(  r|j                         |z  }||fS )Nr$   r%   r   r,   r   )rL   r  r  rf   rg   rR   gatherr  r#   r)   rz   rE  r  r   r9   r+   r*   )ry   r   r   r   r   r  r%  r  wr&  safe_target_r  r!  wsums                 r.   _nll_loss_forwardr    s    XXZFKzA:E "(aE+E"AAax++f4fa@K((5L ll4l;CCKPPF[[</;FINN(((VaZ}}R-|##HHTZZ ||A{L9AA+N{{6\14;xxz,.33588>IMM''' < 
inn**	*,<r-   c                     | j                         dkD  r| j                         dk  sJ d       |j                         dk  sJ d       | j                         dk(  xr |j                         dk(  }|sA| j                  d   |j                  d   k(  s"J d| j                   d|j                   d       | j                  d	   }|=|j                         dk(  r|j                         |k(  sJ d
| d|j                          t        | ||||      S )Nr   r%   r7  r$   r8  r9  r:  r;  rP   z/weight tensor should be defined either for all z7 classes or no classes but got weight tensor of shape: )rL   r  r   r  )ry   r   r   r   r   r=  	n_classess          r.   nll_loss_forwardr	  M  s1    88:>dhhjAoP/PP-

EDE 88:?8vzz|q'8L

1a(J	#DJJ<z&,,qIJ  

2I>

flln	9J	8Cz  |B  |H  |H  {I  JJ  T669lKKr-   c                      t        | ||||      S r6   )r  )ry   r   r   r   r   s        r.   nll_loss2d_forwardr  i  s     T669lKKr-   Ac                 0    |dz   | z  |dz   z
  | z  | z  dz   S )Nr%   r   r$   r,   r:   r  s     r.   _upsample_cubic_convolution1r  w  s(    UaK1q5!Q&*Q..r-   c                 <    || z  d|z  z
  | z  d|z  z   | z  d|z  z
  S )Nr     r@  r,   r  s     r.   _upsample_cubic_convolution2r  {  s0    UQU]a!a%'1,q1u44r-   r  c                    d}| j                   t        j                   d      k(  rt        j                  | d| z
  gd      }t        j                  | dz   d| z
  gd      }t        ||      }t	        ||      }t        j
                  |d      \  }}t        j
                  |d      \  }}	|||	|fS t        | dz   |      t	        | |      t	        d| z
  |      t        d| z
  |      fS )Ng      r  rd   r   r~   r   )r  rf   ri  r  r  rh  )
r  r  tt1tt2w03w12rO  rR  rP  rQ  s
             r.    _upsample_get_cubic_coefficientsr    s    Axx5<<&&kk1cAg,A.kk1s7C!G,!4*32*32cq)Bcq)B2r2~ )S!4(A.(q!4(q!4	
 	
r-   coeffstsc                 P    t        |      }t        d t        | |      D              S )Nc              3   ,   K   | ]  \  }}||z    y wr6   r,   r  r  r  s      r.   r  z+_upsample_cubic_interp1d.<locals>.<genexpr>  s     EHRREry  )r  r  r  )r  r  coeffs2s      r.   _upsample_cubic_interp1dr    s$    .r2GEFG0DEEEr-   c                 6    t        t        j                  |       S r6   )r   rf   add)r  s    r.   r  r    s    %))R  r-   	num_stepsc                     | dk  rt        j                  d||      S |s| dz
  | z  nd}t        j                  | || ||      S )Nr$   r   r  )stepsr  r   )rf   ro  linspace)r"  r  r   r  r  s        r.   _linspace_from_neg_oner&    sI     A~||AfE::-:)a-9	$A>>1"ayuMMr-   thetahr  c                    | j                   }| j                  }t        ||||      j                  d|d      }t        ||||      j                  |dd      }t	        j
                  d||      }t        j                  j                  j                  |ddd      }t        j                  j                  j                  |ddd      }t        j                  j                  j                  |d	dd      }||z   |z   S )
Nr$   )r$   r$   r$   r  )r   r%   constantr   r  rU  rz   r$   r$   )r%   r   	r   r  r&  r  rf   r  r  r  r  )	r'  r(  r  r  r   r  grid_xgrid_ygrid_ones	            r.   _make_base_grid_4dr1    s    KKE\\F $A}eVDII!QPQRF#A}eVDII!QPQRFzz)5@H XX  $$VjPQ$RFXX  $$VjPQ$RFxx""&&xV*TU&VHF?X%%r-   r  c                    | j                   }| j                  }t        ||||      j                  dd|d      }t        ||||      j                  d|dd      }t        ||||      j                  |ddd      }	t	        j
                  d||      }
t        j                  j                  j                  |ddd      }t        j                  j                  j                  |ddd      }t        j                  j                  j                  |	d	dd      }	t        j                  j                  j                  |
d
dd      }
||z   |	z   |
z   S )Nr$   )r$   r$   r$   r$   r  )r   r   r*  r   r+  )r$   r%   )r%   r$   )r   r   r-  )r'  r  r(  r  r  r   r  r.  r/  grid_zr0  s              r.   _make_base_grid_5dr4    s5   KKE\\F#A}eVDII!QPQSTUF#A}eVDII!QPQSTUF#A}eVDII!QPQSTUFzz,eFCH XX  $$VjPQ$RFXX  $$VjPQ$RFXX  $$VjPQ$RFxx""&&xV*TU&VHF?V#h..r-   c                     |\  }}}}t        | |||      }|j                  ddd      | j                  j                  d      z  j	                  d      }|j                  |||d      S )Nr  rP   r   r$   rX  r%   )r1  r  r]  rR   r   )	r'  r+  r  r
  rS   r(  r  	base_gridgrids	            r.   _affine_grid_generator_4dr9    sg    JAq!Q"5!QmLI NN2q!$uxx'9'9!'<<AA"ED99Q1a  r-   c                     |\  }}}}}t        | ||||      }|j                  ddd      | j                  j                  d      z  j	                  d      }	|	j                  ||||d      S )Nr6  rP   r@  r$   rX  r   )r4  r  r]  rR   r   )
r'  r+  r  r
  rS   r  r(  r  r7  r8  s
             r.   _affine_grid_generator_5dr;    sm    MAq!Q"5!QOI NN2q!$uxx'9'9!'<<AA"ED99Q1a##r-   c                     t        j                  t        |      dv d        t        |      dk(  rt        | ||      S t	        | ||      S )N)r@  r  c                       y)NzCaffine_grid_generator needs 4d (spatial) or 5d (volumetric) inputs.r,   r,   r-   r.   r   z'affine_grid_generator.<locals>.<lambda>  r  r-   r@  r6  )rf   r   r  r9  r;  )r'  r+  r  s      r.   affine_grid_generatorr>    sJ     
LLD	VU 4yA~(MRR(MRRr-   r8  interpolation_modepadding_mode_expand_gridc           	          !"#$%&'()*+,- t        j                  dv fd       t        j                  dv fd       dt        dt        dt        ffd-dt        dt        d	t        dt        fd
+dt        dt        dt        f+fd dt        dt        dt        f -fd} j                  \  $%|j                  \  })*}|dk(  sJ r(|j                  d)*|      j                  )*d      }dt        dt        dt        f$%fd&t        j                   j                        j                  ddd      t        j                   j                        j                  ddd      dt        dt        dt        dt        f&)*fddt        dt        dt        f fd"|d   }	|d   }
dk(  r ||	%      } ||
$      }|j                         |j                         c'('dz   (}}'(dz   }}||}}||z
  ||z
  z  }||z
  ||z
  z  }||z
  ||z
  z  }|'z
  |(z
  z  }t        "fd'(|f|||f|||f|||ffD              S dk(  r< ||	%      } ||
$      }|j                         }|j                         } "||d      S  -|	%      } -|
$      }|j                         '|j                         (|'z
  ,|(z
  }s",j                  d      ,|j                  d      }dt        dt        dt        f "$%fd#dt        dt        f#'(,fd!t        !fdt        d       D              }t!        ||      S )!N)r   r$   r%   c                      d  S )NzInvalid interpolation mode r,   )r?  s   r.   r   z"_grid_sampler_2d.<locals>.<lambda>  s    -.@-AB r-   c                      d  S )NzInvalid padding mode r,   )r@  s   r.   r   z"_grid_sampler_2d.<locals>.<lambda>  s    -B<.+Q r-   coordsr+  rM   c                 B    r|dz  dz
  n|dz  }|dz  dz
  }| |z  |z   S r  r,   )rE  r+  r   ofsr  s       r.   unnormalizez%_grid_sampler_2d.<locals>.unnormalize  s8     %2tczCs
Sj3|c!!r-   	twice_low
twice_highc                 P   ||k(  rt        j                  |       S |dz  }||z
  dz  }| |z
  j                         }t        j                  ||      }||z  j	                         j                  t         j                        }t        j                  |dz  dk(  ||z   ||z   |z
        S )Nr%   r   r$   r   )rf   r  r   fmodfloorr9   int8rg   )rE  rI  rJ  
coords_mincoords_spancoords2extraflipss           r.   reflect_coordinatesz-_grid_sampler_2d.<locals>.reflect_coordinates  s    
"##F++]
!I-2J&++-

7K0;&--/222D{{AINEJ.j0H50P
 	
r-   c                     dk(  r| S dk(  rt        j                  | d|dz
        S r | dd|dz
  z        }n | dd|z  dz
        }t        j                  |d|dz
        S )Nr   r$   r%   rP   r   )rE  r+  coords_reflectedr  r@  rT  s      r.   compute_coordinatesz-_grid_sampler_2d.<locals>.compute_coordinates  sx    1MQ;;vq$(33#6vq!tax.#Q #6vr1t8a<#P ;;/D1H==r-   c                 (     | |      } ||      S r6   r,   )rE  r+  	coords_unrW  rH  s      r.   compute_source_indexz._grid_sampler_2d.<locals>.compute_source_index%  s    -	"9d33r-   r%   r$   r  ysc                     t        j                  d| k  t        j                  | k  t        j                  d|k  |k                    S rq   rf   r  )r  r[  iHiWs     r.   in_bounds_condz(_grid_sampler_2d.<locals>.in_bounds_cond5  sF      GU&&rBw0A0A!r'2PR70ST
 	
r-   r  wsc                      | |      rndt        	
fd| j                  t        j                        |j                  t        j                        |fD              S )Nr$   c              3   p   K   | ]-  }t        j                  |d       j                         / ywr  )rf   rg   r  )r  r  r  r  r  oHoWs     r.   r  z1_grid_sampler_2d.<locals>.clip.<locals>.<genexpr>D  s7      
 KKa#((Ar26
s   36r   )r  r9   rf   r  )r  r[  ra  r  r  r  r  rA  r`  rd  re  s      @@r.   clipz_grid_sampler_2d.<locals>.clip=  sY    b"%
 A1 
ee%++e.EKK0H"M
 
 	
r-   ixiyc                 8     	| ||      \  }}}||f   |z  S r6   r,   )
rg  rh  r  idx_xidx_yw_C_idxN_idxr  rf  s
         r.   get_summandz%_grid_sampler_2d.<locals>.get_summandI  s0    B?ubue+,r11r-   ).r   ).r$   r   c              3   :   K   | ]  \  }}} |||        y wr6   r,   )r  rg  rh  r  ro  s       r.   r  z#_grid_sampler_2d.<locals>.<genexpr>_  s(      
R B"
s   c                 <     |       } |      } ||d      S rY   r,   )rg  rh  r:   rW   rW  ro  r^  r_  s       r.   get_value_boundedz+_grid_sampler_2d.<locals>.get_value_bounded~  s*    #B+A#B+Aq!Q''r-   rG  c                     | dz
  z   } dz
  |       |       dz   |       dz   |      f}t        |      S )Nr$   r%   )r  )rG  iy_ofscsrr  ix_nwiy_nwtxs      r.   	get_coeffz#_grid_sampler_2d.<locals>.get_coeff  s[    cAg&F!%!)V4!%0!%!)V4!%!)V4	B ,B33r-   c              3   .   K   | ]  } |        y wr6   r,   )r  rG  ry  s     r.   r  z#_grid_sampler_2d.<locals>.<genexpr>  s     :#y~:   r@  )rf   r   r   r  r  r  r  r  r  r   rM  r  r  rR   r  rQ   r  ).r  r8  r?  r@  r  rA  rZ  rS   twor:   rW   rg  rh  ix_neiy_neix_swiy_swix_seiy_sew_nww_new_sww_se
ix_nearest
iy_nearesttyr  r  rm  r  rn  rf  rW  ry  ro  rr  r^  r_  r`  rv  rw  rd  re  rT  rx  rH  s.   ` ````                     @@@@@@@@@@@@@@@@@@@r.   _grid_sampler_2dr    s    
LLi'B 
LL	!#Q"F "# "& "

F 

s 

 

PV 


>F 
># 
>& 
>4V 43 46 4 77LAq"bZZNAr2s!8O8 yyAr2s+221aRC
6 
v 
& 

 LL188,11!Q1=ELL188,11!Q1=E

 

V 

 

4F 

 

2 2F 2& 2 2
 	VAVAQ!!R(!!R(xxz288:uqy%ueaiueu
urz*U
urz*
rEz*U
rEz* 
 t$t$t$t$	 
 
 	
 
q	 !!R(!!R(XXZ
XXZ
:z155BB

%Z%ZaBaB	(& 	(f 	( 	( 	(
	43 	46 	4 	4 :q::'33r-   c                 "    t        | ||||      S )N)r8  r?  r@  r  )r  )r  r8  r?  r@  r  s        r.   grid_sampler_2dr    s      	-!# r-   c                 &    t        j                   j                         dk(  xr j                         dk(   fd       t        j                   j                  d      j                  d      k(   fd        z  j	                  d      S )Nr%   r$   c                  L    d j                          dj                          S )Nzmatrix @ vector expected, got r  r~   ry   r  s   r.   r   zmv.<locals>.<lambda>  s!    0BswwykJ r-   r   c                  v    d j                  d       d j                  d       dj                  d       dS )Nzsize mismatch, got input (r   r:   r$   z), vec (r;  r  r  s   r.   r   zmv.<locals>.<lambda>  s<    ,TYYq\N!DIIaL>RURZRZ[\R]Q^^_` r-   r~   )rf   r   rL   r+  r   r  s   ``r.   r  r    ss     
LL
a*CGGINJ 
LL		!#` 3J""r-   c                     |-|dz
  |z  dz   }d|z
  | z  |t        j                  |       z  z
  }nd|z
  | z  t        j                  |       z
  }|||z  }t        ||      S rY   )r   
logsigmoidr   )ry   r   r   
pos_weightr   
log_weightr   s          r.    binary_cross_entropy_with_logitsr    sv    
  1n.2
F
d"j1<<3E&EFF
d"Q\\$%77f}i00r-   tensor1tensor2is_outc           
         | j                   |j                   k\  r| |fn|| f\  }}ddlm} |j                   dk\  r|j                   dk  sy|j                  r|sy| j                   dk(  ry ||j	                         dk(        ry|j
                  }|j                         }t        d t        |d d |d	d
 |d	d
       D              S )Nr   )rp  r   r%   FTc              3   4   K   | ]  \  }}}|||z  k(    y wr6   r,   )r  st1st2s2s       r.   r  zshould_fold.<locals>.<genexpr>  s&      S#r 	sRxs   rX  r$   rP   )	r|  rr  rp  r  r   r  ru  r  r  )r  r  r  t1t2rp  t1_shape	t1_strides           r.   should_foldr    s     $+<<7<<#?gwgwEWFBJGGqLRWW\	||qBHHJ!O,xxH		I !)CR.)Ab/8Ab>R  r-   )pass_is_out)r  c                   | j                         }|j                         }|dk7  r|dk7  sJ |dk(  r|dk(  rt        j                  | |      S |dk(  r|dk(  rt        j                  | |      S |dk(  rC|dk(  r>t        j                  t        j
                  t        j                  | d      |      d      S |dk(  r|dk(  rt        j
                  | |      S t        | ||      r||kD  }|r|j                  n| }|s|n|dk(  r| j                         n| }|j                  }t        |d d       }	t        t        j                  |	      }
|j                         dk(  }|r|	j                  |j                  d          |j!                  |
|d         }|r>|j                  |      j#                  |	      }|r|j                  j%                         S |S |j                  |      j#                  |	      S |dk\  r^|dk\  rX|dkD  r| j'                  d      nd}| j'                  d      }| j                  d d }|dkD  r|j'                  d      n|j'                  d      }|dkD  r|j'                  d      nd}g }t)        |dz
        D ]"  }|j                  |j'                  |             $ |dk(  rn|dk(  ri|d   |d   k7  r^|d   dk(  r'| j*                  rt-        | j	                  d      |      S |d   dk(  r'|j*                  rt-        | |j	                  d            S t        t        j.                  ||            }|||gz   }t1        |      }| j3                  |      j!                  |||      }|dk(  }|r7||gz   }|j3                  |      j!                  ||      j                  d      }n)|||gz   }|j3                  |      j!                  |||      }|}	|dkD  r|	j                  |       |dkD  r|	j                  |       |r/|j5                  |      j	                  d      j#                  |	      S |j5                  |      j#                  |	      S t        j6                  dd        y )	Nr   r$   r%   rP   rX  r   Fc                       y)Nz/both arguments to matmul need to be at least 1Dr,   r,   r-   r.   r   zmatmul.<locals>.<lambda>S  r  r-   )rL   rf   dotr  r  r  rR   r  r]  r  r  rt  r   operatorr   rm  r$  r  r  r+  rQ   r  r\  broadcast_shapesr   r  bmmr   )r  r  r  dim_tensor1dim_tensor2r]  r  r  sizes_1output_shapefolded_dim1t2_is_matrix	t1_foldedr   r
  m1batch_tensor1m2rP  batch_tensor2rg  expand_batch_portiontensor1_expand_sizeexpand_batch_producttensor1_expanded
vector_rhstensor2_expand_sizetensor2_expandeds                               r.   r\  r\    sG    ++-K++-K!q 000aK1,yy'**		kQ.xx))		kQ.}}UXXeoogq&A7KQOO		kQ.xx))	Wgv	.  +-	$WZZ'$G+:J799;PW 	 ((GCRL)X\\<8 vvx1}, JJ{GBK8	 \\"%**<8F-6699'')BFB<<#((66		kQ. !,aGLLQ\\"cr*!,qW\\"gll26F +aGLLQ#%{Q' 	2A  a1	2 1q a M!$44Q1$)>)>gooa0'::Q1$)>)>gwq'9::  $""=-@ 
 3aW<#$89 #>>*=>FF !R
 !A%
"6""=23-r21  #7"a"@&~~.ABJJ$b!  ,?"?"#''(89AA"EJJ<XX#''(89>>|LLUUVr-   r  r  c                 \     j                   \  }}t        |d   ||      }t        |d   ||      }t        j                   t        j                  j
                        \  }}t        j                  |d    j                        j                  |      }	t        j                  |d    j                        j                  |      }
t        ||
|      }t        ||	|      }|j                  d      }|j                         }|j                         }||z
  j                  dd      }||z
  j                  dd      }|j                  t        j                        }|j                  t        j                        }|dz
  ||dz   |d	z   f}|dz
  ||dz   |d	z   ft        |      t        |      }d
\  } j                   t        j"                  k(  rt%              t%        |      }D cg c]@  }|dz  z  t        j&                  |      dz  z   j                  t        j(                        B c}|D cg c]@  }|d|z  z  t        j&                  |      dz  z   j                  t        j(                        B }} fd fdt+        fd|D              } j                   t        j"                  k(  r|J t-        |||      }nt/        d t1        ||      D              }t        j2                         }|j5                  |      }|S c c}w c c}w )Nr   r$   rL  r  r   rP   r   rd   r%   r   r   c                     t        j                  | ddz
        }t        j                  |ddz
        }t        j                  d d ||g      }|S r  )rf   r   r   r  )r[  r  y_idxx_idxr  in_hin_wr   s        r.   load_boundedz0upsample_bicubic2d_default.<locals>.load_bounded  sO    B4!8,B4!8,utT5%&@Ar-   c                      t         fdD              }j                  t        j                  k(  rJ t	        |      S t        d t        |      D              S )Nc              3   0   K   | ]  } |        y wr6   r,   )r  x_ofsr  rW   s     r.   r  zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>  s     Bl1e,Bs   c              3   ,   K   | ]  \  }}||z    y wr6   r,   r  s      r.   r  zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>  s     JRBGJry  )r  r   rf   r-  r  r  r  )rW   src_xr   ixs_ofsr  weights_precision_x	weights_xs   ` r.   get_x_interpz0upsample_bicubic2d_default.<locals>.get_x_interp  sW    B'BB;;%++%&222%eY8KLLJCy4IJJJr-   c              3   .   K   | ]  } |        y wr6   r,   )r  y_ofsr  s     r.   r  z-upsample_bicubic2d_default.<locals>.<genexpr>  s     ;%,u%;r{  c              3   ,   K   | ]  \  }}||z    y wr6   r,   r  s      r.   r  z-upsample_bicubic2d_default.<locals>.<genexpr>  s     L(2rb2gLry  rV  )r  r  rB   rC   rN  r  rf   r  r  r9   r  rR   rM  r   r  r  r   r-  r  r   int16r  r  r  r  r   r  )r   r  r  r  r  rS   h_scale_factorw_scale_factorr   rg  r  x_floaty_floatr:   rW   yscaler  iys_ofs	weights_yweights_precision_yr  src_yr  rW  r  r  r  r  r  r  r  s   `                       @@@@@@@r.   upsample_bicubic2d_defaultr  V  s    {{Aq$ $D+a.-QN#D+a.-QN''5#H#H#U#UHAu 	[^ELL9<<5<IA[^ELL9<<5<IA#NA}EG#NA}EG#GAA k  c*Fk  c*F	U[[A	U[[A1uaQA&G1uaQA&G08I08I/9,,{{ekk!7	B7	B 
 !**+ejjmc.AAEEekkR
	 
 !**+ejjmc.AAEEekkR
	 

K K ;7;;E{{ekk!"...#E96IJLc%6KLL //6M];FMA

s   9AL$AL)c                 $   t        j                  t        |      t        |      z   dk(  d        |H|J t        t        t
        t
        f   t        d t        | j                  dd  |      D                    }|r|nd\  }}t        | ||||      S )Nr$   c                       y)Nz:Must specify exactly one of output_size and scale_factors.r,   r,   r-   r.   r   z(upsample_bicubic2d_vec.<locals>.<lambda>  r  r-   c              3   P   K   | ]  \  }}t        t        |      |z           y wr6   )r   r   )r  r  rl   s      r.   r  z)upsample_bicubic2d_vec.<locals>.<genexpr>  s*      Au 	!u,-s   $&r%   r   )
rf   r   r  r
   r   r  r  r  r  r  )r  r  r  r  r  r  s         r.   upsample_bicubic2d_vecr    s     
LL[D//14L (((#s(O  #AGGABK ? 
 )6}<GW%amWgVVr-   c                 (      fd}t         ||      S )Nc                     t        j                  |  ||z   j                        }|dz
  |dz
  |j                         z
  j                         z
  S )Nr  r$   )rf   r  r  r   r   middler   dim_idxr  s       r.   r  z_reflection_pad.<locals>.idx  sF    ,,ufunQXXFzVaZ'++-7<<>>>r-   _reflection_or_replication_padr  r  r  s   `  r.   _reflection_padr    s     ? *	 r-   c                 (      fd}t         ||      S )Nc                     t        j                  |  ||z   j                        }t        j                  |d|dz
        S )Nr  r   r$   )rf   r  r  r   r  s       r.   r  z_replication_pad.<locals>.idx  s6    ,,ufunQXXF{{7Avz22r-   r  r  s   `  r.   _replication_padr    s     3 *	 r-   idx_fnc                 n   t        |      dz  t        j                  | j                         dz   dz   fv fd       | j                   d  }| j                         z
  }t              D cg c]  }|ddz
  |z
  z      }}t              D cg c]  }|ddz
  |z
  z  dz       }}| }t              D ]E  }d g|j                         z  }	 |||   ||   ||         |	||z   <   t        j                  ||	      }G t        j                  |      }
|j                  |
      }|S c c}w c c}w )Nr%   r$   c                  (    d  d dz    d dz    dS )Nreflection_padzd requires r$   zD or r%   zD inputr,   r~   s   r.   r   z0_reflection_or_replication_pad.<locals>.<lambda>  s$    .[q	sQwiwO r-   rV  )r  rf   r   rL   r  rQ   r   r  rB   r   r  )r  r  r  	inp_shapenc_dimrg  padding_leftpadding_rightr  r  rW  rL   s              @r.   r  r    sH   
 g,!
C	LL	C!GS1W%%O IUUWs]F8=c
C1GAq1-.CLC=B3ZHWQ#'A+.23HMHF3Z 1&**,. a)A,a@PQAJ##FC01 //7M];FM DHs   1D-D2c           
          t        |      dz  |j                   d  D cg c]  }|dz
  	 }}t              D cg c]  }|ddz
  |z
  z      }}t              D cg c]  }|ddz
  |z
  z  dz       }}g }t        |j                        D ]c  }dg|j                  z  }	d|	|<   |j	                  t        j                  |j                  |   |j                        j                  |	             e |d   | d  }
d t              D cg c]  }|
|   ||   z    }}t              D cg c]  }||   |
|   z
   }}t              D cg c]  }d||   z  ||   z   |
|   z
   }}t              D cg c]  }||   d||   ||   z   ||   z   f }}t        j                  t        j                  t              D cg c]  } ||          c}      }t        j                   ||z   d      } fd}t        j                  t              D cg c]  }g d	 c} D ]  }|t!        dgz        k(  rg }g }t              D ]t  }||   dk(  r||   }||   }n=||   dk(  r||   }|
|   d||   f}n$||   dk(  r||   }|
|   ||   ||   z
  ||   dz
  f}|j	                         |j	                         v  ||||      } |S c c}w c c}w c c}w c c}w c c}w c c}w c c}w c c}w c c}w )
Nr%   r$   rP   r  c                 F    | \  }}}t        j                  ||k\  ||k        S r6   r]  )index_rangerg  lbubs       r.   index_range_conditionz7_reflection_pad_backward.<locals>.index_range_condition  s(    	2r  b!r'22r-   r   r   c           	      .   t        	      D ]*  }||   d   ||   d   k  }t        |t              s%|s(| c S  t        j                  t
        j                  |D cg c]
  } |       c}      }t
        j                  
||z   d      }| |z   S c c}w )Nr%   r$   r   )rQ   r8   r  rI   r   r   r  r  )r   r   index_rangesrg  upper_less_than_lowerr  r  gr  rL   rj   r  s           r.   r  z,_reflection_pad_backward.<locals>.accumulate8  s     s 	A$0OA$6a9K$K!/6;P	
 COPK";/P
 %%k4S#Fax Qs   B
)rP   r   r$   )r  r  rQ   r|  rm  rf   r  r  r  rI   r   r   r  r  	itertoolsr   r  )rj   r:   r  r(  dhwrg  r  r  r  
view_shapexyzcenterleft_reflectright_reflectrange_cr  r   r  rS   areaoutsr  r   r  r  rL   r  s   `                       @@@r.   _reflection_pad_backwardr    s`   
 g,!
C''3$%.
)Q1q5
)C
)8=c
C1GAq1-.CLC=B3ZHWQ#'A+.23HMHG166] SS166\

1u||AGGAJqxx@EEjQRS
 	#A
3$%.C3 16c
;1c!f|A&;F;6;CjALOc!f,ALADI#JOqQQZ,q/1CF:OMO
 NSSVZHIAs1vQ/-2BBCG  eCjQ0<QD $$[$F
CHD !!c
#C1J#CD 45!s##s 	-AAw!|Qi%ajaB"1o"1vq,q/:aA#A&"1vs1va0@'@#a&1*MKK,	- $l3-40 Kc *CH2 <AO R& $Ds5   K K$$K)K.8K3K8K=L
&Lr   r   r  c                j    t        j                  | ||      }t        j                  | ||      }||fS )Nr  )rf   aminrP  )ry   rL   r  r  rP  s        r.   aminmaxr	  b  s2     ::dW5D::dW5D:r-   r   c                    t         j                  t        j                  t        j                  |       d|       |||      S )Nr   r   )r   r   rf   rg   isnan)ry   rL   r  r   s       r.   nansumr  j  s2     88EKKD 11d;S'QV8WWr-   r   r&  r  r?  r&  c          	      N    t         j                  j                  d| d||||      S )Nr   r$   r  r   r  
start_step)ri  r   r&  r  r?  s        r.   arange_defaultr  p  s/     ;;!!	3vf "  r-   c          	      N    t         j                  j                  | |d||||      S )Nr$   r  r  )rh  ri  r   r&  r  r?  s         r.   arange_startr    s/     ;;!!sAU6&Z "  r-   c                      ddl m}  || i |S )Nr   )out_dtype_dense)!torch._higher_order_ops.out_dtyper  )rD   rE   r  s      r.   out_dtype_decompr    s    AD+F++r-   marginc                 t   	
 t        j                          t        j                         j                  d   
 j                  d   	t        j                  |dk(  xs |dk(  d        t        j                   j
                  dk(  xr 	dk7   fd       t        j                  j
                  dk(  xr j                         
k(  
fd       Qt        j                        t        j                  j
                  dk(  xr j                         	k(  	fd       j                  d      t        j                   d      }||z
   z   }|j                  d      }|dk(  r|n||z  }|   z  }t        j                  	 j                  	      }t        j                  |k7  |d      }|t        j                  j                  k(  r|j!                         S |t        j"                  j                  k(  r |j%                         |j                  d   z  S |j!                  d
      S )Nr   r$   r%   c                       y)Nz only p == 1 and p == 2 supportedr,   r,   r-   r.   r   z#multi_margin_loss.<locals>.<lambda>  r  r-   c                  "    d j                    S NzMExpected non-empty vector or matrix with optional 0-dim batch size, but got: r  )r   s   r.   r   z#multi_margin_loss.<locals>.<lambda>  s    _`e`k`k_lm r-   c                  (    d  dj                    S )Nz#inconsistent target size, expected r  r  )nframer   s   r.   r   z#multi_margin_loss.<locals>.<lambda>  s    5fXYv||nU r-   c                  (    d  dj                    S )Nz#inconsistent weight size, expected r  r  )rL   r   s   r.   r   z#multi_margin_loss.<locals>.<lambda>  s    9#i~V r-   r  r  r~   )rf   
atleast_2d
atleast_1dr  r   r|  r   rR   r  r^  r  r  rg   r#   r*   rz   r   r+   r   )r   r   rP  r  r   r   urh   r  rL   r  s   ``  `    @@r.   multi_margin_lossr#    s    U#Ef%F[[^F
++a.C	LLa!16#MN	LL

a$C1Hm 
LLq5V\\^v5U !!&)KK163!6V	
 a FU0A
UA	AA!VQAv
,,s5<<
0CC6M1a(AINN(((vvx	imm))	)uuw##vv!v}r-   	is_targetc                    | j                   |j                   t        j                  |       } t        j                  |      }| j                   d   }t        j                  t	              dk  xr |dk7  fd       t        j                  t	              dk  xr k(  fd       t        j
                  ||j                        }|dk(  }t        j                  t        j                  |||      dd	      }||k  }t        j                  ||d      }t        j                  | d|
      }	t        j                  ||d      }
t        j                  ||
j                  d      k(  d      }d|	j                  j                  d      z
  | z   }|j                  d      }||z  }t        j                  |d|      }|t        j                  j                   k(  r!|j#                  d      j%                         }n@|t        j&                  j                   k(  r|j#                         }n|j#                  d      }|j)                  | j*                        j-                        }||fS )Nr$   r%   r   c                      d  S r  r,   )orig_input_shapes   r.   r   z0multilabel_margin_loss_forward.<locals>.<lambda>  s    _`p_qr r-   c                      d d  S )Nzinconsistent target size: z for input of size: r,   )r'  orig_target_shapes   r.   r   z0multilabel_margin_loss_forward.<locals>.<lambda>  s    ,->,??STdSef r-   r  rP   Tr  r  r~   rd   )r   rP   )r  rf   r   r   r  r  r  r  rg   r  anyrR   Tr^  r#   r*   rz   r   r   r+   r9   r   r$  )r   r   r   rL   r  is_endend_idxtarget_masktidx0r"  tidx1r$  rh   r'  r)  s                @@r.   multilabel_margin_loss_forwardr1    s    {{U#Ef%F
++a.C	LL"/saxr 
LL!#M(9=M(Mf
 ,,s6==
1Cr\FjjVS#6BMG-KKKVQ/EU%0AKKVR0E		#R!88a@Iaccmmm##e+A	AA	CAIq!$AINN(((EEgE##%	imm))	)EEGEEgEU[[)112CDIi<r-   )	attn_maskrl   querykey	dropout_p	is_causalr2  c          
           j                   }t        j                  t        j                          fd       t        j                   j	                         dk(  xr( j	                         dk(  xr j	                         dk(   fd       t        j                  dk(  fd       t        j                   j
                  d   j
                  d   k(  xr j
                  d   j
                  d   k(  d        t        j                  j                   ||d |      \  }}	|j                  d	d
      j                  t        j                        }|j                  d	d
      |	fS )Nc                  "    d j                    S )Nz-query must be FP32, FP64, BF16, FP16 but got r   )r3  s   r.   r   z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>  s    ?}M r-   r@  c                  n    dj                          d j                          dj                          S )Nz,q, k, v must be a 4 dimensional tensor, got r  r~   )r4  r3  rz   s   r.   r   z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>  s3    >uyy{m2cggi[XZ[`[d[d[fZgh r-   r   c                      d  S )Nz&dropout probability must be zero, got r,   )r5  s   r.   r   z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>  s    $J9+"V r-   r   c                       y)Nz&q, k, v should have the same head sizer,   r,   r-   r.   r   z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>  r  r-   )r2  r5  r6  dropout_maskrl   r$   r%   rV  )r   rf   r   rE  rL   r  r   "_scaled_dot_product_attention_mathr  r]  r  r[  )
r3  r4  rz   r5  r6  r2  rl   r   r   attns
   ````      r.   *scaled_dot_product_flash_attention_for_cpur?    s@    KKE	LL&M 
LL		q@SWWY!^@		q0@h 
LLSV 
LLA%++a.(KSYYq\U[[^-K8
 ::BB C 	LFDV a#..U=T=T.UFQ"D))r-   c                 .    t        |       fd       }|S )Nc                  <     | i |}| d   j                  |      S rq   )r   )rD   rE   r   outplace_ops      r.   
inplace_opz$register_inplace.<locals>.inplace_opO  s%    4*6*Aw}}S!!r-   r   )aten_oprB  rC  s    ` r.   register_inplacerE  N  s"    G$" %" r-   c                 B   | j                         s&| j                         st        |      }t        |      }t        j                  ||      }t        |t        j                        r|dk7  r||z  }|dk(  r|S t        |t        j                        r|dk7  r| |z  } | |z   S r  )rE  r  r  rf   r  r8   numbersNumber)ry   batch1batch2r`   rk   r  s         r.   baddbmmrK  W  s     !!#DOO,=4yE
YYvv&FeW^^,
%qydGNN+tqyd{&=r-   c                 2    t        j                  | |d      S )NrM  r}  r  )ry   rO  s     r.   floor_dividerM  h  s     99T588r-   c                 `    t        j                  t        j                  | j                  d      S rY   )rI   r   r  r   r  )r  s    r.   	sym_numelrO  n  s    HLL!''155r-   r   r   c                    |"t         j                  j                  | g |      S t         j                  j                  | g ||      S )Nr   rP  )r   r   dim_IntListIntList_out)ry   r   r   s      r.   sum_defaultrT  s  sC     {xx##D"E#::xx##D"Es#CCr-   c           	          t        | t        j                        s| S |@t        j                  j                  | t        t        | j                                           S t        j                  j                  | |g      S r6   )	r8   rf   r   r   r  dimsrt  rQ   rL   )ry   rL   s     r.   squeeze_defaultrW    s\     dELL)
{||  tE$((*,='>??||  u--r-   c                 2   t        fdt        t        | j                              D              }|j                  t
        j                  k(  rt
        j                  nd }| j                  d|d|      }| ||j                  |j                        z  z  |fS )Nc              3   .   K   | ]  }|k7  s	|  y wr6   r,   )r  rg  rL   s     r.   r  z)_weight_norm_interface.<locals>.<genexpr>  s     @1qCxQ@s   
r%   T)r  r   )
r  rQ   r  r  r   rf   r  r  r   r9   )r  r  rL   keep_dim
norm_dtyper   s     `   r.   _weight_norm_interfacer\    st     @c!''l 3@@H !5>> 9tJ66!Xt:6>DDGGAGG$$%t++r-   assume_uniqueinvertc                   t        | t        j                        s!t        j                  | |j                        } t        |t        j                        s!t        j                  || j                        }|j                         dt        | j                         d      z  k  rt        | ||      S t        | |||      S )Nr  g      $@g(\?r_  r]  )	r8   rf   r   ro  r  r   rY  isin_defaultisin_sorting)elementstest_elementsr^  r_  s       r.   isinrf    s     h-<<1E1EFmU\\2]8??Ktc(..*:E&BBBHmFCCm=
 	
r-   ra  c                N   | j                         dk(  r%t        j                  | t        j                        S  | j                  g | j
                  d|j                  z   }|s||k(  }n||k7  }t        t        d|j                   dz
  d            }|j                  |      S )Nr   r   )r$   rP   r$   r~   )
r   rf   
empty_liker  r  r  r|  r  rQ   r*  )rd  re  r_  r:   cmprL   s         r.   rb  rb    s    ~~1

;;Dx~~D0B0B)BDA= = 
b=---126
7C77s7r-   c                   | j                         }|j                         }|rt        j                  ||g      }t        j                  |d      \  }}|dd  |d d k(  }	t        j                  |	ddgd      }	|r|	j                         }	t        j                  |	      }
|
j                  d||	      }
|
d| j                          S t        j                  |      \  }}t        j                  ||      }t        j                  ||j                         k  |d      }||   |k(  }|r|j                         n|}|j                  | j                        S )NT)stabler$   rP   r   F)r5  rf   r-  sortrk  logical_notrh  r  r   searchsortedrg   r$  r  )rd  re  r^  r_  elements_flattest_elements_flatall_elementssorted_elementssorted_orderduplicate_maskr  sorted_test_elementsrS   r  test_idxri  s                   r.   rc  rc    s?   $$&M&..0 yy-1C!DE(-

<(M%(,0DD..~1vuM+779N/q,?A())"'**-?"@a  !5}E;;s%9%?%?%AA3J"8,=#)coos{{8>>**r-   c                 .    | j                  d      }||   S rO   )r$  )ry   r  	flatteneds      r.   takery    s     R IUr-   c                     |t         j                  }|t         j                  k(  rt        |      }t        j                  | |j                  |      S r,  )rf   r[  preserve_formatr   r   resizer  )ry   rO  rW  s      r.   	resize_asr}    sD    //----e4;;tU[[;FFr-   )F)none)g      ?gUUUUUU?FNr6   )r%   )r   NNr$   )rP   FFr   r  r,  )r$   r$   F)Fr   )r   rd   N)r   r$   Nr   )NNN)r   r   FT)r   r   Fr  )r   F(  rI   r  rG  r  rv  enumr   r   r   r   r   typingr   r	   r
   r   r   r   r   r   rf   torch._meta_registrationstorch._primsrF  r  torch._prims_common_prims_commonrB   torch.nn.functionalr  r  r   r   r   r   torch._decompr   r  r   r   r   r   r   r   torch._prims_common.wrappersr   r   r   r   torch.utilsr   r@   torch.utils._pytreer    r'  DispatchKeyr!   str__annotations___opsr  r   r#   rN  r  rK   rO  compute_only_pw_cast_for_opmathpw_cast_for_opmathr  pw_cast_for_int_to_realr  rT   r]   r_   ri   r  ru   r  Scalarr{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  py_implAutogradCUDA	Generatorr   r   r   r   r   r   r   r*   rz   r   _safe_softmaxr  r  r  rU   r  r  r   r  r(  r5  r>  rA  rG  rJ  rL  rN  rR  rf  rn  slicer  r  rm  r  r  r  r  r  r  r  r*  r.  r7  r=  rA  CompositeImplicitAutogradAutogradr@  rS  rW  r]  re  r   rs  rv  rz  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  unsafe_chunkr  r  r  no_statsr   r$  r0  r6  r:  r<  _fused_dropoutr>  rK  r  rW  detachlift
lift_freshrN  rS  rV  rY  rX  rl  rq  rt  _adaptive_avg_pool2dr  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  _upsample_nearest_exact1dr  r  r  r  r  r  r  r'  r+  r/  r=  rE  rH  rL  r[  rd  rnn_tanhr   rk  rnn_relurn  rs  rt  rw  r  r  r  r  lstmr  r  r  r  grur  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  _unsafe_viewr  r!  r  r   r  r	  r  r  r  r  r  r  r&  r1  r4  r9  r;  r>  r  r  r  r  r  r\  upsample_bicubic2dr  r  reflection_pad1dreflection_pad2dreflection_pad3dr  replication_pad1dreplication_pad2dreplication_pad3dr  r  reflection_pad1d_backwardreflection_pad2d_backwardreflection_pad3d_backwardr  r	  r  r  rC  r&  r  rh  r  r  r#  r1  +_scaled_dot_product_flash_attention_for_cpur?  rE  rK  rM  rO  r   rT  r  rL   rW  r\  rf  rb  rc  ry  r}  addbmm_addbmmaddmm_addmv_baddbmm_fill_gelu_r  
hardswish_	hardtanh_hardtanhhardsigmoid___iand____and____ilshift__
__lshift__r  r  index_reduce_index_reduce__ior____or____irshift__
__rshift____ixor____xor__leaky_relu_r   logit_logitrelu_r  renorm_renormround_r  scatter_r#  scatter_add_scatter_addscatter_reduce_scatter_reducesilu_r,   r-   r.   <module>r     s}M       
  % $ N N N     #   , , 0 7   * ( hh"" c zz~~   %!!99! !H #*88@@# 
 uDDLL  "uDDQQ  c f  **+\2F 2v 2   ,2 --.\4v 4& 4   /4 ../\S S6 S S5 S   0S
 ))*\


 
 	

 
 
   +
2 ))*+( ,( ))*+"V " ," (()@f @ @   *@ 112\f F    3 ../\PP%P05P@EP  0P 'GF Gv G   (G //0F & V    1 //0\:F :& :U :  1: 001\LL%L7<LNRL   2L **+\) )f )3 )   ,)< **+5v 5f 5  ,5 		"&v && &   #& **+\>v >V > >   ,>
 **+6 6 66 6 ,6 334%%
% % 66>	% 5% --.&&{'?'?@ %+/5
55 5 	5
 5 (5 5   A /5( ../''(@(@A %+/X
XX X 	X
 X (X X  B 0X 667



 
 	

 
 
 
 
   8
$ 112\<f <F <F <v <   3<v #   &3<>>3G3G1
1 1-011   '1 ../\11 &1061CF1   01 **+0 ,0 ++, ^^))	1
11 1 	1   -1 44<<=%/5BEMR  > 44??@
R
R

R 
R 	
R
 
R 
R  A
R 00889		%	/5	BE	NS	  :	 00445
R
R

R 
R 	
R
 
R 
R  6
R$$
$ $ V	$
 $ $ $ $@ ))*\Nf NF N N N   +N$ ../\''
' ' V	'
 ' ' ' '  0'T 001\
  V	
      2B 112  $^^))	1
11 V1 	1
 1   31& ::;\
  $^^))
  V	
     <  --. ^^))111 1 	1   /1 667\
 ^^))			
	 	 		
 	   8	 		") )v )% )  #) ,,-& &F &v &  .& ++,	O	Oc	O 
	O 		O
 
	O 	O  -	O 

))* 6?
6? 
6? C=	6?
 
#6? 6? +6?r (4<SM
38_. **+ $U$U	$U 
$U C=	$U
 
#$U $U  ,$UN ,,-E Ed3i Ec ERU E  .E
 ../OO&*3iO9<ODGOORO  0O%+:?++ 334\ XX!'X.1X@EX !  5X 778 KK!'K.1K@EK !  9K*& $MMcM 3iM #Y	M
 IM M  %M` $]]c] c] 3i	]
 #Y] I] ]   %]@ 445
 
v 
e 
  6
 ,,-Y
Y"3iY47Y?BYJMYY  .Y" ++334>B

%
,4UO

  5
& %kCCDk2236 e HTN  4 E & ++,VVA& AU A8D> A  -A & S    '* ))*F  T   +, ' $  	
    (& 556  	
   7:DI &\	  
&\	*T&\ 
tF| 
# 
"&\"	" "J 00$//2E2EFG
 !	&\	  
&		
  H --.56
#Cy/2	&\ /6 	'')C)C)G)GH "&	
c 
 
$v,		
 d6l  ))0015 5C 5c 5%PSBT 5 25 44<<=67BB $S	B03B
63;B >B 

))*/ /C /c /%:L / +/" --55)) /
/ &/ 
/ 63;	//@ 

# f F # #    $" ../
 

  	
     0" 

# f 6      $ 77??@S&S&S& S& 	S&
 FS& S& S& 
S& S& dS& 8FXf-x/??@S&  AS&n 77;;<  	
 F   
  d ,, ,, ,, 8FXf-x/??@ =88F# x/?  77??@JJJ 3iJ 	J
 J VJ 6
J dJ 8FXf-x/??@J AJ\ 77;;< 3i 	
  V 6
 d ,, ,, ,, 8FXf-x/??@ =4OOVO 6
O 6"	O
 &!O O O 
O O 6668F#3Xf5EEFOd ../UK/((V( 6
( 6"	(
 &!( ( ( 
( 666!"( 0 0(4 ''(<(<=''(M(MN 
 
V 
 6
 
 6"	 

 &! 
  
  
 
 
 666!" 
 O > 
F "";#H#HIG4< G JG AAIIJV 6
 	
   
 666!" K* 55==>((V( 6
( 	(
 ( ( ( 
( 666!"( ?(  55>>?((V( 6
( 	(
 ( 
( 666!"( @( @@HHIKKVK 6
K 	K
 K K K 
K 6666612K JK0V 6
 	
  
  6 44<<=11V1 6
1 	1
 1 1 
1 6666)*1 >14 ??GGHCCVC 6
C 	C
 C C 
C 6666669:C IC4 22::;11V1 6
1 	1
 1 1 
1 6666)*1 <14 ++,VV   - & $(%)37&VZ & EKK &
 U\\"& & & E//0&  '&X diiAB  C &&{';';<--.VVVV, 6
 6"	
 &!  !&  - / => 00889 V 6"	
 &!  &!  
 d  68F#Xf%556 :6 77??@``` V` 6"	`
 &!` ` &!` ` 
` d` 68F#Xf%556` A`H 77;;<""" V" 6"	"
 &!" " &!" " 
" d" ,," ,," ,," 68F#Xf%556" ="J 778VVV$  6"	
 &!  v  % 90 667VVV$  6"	
 &!  v   % 82 112c'v c'E#s(O c'   3c'L ( HH	H H 	H H )H ' II	I I 	I I  (I$ %@%@	%@ %@ 	%@ %@ %@P ))112"";#H#HI J 32 (()<: <C <
 <J < *< (=* =3 =z =: =  )=@@@$.@8B@PT@, 001Xx (f (vv~)> (  ! 2( % $'$'+/		tS%	  c5 
! (	  &  &; ';
8 //334//334//334$$[%J%JK$$[%9%9:$$[%J%JK$$[%9%9:$$[%J%JK$$[%9%9:	3	3$s)$	3 DK(	3 		3 ; L ; L ; L 5 5 5	3 66::;66::;66::;##++K,Q,QR##++K,@,@A##++K,Q,QR##++K,@,@A##++K,Q,QR##++K,@,@A	?	?$s)$	? DK(	? 		? B S B S B S < < <	?@ 0088$:Q:Q:U:UVW  (()N)NO  (()=)=>Dd; #;;c; UO; 	; < ? P X; 	##++T-K-K-O-OP ''//0U0UV''//0D0DEDd; #GGcG UOG 	G < F WG 0088$:Q:Q:U:UVW  (()N)NO  (()=)=>Dd; !% $	GGcG uoG uo	G
 G < ? P XG 	##++T-K-K-O-OP ''//0U0UV''//0D0DEDd; !% $	SScS uoS uo	S
 S < F WS 0088$:Q:Q:U:UVW  (()N)NO  (()=)=>Dd; !% $ $QQcQ uoQ uo	Q
 uoQ Q < ? P XQ 	##++T-K-K-O-OP ''//0U0UV''//0D0DEDd; !% $ $		c	 uo	 uo		
 uo	 	 < F W	 
 	c %! 	
  4 >/ FK+\&,/-d& R ++,[BBC[112. 3 D -.8 ++,[BBC[112. 3 D -.8 **+KAABK001. 2 C ,.@ **+KAABK001. 2 C ,.@/6<~/d 		(>>?--.S / @ )S> 		'==>,,-S . ? (S<;; &{<<={++,. - > '.6 '==>,,-. . ? (.6 44889!!))+*O*OP!!))+*>*>? @ Q : 33778  (()N)NO  (()=)=> ? P 9 0044511556##K$I$IJ##K$8$89%%k&K&KL%%k&:&:;&&{'L'LM&&{';';<A = N < M : K 7 6A //779O9O9S9STU
 !%	KKcK K uo	K
 K  VK 	%%t'?'?'C'CD !!))+*>*>?
 !% $UUcU U uo	U
 uoU U  @
U 	&&(A(A(E(EF 
 !% $ $

c
 
 uo	

 uo
 uo
 
 
V/7	&	7$,V$47IO77-'9 -f - EEcE E %!	E
 E ER ))112F v $  3 ,,d.?.?@A  B ++,-" ." //019 29 2234C 5C2 AABCM DM22 
2 2  V2  	2 
 2  66>2 j --.X~&L
LL VL 	L
 L 66>L ' /L4 //0X~&L
LL VL 	L
 L 66>L ' 1L/F /u / /5F 5u 5 5
 
3E 
(F%7 FV F F!Xf% !& !NN#'N05NEJ\\N&f & & &T &"/f / / / /T /"!V !49 !T !$V $49 $T $ 223S StCy S S   4S  c4c4
c4 c4 	c4
 c4 c4 c4L ,,-  
  	
     .   	#   !	# ==>$)..:N:N1  ?1  d t 0 [BBC>>?', tW  @ DtWn 0088$:Q:Q:U:UVW  (()=)=>
  $#PPsCxP P e_	P
 e_P P   ? XPf //334$$[%J%JK$$[%9%9:
 48	WW%S/*W W E%,/0	W
 W   ; L 5
W. --.--.--.	v 	c3h 	F 	   / / /
	 ../../../	 	sCx 	V 	   0 0 0
	38_ c3_f,- 	8 667667667\T  8 8 8Tn %UEu   & $X4 X  %X ,,dkkoo>? $( ==%)
	
 EKK 
 LL	

 U\\"
 
  @
 **+,
 $( ==%)	 EKK 	
 LL U\\"  - 	", #, ../''(<(<= #^^))))) ) 	)
 V) ) )  > 0)X ;;<$$,,44[5I5IJX{#--- - 66>	- $ K =-x HHPPQ
 G* #'!G*G*	G* G* 	G*
 G* G* E?G* 66>G* RG*T '   ( ))*9  +9 '6 (6 ))488<<89 $( 		D
	D EKK 	D 
&			D
 	D :	D --t||/?/?@A.& .x} . B. 		==>, ?, 		"38 
  #
 5: 
 <A +: 		"  #
 'G (G t{{ + djj ) djj )  - TYY ' TYY ' $.. 1  / ""D$4$4 5  - !!4?? 3 $.. 1 ##T%6%6 7 t{{ + !!4?? 3  - !!4?? 3 djj ) TYY ' t{{ + djj )  - ""D$4$4 5 %%t':': ; TYY 'r-   