
    sg[                         U d dl Z d dlmZmZ d dlZd dlmZ d dlmZ d dl	m
Z
  G d de      Z G d d	e      Z G d
 de      Z G d de      Zdaeeeej                            ed<   dej$                  fdZy)    N)ListOptional)_get_device_index)Function)commc                   ,    e Zd Zed        Zed        Zy)	Broadcastc                    t        d |D              sJ d       |D cg c]  }t        |d       }}|| _        t        |      dk(  ryt        |      | _        |d   j                         | _        t        j                  || j                        }g }t        | j                  dd        D ]#  \  }}|r	|D ]  }|j                  ||           %  | j                  |  t        |D 	
cg c]  }	|	D ]  }
|
  c}
}	      S c c}w c c}
}	w )Nc              3   N   K   | ]  }|j                   j                  d k7    ywcpuNdevicetype.0is     O/var/www/html/venv/lib/python3.12/site-packages/torch/nn/parallel/_functions.py	<genexpr>z$Broadcast.forward.<locals>.<genexpr>   "      
'(AHHMMU"
   #%z2Broadcast function not implemented for CPU tensorsTr       )allr   target_gpuslen
num_inputs
get_deviceinput_devicer   broadcast_coalesced	enumerateneeds_input_gradappendmark_non_differentiabletuple)ctxr   inputsxoutputsnon_differentiablesidxinput_requires_gradoutputtensorsts              r   forwardzBroadcast.forward   s'    
,2
 
 	@?	@ 
 <GGa(D1GG%v;!V!!9//1**63??C (1#2F2Fqr2J(K 	<$C$&% <F'..vc{;<	< 	$##%89w@G@1a@a@AA H As   D*D
c                 ^    dt        j                  | j                  | j                  g| z   S )NN)ReduceAddCoalescedapplyr   r   r&   grad_outputss     r   backwardzBroadcast.backward   s4    +11cnn
/;
 
 	
    N__name__
__module____qualname__staticmethodr0   r7   r   r8   r   r	   r	   
   s*    B B& 
 
r8   r	   c                   ,    e Zd Zed        Zed        Zy)r3   c                    t        dt        |      |      D cg c]  }||   j                          c}| _        t        dt        |      |      D cg c]
  }||||z     }}t	        j
                  ||      S c c}w c c}w )Nr   )ranger   r   r   r   reduce_add_coalesced)r&   destinationr   gradsr   grads_s         r   r0   zReduceAddCoalesced.forward'   s     ,1CJ
+K
&'E!H!
 6;1c%j*5UV%A
N+VV((==
 Ws   A:A?c                 H    dt        j                  | j                  g| z   S )NNN)r	   r4   r   r5   s     r   r7   zReduceAddCoalesced.backward0   s(    
 OOCOO;l;< 	<r8   Nr9   r   r8   r   r3   r3   &   s(    > > < <r8   r3   c                   ,    e Zd Zed        Zed        Zy)Gatherc                     t        d |D              sJ d       |dk(  rd _        nt        |d      }| _        | _        t	        d |D               _        t        d |D              r4|dk(  r/t	        d |D              }t        j                  d	       d _        nd
 _        t	         fd|D               _	        t        j                  | j                   j                        S )Nc              3   N   K   | ]  }|j                   j                  d k7    ywr   r   r   s     r   r   z!Gather.forward.<locals>.<genexpr>;   r   r   z/Gather function not implemented for CPU tensorsr   Tc              3   <   K   | ]  }|j                           y wr2   )r   r   s     r   r   z!Gather.forward.<locals>.<genexpr>D   s     >!q||~>s   c              3   B   K   | ]  }|j                         d k(    ywr   N)dimr   r/   s     r   r   z!Gather.forward.<locals>.<genexpr>E   s     ,quuw!|,s   r   c              3   >   K   | ]  }|j                  d         yw)r   N)viewrO   s     r   r   z!Gather.forward.<locals>.<genexpr>F   s     5166!95s   zvWas asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.Fc              3   T   K   | ]  }|j                  j                         ! y wr2   )sizerN   )r   r   r&   s     r   r   z!Gather.forward.<locals>.<genexpr>O   s     @Asww@s   %()r   target_devicer   rN   r%   
input_gpuswarningswarnunsqueezed_scalarinput_sizesr   gather)r&   rT   rN   r'   s   `   r   r0   zGather.forward9   s     
,2
 
 	=<	= 
 E! %C-mTBM -C>v>>,V,,5f55FMM'
 %)C!$)C!@@@{{6377C,=,=>>r8   c                     t         j                  | j                  | j                  | j                  |      }| j
                  rt        d |D              }d|z   S )Nc              3   &   K   | ]	  }|d      ywrM   r   )r   gs     r   r   z"Gather.backward.<locals>.<genexpr>X   s     #BQAaD#Bs   rF   )Scatterr4   rU   rY   rN   rX   r%   )r&   grad_outputscattered_gradss      r   r7   zGather.backwardR   sK    !--NNCOOSWWk
   ##B/#BBOo--r8   Nr9   r   r8   r   rH   rH   8   s(    ? ?0 . .r8   rH   c                   ,    e Zd Zed        Zed        Zy)r^   c           	         |D cg c]  }t        |d       }}|| _        |j                  j                  dk7  r|j	                         nd| _        d }t        j                  j                         r;| j
                  dk(  r,|D cg c]!  }t        t        j                  d|            # }}t        j                  |||| j                  |      }|t        |      D ]s  \  }	}
t        j                  j                  ||	         5  t        j                  j                         }|j                  ||	          |
j                  |       d d d        u |S c c}w c c}w # 1 sw Y   xY w)NTr   cuda)r   rN   r   r   r   r   torchrd   is_available_get_streamr   scatterr!   current_streamwait_streamrecord_stream)r&   r   chunk_sizesrN   inputr(   streamsr   r)   r   r-   main_streams               r   r0   zScatter.forward]   sG   ;FGa(D1GG161B1Be1K5++-QS::""$)9)9R)? IT>DELL89G  ,,uk;Q&w/ 6	6ZZ&&{1~6 6"'**";";"=K++GAJ7((56 66
 # H6 6s   E&EAEE'	c                 `    d d d t        j                  | j                  | j                  g| fS r2   )rH   r4   r   rN   )r&   r_   s     r   r7   zScatter.backwardr   s+    T4c.>.>!V+!VVVr8   Nr9   r   r8   r   r^   r^   \   s*     ( W Wr8   r^   _streamsr   c                 6   | j                   dk(  ryt        t        | j                   d      }|yt        dg|j	                         z  at        | j
                     ,|j                  | j
                        t        | j
                  <   t        | j
                     S )zBGet a background stream for copying between CPU and target device.r   N)r   getattrre   rq   device_countindexStream)r   
device_mods     r   rg   rg   {   s     {{eT2J6J3355%!+!2!26<<!@FLL!!r8   )rV   typingr   r   re   torch._utilsr   torch.autogradr   torch.nn.parallelr   r	   r3   rH   r^   rq   rv   __annotations__r   rg   r   r8   r   <module>r}      sy     !  * # "
 
8< <$!.X !.HWh W8 48(4./
0 7" "r8   