
    sgE8                        d dl Z d dlmZ d dlmZmZmZmZ d dlZd dl	m
c mZ d dlm
c mc mZ d dlmZmZ d dlmZ d dlmZmZmZmZ  e j6                  e      Z G d de      Z ed	      d
ededee   fd       Zddddej@                  dedede!de!dej@                  fdZ" G d dejF                  jH                        Z%y)    N)	lru_cache)castList
NamedTupleTuple)DTensorSpec
TensorMeta)
DeviceMesh)Partial	Placement	ReplicateShardc                   <    e Zd ZU eed<   eeef   ed<   ee   ed<   y)_TransformInfomesh_dimsrc_dst_placementslogical_shapeN)__name__
__module____qualname__int__annotations__r   r   r        Y/var/www/html/venv/lib/python3.12/site-packages/torch/distributed/tensor/_redistribute.pyr   r      s!    Mi2339r   r   )maxsizesrc_specdst_specreturnc           	         g }| j                   }|j                         }|J t        | j                        }|g}|j                  dk(  r;|j                  t        d| j                  d   |j                  d   f|             |S t        t        | j                  |j                              D ]  \  }\  }}	||   }
t        |t              rw||j                  dz
  k  s1|j                  |      }|j                  |
|j                     |||         \  }}t        |
      }|||j                  <   |j                  |       |j                  |
        t        | j                        }t        |j                        }| j                  dkD  rt!        t#        t%        |                  D ]  }||   }||   }t        |t              r|j                  }g g }}t        t        ||            D ]T  \  }\  }}||k\  r nG|j'                  |      r|j                  |       |j'                  |      sD|j                  |       V ||k7  r
t)               }||k7  s|j                  t        |||f||                |||<    t        t        ||            D ]5  \  }\  }}||k7  s|j                  t        |||f||                |||<   7 |S )a  
    Generate the transform infos from the source placements to the target placements.

    To transform from source to target placement it might have multiple steps, i.e. it
    might decompose Si -> Sj into Si -> R -> Sj.
    This would detect if there're mis-aligned/nested shardings between src/dst placements.
    E.g. Suppose the redistribution to perform is (Shard(0), Shard(0)) -> (Replicate(), Shard(0)),
    in this case Shard(0) -> Shard(0) for mesh dimension 1 actually needs resharding, because in
    the former is a nested-sharding of a tensor already already sharded dimension 0, whereras
    the latter is the first sharding on tensor dimension 0.
       r   )r   r   r   r   )device_meshget_coordinatelistshapendimappendr   
placements	enumeratezip
isinstancer   size_local_shard_size_on_dimdim
num_shardsreversedrangelenis_shardr   )r   r   transform_infosr#   my_coordinateinitial_logical_shapemesh_dims_to_logical_shapeisrcdstcurrent_logical_shapemesh_dim_sizelocal_shard_size_new_logical_shapecurrent_placementstarget_placementsr   currenttarget	shard_dimcurrent_mesh_shardingtarget_mesh_shardingsps                            r   _gen_transform_infosrJ      s      -/O&&K..0M$$$ !0"7!81$,$7$7$:H<O<OPQ<R#S3	
 
 #3x':':H<O<O#PQ E:C :1 =c5!;##a'' + 0 0! 0 <&)&B&B)#''2!!!$'# !
 %))>$?!-=!#''**112CD&--.CDE& h112X001Q
 !s+='>!?@ 	6H(2G&x0F &%("JJ	>@"';%!*3/ACT+U!V 7IAv1H}zz),-44Q7zz),,33A67 ),@@ '[F& &&"!),3V+<&@&J 06"8,=	6F (1 12( 2##7F f""%(/'8"<X"F ,2x(2 r   Fasync_opis_backwardlocal_tensorcurrent_spectarget_specrL   rM   c                $   |j                   |j                   k7  rt        d      d}|j                   }|j                         }|| S t        ||      }|D ]  }	|	j                  }
|	j
                  \  }}|j                  |
      }||k(  r| }9t        j                  d|||
       |j                         r|j                         r%t        t        |      }|j                  | ||
      }n|j                         r0t        t        |      }|j!                  | ||
|	j"                        }nt%        d| d| d      |j                         rt        t        |      }|j&                  }|j                         r&t        t        |      }|j)                  | ||
|      }n3|j                         r|j+                  | ||
||
         }n
|j                         s
J d|        t        t        |      }|j&                  |j&                  k7  r|j-                  | ||
|	j"                  |j&                        }n|j                         r|j                         r(t        t        |      }|s|j/                  | ||
      n| }nU|j                         rC|st%        d| d| d      t        t        |      }|j!                  | ||
|	j"                        }n| }|J |}  |J d	       |s*t1        |t2        j4                        r|j7                         }|S )
z
    This redistribute the local tensor (torch.Tensor) from the current DTensorSpec to
    the target DTensorSpec, which involves the necessary collective calls to transform
    the local shard of the DTensor from its current spec to the target spec.
    z)Cross device mesh comm not supported yet!Nr"   z)redistribute from %s to %s on mesh dim %szredistribute from z to z not supported yetz,Current placement should be shard but found zredistribute failed!)meshNotImplementedErrorr$   rJ   r   r   r-   loggerdebugis_replicate
is_partialr   r   _reduce_valuer4   r   _to_replicate_tensorr   RuntimeErrorr/   _reduce_shard_value_replicate_to_shard_to_new_shard_dim_partition_valuer,   funcolAsyncCollectiveTensorwait)rN   rO   rP   rL   rM   new_local_tensorr#   r6   r5   transform_infor9   rC   rD   
num_chunkspartial_speccurrent_placementtarget_placement
target_dim
shard_specs                      r   redistribute_local_tensorrj      sM    K,,,!"MNN##K..0M *<EO) U(##(;; %%q%1
f+@'6STU !!##GW5#/#=#= +q$  !!#$($8!#4#I#I +q.2N2N$  #(	fX=OP  __#E62)--J!!##GW5#/#C#C +q2B$  %%'#3#G#G +q-2B$ 
 $$&LA'KL&!%1
>>%5%9%99'1'C'C$#&44(,,($  ##%#GV4 ' !11,QO% !
 !!#"&,WIT&AST  %)$8!#4#I#I +q.2N2N$ 
 $0 +++'kU(n '?)??'
#3V5Q5QR+002r   c            
       N    e Zd Ze	 d
dddedeedf   defd       Zedd       Z	y	)Redistributeinputdtensor.DTensorr#   r)   .rL   c                 0   |j                   }|| _        || _        |j                  |k7  r>t	        |||j                   j
                        }|j                  }t        ||||      }n|j                  }|}t        j                  |||j                        S )Ntensor_meta)rL   requires_grad)_specrO   rL   r)   r   rq   _local_tensorrj   dtensorDTensorrs   )	ctxrm   r#   r)   rL   rO   rP   rN   outputs	            r   forwardzRedistribute.forward  s     {{'""j0%ZU[[5L5LK !..L.lK(F
 ((F&K--
 	
r   c           	         | j                   }|j                  }| j                  }|j                  }t	        ||||d      }g }|j
                  D ]=  }|j                         r|j                  t                      -|j                  |       ? t        |j                  t        |      t        |j                  |j                         |j                              }	t!        j"                  ||	|j$                        }
|
d d d fS )NTrK   )r&   stridedtyperp   rr   )rO   rt   rL   ru   rj   r)   rW   r(   r   r   r#   tupler	   r&   r|   r}   rv   rw   rs   )rx   grad_outputprevious_specrO   rL   rN   ry   normalized_placementsprevious_placementspecoutput_dtensors              r   backwardzRedistribute.backward4  s   (("((<<"00*
 24"/":": 	A!,,.%,,Y[9%,,-?@	A %%'("!''"))+!''
 !%33
 	
 	
r   N)F)r   rn   )
r   r   r   staticmethodr
   r   r   boolrz   r   r   r   r   rl   rl     s_     
 !
  	

 )S.)
 
 
@ *
 *
r   rl   )&logging	functoolsr   typingr   r   r   r   torch)torch.distributed._functional_collectivesdistributed_functional_collectivesr_   torch.distributed.tensor._apitensor_apirv   &torch.distributed.tensor._dtensor_specr   r	   $torch.distributed.tensor.device_meshr
   (torch.distributed.tensor.placement_typesr   r   r   r   	getLoggerr   rT   r   rJ   Tensorr   rj   autogradFunctionrl   r   r   r   <module>r      s      0 0  : : / / J ;  
		8	$Z  4sss 
.s sv z,,zz z
 z z \\zzM
5>>** M
r   