
    sgL                     8   d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z	d dl
mc mc mZ d dlmc mc mZ d dlmZ d dlmZmZ d dlmZmZ d dlmZmZ d dlmZmZ d dlm Z m!Z! d dl"m#Z#m$Z$m%Z% d d	l&m'Z'm(Z(m)Z)m*Z* g d
Z+e	jX                  jZ                  Z- G d de	j\                  j^                        Z0 G d de	j\                  j^                        Z1 G d de	jd                        Z3	 	 d)de	jd                  dee   deee(      de3fdZ4	 	 	 	 d*dejj                  dee   deee6ejj                  egdf      deeejj                  eegdf      deeejj                  eegdf      dejj                  fdZ7	 	 d)de	jp                  dee   deee(      de3fdZ9de	jt                  dddddee	jv                     d e	jx                  d!e=dee   deee(      de3fd"Z>de	jt                  dddddee	jv                     d e	jx                  d!e=dee   deee(      de3fd#Z?de	jt                  dddddee	jv                     d e	jx                  d!e=dee   deee(      de3fd$Z@dde	jt                  ddd%d!e=dee	jv                     d e	jx                  dee   deee(      de3fd&ZAdde	jt                  ddd%d!e=dee	jv                     d e	jx                  dee   deee(      de3fd'ZBdde	jt                  ddd%d!e=dee	jv                     d e	jx                  dee   deee(      de3fd(ZCy)+    N)AnyCallablecastOptionalSequenceTuple)_mesh_resources
DeviceMesh)check_tensor_metamesh_broadcast)DTensorSpec
TensorMeta)is_rng_supported_meshOffsetBasedRNGTracker)Redistributeredistribute_local_tensor)compute_global_tensor_infocompute_local_shapenormalize_to_torch_size)Partial	Placement	ReplicateShard)	DTensordistribute_tensordistribute_moduleonesemptyfullrandrandnzerosc                   \    e Zd Zedddeee      fd       Zedej                  fd       Z
y)_ToTorchTensorinputr   grad_placementsc                 l    |j                   | _        || _        |j                  }|j	                  |      S N)_specdtensor_specr&   _local_tensorview_as)ctxr%   r&   local_tensors       P/var/www/html/venv/lib/python3.12/site-packages/torch/distributed/tensor/_api.pyforwardz_ToTorchTensor.forwardH   s6     !;;-**
 ##L11    grad_outputc           	      \   | j                   }|j                  }| j                  }|j                  }t	        |||j
                        \  }}t        |      }|xs |j
                  }t        ||t        |j                  ||j                              }t        |||j                        d fS )Nshapestridedtypetensor_metarequires_grad)r*   meshr&   r9   r   
placementstupler   r   r5   r7   r   r;   )	r-   r2   r*   r<   r&   dtensor_meta_tensor_stride	grad_specs	            r/   backwardz_ToTorchTensor.backwardW   s    ''  --#//5|66
= m,)D\-D-D""(($"((
	 )77
 
 	
r1   N)__name__
__module____qualname__staticmethodr   r   r   r0   torchTensorrC    r1   r/   r$   r$   G   sO    22 "(9"562 2 
5<< 
 
r1   r$   c                       e Zd Ze	 	 ddej
                  dedeedf   de	de
ej                     de
eedf      d	d
fd       Zedd       Zy)_FromTorchTensorNr%   device_meshr=   .	run_checkr5   r6   returnr   c           	      f   || _         || _        |r|r||}}nH|s3|s1t        |||      \  }	}
t        j                  |	      t        |
      }}nt        d| d| dd      |j                         |j                  d|j                        }nY|rW| xr | }t        ||       t        |      D ]4  \  }}|j                         s|j                         }t        |||       6 t        ||t!        |||j"                        	      }t%        |j'                  |      ||j                        }|S )
NzFound shape:z	, stride:.z3Please pass both shape and stride at the same time.r   r:   )check_shape_stride)mesh_dimr8   )previous_placementprevious_device_meshr   rH   Sizer>   RuntimeErrorget_coordinate	new_emptyr;   r   	enumerateis_replicate
contiguousr   r   r   r7   r   r,   )r-   r%   rM   r=   rN   r5   r6   tensor_shaperA   global_shapeglobal_striderR   idx	placement	dist_specdist_tensors                   r/   r0   z_FromTorchTensor.forwardx   s^    ",#. V*/-Lv +E{J+'L- +0**\*BE-DX-LugYvha8E 
 %%'/ OOAU5H5HOIE &+!96ze8JK #,J"7 EY))+ ",,.E"5+DE  "
	 MM%   --
 r1   c                    | j                   }| j                  }|j                  |k7  rQ|j                  }t	        |||j                  j
                        }|j                  }t        |||d      }|d d d d d fS |j                         d d d d d fS )Nr8   T)is_backward)	rT   rU   r=   r)   r   r9   r+   r   to_local)r-   r2   rT   rU   current_spectarget_specr.   outputs           r/   rC   z_FromTorchTensor.backward   s     33"77
 !!%77&,,L%$"'--99K
 '44L.lKTF
 4tT477 ##%tT4tCCr1   NN)r2   r   )rD   rE   rF   rG   rH   rI   r
   r   r   boolr   rV   intr0   rC   rJ   r1   r/   rL   rL   w   s     '+,0@||@  @ )S.)	@
 @ 

#@ sCx)@ 
@ @D D Dr1   rL   c                      e Zd ZU dZej
                  ed<   eed<   ddgZ e	j                         Ze	j                  ed<   eej                  dej
                  dededd fd	              Zd
 Zd Zed        Zd Zd Zeej                  d(d              Ze	 	 d)dddddej
                  dee   deee      dedeej6                     deeedf      dd fd       Zdddeee      dej
                  fdZ	 	 d)dddee   deee      dedd fdZ dddeee      dej
                  fd Z!e"defd!       Z#e"deedf   fd"       Z$d#e%d$e&fd%Z'd& Z(d' Z)y)*r   a  
    ``DTensor`` (Distributed Tensor) is a subclass of ``torch.Tensor`` that provides single-device like
    abstraction to program with multi-device ``torch.Tensor``. It describes the distributed tensor sharding
    layout (DTensor Layout) through the :class:`DeviceMesh` and following types of :class:`Placement`:

    * :class:`Shard`: Tensor sharded on the tensor dimension ``dim`` on the devices of the ``DeviceMesh`` dimension
    * :class:`Replicate`: Tensor replicated on the devices of the ``DeviceMesh`` dimension
    * :class:`Partial`: Tensor is pending reduction on the devices of the ``DeviceMesh`` dimension

    When calling PyTorch operators, ``DTensor`` overrides the PyTorch operators to perform sharded computation and issue
    communications whenever necessary. Along with the operator computation, ``DTensor`` will transform or propagate the
    placements (DTensor Layout) properly (based on the operator semantic itself) and generate new ``DTensor`` outputs.

    To ensure numerical correctness of the ``DTensor`` sharded computation when calling PyTorch operators, ``DTensor``
    requires every Tensor argument of the operator be DTensor.

    r+   r)   _op_dispatcherr.   specr;   rO   c          	      f   |j                   r|st        j                  d       |j                  J d       t        j
                  j                  | |j                  j                  |j                  j                  |j                  |j                  |j                  |      }||_        ||_        |S )ag  
        Construct a DTensor from a local tensor, device mesh, and placement and
        other tensor properties (i.e. shape, requires_grad, strides, etc).

        .. note:: This is not a public API and it's only supposed to be used by the
            operator implementations and internals. If you want to construct a
            DTensor from a local tensor, consider using ``DTensor.from_local``, if
            you want to construct a DTensor from a "global" tensor (where you
            already have tensor initialized and want to shard this tensor),
            consider using ``distribute_tensor``.
        zxTo construct DTensor from torch.Tensor, it's recommended to use local_tensor.detach() and make requires_grad consistent.zTensorMeta should not be None!)stridesr7   devicelayoutr;   )r;   warningswarnr9   rH   rI   _make_wrapper_subclassr5   r6   r7   rr   rs   r)   r+   )clsr.   ro   r;   rs        r/   __new__zDTensor.__new__   s    ( %%mMMO +M-MM+LL//""$$++$$&&&&' 0 
 &r1   c                 ~    d| j                    d| j                  j                   d| j                  j                   dS )NzDTensor(local_tensor=z, device_mesh=z, placements=))r+   r)   r<   r=   selfs    r/   __repr__zDTensor.__repr__  s=    &t'9'9&:.HYYfgkgqgqg|g|f}}~r1   c                 8    dg| j                   | j                  ffS )ze
        protocol to inform how to flatten a DTensor to local tensor
        for PT2 tracing
        r+   )r)   r;   r|   s    r/   __tensor_flatten__zDTensor.__tensor_flatten__"  s!    
   4::t/A/A"BBBr1   c                     |J d       | d   }|\  }}t        |||j                  j                        }t        |j                  |j
                  |      }t        |||      S )NzEExpecting spec to be not None from `__tensor_flatten__` return value!r+   r4   r8   r:   )r   r9   r7   r   r<   r=   r   )	inner_tensorsflatten_spec
outer_sizeouter_strider.   ro   r;   unflatten_tensor_metaunflatten_specs	            r/   __tensor_unflatten__zDTensor.__tensor_unflatten__)  s     $	SR	S$$_5*m *""((!

 %IIOO-

 '
 	
r1   c                     t        d | j                  D              s| S | j                  D cg c]  }t        |t              r
t	               n|  }}| j                  | j                  |      S c c}w )Nc              3   <   K   | ]  }t        |t                y wr(   )
isinstancer   ).0ps     r/   	<genexpr>z6DTensor.__coerce_tangent_metadata__.<locals>.<genexpr>A  s     Ca:a)Cs   rM   r=   )anyr=   r   r   r   redistributerM   )r}   r   r=   s      r/   __coerce_tangent_metadata__z#DTensor.__coerce_tangent_metadata__@  sj    C4??CCKBF//
=>:a1IKq8

 
   T-=-=* UU
s   #A/c                 Z    |\  }}| j                  | j                  |j                        S )Nr   )r   rM   r=   )r}   r   ro   r@   s       r/   #__coerce_same_metadata_as_tangent__z+DTensor.__coerce_same_metadata_as_tangent__H  s3     	q  (( ! 
 	
r1   Nc                 L    t         j                  j                  |||xs i       S r(   )r   rn   dispatch)rw   functypesargskwargss        r/   __torch_dispatch__zDTensor.__torch_dispatch__O  s*    
 %%..Lb
 	
r1   F)rN   r5   r6   rM   r=   rN   r5   r6   .c                H   |xs t        j                         }|j                  }|| j                  j                  k7  r| j
                  s| j                  |      } |+t        |j                        D cg c]  }t                }}ntt        |      }t        |      D ][  \  }}	|	j                         st        t        |	      }	|	j                  dk  s7t        |	j                  | j                  z         ||<   ] t         j#                  | |t%        |      |||      S c c}w )a"	  
        Create a :class:`DTensor` from a local torch.Tensor on each rank
        according to the ``device_mesh`` and ``placements`` specified.

        Args:
            local_tensor (torch.Tensor): local torch.Tensor on each rank.
            device_mesh (:class:`DeviceMesh`, optional): DeviceMesh to place the
                tensor, if not specified, must be called under a DeviceMesh
                context manager, default: None
            placements (List[:class:`Placement`], optional): the placements that
                describes how to place the local torch.Tensor on DeviceMesh, must
                have the same number of elements as ``device_mesh.ndim``.

        Keyword args:
            run_check (bool, optional): at a cost of extra communications, perform
                sanity check across ranks to check each local tensor's meta information
                to ensure correctness. If have :class:`Replicate` in ``placements``, the
                data on first rank of the device mesh dimension will be broadcasted
                to other ranks. default: False
            shape (torch.Size, optional): A List of int which specifies the size of
                DTensor which build on top of `local_tensor`. Note this needs to be
                provided if the shape of ``local_tensor`` are different across the ranks.
                If not provided, ``shape`` will be computed assuming the given distributed
                tensor is evenly sharded across ranks. default: None
            stride (tuple, optional): A List of int which specifies the stride of DTensor.
                If not provided, ``stride`` will be computed assuming the given distributed
                tensor is evenly sharded across ranks. default: None

        Returns:
            A :class:`DTensor` object

        .. note:: When ``run_check=False``, it is the user's responsibility to ensure the
            local tensor passed in is correct across ranks (i.e. the tensor is sharded for
            the ``Shard(dim)`` placement or replicated for the ``Replicate()`` placement).
            If not, the behavior of the created DTensor is undefined.

        .. note:: ``from_local`` is differentiable, the `requires_grad` of the created
            `DTensor` object will depend on if `local_tensor` requires_grad or not.
        r   )r	   get_current_meshdevice_typerr   typeis_metatorangendimr   listrZ   is_shardr   r   dimrL   applyr>   )
r.   rM   r=   rN   r5   r6   r   r@   r`   ra   s
             r/   
from_localzDTensor.from_localZ  s   l "G_%E%E%G!-- ,--222<;O;O'??;7L /4[5E5E/FG!)+GJGj)J"+J"7 SY%%' $UI 6I }}q(*/	@Q@Q0Q*R
3S  %%*
 	
 Hs   4D)r&   r&   c                    t        j                         s| j                  S |t        |t              st	        |      }t
        j                  | |      S )a  
        Get the local tensor of this DTensor on its current rank. For sharding it returns
        a local shard of the logical tensor view, for replication it returns the replica on
        its current rank.

        Keyword args:
            grad_placements (List[:class:`Placement`], optional): the placements describes
                the future layout of any gradient layout of the Tensor returned from this
                function.
                `to_local` converts DTensor to local tensor and the returned local tensor
                might not be used as the original DTensor layout later in the code. This
                argument is the hint that user can give to autograd in case the gradient
                layout of the returned tensor does not match the original DTensor layout.
                If not specified, we will assume the gradient layout remains the same
                as the original DTensor and use that for gradient computation.

        Returns:
            A :class:`torch.Tensor` or ``AsyncCollectiveTensor`` object. it represents the
            local tensor on its current rank. When an ``AsyncCollectiveTensor`` object is returned,
            it means the local tensor is not ready yet (i.e. communication is not finished). In this
            case, user needs to call ``wait`` to wait the local tensor to be ready.

        .. note:: ``to_local`` is differentiable, the ``requires_grad`` of the local tensor returned
            will depend on if the `DTensor` requires_grad or not.
        )rH   is_grad_enabledr+   r   r>   r$   r   )r}   r&   s     r/   rf   zDTensor.to_local  sM    8 $$&%%%&z/5/Q#O4O##/
 	
r1   )async_opr   c                   |xs | j                   }|t        d      t        |      }t        |      D ]f  \  }}|j	                         rt        d      t        |t              s2|j                  dk  sBt        |j                  | j                  z         ||<   h t        |      }t        j                  | |||      S )as  
        ``redistribute`` performs necessary collective operations that redistribute the current
        DTensor from its current placements to a new placements, or from is current DeviceMesh
        to a new DeviceMesh. i.e. we can turn a Sharded DTensor to a Replicated DTensor by
        specifying a Replicate placement for each dimension of the DeviceMesh.

        When redistributing from current to the new placements on one device mesh dimension, we
        will perform the following operations including communication collective or local operation:

        1. ``Shard(dim)`` -> ``Replicate()``: ``all_gather``
        2. ``Shard(src_dim)`` -> ``Shard(dst_dim)``: ``all_to_all``
        3. ``Replicate()`` -> ``Shard(dim)``: local chunking (i.e. ``torch.chunk``)
        4. ``Partial()`` -> ``Replicate()``: ``all_reduce``
        5. ``Partial()`` -> ``Shard(dim)``: ``reduce_scatter``


        ``redistribute`` would correctly figure out the necessary redistribute steps for DTensors
        that are created either on 1-D or N-D DeviceMesh.

        Args:
            device_mesh (:class:`DeviceMesh`, optional): DeviceMesh to place the
                DTensor. If not specified, it would use the current DTensor's DeviceMesh.
                default: None
            placements (List[:class:`Placement`], optional): the new placements that
                describes how to place the DTensor into the DeviceMesh, must
                have the same number of elements as ``device_mesh.ndim``.
                default: replicate on all mesh dimensions

        Keyword args:
            async_op (bool, optional): whether to perform the DTensor redistribute operation
                asynchronously or not. Default: False

        Returns:
            A :class:`DTensor` object

        .. note:: ``redistribute`` is differentiable, which means user do not need to worry about
            the backward formula of the redistribute operation.

        .. note:: ``redistribute`` currently only supports redistributing DTensor on the same DeviceMesh,
            Please file an issue if you need to redistribute DTensor to different DeviceMesh.
        z&placements is needed for redistribute!zTCan not redistribute to Partial, redistributing to Partial is for internal use only!r   )rM   rW   r   rZ   
is_partialr   r   r   r   r>   r   r   )r}   rM   r=   r   ira   s         r/   r   zDTensor.redistribute  s    j "5T%5%5GHH*%
%j1 	ALAy##%"j  Iu-)--!2C %immdii&? @
1	A :&
 !!$ZJJr1   c                    | j                  t               g| j                  j                  z  d      }t        j                  ||      S )a  
        Return the full tensor of this DTensor. It will perform necessary collectives
        to gather the local tensors from other ranks in its DeviceMesh and concatenate
        them together. It's a syntatic sugar of the following code:

        ``dtensor.redistribute(placements=[Replicate()] * mesh.ndim).to_local()``

        Keyword args:
            grad_placements (List[:class:`Placement`], optional): the placements describes
                the future layout of any gradient layout of the full Tensor returned from this
                function.
                `full_tensor` converts DTensor to a full torch.Tensor and the returned torch.tensor
                might not be used as the original replicated DTensor layout later in the code. This
                argument is the hint that user can give to autograd in case the gradient
                layout of the returned tensor does not match the original replicated DTensor layout.
                If not specified, we will assume the gradient layout of the full tensor be replicated.

        Returns:
            A :class:`torch.Tensor` object that represents the full tensor of this DTensor.

        .. note:: ``full_tensor`` is differentiable.
        F)r=   r   )r   r   rM   r   r$   r   )r}   r&   
redist_ress      r/   full_tensorzDTensor.full_tensor  sH    4 &&!}t'7'7'<'<<u ' 

 ##J@@r1   c                 .    | j                   j                  S )z
        The :class:`DeviceMesh` attribute that associates with this DTensor object.

        .. note:: ``device_mesh`` is a read-only property, it can not be set.
        )r)   r<   r|   s    r/   rM   zDTensor.device_mesh;  s     zzr1   c                 .    | j                   j                  S )z
        The placements attribute of this DTensor that describes the layout of this
        DTensor on the its DeviceMesh.

        .. note:: ``placements`` is a read-only property, it can not be set.
        )r)   r=   r|   s    r/   r=   zDTensor.placementsD  s     zz$$$r1   fqnobjectc                     ddl m} t        | j                  d      r| j                  j	                  ||      S t        | j                  t        j                        r
 |||      gS t        d      )Nr   )_create_write_items_for_dtensor__create_write_items__Unsupported tensor type!)	,torch.distributed.checkpoint.planner_helpersr   hasattrr+   r   r   rH   rI   rW   )r}   r   r   r   s       r/   r   zDTensor.__create_write_items__N  sc    	
 4%%'?@%%<<S&II**ELL93C@AA9::r1   c                     ddl m} t        | j                  d      r| j                  j	                         S t        | j                  t        j                        r	 ||       gS t        d      )Nr   )_create_chunk_from_dtensor__create_chunk_list__r   )	r   r   r   r+   r   r   rH   rI   rW   )r}   r   s     r/   r   zDTensor.__create_chunk_list__Z  s]    	
 4%%'>?%%;;==**ELL9.t4559::r1   c                     t        | j                  d      r| j                  j                  |      S t        | j                  t        j
                        r| j                         S t        d      )N__get_tensor_shard__r   )r   r+   r   r   rH   rI   rf   rW   )r}   indexs     r/   r   zDTensor.__get_tensor_shard__f  sV    4%%'=>%%::5AA**ELL9==?"9::r1   )rJ   Nrj   )*rD   rE   rF   __doc__rH   rI   __annotations__r   	__slots__op_dispatchOpDispatcherrn   rG   _disable_dynamork   ry   r~   r   r   r   r   classmethodr   r   r
   r   r   rV   r   rl   r   rf   r   r   propertyrM   r=   strr   r   r   r   rJ   r1   r/   r   r      s   $ << '*I 0H{/G/G/INK,,I
'll' '
 ' 
'  'V@C 
 
,V
 

  
  -148R

  &*,0R
llR
j)R
 Xi01R

 R
 

#R
 sCx)R
 
R
 R
j CG#
"*8I+>"?#
	#
N -148FK
 FKj)FK Xi01FK
 FK 
FKR CGA"*8I+>"?A	A> Z   %E)S.1 % %
;# 
;s 
;
;;r1   r   tensorrM   r=   rO   c           	      l   t         j                  j                  d       |xs t        j                         }|j
                  }|dk(  r	 ddlm}  || ||      S t        j                  st        |      rt        |      t        _
        | j                  st        d      || j                  j                   k7  r| j"                  s| j%                  |      } |*t'        |j(                        D cg c]  }t+                }}t-        |      |j(                  k7  r%t/        dt-        |       d	|j(                   d
      t1        | t2              ra| j4                  |k7  rt/        d| j4                   d| d
      | j6                  t9        |      k7  rt/        d| j6                   d| d      | S | j;                         }t=        |      }t?        |      D ]  \  }	}
|
jA                         rZtC        tD        |
      }
|
jF                  dk  r'tE        |
jF                  | j(                  z         }
|
||	<   |
jI                  |||	      }p|
jK                         r$tC        t*        |
      }
|
jM                  |||	      }t        d|
 d|	 d       t9        |      }|J d       tO        ||tQ        | jS                         | jU                         | jV                              }t3        |jY                  | jZ                        || jZ                        S # t        $ r}d}t        |      |d}~ww xY wc c}w )av  
    Distribute a leaf ``torch.Tensor`` (i.e. nn.Parameter/buffers) to the ``device_mesh`` according
    to the ``placements`` specified. The rank of ``device_mesh`` and ``placements`` must be the
    same. The ``tensor`` to distribute is the logical or "global" tensor, and the API would use
    the ``tensor`` from first rank of the DeviceMesh dimension as the source of truth to perserve
    the single-device semantic. If you want to construct a DTensor in the middle of the Autograd
    computation, please use :meth:`DTensor.from_local` instead.

    Args:
        tensor (torch.Tensor): torch.Tensor to be distributed. Note that if you
            want to shard a tensor on a dimension that is not evenly divisible by
            the number of devices in that mesh dimension, we use ``torch.chunk``
            semantic to shard the tensor and scatter the shards. The uneven sharding
            behavior is experimental and subject to change.
        device_mesh (:class:`DeviceMesh`, optional): DeviceMesh to distribute the
            tensor, if not specified, must be called under a DeviceMesh context
            manager, default: None
        placements (List[:class:`Placement`], optional): the placements that
            describes how to place the tensor on DeviceMesh, must have the same
            number of elements as ``device_mesh.ndim``. If not specified, we will
            by default replicate the tensor across the ``device_mesh`` from the
            first rank of each dimension of the `device_mesh`.

    Returns:
        A :class:`DTensor` or ``XLAShardedTensor`` object.

    .. note::
        When initialize the DeviceMesh with the ``xla`` device_type, ``distribute_tensor``
        return `XLAShardedTensor` instead. see `this issue <https://github.com/pytorch/pytorch/issues/92909>`__
        for more details. The XLA integration is experimental and subject to change.
    ztorch.dtensor.distribute_tensorxlar   )xla_distribute_tensorDTo use DTensor API with xla, you must install the torch_xla package!NzY`distribute_tensor` should be used to distribute leaf tensors! but found non-leaf tensor!zW`placements` must have the same length as `device_mesh.ndim`! Found placements length: z, and device_mesh.ndim: rQ   z-Cannot distribute a DTensor with device mesh z to a different device mesh z,Cannot distribute a DTensor with placements z to a different placements z-. do you want to call `redistribute` instead?z8Trying to distribute tensor with unsupported placements z on device mesh dimension !z(distributing a tensor should not be Noner4   )r<   r=   r9   r:   ).rH   _C_log_api_usage_oncer	   r   r   torch_xla.distributed.spmdr   ImportErrorrandom_rng_trackerr   r   is_leafrW   rr   r   r   r   r   r   r   len
ValueErrorr   r   rM   r=   r>   detachr   rZ   r   r   r   r   _shard_tensorr[   _replicate_tensorr   r   sizer6   r7   requires_grad_r;   )r   rM   r=   r   r   emsgr@   r.   r`   ra   ro   s               r/   r   r   o  sC   J 
HH  !BC C!A!A!CK))Ke	* )Z  #8#E3K@>>g
 	

 fmm(((;' +01A1A+BCaikC
C
:+***((+J'88PQ\QaQaPbbce
 	
 &'"
 ,?@R@R?S T..9]!=  j 11>v?P?P>Q R--7L 9*+ 
 ==?L j!J#J/ YUI.I}}q !)--&++"=>	"+
3$22<cRL##%Y	2I$66|[RUVLJ9+Uopsottuv  z"J#O%OO# ++-==?,,
D ##F$8$89** c  	*XCc")	*, Ds   
L /L1	L.L))L.modulepartition_fninput_fn	output_fnc                    t         j                  j                  d       xs t        j                         j
                  }|dk(  r	 ddlm}  || |      S dt        j                  dt        d	dfd
}	|"| j                         D ]  \  }
} |	|        n+| j                         D ]  \  }
} ||
|        |	|        t        t        j                        j                         }|dk(  r2t#        j$                  dt&        d       | j)                  fd       n*|dk(  r| j)                  fd       nt+        d| d      t        t        j                        j                         }|dk(  r3t#        j$                  dt&        d       | j-                  fd       | S |dk(  r| j-                  fd       | S t+        d| d      | S # t        $ r}d}t        |      |d}~ww xY w)ai  
    This function expose three functions to control the parameters/inputs/outputs of the module:

    1. To perform sharding on the module before runtime execution by specifying the
    ``partition_fn`` (i.e. allow user to convert Module parameters to :class:`DTensor`
    parameters according to the `partition_fn` specified).
    2. To control the inputs or outputs of the module during runtime execution by
    specifying the ``input_fn`` and ``output_fn``. (i.e. convert the input to
    :class:`DTensor`, convert the output back to ``torch.Tensor``)

    Args:
        module (:class:`nn.Module`): user module to be partitioned.
        device_mesh (:class:`DeviceMesh`): the device mesh to place the module.
        partition_fn (Callable): the function to partition parameters (i.e. shard certain
            parameters across the ``device_mesh``). If ``partition_fn`` is not specified,
            by default we replicate all module parameters of ``module`` across the mesh.
        input_fn (Callable): specify the input distribution, i.e. could control how the
            input of the module is sharded. ``input_fn`` will be installed as a module
            ``forward_pre_hook`` (pre forward hook).
        output_fn (Callable): specify the output distribution, i.e. could control how the
            output is sharded, or convert it back to torch.Tensor. ``output_fn`` will be
            installed as a module ``forward_hook`` (post forward hook).

    Returns:
        A module that contains parameters/buffers that are all ``DTensor`` s.

    .. note::
        When initialize the DeviceMesh with the ``xla`` device_type, ``distribute_module``
        return nn.Module with PyTorch/XLA SPMD annotated parameters. See
        `this issue <https://github.com/pytorch/pytorch/issues/92909>`__
        for more details. The XLA integration is experimental and subject to change.

    ztorch.dtensor.distribute_moduler   r   )xla_distribute_moduler   Nmr<   rO   c                    t               g|j                  z  }| j                  j                         D ]S  \  }}|	t	        |t
              r| j                  |t        j                  t        |j                  ||                   U | j                  j                         D ]3  \  }}|	t	        |t
              rt        |||      | j                  |<   5 y r(   )r   r   _parametersitemsr   r   register_parameternn	Parameterr   data_buffers)r   r<   full_replicatekeyparambuffers         r/   replicate_module_params_buffersz:distribute_module.<locals>.replicate_module_params_buffers9  s     $+2----/ 	JC E7)C$$LL!25::t^!TU	 ::++- 	RKC!*VW*E"3FD."Q

3	Rr1      zDeprecating input_fn that takes two arguments (inputs, device_mesh), please use input_fn that takes in (module, inputs, device_mesh) instead!)
stacklevelc                      |      S r(   rJ   )r@   inputsrM   r   s     r/   <lambda>z#distribute_module.<locals>.<lambda>c  s    xP[?\ r1      c                      | |      S r(   rJ   )modr   rM   r   s     r/   r   z#distribute_module.<locals>.<lambda>g  s    HS&+$F r1   z-input_fn should take in 3 arguments, but got z arguments!zDeprecating output_fn that takes two arguments (inputs, device_mesh), please use output_fn that takes in (module, inputs, device_mesh) instead!c                      |      S r(   rJ   r   r   outputsrM   r   s      r/   r   z#distribute_module.<locals>.<lambda>y  s    Yw-L r1   c                      | |      S r(   rJ   r   s      r/   r   z#distribute_module.<locals>.<lambda>}  s    YsG[-Q r1   z.output_fn should take in 3 arguments, but got )rH   r   r   r	   r   r   r   r   r   r   Moduler
   named_modulesr   inspect	signature
parametersrt   ru   FutureWarningregister_forward_pre_hookr   register_forward_hook)r   rM   r   r   r   r   r   r   r   r   namesubmodnum_argss    ` ``        r/   r   r     s7   R 
HH  !BCC!A!A!CK))Ke	* )\8Y R299 RJ R4 R&  #002 	ALD&+FK@	A #002 	ALD&v{3+FK@	A
 w((2==>q=MM[	 ,,-\]],,F ?zU  w((3>>?q=MM\	 ((L M ]((Q M	 @
+V  M_  	*XCc")	*s   G$ $	H -G;;H r   c           	      ^   |xs t        j                         }|j                  |d<   |xs% t        d t	        |j
                        D              }|j
                  t        |      k(  sJ d       |d   t        j                  k(  sJ d       t        j                  j                  |      }t        |||      }| t        j                  k(  r|j                  dd      } | ||fi |}n| t        j                  k(  s| t        j                  k(  r|j!                  dt        j"                               }	t%        |d	|	      }
t'        |t        |      |

      }t)        j*                  |      r-t(        j,                  st)        j.                         t(        _        t(        j,                  J t(        j,                  j1                  |      5   | |fi |}d d d        n	 | |fi |}t'        |t        |      t%        ||j2                        
      }t5        |||d         S # 1 sw Y   FxY w)Nrr   c              3   0   K   | ]  }t                 y wr(   )r   )r   r@   s     r/   r   z'_dtensor_init_helper.<locals>.<genexpr>  s     $RQY[$Rs   z6mesh dimension does not match the length of placementsrs   zlayout value not supported!
fill_valuer   r7   )r   r8   r;   r:   )r	   r   r   r>   r   r   r   rH   strided_prims_commonmake_contiguous_strides_forr   r   popr    r!   getget_default_dtyper   r   r   r   r   r   _distribute_regionr7   r   )init_opr   rM   r=   r   torch_stridelocal_shaper  r.   r7   r9   ro   s               r/   _dtensor_init_helperr    s
    C!A!A!CK"..F8 Ru$R%@P@P:Q$RRJ s   @?@  (u}},K.KK,&&BB4HL &dKDK%**ZZa0
{JA&A	EJJ	'U[["8

7E$;$;$=> tU3;j(9{S''4V=P=P"(">">"@F""...  33D9 	:";9&9L	: 	: {5f5j
D _- 	: 	:s   

H##H,Fr7   rs   r;   rM   r=   r7   rs   r;   c           	      X    t        |      }t        t        j                  || ||||      S )aS  
    Returns a :class:`DTensor` filled with the scalar value 1, with the shape defined
    by the variable argument ``size``.

    Args:
        size (int...): a sequence of integers defining the shape of the output :class:`DTensor`.
            Can be a variable number of arguments or a collection like a list or tuple.
            E.g.: ones(1,2,3..) or ones([1,2,3..]) or ones((1,2,3..))

    Keyword args:
        dtype (:class:`torch.dtype`, optional): the desired data type of returned :class:`DTensor`.
            Default: if ``None``, uses a global default (see :func:`torch.set_default_dtype`).
        layout (:class:`torch.layout`, optional): the desired layout of returned DTensor.
            Default: ``torch.strided``.
        requires_grad (bool, optional): If autograd should record operations on the
            returned :class:`DTensor`. Default: ``False``.
        device_mesh: :class:`DeviceMesh` type, contains the mesh info of ranks
        placements: a sequence of :class:`Placement` type: ``Shard``, ``Replicate``

    Returns:
        A :class:`DTensor` object on each rank
    r  )r   r  rH   r   r7   rs   r;   rM   r=   r   
torch_sizes          r/   r   r     s4    < ).J

# r1   c           	      X    t        |      }t        t        j                  || ||||      S )at  
    Returns a :class:`DTensor` filled with uninitialized data. The shape of the :class:`DTensor`
    is defined by the variable argument ``size``.

    Args:
        size (int...): a sequence of integers defining the shape of the output :class:`DTensor`.
            Can be a variable number of arguments or a collection like a list or tuple.
            E.g.: empty(1,2,3..) or empty([1,2,3..]) or empty((1,2,3..))

    Keyword args:
        dtype (:class:`torch.dtype`, optional): the desired data type of returned :class:`DTensor`.
            Default: if ``None``, uses a global default (see :func:`torch.set_default_dtype`).        layout (:class:`torch.layout`, optional): the desired layout of returned :class:`DTensor`.
            Default: ``torch.strided``.
        requires_grad (bool, optional): If autograd should record operations on the
            returned :class:`DTensor`. Default: ``False``.
        device_mesh: :class:`DeviceMesh` type, contains the mesh info of ranks
        placements: a sequence of :class:`Placement` type: ``Shard``, ``Replicate``

    Returns:
        A :class:`DTensor` object on each rank
    r  )r   r  rH   r   r  s          r/   r   r     s4    < ).J# r1   c          
      Z    t        |       }t        t        j                  |||||||      S )a  
    Returns a :class:`DTensor` filled with ``fill_value`` according to ``device_mesh`` and
    ``placements``, with the shape defined by the argument ``size``.

    Args:
        size (int...): a sequence of integers defining the shape of the output :class:`DTensor`.
            Can be a variable number of arguments or a collection like a list or tuple.
            E.g.: ones(1,2,3..) or ones([1,2,3..]) or ones((1,2,3..))
        fill_value(Scalar): the value to fill the output tensor with.

    Keyword args:
        dtype (:class:`torch.dtype`, optional): the desired data type of returned :class:`DTensor`.
            Default: if ``None``, uses a global default (see :func:`torch.set_default_dtype`).
        layout (:class:`torch.layout`, optional): the desired layout of returned DTensor.
            Default: ``torch.strided``.
        requires_grad (bool, optional): If autograd should record operations on the
            returned :class:`DTensor`. Default: ``False``.
        device_mesh: :class:`DeviceMesh` type, contains the mesh info of ranks.
        placements: a sequence of :class:`Placement` type: ``Shard``, ``Replicate``

    Returns:
        A :class:`DTensor` object on each rank
    )r  r7   rs   r;   rM   r=   )r   r  rH   r   )r   r  r7   rs   r;   rM   r=   r  s           r/   r   r   "  s8    B ).J

#	 	r1   )r;   r7   rs   rM   r=   c           	      X    t        |      }t        t        j                  |||| ||      S )a  
    Returns a :class:`DTensor` filled with random numbers from a uniform distribution
    on the interval ``[0, 1)``. The shape of the tensor is defined by the variable
    argument ``size``.

    Args:
        size (int...): a sequence of integers defining the shape of the output :class:`DTensor`.
            Can be a variable number of arguments or a collection like a list or tuple.
            E.g.: ones(1,2,3..) or ones([1,2,3..]) or ones((1,2,3..))

    Keyword args:
        dtype (:class:`torch.dtype`, optional): the desired data type of returned :class:`DTensor`.
            Default: if ``None``, uses a global default (see :func:`torch.set_default_dtype`).
        layout (:class:`torch.layout`, optional): the desired layout of returned DTensor.
            Default: ``torch.strided``.
        requires_grad (bool, optional): If autograd should record operations on the
            returned :class:`DTensor`. Default: ``False``.
        device_mesh: :class:`DeviceMesh` type, contains the mesh info of ranks.
        placements: a sequence of :class:`Placement` type: ``Shard``, ``Replicate``

    Returns:
        A :class:`DTensor` object on each rank
    r  )r   r  rH   r    r;   r7   rs   rM   r=   r   r  s          r/   r    r    Q  s4    > ).J

# r1   c           	      X    t        |      }t        t        j                  |||| ||      S )a  
    Returns a :class:`DTensor` filled with random numbers from a normal distribution
    with mean 0 and variance 1. The shape of the tensor is defined by the variable
    argument ``size``.

    Args:
        size (int...): a sequence of integers defining the shape of the output :class:`DTensor`.
            Can be a variable number of arguments or a collection like a list or tuple.
            E.g.: ones(1,2,3..) or ones([1,2,3..]) or ones((1,2,3..))

    Keyword args:
        dtype (:class:`torch.dtype`, optional): the desired data type of returned :class:`DTensor`.
            Default: if ``None``, uses a global default (see :func:`torch.set_default_dtype`).
        layout (:class:`torch.layout`, optional): the desired layout of returned DTensor.
            Default: ``torch.strided``.
        requires_grad (bool, optional): If autograd should record operations on the
            returned :class:`DTensor`. Default: ``False``.
        device_mesh: :class:`DeviceMesh` type, contains the mesh info of ranks.
        placements: a sequence of :class:`Placement` type: ``Shard``, ``Replicate``

    Returns:
        A :class:`DTensor` object on each rank
    r  )r   r  rH   r!   r!  s          r/   r!   r!   }  s4    > ).J# r1   c           	      X    t        |      }t        t        j                  |||| ||      S )a   
    Returns a :class:`DTensor` filled with the scalar value 0.

    Args:
        size (int...): a sequence of integers defining the shape of the output :class:`DTensor`.
            Can be a variable number of arguments or a collection like a list or tuple.
            E.g.: zeros(1,2,3..) or zeros([1,2,3..]) or zeros((1,2,3..))
    Keyword args:
        requires_grad (bool, optional): If autograd should record operations on the
            returned :class:`DTensor`. Default: ``False``.
        dtype (:class:`torch.dtype`, optional): the desired data type of returned :class:`DTensor`.
            Default: if ``None``, uses a global default (see :func:`torch.set_default_dtype`).
        layout (:class:`torch.layout`, optional): the desired layout of returned :class:`DTensor`.
            Default: ``torch.strided``.
        device_mesh: :class:`DeviceMesh` type, contains the mesh info of ranks
        placements: a sequence of :class:`Placement` type: ``Shard``, ``Replicate``

    Returns:
        A :class:`DTensor` object on each rank
    r  )r   r  rH   r"   r!  s          r/   r"   r"     s4    8 ).J# r1   rj   )NNNN)Dr  rt   typingr   r   r   r   r   r   rH   "torch.distributed.tensor._dispatchdistributedr   	_dispatchr    torch.distributed.tensor._random_randomr   torch.nnr   torch.distributed.device_meshr	   r
   *torch.distributed.tensor._collective_utilsr   r   &torch.distributed.tensor._dtensor_specr   r   r   r   &torch.distributed.tensor._redistributer   r   torch.distributed.tensor._utilsr   r   r   (torch.distributed.tensor.placement_typesr   r   r   r   __all__opsatenautogradFunctionr$   rL   rI   r   r   r  r   r   rV   r  r  r7   rs   rk   r   r   r   r    r!   r"   rJ   r1   r/   <module>r6     s     A A  8 8 1 1  E X J 
 
 yy~~.-
U^^,, -
`]Du~~.. ]D@U;ell U;t )-04JLLJ*%J ),-J 	J^ )-KOGKHLHIIH*%H 8S"))Z$@$$FGHH xC <d BCD	H
 "))S*!=t!CDEH YYHh )-04	<
**< *%< ),-	< <B $( ==(,04(EKK ( LL( 	(
 *%( ),-( (Z $( ==(,04(EKK ( LL( 	(
 *%( ),-( (^ $( ==(,04, EKK 	,
 LL, , *%, ),-, ,b  #' ==(,04)) EKK ) LL	)
 *%) ),-) )\  #' ==(,04)) EKK ) LL	)
 *%) ),-) )\  #' ==(,04&& EKK & LL	&
 *%& ),-& &r1   