
    sgg                     $   d dl Z d dlZd dlZd dlZd dlmZmZmZmZm	Z	m
Z
mZmZmZmZmZmZ d dlZd dlmZ d dlmc mZ d dlmZ  ej6                         serd dlmZ d dlmZ d dlm Z m!Z!m"Z" d dl#m$Z$ dejJ                  d	eejL                     d
eejN                     dedejJ                  f
dZ(	 	 d@ddd	eejL                     d
eejN                     dejJ                  fdZ) G d de*      Z+dddddddddedededed	eejL                     d
eejN                     de,dedee-df   de,de,dee.ef   fd Z/dddddd!d"ee.ef   d	eejL                     d
eejN                     de,dee-df   de,dee.ef   fd#Z0ddd$d"ee.ef   dee-df   de,dee.ef   fd%Z1	 	 dAd"ee.ef   d&ee.ef   de,de,dee.ef   f
d'Z2	 dBd"ee.ef   d(e,d)e,dee.ef   fd*Z3d"ee.ef   d+ee.ef   de,fd,Z4 G d- d.e      Z5	 dCd/ee.ef   d0ee.ef   d1e	e.   d
ejN                  d	eejL                     ddfd2Z6	 dCd0ee.ef   d1e	e.   d
ejN                  d	eejL                     ddf
d3Z7	 	 dDd/ee.ef   d0ee.ef   d
ejN                  d	eejL                     d4e,ddfd5Z8	 dCd/ee.ef   d0ee.ef   d
ejN                  d	eejL                     ddf
d6Z9ee.e-f   Z:ee:df   Z;ee.e;f   Z<ee.ef   Z=ee:ef   Z>d"e=d7ee;egdf   ddfd8Z?d"e=dee=e<f   fd9Z@d:e=d;e;d<eddfd=ZAd"e=d>e<de=fd?ZBy)E    N)AnyCallablecastDictListMappingMutableMapping
NamedTupleOptionalTupleTYPE_CHECKINGUnion)AsyncCollectiveTensor)distributed_c10d)ShardedTensor)distribute_tensorDTensor	Replicate)%compute_local_shape_and_global_offsetobjpgdevicecompanion_objreturnc                     | S N r   r   r   r   s       V/var/www/html/venv/lib/python3.12/site-packages/torch/distributed/_state_dict_utils.py_identity_funcr    "   s	     J    sharded_tensorr   c                 r   |t        j                         }t        j                  |      }| j	                         }| j                         d   }| j                         j                         }t        j                  ||z        |z  |z  }|t        j                  |      n|}|r|d   j                  j                         }	|	j                  j                  |j                  k7  r|	j                  |      }	||	j                         z
  }
|
dkD  r;t        j                   |	d|
g      }	n"t#        j$                  || j&                  |      }	t#        j(                  ||z  |	j&                  |      }t        j*                  ||	|       |j-                  dd|      j/                  | j                               }|S )Nr   )dtyper   )group)r   _get_default_groupdistget_world_sizelocal_shardssizenumelmathceil_get_pg_default_devicetensorflattenr   typetoFpadtorchzerosr$   emptyall_gather_into_tensornarrowreshape)r"   r   r   
world_sizeshards
dim_0_sizetensor_numel
chunk_size	pg_devicelocal_tensornum_paddingr/   s               r   _all_gather_sharded_tensorrC   +   s   
 
z002$$R(J((*F$$&q)J!&&(..0L:
23lBjPJ7=~//36  ay''//1##y~~5'??95L <#5#5#77?55;/?@L{{n229
 [[Z  F
 	B?]]1a.66~7J7J7LMFMr!   c                       e Zd Zy)CompanionMismatchN)__name__
__module____qualname__r   r!   r   rE   rE   Q   s    r!   rE   Fr   Tr   r   cpu_offloadr   
ranks_only
type_checknon_blockingiter_objectsharded_tensor_funcdtensor_functensor_funcrJ   rK   .rL   rM   c                   t        j                  d      }t        | t              r || |||      }n.t        | t              r || |||      }nt        | t         j
                        r || |||      }nt        | t        t        t        t        t        j                  f      s| | }nt        | t              r|t        |t              r3t        |j                               t        | j                               k7  rSt        |t              rdn5dt        |j                               dt        | j                               }t        |      | j!                         D ci c]#  \  }}|t#        ||||||||||   nd||	|
      % }}}nt        | t$        t&        f      r|3t        |t$        t&        f      rt)        |      t)        |       k7  rt        t+        |       D cg c]"  \  }}t#        ||||||||||   nd||	|
      $ }}}t        | t&              r;t'        |      }n/|	st-        j.                  |       }nt1        dt3        |              |rt5        j6                  |      |v rHt        |t         j
                        r,|r||j9                  |      }||j;                  ||
       |}|S t        |t              ri nd}|S c c}}w c c}}w )	ae  Iterate through the state dict, applying the given functions to each tensor type.

    Args:
        iter_object (Any): the target state_dict.
        sharded_tensor_func (Callable): the function to apply to ShardedTensor
        dtensor_func (Callable): the function to apply to DTensor
        tensor_func (Callable): the function to apply to Tensor
        pg (Optional[dist.ProcessGroup]): process group passed to tensor functions
        device (Optional[torch.device]): device passed to tensor functions
        cpu_offload (bool): whether to offload the tensors to CPU memory. This option is ignored
            if a companion_obj is supplied.
        companion_obj (Any): A companion object to the state dict. If this object
            is supplied, we attempt to copy the tensor to the companion object.
        ranks_only (Tuple[int, ...]): if this tuple is empty, all ranks will
            have the same state_dicts. Otherwise only ranks that in ``ranks_only``
            have the same state_dicts. Other ranks will get empty state_dicts.
        type_check (bool): check if the instance data type is a supported type
            that can be saved by DCP.  The current supported data types are
            torch.Tensor, DTensor, int, float, str, list, dict, None.
        non_blocking (bool): whether to use non-blocking copy when copying to the companion object.
    cpuN zset(companion_obj.keys())=z set(iter_object.keys())=rI   zUnexpected value type )rM   )r5   r   
isinstancer   r   TensorintfloatstrbytesioBytesIOdictsetkeysrE   items_iterate_state_dictlisttuplelen	enumeratecopydeepcopy
ValueErrorr1   r'   get_rankr2   copy_)rN   rO   rP   rQ   r   r   rJ   r   rK   rL   rM   
cpu_deviceretmsgkeyvalueidxvs                     r   ra   ra   U   s   H e$J+}-!+r6=I	K	);FMB	K	.+r6=A;eS% DE	K	&$=$/=%%'(C0@0@0B,CC mT2 2M..0133MS9I9I9K5L4NO 
 $C((  *//1
 U $#'4A4MmC0SW%%) 
 
  
K$	/$=4-8=!S%55##  $K0
 Q  #'4A4MmC0SW%%)
 
  k5)*CmmK(1${2C1DEFFr*j8c5<<(}4ffZ((##Cl#C# J sD)btJu
.
s   '(K,'Kr   r   rJ   rK   rL   
state_dictc                >    d }d }t        | ||t        |||||	      S )a)  
    Given a state_dict, this API gathers all the ShardedTensors or DTensors in
    the state_dict.


    Args:
        state_dict (Dict[str, Any]): the target sharded state_dict.
        pg (Optional[dist.ProcessGroup]): the process group that is used to
            gather ShardedTensor. Note that gathering a DTensor will use
            the DeviceMesh. So this argument will be ignored when gathering a
            DTensor.
        device: (Optional[torch.device]): the device that is used to
            perform allgather for ShardedTensor. Note that gathering a DTensor
            will use the DeviceMesh. So this argument will be ignored when
            gathering a DTensor.
        cpu_offload (bool): whether to offload the tensors to CPU memory. The
            default value is False.
        ranks_only: (Tuple[int, ...]): if this tuple is empty, all ranks will
            have the same state_dicts. Otherwise only ranks that in ``ranks_only``
            have the same state_dicts. Other ranks will get empty state_dicts.
        type_check: (bool): check if the instance data type is a supported type
            that can be saved by DCP.  The current supported data types are
            torch.Tensor, DTensor, int, float, str, list, dict, None.

    Returns:
        The gathered state dictionary.
    c                    t        j                  d      }t        | ||      }| j                         r'| j                         d   j                  j                  n|}|j                  |k7  r|j                  |      } | S |} | S )NrS   r   )r5   r   rC   r)   r/   r2   )ro   r   r   r   rk   output_tensorlocal_shard_devices          r   rO   z/_gather_state_dict.<locals>.sharded_tensor_func   s     \\%(
25"fE !!#  #**11 	
 #55!$$%78E  "Er!   c                 |   | j                   | j                  j                  k7  r%| j                  | j                  j                        } | j                  D cg c]  }t                }}| j                  | j                  |      } | j                         } t        | t              r| j                         } | S c c}w )N)device_mesh
placements)r   ry   device_typer2   rz   r   redistributeto_localrU   r   wait)ro   r   r   r   _rz   s         r   rP   z(_gather_state_dict.<locals>.dtensor_func  s    <<5,,888HHU..::;E ,1+;+;<aik<
<""))! # 
  e23JJLE =s   B9rr   ra   r    )rs   r   r   rJ   rK   rL   rO   rP   s           r   _gather_state_dictr      s7    J"* 
 
r!   )rK   rL   c                F    t        | t        t        t        ddd||	      }|S )a  
    Given a state_dict, this API offload all the tensors to CPU memory.

    Args:
        state_dict (Dict[str, Any]): the target state_dict.
        pg (Optional[dist.ProcessGroup]): the process group that is used to
            gather ShardedTensor. Note that gathering a DTensor will use
            the DeviceMesh. So this argument will be ignored when gathering a
            DTensor.
        ranks_only: (Tuple[int, ...]): if this tuple is empty, all ranks will
            have the same state_dicts. Otherwise only ranks that in ``ranks_only``
            have the same state_dicts. Other ranks will get empty state_dicts.
        type_check: (bool): check if the instance data type is a supported type
            that can be saved by DCP.  The current supported data types are
            torch.Tensor, DTensor, int, float, str, list, dict, None.

    Returns:
        The gathered state dictionary.
    NTrr   r   )rs   rK   rL   rl   s       r   _offload_state_dict_to_cpur   &  s0    4 
C Jr!   copy_state_dictc                 F    t        | t        t        t        dddd|||      S )a  
    Copies all tensors in a given state dict into a different state_dict with the
    same structure. Additionally, a copied state dict with the same value references
    is returned. Editing the keys on this state dict will not affect the
    passed in copy_state_dict (but the value references are the same).

    .. warning::
        It is expected by this function that state_dict and copy_state_dict share
        the same structure and data types.

    .. warning::
        The current supported data types are
            torch.Tensor, DTensor, int, float, str, list, dict, None.

    Args:
        state_dict (Dict[str, Any]): the target state_dict.
        copy_state_dict (Dict[str, Any]):
            The state dict we are copying into. This state_dict must have exactly
             the same structure as the source `state_dict`.
        non_blocking: (bool): Whether copy ops should be performed asynchronously
        type_check (bool): check if the instance data type is a supported type
            that can be saved by DCP. The current supported data types are
            torch.Tensor, DTensor, int, float, str, list, dict, None.

    Returns:
        State Dict copy
    NFr   )r   r   rJ   rK   r   rL   rM   r   )rs   r   rM   rL   s       r   _copy_state_dictr   N  s3    D %! r!   
pin_memoryshare_memoryc                     dt         j                  dt        t        j                     dt        t         j
                     dt        dt         j                  f
fd}t        | t        t        |dddd	d
	      }|S )a  
    Given a state_dict, create another state_dict with the same structure and elements.
    However, all tensors in the returned state_dict are new tensors on CPU. These
    tensors can be placed on pin_memory or share_memory based on the provided arguments.

    .. warning::
        Setting both `pin_memory` and `share_memory` to True significantly increases the
        latency of this method because of the nuances which require us to register memory
        as pinned directly as opposed to relying on the pin_memory cache allocator. This
        option should only be used for long lived tensors which are required to be shared.
        This is not the case as long as at least one of `pin_memory` or `share_memory` is
         set to False.

    r   r   r   r   r   c                 *   t        | j                               dk(  r!t        j                  d| j                        S rt        j
                  t        | j                               d| j                  i}|j                         }rd }t        j                  |||       t        t        j                  j                         j                  |j                         |j                         |j!                         z  d            }|dk(  s
J d|        |S rDt        j
                  t        | j                               d| j                  ij#                         S t        j
                  t        | j                               d| j                  iS )Nr   )r$   r$   c                     t        t        j                  j                         j	                  | j                                     }|dk(  s
J d|        y )Nr   z0Unpinning shared memory failed with error-code: )rW   r5   cudacudartcudaHostUnregisterdata_ptr)tsuccs     r   unpin_memoryzA_create_cpu_state_dict.<locals>.tensor_func.<locals>.unpin_memory  sJ    uzz002EEajjlSTD	QI$PQ!r!      z.Pinning shared memory failed with error-code: )rd   r*   r5   r/   r$   r7   rc   share_memory_weakreffinalizerW   r   r   cudaHostRegisterr   r+   element_sizer   )	r   r   r   r   r   r   r   r   r   s	          r   rQ   z+_create_cpu_state_dict.<locals>.tensor_func  s9    sxxz?a<<33U388:.@cii@A!AQ   L!4JJ%%'88

	ANN$44 AIKCD6JKH;;chhj 1CCNNPP;;chhj 1CCCr!   NFr   rr   )	r5   rV   r   r'   ProcessGroupr   r   ra   r    )rs   r   r   rQ   rl   s    ``  r   _create_cpu_state_dictr     s    $#D\\#DT&&'#D &#D 	#D
 
#DJ 
C Jr!   compared_state_dictc                    dt         j                  dt        t        j                     dt        t         j
                     dt        dt         j                  f
d}	 t        | t        t        |dddd	|d

       y# t        $ r Y yw xY w)a6  
    Given two state_dicts, check if the structures are the same. And
    if a [key, tensor] pair exist in one state_dict there must be
    the a corresponding pait, [key, other_tensor], in the other state_dict,
    where tensor and other_tensor have the same size and dtype.

    Return the check result.
    r   r   r   r   r   c                     |j                   | j                   k7  s!|j                         | j                         k7  rt        | S r   )r$   r*   rE   r   s       r   rQ   z1_check_state_dict_similarity.<locals>.tensor_func  s7     #))+}/A/A/Csxxz/Q##
r!   NFr   )r   r   rJ   rK   r   rL   T)
r5   rV   r   r'   r   r   r   ra   r    rE   )rs   r   rQ   s      r   _check_state_dict_similarityr     s    \\T&&' & 	
 
-	
   s   A8 8	BBc                   J    e Zd ZU ej                  ed<   ej                  ed<   y)_TensorInfor*   r$   N)rF   rG   rH   r5   Size__annotations__r$   r   r!   r   r   r     s    
**;;r!   r   full_state_dictlocal_state_dictr_   c                    g }|D ]  }t        j                         dk(  rA| |   }t        |t        j                        sJ |j                         j                  |      }n1| |   }	t        j                  |	j                  ||	j                        }|j                  |       |j                  |d       }
|
t        |
t              r|
|f||<   |||<    |t         j                  j                         }t        |      dkD  rt        j                   ||dd       nt        j"                  |d   d|       t%        ||||       y )Nr   )r*   r   r$   r   i  srcr%   )r'   ri   rU   r5   rV   detachr2   r7   r*   r$   appendgetr   r   r&   rd   _broadcast_coalesced	broadcast_distribute_tensors)r   r   r_   r   r   tensorsrn   
full_statefull_tensortensor_infolocal_states              r   _broadcast_tensorsr     s6    G 0==?a(-Jj%,,777$++-008K)#.K++ %%!''K 	{#&**35W-%0+$>S!$/S!)0, 
z""557
7|a!!"gsA6wqzq3($;r!   c           
      *   |t         j                  j                         }|D ]  }| j                  |d       }|t	        j
                  |      r-|d   }|d   }t        |j                  |j                  |j                        \  }}	t        t        |            D 
cg c]  }
t        |	|
   ||
   |	|
   z          }}
||   }t        j                  ||j                  |j                  |j                  |j                               | |<    y c c}
w )Nr   r   )shapestride)r'   r   r&   r   r5   	is_tensorr   r   ry   rz   rangerd   slicer   
from_localr   )r   r_   r   r   rn   _local_stater   r   r   offsetislicesrA   s                r   r   r     s    
z""557 
'++C65??<#@"1o"1o={668N8N
v CHE
BSTQ%q	58fQi#78TT"6* !( 2 2##""##%%'!

 Us   Dstrictc                 j   i }t        j                         dk(  r| j                         D ]n  \  }}t        j                  |      s|||<   !|j                         dk(  r|j                         ||<   Ht        |j                         |j                        ||<   p |g}t        j                  |d|       |d   }g }	t        |j                               }
t               }|j                         D ]  \  }}|j                  |       t        |t              s
||v r|||<   1t        j                         dk(  r| |   ||<   |	j                  |       t!        |	      dk\  spt#        |||	||       |	j%                           |r|
|z
  x}r|D ]  }|j'                  |        |	rt#        |||	||       y y )Nr   r   r   )r'   ri   r`   r5   r   dimrS   r   r*   r$   broadcast_object_listr^   r_   addrU   r   rd   r   clearpop)r   r   r   r   r   rl   rn   ro   broadcast_listr_   local_state_dict_keysglobal_keysmissing_keyss                r   _broadcast_state_dictr   <  s    C}}!)//1 	BJC??5) C! 99;C&uzz|U[[AC	B UN~1B?

C D 0 5 5 78%Kiik 
U%-&&(- %==?a&s+CHCt9>s$4dFBGJJL  1K?@<@# * $$S)* 3 0$C r!   c                    | j                         D ]  \  }}|| vrt        j                  |      s|||<   &|j                         dk(  r|j	                         ||<   Mt        |t        j                        sJ |j                  |d       }|~t        |t              rBt        |j                         j                  |      |j                  |j                        ||<   |j                         j                  |      ||<    y )Nr   )r`   r5   r   r   rS   rU   rV   r   r   r   r   r2   ry   rz   )r   r   r   r   rn   ro   r   s          r   _distribute_state_dictr   q  s     &++- B
Uo%u%$)S!YY[A$)IIKS!eU\\222*..sD9K"K1(9LLN%%f-++**) % ).(9(9&(A %'Br!   visitorc                     dt         dt        ddffd| j                         D ]  \  }} t        |      f|        y)z
    Invoke ``visitor`` for each value recursively in ``state_dict``.
    Mapping, list, and tuple will be flattened and other value types are treated
    as the terminal values and will invoke ``visitor``.
    pathro   r   Nc                    t        |t              r/|j                         D ]  \  }} | t        |      fz   |        y t        |t        t
        f      r!t        |      D ]  \  }} | |fz   |        y  | |       y r   )rU   r   r`   rY   rb   rc   re   )r   ro   krq   r   _traverse_objr   s        r   r   z+_traverse_state_dict.<locals>._traverse_obj  s}    eW% 31dc!fY.23e}-!%( .1daTk1-. D% r!   )OBJ_PATHr   r`   rY   )rs   r   rn   ro   r   s    `  @r   _traverse_state_dictr     sK    !H !S !T ! !&&( *
Us3xk5)*r!   c                 X    i i dt         dt        ddffd}t        | |       fS )a  
    Flatten ``state_dict`` made of nested dicts and lists into a top level dictionary.

    Use ``unflatten_state_dict`` to revert this process.
    Returns:
        A tuple with the flatten state_dict and a mapping from original to new state_dict.
    N.B. The new keys are derived from the object paths, joined by dot.
        For example: ``{ 'a': {'b':...}}`` results in the key `a.b`.
    r   ro   r   Nc                 |    dj                  t        t        |             }|v rt        d|       ||<   | |<   y )N.zduplicated flatten key )joinmaprY   rh   )r   ro   new_fqn	flattenedmappingss      r   	flat_copyz&_flatten_state_dict.<locals>.flat_copy  sF    ((3sD>*i6wi@AA"	' r!   )r   r   r   )rs   r   r   r   s     @@r   _flatten_state_dictr     s@     "$I "H! ! ! ! Y/hr!   	root_dictr   ro   c                    t        t        |       }dt        t           dt        ddfd}t        dt        |            D ]n  }||dz
     }||   }t        |      t        k(  ri ng }t        |t              r!t        t        |j                  ||            }W |||       ||   |||<   ||   }p |d   }t        |      t        k(  r |t        t        t           |      |       |||<   y)z>Set ``value`` in ``root_dict`` along the ``path`` object path.lstrp   r   Nc                 b    t        |       |k  r!| j                  d        t        |       |k  r y y r   )rd   r   )r   rp   s     r   extend_listz!_set_element.<locals>.extend_list  s&    #h#oJJt #h#or!   r   )r   CONTAINER_TYPEr   r   rW   r   rd   r1   rY   rU   r   
setdefault)	r   r   ro   cur_containerr   r   prev_keyrn   def_vals	            r   _set_elementr     s    3Mc    1c$i  4A;1g:>s)s:JBPRmW-  8 87 KM x0X&.*1h')(3M4 r(CCyCDcM2C8M#r!   mappingc                 Z    i }| j                         D ]  \  }}t        |||   |        |S )zaRestore the original nested state_dict according to ``mapping`` and the flattened ``state_dict``.)r`   r   )rs   r   nestedrn   ro   s        r   _unflatten_state_dictr     s:     !F &&( 2
UVWS\512Mr!   )NN)FT)FFr   )NF)Crf   r[   r,   r   typingr   r   r   r   r   r   r	   r
   r   r   r   r   r5   torch.distributeddistributedr'   torch.nn.functionalnn
functionalr3   )torch.distributed._functional_collectivesr   is_availabler   'torch.distributed._shard.sharded_tensorr   torch.distributed.tensorr   r   r   torch.distributed.tensor._utilsr   rV   r   r   r    rC   	ExceptionrE   boolrW   rY   ra   r   r   r   r   r   r   r   r   r   r   	PATH_ITEMr   FLATTEN_MAPPINGSTATE_DICT_TYPEr   r   r   r   r   r   r!   r   <module>r      s    	           K 4-2ENNU	""# U\\" 	
 \\ '+%)###""## U\\"# \\	#L	  '+%)"$vv!v v 	v 	""#v U\\"v v v c3hv v v 
#s(^vx '+%)"$US#XU 	""#U U\\"	U
 U c3hU U 
#s(^Uv #%	%S#X% c3h% 	%
 
#s(^%V 	.S#X.#s(^. . 	.
 
#s(^.d PUBS#XB,0BHLB	#s(^BJ'S#X'c3h' 
'T*  '+&<#s(^&<38n&< s)&< LL	&<
 	""#&< 
&<Z '+	
38n

s)
 LL
 	""#	

 

H '+2D#s(^2D38n2D LL2D 	""#	2D
 2D 
2Dr '+	B#s(^B38nB LLB 	""#	B
 
BD #s(O	C sH}%sCx.	3/**xot+,* 
*0
?O+,4O 8 C D <*9r!   