
    sg6                         U d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
mZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ g Ze	e   ed<    e j4                  e      Z G d d	ej:                        Zd
e	e   defdZy)    N)deepcopy)	AnyCallable
CollectionDictListMappingOptionaloverloadUnion)optim)ShardedTensor)FullyShardedDataParallel__all__c                      e Zd ZdZ	 	 ddeeeej                  e	f   f   de
j                  deeeeef         deej                      ddf
dZd	 Zdeeef   fd
Zeddd       Zedeg ef   defd       Zddeeg ef      dee   fdZedeej                  ef   fd       Zdeeef   ddfdZdeeef   ddfdZddZdeeef   fdZdeeef   fdZy)_NamedOptimizera  
    ``_NamedOptimizer`` takes a dict of parameters and exposes ``state_dict`` by parameter key.

    We replace the original key (number) in an optim to the
    fully qualified name (FQN) string. User can initialize the optim as they
    initialize a PyTorch optim, the only difference is that they also need to
    pass in the FQN of each parameters.

    Args:
        named_parameters (Mapping[str, Union[torch.Tensor, ShardedTensor]]):
            Mapping from FQN to parameter.
        optimizer_class (optim.Optimizer):
            The class of optimizer to instantiate.
        param_groups (Collection[Mapping[str, Any]]):
            `param_groups` to pass to optimizer if specified.
            The key of the inner map needs to be FQNs.
            Default: None
        module (nn.Module): the module whose parameters to updated
            by the optimizer.
        args: arguments to pass to the optimizer constructor.
        kwargs: arguments to pass to the optimizer constructor.

    Example::
        >>> # xdoctest: +SKIP("distributed")
        >>> from torch import optim
        >>> from torch.distributed.optim import _NamedOptimizer
        >>>
        >>> # Define the named optimizer.
        >>> m = Model(...)
        >>> named_optim = _NamedOptimizer(m.named_parameters(), optim.SGD)
        >>> # Forward pass + backward pass.
        >>> named_optim.step()
        >>> ...
        >>> # Call state_dict for the named optimizer returns a FQN state_dict.
        >>> named_optim.state_dict()

    Warning: This API is still in development and subject to change.

    TODO: Add tutorial for _NamedOptimizer.
    TODO: Add documentation in the docstring for the public attributes
          like self.param_groups and self.named_parameters.
    Nnamed_parametersoptimizer_classparam_groupsmodulereturnc                    t         j                  j                  d       || _        | j	                          t        |      | _        || j                  j                         n|} ||g|i || _        || _	        |)t        | j                  j                               | _        nt        j                  d       | j                  j                         D 	ci c]  \  }}	|	|
 }
}}	g }|D ]3  }|d   D ])  }	|	|
vrt!        d|	 d      |j#                  |
|	          + 5 || _        | j                  j                  | _        y c c}	}w )Nz'torch.distributed.optim._NamedOptimizerzvSince we pass in param_groups, we will use param_groups to initialize the optimizer, not all parameters of the module.paramszExpect param name z% found in param group but is missing.)torch_C_log_api_usage_oncer   _param_groups_checkdictr   values
_optimizerr   listkeysordered_param_keyswarningswarnitems
ValueErrorappend)selfr   r   r   r   argskwargsparams_for_optimizerkeyparamparam_to_keyr#   groups                Z/var/www/html/venv/lib/python3.12/site-packages/torch/distributed/optim/named_optimizer.py__init__z_NamedOptimizer.__init__I   sh    	$$%NO;G  " $%5 6.:.BD!!((* 	 * 

 

 &*4+@+@+E+E+G&HD#MMN :>9N9N9T9T9VW:3E3JWLW!#% C"8_ CEL0(07\]  '--l5.ABCC '9D# OO88 Xs   Ec                 l   | j                   | j                   D ]  }t        |t              sJ d       d|v sJ d       |d   }t        |t        j                        r|g}t        |      }|D ]=  }t        |t        j                        rt        dt        j                  |      z          ||d<    y y )Nparam group must be a dictr   z#param group must contain key paramsz>optimizer can only optimize Tensors, but one of the params is )r   
isinstancer   r   Tensorr!   	TypeErrortypename)r)   param_groupr   r.   s       r1   r   z#_NamedOptimizer._param_groups_checks   s    (#00 /!+t4R6RR4;.U0UU.$X.fell3$XFf# E%eU\\:'8:?..:OP  )/H%/ )    c                    | j                   j                         }|d   }|d   j                         D ci c]  \  }}| j                  |   | }}}g }|D ]v  }g }|d   D ]   }	|j	                  | j                  |	          " dt        |      i}
|j                         D ]  \  }}|dk7  st        |      |
|<    |j	                  |
       x | j                  ||d      S c c}}w )z
        Return the ``state_dict`` of the optimizer.

        Instead of using number to index
        parameters, we will use module fully qualified name (FQN) as the key.
        r   stater   )r<   r   )r    
state_dictr&   r#   r(   sortedr   _post_state_dict)r)   r=   r   st_key	state_val	ret_state
ret_groupsr0   
param_keysr.   	ret_groupkvs                r1   r=   z_NamedOptimizer.state_dict   s    __//1
!.1 &0%8%>%>%@
!	 ##F+Y6
	 

 
! 	)EJx B!!$"9"9%"@AB!6*#56I /1=#+A;IaL/ i(	) $$y*%UVV!
s   C$closurec                      y N r)   rH   s     r1   stepz_NamedOptimizer.step       r:   c                      y rJ   rK   rL   s     r1   rM   z_NamedOptimizer.step   rN   r:   c                 :    | j                   j                  |      S )z
        Perform a single optimization step.

        This will call :meth:`torch.optim.Optimizer.step` on the wrapped
        optimizer.
        rH   )r    rM   rL   s     r1   rM   z_NamedOptimizer.step   s     ##G#44r:   c                 .    | j                   j                  S rJ   )r    r<   )r)   s    r1   r<   z_NamedOptimizer.state   s    $$$r:   r=   c                    | j                   j                         }| j                  |      }|d   }|d   }t        |      dk(  rt	        d      t        | j                        D ]  \  }}||j                         vrt        ||         t        ||         k7  r,t	        dt        ||          d| dt        ||                ||   j                         D ]c  \  }}|||   vrt	        d| d| d      ||   |   }	t        |t              rt        |	t              sJ t        |j                               }
t        |	j                               }|
|k7  rt	        d	| d
|
 d| d|       t        |j                         |	j                               D ]8  \  }}|j                  j                         j                  |j                         : t        |t         j"                        r=t        |	t         j"                        sJ |j                         j                  |	       St%        |	      ||   |<   f  |d   }|d   }i }|D ]  }t'        |d         }||t)        |      <     i }|D ]:  }g }|d   D ]   }|j+                  | j                  |          " ||t)        |      <   < |j                         D ]  \  }}||vr||   }t        |      t        |      k7  r't	        dt        |       d| d
t        |       d      |D ]/  }||vrt	        d| d| d      |dk7  st%        ||         ||<   1  | j                   j-                  |       y)a  
        Define the default behavior to load a state_dict for ``_NamedOptimizer``.

        Sample Code
        ```
            my_model = MyModule()
            optimizer = _NamedOptimizer(my_model.named_parameters(), Adagrad)
            ...

            optim_state_dict = optimizer.state_dict()
            ...
            ...

            optimizer.load_state_dict(optim_state_dict)
            ...
        ```
        Args:
            state_dict (Dict[str, Any]) : A ``state_dict`` to load into the optimizer.
                Note that this state dict update is performed in place.

        .. note:: PyTorch is using lazy init to initialize the optim states.
            So it is possible that there is no optim state when user call
            ``load_state_dict`` and for ``_NamedOptimizer`` we make it stricter
            that users can only call ``load_state_dict`` after the state is initialized.
            By doing this, we can validate the optim ``state_dict`` to be loaded.
        r<   r   zJExpects the optim to be initialized before load but found not initialized.zExpects equal length as z for parameter z but found: zExpects state z but not found.z"Expects equal number of shards as z but found z for /r   r   z"Expects equal param_group size as z for group .zExpects group key z to be in group z  in `state_dict` but is missing.N)r    r=   _pre_load_state_dictlenr'   	enumerater#   r"   r&   r5   r   local_shardsziptensordetachcopy_r   r6   r   r!   _gen_param_group_keyr(   load_state_dict)r)   r=   new_state_dictr<   	new_stateidx	param_key	state_keyrA   src_state_val
num_shardsnum_new_shardsshard	src_shardsrc_param_groupsnew_param_groupssrc_group_mapr0   rD   new_group_map	new_group	group_key	src_grouprF   s                           r1   r_   z_NamedOptimizer.load_state_dict   s   6 335..z:
7#"7+	y>Q\  ((?(?@  	HNC

,5#$IcN(;; .s9S>/B.C?S\R]]ijmnst}n~j  jA  B  )2#(<(<(> H$	9E)$44$(?9+_]  !&i 0 ;i7%m]CCC!$Y%;%;%=!>J%()C)C)E%FN!^3(@@PP[\f[gglmvlwwx  zC  yD  E  -0!..0-2L2L2N- F(y ++-33I4D4DEF  	5<<8%mU\\BBB$$&,,];080GIcN9-/H 	HF &n5).9% 	DEeHo.J>CM.z:;	D ) 	HIJ&x0 F	!!$"9"9)"DEF>GM.z:;		H
 %2$7$7$9 	: Iy -%i0I9~Y/ 8Y8HT]S^^ijmnwjxiyyz{   :I%$,QC/?	{Jjk  =#+IaL#9IaL:	:$ 	''7r:   r9   c                    t        |t              sJ d       |d   }t        |t        j                        r|g|d<   nt	        |      |d<   | j
                  j                         D ci c]  \  }}||
 }}}|d   D ]/  }||vrt        d      | j                  j                  ||          1 | j                  j                  |       | j                  j                  | _        yc c}}w )z
        Add a param group to the :class:`_NamedOptimizer` s `param_groups`.

        Warning: This API is still in development and subject to change.
        r4   r   z%some parameters are not in the moduleN)r5   r   r   r6   r!   r   r&   r'   r#   r(   r    add_param_groupr   )r)   r9   r   r-   r.   r/   s         r1   rr   z_NamedOptimizer.add_param_group  s     +t,J.JJ,X&fell+%+HK!$(LK!595J5J5P5P5RSzsEs
SS * 	@EL( !HII##**<+>?	@
 	''4 OO88 Ts   )C'c                     | j                   j                         D ]H  }|j                  st        j                  |      }t        j
                  j                  |      |_        J | j                  d       y)z
        Run a dummy optimizer step, which allows to initialize optimizer state because we do lazy init for most optimizers.

        This allows doing in-place loading of optimizer state from a checkpoint.
        NrQ   )	r   r   requires_gradr   
zeros_likeautogradVariablegradrM   )r)   r.   ts      r1   
init_statez_NamedOptimizer.init_state5  sa     **113 	8E""$$U+"^^44Q7
	8
 			$	r:   c                     t        | j                  t              r-t        j                  | j                  | j                  |d      S |S )NT)is_named_optimizer)r5   r   FSDPoptim_state_dict_to_loadr    r)   r=   s     r1   rV   z$_NamedOptimizer._pre_load_state_dictB  s>     dkk4(00T__jT  r:   c                     t        | j                  t              r+t        j                  | j                  | j                  |       |S rJ   )r5   r   r}   optim_state_dictr    r   s     r1   r?   z _NamedOptimizer._post_state_dictK  s2     dkk4(!!$++t
Kr:   )NN).)rH   Nr   NrJ   )r   N) __name__
__module____qualname____doc__r	   strr   r   r6   r   r   	Optimizerr
   r   r   nnModuler2   r   r   r=   r   rM   r   floatpropertyr<   r_   rr   rz   rV   r?   rK   r:   r1   r   r      s   )^ AE&*(9!#uU\\=-H'I"IJ(9 (9 z'#s(*;<=	(9
 #(9 
(9T/"WDcN W8   HRY/ E  5HXb%i%89 5Xe_ 5 %wu||S01 % %f8'#s(*; f8 f8P9738+< 9 90 $sCx. d38n r:   r   rD   r   c                 6    dj                  t        |             S )zGConcatenate all param keys as a unique indentifier for one param group.rT   )joinr>   )rD   s    r1   r^   r^   S  s    88F:&''r:   ) loggingr$   copyr   typingr   r   r   r   r   r	   r
   r   r   r   torch.nnr   r   'torch.distributed._shard.sharded_tensorr   torch.distributed.fsdpr   r}   r   r   __annotations__	getLoggerr   loggerr   r   r^   rK   r:   r1   <module>r      sz      
 
 
    A C c 			8	$seoo sl	(T#Y (3 (r:   