
    sg                         U d dl mZmZmZ d dlZd dlmc mZ d dlm	Z	 g Z
ee   ed<   ej                  j                   G d d             Zy)    )DictListOptionalN)Tensor__all__c                       e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 ddee   dededededededed	ed
edededefdZdeee      fdZ	y)_FunctionalAdagradparamslrlr_decayweight_decayinitial_accumulator_valuewarmup_lr_multiplierwarmup_num_itersepscoalesce_gradforeachfusedmaximize_allow_empty_param_listc                    |||||||d| _         |	| _        |
| _        || _        || _        t
        j                  j                  t        t
        j                  t        t        t
        j                  f   f   i       | _        t        |      dk(  r|st        d      d|i| _        | j                  d   D ]E  }t        j                  |j                   |      t        j"                  d      d| j                  |<   G y )N)r   r   r   r   r   r   r   r   z%optimizer got an empty parameter listr
           )sumstep)defaultsr   r   r   r   torchjitannotater   r   strstatelen
ValueErrorparam_group	full_likedatatensor)selfr
   r   r   r   r   r   r   r   r   r   r   r   r   ps                  ]/var/www/html/venv/lib/python3.12/site-packages/torch/distributed/optim/functional_adagrad.py__init__z_FunctionalAdagrad.__init__   s    "  ()B$8 0
 +
 YY''U\\4U\\@Q;R-R(SUWX
v;!$;DEE %f- !!(+ 	Aqvv/HIS)DJJqM	    	gradientsc                 D   | j                   d   }g }g }g }g }t        |      t        |      k7  r*t        ddt        |       dz   dt        |       z         d\  }}t        | j                   d   |      D ]  \  }	}
|
	||
j                  z  }|t        j                  |	      z  }|j                  |	       |j                  |
       | j                  |	   }|j                  |d          |j                  |d           t        j                         5  t        j                  ||||| j                  d	   | j                  d
   | j                  d   | j                  d   || j                  | j                  || j                  d d        d d d        y # 1 sw Y   y xY w)Nr
   zEthe gradients passed in does not equal to the size of the parameters!zParams length: z. zGradients length: )FFr   r   r   r   r   r   )r   r   r   r   has_sparse_gradr   r   has_complexr   
grad_scale	found_inf)r#   r!   r"   zip	is_sparser   
is_complexappendr    no_gradFadagradr   r   r   r   )r'   r,   r
   params_with_gradgrads
state_sumsstate_stepsr.   r/   paramgradientr    s               r)   r   z_FunctionalAdagrad.stepE   s   !!(+
$&v;#i.(W#CK=34&s9~&678  (4$"4#3#3H#=yI 	2OE8#8#5#55u//66 ''.X&

5)!!%,/""5=1	2 ]]_ 	II==&!]]>:z2MM%( /'jj	 	 	s   A7FFN)g{Gz?r   r   r   g      ?r   g|=TFFFF)
__name__
__module____qualname__r   r   floatboolr*   r   r    r+   r)   r	   r	      s    
 !+.&)"%"(-,V, , 	,
 , $), $,  , , , , , , "&,\*d8F#34 *r+   r	   )typingr   r   r   r   torch.optim._functionaloptim_functionalr7   r   r   r   __annotations__r   scriptr	   rD   r+   r)   <module>rK      sM    ' '  # #  c  Y Y Yr+   