
    sg"                     P    d Z ddlZ G d d      Z G d de      Z G d de      Zy)	z'Stochastic optimization methods for MLP    Nc                   *    e Zd ZdZddZd Zd Zd Zy)BaseOptimizera9  Base (Stochastic) gradient descent optimizer

    Parameters
    ----------
    learning_rate_init : float, default=0.1
        The initial learning rate used. It controls the step-size in updating
        the weights

    Attributes
    ----------
    learning_rate : float
        the current learning rate
    c                 2    || _         t        |      | _        y N)learning_rate_initfloatlearning_rate)selfr   s     `/var/www/html/venv/lib/python3.12/site-packages/sklearn/neural_network/_stochastic_optimizers.py__init__zBaseOptimizer.__init__   s    "4"#56    c                 f    | j                  |      }t        d |D        |      D ]
  \  }}||z  } y)a  Update parameters with given gradients

        Parameters
        ----------
        params : list of length = len(coefs_) + len(intercepts_)
            The concatenated list containing coefs_ and intercepts_ in MLP
            model. Used for initializing velocities and updating params

        grads : list of length = len(params)
            Containing gradients with respect to coefs_ and intercepts_ in MLP
            model. So length should be aligned with params
        c              3       K   | ]  }|  y wr    ).0ps     r   	<genexpr>z.BaseOptimizer.update_params.<locals>.<genexpr>*   s     !4!!4s   N)_get_updateszip)r
   paramsgradsupdatesparamupdates         r   update_paramszBaseOptimizer.update_params   s=     ##E* !4V!4g> 	ME6VOE	r   c                      y)zhPerform update to learning rate and potentially other states at the
        end of an iteration
        Nr   r
   	time_steps     r   iteration_endszBaseOptimizer.iteration_ends-   s     	r   c                 $    |rt        |dz          y)aH  Decides whether it is time to stop training

        Parameters
        ----------
        msg : str
            Message passed in for verbose output

        verbose : bool
            Print message to stdin if True

        Returns
        -------
        is_stopping : bool
            True if training needs to stop
        
 Stopping.T)printr
   msgverboses      r   trigger_stoppingzBaseOptimizer.trigger_stopping3   s      #$%r   N)皙?)__name__
__module____qualname____doc__r   r   r   r&   r   r   r   r   r   	   s    7"r   r   c                   @     e Zd ZdZ	 	 	 	 	 d fd	Zd Zd Zd Z xZS )SGDOptimizera  Stochastic gradient descent optimizer with momentum

    Parameters
    ----------
    params : list, length = len(coefs_) + len(intercepts_)
        The concatenated list containing coefs_ and intercepts_ in MLP model.
        Used for initializing velocities and updating params

    learning_rate_init : float, default=0.1
        The initial learning rate used. It controls the step-size in updating
        the weights

    lr_schedule : {'constant', 'adaptive', 'invscaling'}, default='constant'
        Learning rate schedule for weight updates.

        -'constant', is a constant learning rate given by
         'learning_rate_init'.

        -'invscaling' gradually decreases the learning rate 'learning_rate_' at
          each time step 't' using an inverse scaling exponent of 'power_t'.
          learning_rate_ = learning_rate_init / pow(t, power_t)

        -'adaptive', keeps the learning rate constant to
         'learning_rate_init' as long as the training keeps decreasing.
         Each time 2 consecutive epochs fail to decrease the training loss by
         tol, or fail to increase validation score by tol if 'early_stopping'
         is on, the current learning rate is divided by 5.

    momentum : float, default=0.9
        Value of momentum used, must be larger than or equal to 0

    nesterov : bool, default=True
        Whether to use nesterov's momentum or not. Use nesterov's if True

    power_t : float, default=0.5
        Power of time step 't' in inverse scaling. See `lr_schedule` for
        more details.

    Attributes
    ----------
    learning_rate : float
        the current learning rate

    velocities : list, length = len(params)
        velocities that are used to update params
    c                     t         |   |       || _        || _        || _        || _        |D cg c]  }t        j                  |       c}| _        y c c}w r   )	superr   lr_schedulemomentumnesterovpower_tnp
zeros_like
velocities)	r
   r   r   r0   r1   r2   r3   r   	__class__s	           r   r   zSGDOptimizer.__init__x   sN     	+,&  =CDE2==/DDs   Ac                 ~    | j                   dk(  r.t        | j                        |dz   | j                  z  z  | _        yy)a  Perform updates to learning rate and potential other states at the
        end of an iteration

        Parameters
        ----------
        time_step : int
            number of training samples trained on so far, used to update
            learning rate for 'invscaling'
        
invscaling   N)r0   r   r   r3   r	   r   s     r   r   zSGDOptimizer.iteration_ends   s?     |+d--.)a-DLL1PP  ,r   c                     | j                   dk7  r|rt        |dz          y| j                  dk  r|rt        |dz          y| xj                  dz  c_        |rt        |d| j                  z  z          y)	Nadaptiver!   Tgư>z# Learning rate too small. Stopping.g      @z Setting learning rate to %fF)r0   r"   r	   r#   s      r   r&   zSGDOptimizer.trigger_stopping   su    z)cL()%cAABc!#69K9KKKLr   c                 X   t        | j                  |      D cg c]$  \  }}| j                  |z  | j                  |z  z
  & }}}|| _        | j                  rEt        | j                  |      D cg c]$  \  }}| j                  |z  | j                  |z  z
  & }}}|S c c}}w c c}}w )  Get the values used to update params with given gradients

        Parameters
        ----------
        grads : list, length = len(coefs_) + len(intercepts_)
            Containing gradients with respect to coefs_ and intercepts_ in MLP
            model. So length should be aligned with params

        Returns
        -------
        updates : list, length = len(grads)
            The values to add to params
        )r   r6   r1   r	   r2   )r
   r   velocitygradr   s        r   r   zSGDOptimizer._get_updates   s      #&doou"=
$ MMH$t'9'9D'@@
 
 "== '*$//5&A"Hd (4+=+=+DDG 
 
s   )B 2)B&)r'   constant?Tg      ?)	r(   r)   r*   r+   r   r   r&   r   __classcell__r7   s   @r   r-   r-   H   s/    -d E" r   r-   c                   ,     e Zd ZdZ	 d fd	Zd Z xZS )AdamOptimizera  Stochastic gradient descent optimizer with Adam

    Note: All default values are from the original Adam paper

    Parameters
    ----------
    params : list, length = len(coefs_) + len(intercepts_)
        The concatenated list containing coefs_ and intercepts_ in MLP model.
        Used for initializing velocities and updating params

    learning_rate_init : float, default=0.001
        The initial learning rate used. It controls the step-size in updating
        the weights

    beta_1 : float, default=0.9
        Exponential decay rate for estimates of first moment vector, should be
        in [0, 1)

    beta_2 : float, default=0.999
        Exponential decay rate for estimates of second moment vector, should be
        in [0, 1)

    epsilon : float, default=1e-8
        Value for numerical stability

    Attributes
    ----------
    learning_rate : float
        The current learning rate

    t : int
        Timestep

    ms : list, length = len(params)
        First moment vectors

    vs : list, length = len(params)
        Second moment vectors

    References
    ----------
    :arxiv:`Kingma, Diederik, and Jimmy Ba (2014) "Adam: A method for
        stochastic optimization." <1412.6980>
    c                    t         |   |       || _        || _        || _        d| _        |D cg c]  }t        j                  |       c}| _        |D cg c]  }t        j                  |       c}| _	        y c c}w c c}w )Nr   )
r/   r   beta_1beta_2epsilontr4   r5   msvs)r
   r   r   rH   rI   rJ   r   r7   s          r   r   zAdamOptimizer.__init__   sm     	+,5;<E2=='<5;<E2=='< =<s   A>Bc                    | xj                   dz  c_         t        | j                  |      D cg c]'  \  }}| j                  |z  d| j                  z
  |z  z   ) c}}| _        t        | j                  |      D cg c]*  \  }}| j
                  |z  d| j
                  z
  |dz  z  z   , c}}| _        | j                  t        j                  d| j
                  | j                   z  z
        z  d| j                  | j                   z  z
  z  | _	        t        | j                  | j                        D cg c]8  \  }}| j                   |z  t        j                  |      | j                  z   z  : }}}|S c c}}w c c}}w c c}}w )r>   r:      )rK   r   rL   rH   rM   rI   r   r4   sqrtr	   rJ   )r
   r   mr@   vr   s         r   r   zAdamOptimizer._get_updates   s[    	! tww.
4 KK!Oq4;;$66
 tww.
4 KK!Oq4;;47;;

 ##gga$++tvv--./4;;&&( 	 DGGTWW-
1 !#rwwqzDLL'@A
 
 #


s   ,E8=/E>6=F)gMbP?rB   g+?g:0yE>)r(   r)   r*   r+   r   r   rC   rD   s   @r   rF   rF      s    +\ SW
= r   rF   )r+   numpyr4   r   r-   rF   r   r   r   <module>rT      s5    -
 < <~z= zzZM Zr   