o
    \i"                     @   s>   d Z ddlZG dd dZG dd deZG dd deZdS )	z'Stochastic optimization methods for MLP    Nc                   @   s2   e Zd ZdZdddZdd Zdd Zd	d
 ZdS )BaseOptimizera9  Base (Stochastic) gradient descent optimizer

    Parameters
    ----------
    learning_rate_init : float, default=0.1
        The initial learning rate used. It controls the step-size in updating
        the weights

    Attributes
    ----------
    learning_rate : float
        the current learning rate
    皙?c                 C   s   || _ t|| _d S N)learning_rate_initfloatlearning_rate)selfr    r	   /var/www/www-root/data/www/176.119.141.140/sports-predictor/venv/lib/python3.10/site-packages/sklearn/neural_network/_stochastic_optimizers.py__init__   s   zBaseOptimizer.__init__c                 C   s4   |  |}tdd |D |D ]\}}||7 }qdS )a  Update parameters with given gradients

        Parameters
        ----------
        params : list of length = len(coefs_) + len(intercepts_)
            The concatenated list containing coefs_ and intercepts_ in MLP
            model. Used for initializing velocities and updating params

        grads : list of length = len(params)
            Containing gradients with respect to coefs_ and intercepts_ in MLP
            model. So length should be aligned with params
        c                 s   s    | ]}|V  qd S r   r	   ).0pr	   r	   r
   	<genexpr>*   s    z.BaseOptimizer.update_params.<locals>.<genexpr>N)_get_updateszip)r   paramsgradsupdatesparamupdater	   r	   r
   update_params   s   

zBaseOptimizer.update_paramsc                 C   s   dS )zhPerform update to learning rate and potentially other states at the
        end of an iteration
        Nr	   r   	time_stepr	   r	   r
   iteration_ends-   s   zBaseOptimizer.iteration_endsc                 C   s   |rt |d  dS )aH  Decides whether it is time to stop training

        Parameters
        ----------
        msg : str
            Message passed in for verbose output

        verbose : bool
            Print message to stdin if True

        Returns
        -------
        is_stopping : bool
            True if training needs to stop
        
 Stopping.T)printr   msgverboser	   r	   r
   trigger_stopping3   s   zBaseOptimizer.trigger_stoppingN)r   )__name__
__module____qualname____doc__r   r   r   r   r	   r	   r	   r
   r   	   s    
r   c                       sD   e Zd ZdZ					d fdd	Zd	d
 Zdd Zdd Z  ZS )SGDOptimizera  Stochastic gradient descent optimizer with momentum

    Parameters
    ----------
    params : list, length = len(coefs_) + len(intercepts_)
        The concatenated list containing coefs_ and intercepts_ in MLP model.
        Used for initializing velocities and updating params

    learning_rate_init : float, default=0.1
        The initial learning rate used. It controls the step-size in updating
        the weights

    lr_schedule : {'constant', 'adaptive', 'invscaling'}, default='constant'
        Learning rate schedule for weight updates.

        -'constant', is a constant learning rate given by
         'learning_rate_init'.

        -'invscaling' gradually decreases the learning rate 'learning_rate_' at
          each time step 't' using an inverse scaling exponent of 'power_t'.
          learning_rate_ = learning_rate_init / pow(t, power_t)

        -'adaptive', keeps the learning rate constant to
         'learning_rate_init' as long as the training keeps decreasing.
         Each time 2 consecutive epochs fail to decrease the training loss by
         tol, or fail to increase validation score by tol if 'early_stopping'
         is on, the current learning rate is divided by 5.

    momentum : float, default=0.9
        Value of momentum used, must be larger than or equal to 0

    nesterov : bool, default=True
        Whether to use nesterov's momentum or not. Use nesterov's if True

    power_t : float, default=0.5
        Power of time step 't' in inverse scaling. See `lr_schedule` for
        more details.

    Attributes
    ----------
    learning_rate : float
        the current learning rate

    velocities : list, length = len(params)
        velocities that are used to update params
    r   constant?T      ?c                    s8   t  | || _|| _|| _|| _dd |D | _d S )Nc                 S      g | ]}t |qS r	   np
zeros_liker   r   r	   r	   r
   
<listcomp>       z)SGDOptimizer.__init__.<locals>.<listcomp>)superr   lr_schedulemomentumnesterovpower_t
velocities)r   r   r   r0   r1   r2   r3   	__class__r	   r
   r   x   s   	zSGDOptimizer.__init__c                 C   s,   | j dkrt| j|d | j  | _dS dS )a  Perform updates to learning rate and potential other states at the
        end of an iteration

        Parameters
        ----------
        time_step : int
            number of training samples trained on so far, used to update
            learning rate for 'invscaling'
        
invscaling   N)r0   r   r   r3   r   r   r	   r	   r
   r      s   

zSGDOptimizer.iteration_endsc                 C   sd   | j dkr|rt|d  dS | jdkr|rt|d  dS |  jd  _|r0t|d| j   dS )	Nadaptiver   Tgư>z# Learning rate too small. Stopping.g      @z Setting learning rate to %fF)r0   r   r   r   r	   r	   r
   r      s   

zSGDOptimizer.trigger_stoppingc                    sD    fddt  j|D }| _ jr  fddt  j|D }|S )  Get the values used to update params with given gradients

        Parameters
        ----------
        grads : list, length = len(coefs_) + len(intercepts_)
            Containing gradients with respect to coefs_ and intercepts_ in MLP
            model. So length should be aligned with params

        Returns
        -------
        updates : list, length = len(grads)
            The values to add to params
        c                    $   g | ]\}} j |  j|  qS r	   r1   r   r   velocitygradr   r	   r
   r-          z-SGDOptimizer._get_updates.<locals>.<listcomp>c                    r;   r	   r<   r=   r@   r	   r
   r-      rA   )r   r4   r2   r   r   r   r	   r@   r
   r      s   



zSGDOptimizer._get_updates)r   r%   r&   Tr'   )	r    r!   r"   r#   r   r   r   r   __classcell__r	   r	   r5   r
   r$   H   s    2r$   c                       s,   e Zd ZdZ	d
 fdd	Zdd	 Z  ZS )AdamOptimizera  Stochastic gradient descent optimizer with Adam

    Note: All default values are from the original Adam paper

    Parameters
    ----------
    params : list, length = len(coefs_) + len(intercepts_)
        The concatenated list containing coefs_ and intercepts_ in MLP model.
        Used for initializing velocities and updating params

    learning_rate_init : float, default=0.001
        The initial learning rate used. It controls the step-size in updating
        the weights

    beta_1 : float, default=0.9
        Exponential decay rate for estimates of first moment vector, should be
        in [0, 1)

    beta_2 : float, default=0.999
        Exponential decay rate for estimates of second moment vector, should be
        in [0, 1)

    epsilon : float, default=1e-8
        Value for numerical stability

    Attributes
    ----------
    learning_rate : float
        The current learning rate

    t : int
        Timestep

    ms : list, length = len(params)
        First moment vectors

    vs : list, length = len(params)
        Second moment vectors

    References
    ----------
    :arxiv:`Kingma, Diederik, and Jimmy Ba (2014) "Adam: A method for
        stochastic optimization." <1412.6980>
    MbP?r&   +?:0yE>c                    sH   t  | || _|| _|| _d| _dd |D | _dd |D | _d S )Nr   c                 S   r(   r	   r)   r,   r	   r	   r
   r-      r.   z*AdamOptimizer.__init__.<locals>.<listcomp>c                 S   r(   r	   r)   r,   r	   r	   r
   r-      r.   )r/   r   beta_1beta_2epsilontmsvs)r   r   r   rH   rI   rJ   r5   r	   r
   r      s   zAdamOptimizer.__init__c                    s     j d7  _  fddt j|D  _ fddt j|D  _ jtd j j    d j j     _	 fddt j jD }|S )r:   r8   c                    s(   g | ]\}} j | d  j  |  qS )r8   )rH   )r   mr?   r@   r	   r
   r-     s    z.AdamOptimizer._get_updates.<locals>.<listcomp>c                    s,   g | ]\}} j | d  j  |d   qS )r8      )rI   )r   vr?   r@   r	   r
   r-         c                    s,   g | ]\}} j  | t| j  qS r	   )r   r*   sqrtrJ   )r   rN   rP   r@   r	   r
   r-     rQ   )
rK   r   rL   rM   r   r*   rR   rI   rH   r   rB   r	   r@   r
   r      s"   




zAdamOptimizer._get_updates)rE   r&   rF   rG   )r    r!   r"   r#   r   r   rC   r	   r	   r5   r
   rD      s
    .rD   )r#   numpyr*   r   r$   rD   r	   r	   r	   r
   <module>   s
    ?}