
    sg@                     F   d Z ddlZddlmZ ddlmZmZ ddlZddl	m
Z
 ddlmZ ddlmZ ddlmZ d	d
lmZmZ d	dlmZ d	dlmZ d	dlmZmZmZ d	dlmZmZ d	dlm Z  ddl!m"Z"  ejF                  ejH                        jJ                  Z&d Z'ddZ(d Z)d Z* G d dee"      Z+y)z<
A Theil-Sen Estimator for Multiple Linear Regression Model
    N)combinations)IntegralReal)effective_n_jobs)linalg)get_lapack_funcs)binom   )RegressorMixin_fit_context)ConvergenceWarning)check_random_state)HiddenInterval
StrOptions)Paralleldelayed)validate_data   )LinearModelc                 F   | |z
  }t        j                  t        j                  |dz  d            }|t        k\  }t	        |j                         | j
                  d   k        }||   }||   ddt         j                  f   }t        j                  t        j                  ||z  d            }|t        kD  r=t        j                  | |ddf   |z  d      t        j                  d|z  d      z  }nd}d}t        dd||z  z
        |z  t        d||z        |z  z   S )u	  Modified Weiszfeld step.

    This function defines one iteration step in order to approximate the
    spatial median (L1 median). It is a form of an iteratively re-weighted
    least squares method.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Training vector, where `n_samples` is the number of samples and
        `n_features` is the number of features.

    x_old : ndarray of shape = (n_features,)
        Current start vector.

    Returns
    -------
    x_new : ndarray of shape (n_features,)
        New iteration step.

    References
    ----------
    - On Computation of Spatial Median for Robust Data Mining, 2005
      T. Kärkkäinen and S. Äyrämö
      http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf
    r
   r   axisr   Ng      ?        )npsqrtsum_EPSILONintshapenewaxisr   normmaxmin)Xx_olddiff	diff_normmaskis_x_old_in_Xquotient_normnew_directions           R/var/www/html/venv/lib/python3.12/site-packages/sklearn/linear_model/_theil_sen.py_modified_weiszfeld_stepr.      s   6 u9DtQwQ/0I D
QWWQZ/0M:D$2::.IKKti'7a @AMxqqzI5A>	MB
 
  	C}}445E
c==0
1E
9	:    c                    | j                   d   dk(  r'dt        j                  | j                         d      fS |dz  }t        j                  | d      }t        |      D ]3  }t        | |      }t        j                  ||z
  dz        |k  r ||fS |}5 t        j                  dj                  |      t               fS )	u	  Spatial median (L1 median).

    The spatial median is member of a class of so-called M-estimators which
    are defined by an optimization problem. Given a number of p points in an
    n-dimensional space, the point x minimizing the sum of all distances to the
    p other points is called spatial median.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Training vector, where `n_samples` is the number of samples and
        `n_features` is the number of features.

    max_iter : int, default=300
        Maximum number of iterations.

    tol : float, default=1.e-3
        Stop the algorithm if spatial_median has converged.

    Returns
    -------
    spatial_median : ndarray of shape = (n_features,)
        Spatial median.

    n_iter : int
        Number of iterations needed.

    References
    ----------
    - On Computation of Spatial Median for Robust Data Mining, 2005
      T. Kärkkäinen and S. Äyrämö
      http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf
    r   T)keepdimsr
   r   r   zYMaximum number of iterations {max_iter} reached in spatial median for TheilSen regressor.)max_iter)r    r   medianravelmeanranger.   r   warningswarnformatr   )r%   r2   tolspatial_median_oldn_iterspatial_medians         r-   _spatial_medianr>   Q   s    D 	wwqzQ"))AGGI555AIC+/ 
1!5GH66%61<=C >!! "0
 	vxv(		
 >!!r/   c                 <    ddd|z  z  | |z
  dz   z  |z   dz
  | z  z
  S )a  Approximation of the breakdown point.

    Parameters
    ----------
    n_samples : int
        Number of samples.

    n_subsamples : int
        Number of subsamples to consider.

    Returns
    -------
    breakdown_point : float
        Approximation of breakdown point.
    r   g      ? )	n_samplesn_subsampless     r-   _breakdown_pointrC      sG    " 	
A$%\)AA)EF 	r/   c                    t        |      }| j                  d   |z   }|j                  d   }t        j                  |j                  d   |f      }t        j                  ||f      }t        j
                  t        ||            }t        d||f      \  }	t        |      D ]1  \  }
}| |ddf   |dd|df<   ||   |d|  |	||      d   d| ||
<   3 |S )a  Least Squares Estimator for TheilSenRegressor class.

    This function calculates the least squares method on a subset of rows of X
    and y defined by the indices array. Optionally, an intercept column is
    added if intercept is set to true.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Design matrix, where `n_samples` is the number of samples and
        `n_features` is the number of features.

    y : ndarray of shape (n_samples,)
        Target vector, where `n_samples` is the number of samples.

    indices : ndarray of shape (n_subpopulation, n_subsamples)
        Indices of all subsamples with respect to the chosen subpopulation.

    fit_intercept : bool
        Fit intercept or not.

    Returns
    -------
    weights : ndarray of shape (n_subpopulation, n_features + intercept)
        Solution matrix of n_subpopulation solved least square problems.
    r   r   )gelssN)	r   r    r   emptyoneszerosr#   r   	enumerate)r%   yindicesfit_intercept
n_featuresrB   weightsX_subpopulationy_subpopulationlstsqindexsubsets               r-   _lstsqrT      s    6 &Mm+J==#Lhha(*56Ggg|Z89OhhL* =?O
_o,NOHU"7+ Qv-.vqy\=>)*)*6&@CKZPQ
 Nr/   c                       e Zd ZU dZdgd e edh            g eeddd      gdeg eeddd      g eed	dd      gd
gdegdgd	Z	e
ed<   dddddddddd	dZd Z ed      d        Zy)TheilSenRegressora  Theil-Sen Estimator: robust multivariate regression model.

    The algorithm calculates least square solutions on subsets with size
    n_subsamples of the samples in X. Any value of n_subsamples between the
    number of features and samples leads to an estimator with a compromise
    between robustness and efficiency. Since the number of least square
    solutions is "n_samples choose n_subsamples", it can be extremely large
    and can therefore be limited with max_subpopulation. If this limit is
    reached, the subsets are chosen randomly. In a final step, the spatial
    median (or L1 median) is calculated of all least square solutions.

    Read more in the :ref:`User Guide <theil_sen_regression>`.

    Parameters
    ----------
    fit_intercept : bool, default=True
        Whether to calculate the intercept for this model. If set
        to false, no intercept will be used in calculations.

    copy_X : bool, default=True
        If True, X will be copied; else, it may be overwritten.

        .. deprecated:: 1.6
            `copy_X` was deprecated in 1.6 and will be removed in 1.8.
            It has no effect as a copy is always made.

    max_subpopulation : int, default=1e4
        Instead of computing with a set of cardinality 'n choose k', where n is
        the number of samples and k is the number of subsamples (at least
        number of features), consider only a stochastic subpopulation of a
        given maximal size if 'n choose k' is larger than max_subpopulation.
        For other than small problem sizes this parameter will determine
        memory usage and runtime if n_subsamples is not changed. Note that the
        data type should be int but floats such as 1e4 can be accepted too.

    n_subsamples : int, default=None
        Number of samples to calculate the parameters. This is at least the
        number of features (plus 1 if fit_intercept=True) and the number of
        samples as a maximum. A lower number leads to a higher breakdown
        point and a low efficiency while a high number leads to a low
        breakdown point and a high efficiency. If None, take the
        minimum number of subsamples leading to maximal robustness.
        If n_subsamples is set to n_samples, Theil-Sen is identical to least
        squares.

    max_iter : int, default=300
        Maximum number of iterations for the calculation of spatial median.

    tol : float, default=1e-3
        Tolerance when calculating spatial median.

    random_state : int, RandomState instance or None, default=None
        A random number generator instance to define the state of the random
        permutations generator. Pass an int for reproducible output across
        multiple function calls.
        See :term:`Glossary <random_state>`.

    n_jobs : int, default=None
        Number of CPUs to use during the cross validation.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    verbose : bool, default=False
        Verbose mode when fitting the model.

    Attributes
    ----------
    coef_ : ndarray of shape (n_features,)
        Coefficients of the regression model (median of distribution).

    intercept_ : float
        Estimated intercept of regression model.

    breakdown_ : float
        Approximated breakdown point.

    n_iter_ : int
        Number of iterations needed for the spatial median.

    n_subpopulation_ : int
        Number of combinations taken into account from 'n choose k', where n is
        the number of samples and k is the number of subsamples.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    See Also
    --------
    HuberRegressor : Linear regression model that is robust to outliers.
    RANSACRegressor : RANSAC (RANdom SAmple Consensus) algorithm.
    SGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.

    References
    ----------
    - Theil-Sen Estimators in a Multiple Linear Regression Model, 2009
      Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang
      http://home.olemiss.edu/~xdang/papers/MTSE.pdf

    Examples
    --------
    >>> from sklearn.linear_model import TheilSenRegressor
    >>> from sklearn.datasets import make_regression
    >>> X, y = make_regression(
    ...     n_samples=200, n_features=2, noise=4.0, random_state=0)
    >>> reg = TheilSenRegressor(random_state=0).fit(X, y)
    >>> reg.score(X, y)
    0.9884...
    >>> reg.predict(X[:1,])
    array([-31.5871...])
    boolean
deprecatedr   Nleft)closedr   r   random_stateverbose	rL   copy_Xmax_subpopulationrB   r2   r:   r[   n_jobsr\   _parameter_constraintsTg     @,  MbP?Fc       	             || _         || _        || _        || _        || _        || _        || _        || _        |	| _        y Nr]   )
selfrL   r^   r_   rB   r2   r:   r[   r`   r\   s
             r-   __init__zTheilSenRegressor.__init__V  sG     +!2( (r/   c           	         | j                   }| j                  r|dz   }n|}|v||kD  rt        dj                  ||            ||k\  r1||kD  rX| j                  rdnd}t        dj                  |||            ||k7  r't        dj                  ||            t	        ||      }t        dt        j                  t        ||                  }t        t	        | j                  |            }||fS )Nr   z=Invalid parameter since n_subsamples > n_samples ({0} > {1}).z+1 zAInvalid parameter since n_features{0} > n_subsamples ({1} > {2}).z\Invalid parameter since n_subsamples != n_samples ({0} != {1}) while n_samples < n_features.)rB   rL   
ValueErrorr9   r$   r#   r   rintr	   r   r_   )rf   rA   rM   rB   n_dimplus_1all_combinationsn_subpopulations           r-   _check_subparamsz"TheilSenRegressor._check_subparamsm  s   ((NEE#i' --3VL)-L  J&<'%)%7%7TRF$!6&%>   9,$((.|Y(G  ui0Lq"''%	<*H"IJc$"8"8:JKL_,,r/   )prefer_skip_nested_validationc                      j                   dk7  rt        j                  dt               t	         j
                        }t         d      \  j                  \  }} j                  ||      \  } _	        t        ||       _         j                  rt        dj                   j                               t        dj                  |             t         j                  |z        }t        dj                  |             t        dj                   j                               t!        j"                  t%        ||             j&                  k  rt)        t+        t-        |      |            }n4t-         j                        D 	cg c]  }	|j/                  ||d	
       }}	t1         j2                        }
t!        j4                  ||
       t7        |
 j                         fdt-        |
      D              }t!        j8                  |      }t;        | j<                   j>                        \   _         } jB                  r|d    _"        |dd  _#         S d _"        | _#         S c c}	w )aU  Fit linear model.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_features)
            Training data.
        y : ndarray of shape (n_samples,)
            Target values.

        Returns
        -------
        self : returns an instance of self.
            Fitted `TheilSenRegressor` estimator.
        rX   z`copy_X` was deprecated in 1.6 and will be removed in 1.8 since it has no effect internally. Simply leave this parameter to its default value to avoid this warning.T)	y_numericzBreakdown point: {0}zNumber of samples: {0}zTolerable outliers: {0}zNumber of subpopulations: {0}F)sizereplace)r`   r\   c              3   h   K   | ])  } t        t              |   j                         + y wre   )r   rT   rL   ).0jobr%   
index_listrf   rJ   s     r-   	<genexpr>z(TheilSenRegressor.fit.<locals>.<genexpr>  s6      @
 GFOAq*S/43E3EF@
s   /2)r2   r:   r   r   Nr   )$r^   r7   r8   FutureWarningr   r[   r   r    rp   n_subpopulation_rC   
breakdown_r\   printr9   r   r   rk   r	   r_   listr   r6   choicer   r`   array_splitr   vstackr>   r2   r:   n_iter_rL   
intercept_coef_)rf   r%   rJ   r[   rA   rM   rB   tol_outliersrK   _r`   rN   coefsry   s   ```          @r-   fitzTheilSenRegressor.fit  s,     ;;,&MM/ 	 *$*;*;<T1a481 !	:.2.C.Cz/
+d+ +9lC<<(//@A*11)<=t:;L+22<@A1889N9NOP 775L12d6L6LL<i(8,GHG t445 ##IL%#PG 
 "$++.^^GV4
?(&$,,? @
V}@
 
 ))G$-dmm
e #AhDOqrDJ
  "DODJ/s   I<)__name__
__module____qualname____doc__r   r   r   r   r   ra   dict__annotations__rg   rp   r   r   r@   r/   r-   rV   rV      s    vr $fZ%?@A&tQVDEx(h4?@sD89'(";$D   .#-J 5A 6Ar/   rV   )rb   rc   ),r   r7   	itertoolsr   numbersr   r   numpyr   joblibr   scipyr   scipy.linalg.lapackr   scipy.specialr	   baser   r   
exceptionsr   utilsr   utils._param_validationr   r   r   utils.parallelr   r   utils.validationr   _baser   finfodoubleepsr   r.   r>   rC   rT   rV   r@   r/   r-   <module>r      s}     " "  #  0  / + & B B . , 288BII""0f5"p6)XD Dr/   