o
    \i~                     @   s  d Z ddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
 ddlmZmZmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlm Z m!Z!m"Z"m#Z#m$Z$ dd	l%m&Z& d
d Z'dd Z(ej)*de&dd Z+ej)*dddgdd Z,ej)*de&dd Z-dd Z.dd Z/ej)*de0ddgddgddgdd gge0g d!e0d"d#gdfe0d$dgd%dgddgd&d gge0g d'e0d"d"gdfe0ddgddgddgdd gge0g d!e0ej1d#gdfe0d$dgd%dgddgd&d gge0g d'e0ej1ej1gdfgd(d) Z2ej)*d*e0ddgddgddgdd gge0g d!e0d"d+ge0d,d-gdfe0d$dgd%dgddgd&d gge0g d'e0d"d"ge0d,d,gdfe0ddgddgddgd%d gge0g d.e0e3ej4j5d/ge0d"d0gdfe0d%dgddgddgdd gge0g d.e0e3ej4j5d/ge0d"d0gdfe0ddgddgddgdd gge0g d!e0ej1d+ge0ej1d-gdfe0d$dgd%dgddgd&d gge0g d'e0ej1ej1ge0ej1ej1gdfe0ddgddgddgd%d gge0g d.e0ej6d/ge0d"d0gdfe0d%dgddgddgdd gge0g d.e0ej6d/ge0d"d0gdfgd1d2 Z7d3d4 Z8d5d6 Z9ej)*de&d7d8 Z:d9d: Z;d;d< Z<ej)*d=ej=ej4gd>d? Z>d@dA Z?dBdC Z@dDdE ZAdFdG ZBdHdI ZCdJdK ZDdLdM ZEej)*dNg dOej)*dPg dQdRdS ZFdTdU ZGdVdW ZHdXdY ZIdZd[ ZJd\d] ZKd^d_ ZLd`da ZMdbdc ZNddde ZOdfdg ZPdhdi ZQdjdk ZRdldm ZSej)*dned doedpdqedrd dsedtdpdsgdudv ZTdS )wz0
Todo: cross-check the F-value with stats model
    N)assert_allclose)sparsestats)	load_irismake_classificationmake_regression)GenericUnivariateSelect	SelectFdr	SelectFpr	SelectFweSelectKBestSelectPercentilechi2	f_classiff_onewayf_regressionmutual_info_classifmutual_info_regressionr_regression)	safe_mask)_convert_containerassert_almost_equalassert_array_almost_equalassert_array_equalignore_warnings)CSR_CONTAINERSc                  C   sj   t jd} | dd}d| dd }t||\}}t||\}}t ||s+J t ||s3J d S )Nr   
         )nprandomRandomStaterandnr   r   allclose)rngX1X2fpvf2pv2 r+   /var/www/www-root/data/www/176.119.141.140/sports-predictor/venv/lib/python3.10/site-packages/sklearn/feature_selection/tests/test_feature_select.pytest_f_oneway_vs_scipy_stats+   s   r-   c                  C   sf   t jd} | jddd}t d}t||\}}t|t|\}}t||dd t||dd d S )Nr   r   )r   r   size   decimal)	r   r    r!   randintaranger   astypefloatr   )r$   Xyfintpintr'   pr+   r+   r,   test_f_oneway_ints6   s   
r<   csr_containerc                 C   s   t ddddddddd	d
dd\}}t||\}}t| ||\}}|dk s)J |dk s1J |dk  s9J |d d dk  sEJ |dd  dk sQJ t|| t|| d S N      r      r      r           r   F	n_samples
n_featuresn_informativen_redundant
n_repeated	n_classesn_clusters_per_classflip_y	class_sepshufflerandom_state   皙?-C6?)r   r   allr   r=   r7   r8   Fr(   F_sparse	pv_sparser+   r+   r,   test_f_classifD   s,   

rX   centerTFc           	      C   s   t dddddd\}}t||| d}d|k  sJ |d	k  s"J t|d
}t||| d}t|| t||d d tjf f}tj|dd}|d ddf }t	||dd d S )Ni  r@   rP   Fr   rE   rF   rG   rN   rO   rY   r   r   )rowvarr   r1   )
r   r   rS   r   r   r   hstacknewaxiscorrcoefr   )	rY   r7   r8   corr_coeffssparse_Xsparse_corr_coeffsZcorrelation_matrixnp_corr_coeffsr+   r+   r,   test_r_regressiona   s   



rg   c                 C   s   t dddddd\}}t||\}}|dk sJ |dk s"J |dk  s*J |d d dk  s6J |dd  d	k sBJ t||d
d\}}t| ||d
d\}}t|| t|| t||dd\}}t| ||dd\}}t|| t|| d S )Nr?   r@   rP   Fr   rZ   r   rQ   rR   Tr[   )r   r   rS   r   rT   r+   r+   r,   test_f_regressionw   s"   




rh   c                  C   sf   t jd} | dd}t dt}t||\}}t||t\}}t	||d t	||d d S )Nr   r   r@   rP   )
r   r    r!   randr4   r5   intr   r6   r   )r$   r7   r8   F1pv1F2r*   r+   r+   r,   test_f_regression_input_dtype   s   rn   c                  C   s   t dddd} | j}t |}|d d d  d9  < d|d< t| |d	d
\}}t| |dd
\}}t||d  |d  | t|d d d S )N   r\   r   rA   g      rC   r   Tr[   F      ?       @g@9w?)r   r4   reshaper/   onesr   r   r   )r7   rE   Yrk   _rm   r+   r+   r,   test_f_regression_center   s   
rw   z&X, y, expected_corr_coef, force_finiterA   r   r   r0   )r   r   r   r   rC   gI+?rP   r   rB   )r   r   r   r   c                 C   sT   t   t dt t| ||d}W d   n1 sw   Y  tj|| dS )zCheck the behaviour of `force_finite` for some corner cases with `r_regression`.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/15672
    errorforce_finiteN)warningscatch_warningssimplefilterRuntimeWarningr   r   testingr   )r7   r8   expected_corr_coefrz   	corr_coefr+   r+   r,   test_r_regression_force_finite   s
   
'r   z;X, y, expected_f_statistic, expected_p_values, force_finiteg
[?rq   gSr.j?)r   r   rA   r   g?gajK?c                 C   sf   t   t dt t| ||d\}}W d   n1 sw   Y  tj|| tj|| dS )zCheck the behaviour of `force_finite` for some corner cases with `f_regression`.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/15672
    rx   ry   N)r{   r|   r}   r~   r   r   r   r   )r7   r8   expected_f_statisticexpected_p_valuesrz   f_statisticp_valuesr+   r+   r,   test_f_regression_corner_case   s   
Mr   c                  C   s   t ddddddddd	d
dd\} }t| |\}}|dk s J |dk s(J |dk  s0J |d d dk  s<J |dd  dk sHJ d S r>   )r   r   rS   )r7   r8   rU   r(   r+   r+   r,   test_f_classif_multi_class2  s&   
r   c                  C   s   t ddddddddd	d
dd\} }ttdd}|| || }ttddd| || }t|| | }t	d}d|d d< t|| d S Nr?   r@   r   rA   r   rB   r   rC   r   FrD      
percentiler   modeparamrP   )
r   r   r   fit	transformr   r   get_supportr   zerosr7   r8   univariate_filterX_rX_r2supportgtruthr+   r+   r,   test_select_percentile_classifK  0   


r   c           
      C   s  t ddddddddd	d
dd\}}| |}ttdd}||||}ttddd|||}t| |  | }t	
d}d|d d< t|| ||}t|s[J t||}	|j|jkshJ t|d d |	f  |  |j|jksJ d S r   )r   r   r   r   r   r   r   toarrayr   r   r   inverse_transformr   issparser   shapennz)
r=   r7   r8   r   r   r   r   r   X_r2invsupport_maskr+   r+   r,   %test_select_percentile_classif_sparsek  s>   




r   c                  C   s   t ddddddddd	d
dd\} }ttdd}|| || }ttddd| || }t|| | }t	d}d|d d< t|| d S )Nr?   r@   r   rA   r   rB   r   rC   r   FrD   rP   kk_bestr   )
r   r   r   r   r   r   r   r   r   r   r   r+   r+   r,   test_select_kbest_classif  r   r   c                  C   sf   t ddddd\} }ttdd}|| || }t| | ttddd	| || }t|| d S )
Nr@   r   Fr   rE   rF   rN   rO   rS   r   r   r   )r   r   r   r   r   r   r   )r7   r8   r   r   r   r+   r+   r,   test_select_kbest_all  s   

r   dtype_inc                 C   s   t ddddd\}}|| }ttdd}||| | }tjdtd}t	|| t
jtdd	 ||}W d    n1 sBw   Y  |jd
ksNJ |j| ksUJ d S )Nr@   r   Fr   r   r   dtypeNo features were selectedmatch)r@   r   )r   r5   r   r   r   r   r   r   boolr   pytestwarnsUserWarningr   r   r   )r   r7   r8   r   r   r   
X_selectedr+   r+   r,   test_select_kbest_zero  s   


r   c                  C   s   t ddddddddd	d
dd\} }ttdd}|| || }td}d|d d< dD ]}tt|dd| || }t|| |	 }t
|| q-d S )Nr?   r@   r   rA   r   rB   r   rC   r   FrD   {Gz?alpharP   fdrfprfwer   )r   r   r   r   r   r   r   r   r   r   r   r7   r8   r   r   r   r   r   r   r+   r+   r,   test_select_heuristics_classif  s4   


r   c                 C   s:   | j }|  }tt|| t||  d   d S N)scores_r   r   r   sortsum)score_filterscoresr   r+   r+   r,   assert_best_scores_kept  s   ,r   c                  C   s   t dddddd\} }ttdd}|| || }t| ttd	dd
| || }t|| | }t	
d}d|d d< t|| |  }d|d d t	|f< t||| t|t||t d S )Nr?   r@   rP   Fr   rZ   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   copylogical_notr   r5   r   )r7   r8   r   r   r   r   r   X_2r+   r+   r,   !test_select_percentile_regression	  s*   




r   c                  C   s   t dddddd\} }ttdd}|| || }t| ttd	dd
| || }t|| | }t	
d}t|| d S )Nr?   r@   rP   Fr   rZ   d   r   r   r   )r   r   r   r   r   r   r   r   r   r   rt   r   r+   r+   r,   &test_select_percentile_regression_full'  s   



r   c                  C   s   t ddddddd\} }ttdd}|| || }t| ttd	dd
| || }t|| | }t	
d}d|d d< t|| d S )Nr?   r@   rP   Fr   r   rE   rF   rG   rN   rO   noiser   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r+   r+   r,   test_select_kbest_regression<  s(   
	

r   c                  C   s   t ddddddd\} }ttdd	}|| || }td}d
|d d< dD ]6}tt|dd| || }t|| |	 }t|d d tj
dtd t|dd  d
kdk s^J q(d S )Nr?   r@   rP   Fr   r   r   r   r   r   r   r   rP   r   r   )r   r
   r   r   r   r   r   r   r   r   rt   r   r   r   r+   r+   r,   !test_select_heuristics_regressionX  s,   
	

 r   c                  C   sp  t ddgddgddgg} t dgdgdgg}t| |\}}t|t ddg t|t dd	g ttd
d}|| | | }t|t ddg ttdd}|| | | }t|t ddg t	tdd}|| | | }	t|	t ddg t
td
d}
|
| | |
 }t|t ddg ttd
d}|| | | }t|t ddg d S )Nr   r@      r   r   g      @ggm?gQaK?gX٬<y?皙?r   TFr   2   r   )r   arrayr   r   r	   r   r   r   r   r   r
   r   )r7   r8   r   pvalues
filter_fdrsupport_fdrfilter_kbestsupport_kbestfilter_percentilesupport_percentile
filter_fprsupport_fpr
filter_fwesupport_fwer+   r+   r,   test_boundary_case_ch2u  s2   r   r   )gMbP?r   r   rG   )r   rP   r   c                    sT   dd t  fddtdD } |ksJ |dkr&| d ks(J d S d S )Nc                 S   s   t dd|d|dd\}}tjdd% tt| d}||||}ttd	| d
|||}W d    n1 s:w   Y  t|| |	 }t
||d  dk}	t
|d | dk}
|	dkrddS |	|
|	  }|S )N   r@   Fr   r   T)recordr   r   r   r   r   rC   )r   r{   r|   r	   r   r   r   r   r   r   r   r   )r   rG   rO   r7   r8   r   r   r   r   num_false_positivesnum_true_positivesfalse_discovery_rater+   r+   r,   
single_fdr  s4   
	
z.test_select_fdr_regression.<locals>.single_fdrc                    s   g | ]} |qS r+   r+   ).0rO   r   rG   r   r+   r,   
<listcomp>  s    z.test_select_fdr_regression.<locals>.<listcomp>r   r   r   )r   meanrange)r   rG   r   r+   r   r,   test_select_fdr_regression  s   $r   c                  C   s   t dddddd\} }ttdd}|| || }ttd	dd
| || }t|| | }t	d}d|d d< t|d d tj
dtd t|dd  dkdk sYJ d S )Nr?   r@   rP   Fr   rZ   r   r   r   r   r   r   r   rA   )r   r   r   r   r   r   r   r   r   r   rt   r   r   r   r+   r+   r,   test_select_fwe_regression  s   



"r   c                  C   s   g dg dg dg dg} dg}dd }| D ]:}t |dd}t|j|g|}|jd dks1J t| t |d	d}t|j|g|}|jd d	ksMJ t| qd S )
Nr   r   r   r   r   r   r   r   r   r   r   r   r   c                 S      | d | d fS Nr   r+   r7   r8   r+   r+   r,   <lambda>      z.test_selectkbest_tiebreaking.<locals>.<lambda>r   rA   )r   r   fit_transformr   r   Xsr8   dummy_scorer7   selr%   r&   r+   r+   r,   test_selectkbest_tiebreaking  s   
r   c                  C   s   g dg dg dg dg} dg}dd }| D ]:}t |dd	}t|j|g|}|jd dks1J t| t |d
d	}t|j|g|}|jd dksMJ t| qd S )Nr   r   r   r   r   c                 S   r   r   r+   r   r+   r+   r,   r     r   z3test_selectpercentile_tiebreaking.<locals>.<lambda>"   r   C   rA   )r   r   r   r   r   r   r+   r+   r,   !test_selectpercentile_tiebreaking  s   
r   c                  C   s   t g dg dg} ddg}tdD ]8}| d d |f }ttdd||}|jdks/J d	|vs5J ttd
d||}|jdksFJ d	|vsLJ qd S )N)'  '  '  r   r   r   r   r   r   r   rA   rA   r   )rA   rA   r  r   r   )	r   r   	itertoolspermutationsr   r   r   r   r   )X0r8   permr7   Xtr+   r+   r,   test_tied_pvalues  s   r
  c                  C   s   t g dg dg dg} ddgddgddgg}ttdd| |}|jdks*J d|vs0J ttd	d
| |}|jdksAJ d|vsGJ d S )N)r   r  r   )r   r  r   )i  c   r   r   r   rA   r   )r   rA   r   r   )r   r   r   r   r   r   r   )r7   r8   r	  r+   r+   r,   test_scorefunc_multilabel  s   r  c                  C   sn   t g dg dg} ddg}dD ]#}tt|d| |}|g dg}t|d t d| d   qd S )	N)r   r   r   r  r   r   )r   rA   r   r   r  r   )r   r   r   r   r   r   r   r4   )X_trainy_trainrF   r   X_testr+   r+   r,   test_tied_scores%  s    r  c                  C   sl   g dg dg dg} g d}t tddttddfD ]}t|j| | t|jd	d
tddg qd S )Nr   r   r   r   r\   r\   r         ?r  r   r   r   rA   r   r   r   T)indicesr   )	r   r   r   r   r   r   r   r   r   )r7   r8   selectr+   r+   r,   	test_nans0  s   

r  c                  C   s   g dg dg dg} g d}d}t jt|d tdd| | W d    n1 s,w   Y  t jt|d td	dd
| | W d    d S 1 sNw   Y  d S )Nr  r  r  r  zDk=4 is greater than n_features=3. All the features will be returned.r   r0   r   r   r   )r   r   r   r   r   r   )r7   r8   msgr+   r+   r,   test_invalid_k?  s   "r  c                  C   sZ   t ddd\} }d| d d df< tt t| | W d    d S 1 s&w   Y  d S )Nr   rP   )rE   rF   rr   r   )r   r   r   r   r   r   r+   r+   r,   test_f_classif_constant_featureJ  s
   "r  c               	   C   s   t jd} | dd}| jdddd}tdd||tdd||tdd||t	dd||t
dd	||g}|D ]/}t| t d tjtd
d ||}W d    n1 scw   Y  |jdksoJ q@d S )Nr   (   r   r0   r.   r   r   r   r   r   r   )r  r   )r   r    r!   ri   r3   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   )r$   r7   r8   strict_selectorsselectorr   r+   r+   r,   test_no_feature_selectedS  s    r  c                  C   s   t dddddddddddd	\} }ttdd
}|| || }ttddd| || }t|| | }t	d}d|d d< t|| t
tdd}|| || }ttddd| || }t|| | }t	d}d|d d< t|| d S )Nr   rP   r   r   rA   rC   r   FrD   r   r   r   r  r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r+   r+   r,   test_mutual_info_classifh  sF   





r   c                  C   s   t ddddddd\} }ttdd}|| || }t| ttddd	| || }t|| | }t	
d}d
|d d< t|| ttdd}|| || }ttddd	| || }t|| | }t	
d}d
|d d< t|| d S )Nr   r   rA   Fr   r   r   r   r   r   r@   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r+   r+   r,   test_mutual_info_regression  s>   






r!  c                     s   t d} tddd\}}|tjtjd}| j|d dd|d< |j  fd	d
}t	|ddj
dd}|||}t|jg d |j D ]\}}||j| ksUJ qHdS )zmCheck that the output datafarme dtypes are the same as the input.

    Non-regression test for gh-24860.
    pandasT)
return_X_yas_frame)petal length (cm)petal width (cm)r&  r   )binspetal_width_binnedc                    s(   dddddd t  fddD S )	Nr   rA   r   r0   rP   )zsepal length (cm)zsepal width (cm)r%  r&  r(  c                    s   g | ]} | qS r+   r+   )r   namerankingr+   r,   r     s    zBtest_dataframe_output_dtypes.<locals>.selector.<locals>.<listcomp>)r   asarrayr   column_orderr*  r,   r    s   z.test_dataframe_output_dtypes.<locals>.selectorr   r   )r   )r%  r&  r(  N)r   importorskipr   r5   r   float32float64cutcolumnsr   
set_outputr   r   dtypesitems)pdr7   r8   r  r   outputr)  r   r+   r-  r,   test_dataframe_output_dtypes  s$   


r9  r  r   P   r   r   r   r   c                 C   s   t jd}|dd}d	dd}| j|d | | | |}t||ddddf  | |}t||ddddf  dS )
zeCheck support for unsupervised feature selection for the filter that could
    require only `X`.
    r   r   rP   Nc                 S   s   t g dS )N)r   r   r   r   r   )r   r   r   r+   r+   r,   
score_func  s   z,test_unsupervised_filter.<locals>.score_func)r;  r0   r   )	r   r    r!   r"   
set_paramsr   r   r   r   )r  r$   r7   r;  X_transr+   r+   r,   test_unsupervised_filter  s   



r>  )U__doc__r  r{   numpyr   r   numpy.testingr   scipyr   r   sklearn.datasetsr   r   r   sklearn.feature_selectionr   r	   r
   r   r   r   r   r   r   r   r   r   r   sklearn.utilsr   sklearn.utils._testingr   r   r   r   r   sklearn.utils.fixesr   r-   r<   markparametrizerX   rg   rh   rn   rw   r   nanr   finfor1  maxinfr   r   r   r   r   r   r0  r   r   r   r   r   r   r   r   r   r   r   r   r
  r  r  r  r  r  r  r   r!  r9  r>  r+   r+   r+   r,   <module>   s   <


  	 	 	
!  
 
 
 
 
 
 

E 
, 
%"1	,(&

	