o
    \iM                     @   sV  d dl Zd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
mZmZmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZmZmZ d dlmZ d dl m!Z! d dl"m#Z#m$Z$m%Z%m&Z& d dl'm(Z( ej)*dddgej)*dddgej)*dddgdd Z+ej)*dddgej)*dddgdd Z,ej)*dddgej)*dddgd>ddZ-dd  Z.d!d" Z/d#d$ Z0ej)*dd%dgd&d' Z1ej)*dg d(ej)*dddgd)d* Z2ej)*d+d,d-gd.d/ Z3d0d1 Z4d2d3 Z5ej)*d4d5d6gd5d6gfd5d6ged5ed6d7fd5d6gd8d9 fgd:d; Z6d<d= Z7dS )?    N)parallel_backend)assert_allclose)ColumnTransformer)load_diabetes	load_irismake_classificationmake_regression)DummyClassifier)RandomForestClassifierRandomForestRegressor)SimpleImputer)permutation_importance)LinearRegressionLogisticRegression)
get_scorermean_squared_errorr2_score)train_test_split)make_pipeline)KBinsDiscretizerOneHotEncoderStandardScalerscale)_convert_containern_jobs      max_samples      ?      ?sample_weightonesc              
   C   s   t jd}d}tdd\}}||jd|jd d dd	}t ||g}|d
kr/t |n|}t	ddd}	|	
|| t|	|||||| |d}
|
jj|jd	 |fksVJ t |
jd |
jd d ksgJ d S )N*      T)
return_X_yMbP?r   r   sizer   r!   
   n_estimatorsrandom_state)r    	n_repeatsr,   r   r   )nprandomRandomStater   normalshapereshapehstack	ones_liker   fitr   importancesallimportances_mean)r   r   r    rngr-   Xyy_with_little_noiseweightsclfresult rA   /var/www/www-root/data/www/176.119.141.140/sports-predictor/venv/lib/python3.10/site-packages/sklearn/inspection/tests/test_permutation_importance.py9test_permutation_importance_correlated_feature_regression   s(    &rC   c              	   C   s   t d}tjd}d}t }|j|j}}||jd|j	d d 
dd}|j||jd	}||d
< tddd}	|	|| t|	||||| |d}
|
jj	|j	d |fksWJ t|
jd |
jd d kshJ d S )Npandasr"   r#   r%   r   r&   r(   r   )columnscorrelated_featurer)   r*   r-   r,   r   r   )pytestimportorskipr.   r/   r0   r   datatargetr1   r2   r3   	DataFramefeature_namesr
   r6   r   r7   r8   r9   )r   r   pdr:   r-   datasetr;   r<   r=   r?   r@   rA   rA   rB   @test_permutation_importance_correlated_feature_regression_pandasC   s*   
 
&rP   r"   c              	      s  t j|}d}d}d}d}d}|| }	t |}
|j|
|d t  fdd|
d | D }|t j}||k s<J t j||	||gdd}|j
||	fksRJ t| d	|d
\}}}}td|d}||| |j}|d | }||d  }| | k sJ t|||||| |d}|jj
|j
d |fksJ |jd | }|j|d  }tt |dksJ | dk sJ | dksJ d S )Nr#     r   r   )r'   c                    s   g | ]
} |k d dqS )r(   r   )r3   ).0cr<   rA   rB   
<listcomp>~   s    zEtest_robustness_to_high_cardinality_noisy_feature.<locals>.<listcomp>)axisr   )	test_sizer,   r*   rG   gHz>g?g333333?)r.   r/   r0   arangechoicer4   astypefloat32concatenaterandnr2   r   r
   r6   feature_importances_maxminr   r7   r9   abs)r   r   seedr:   r-   	n_samples	n_classesn_informative_featuresn_noise_features
n_featuresclassesr;   X_trainX_testy_trainy_testr?   tree_importancesinformative_tree_importancesnoisy_tree_importancesrinformative_importancesnoisy_importancesrA   rT   rB   1test_robustness_to_high_cardinality_noisy_featurek   sL   
 
rs   c                  C   s  t jd} d}t dddt jgg dgj}t g d}tt tdd	}|	|| t
||||| d
}|jj|jd |fksCJ t |jd |jd d ksTJ t jd} t
||||| d
}|jj|jd |fkspJ t |j|jrzJ t |jd |jd d ksJ d S )Nr"      r          @      @)r   r   r   r   r   r   r   r   lbfgssolverr-   r,   r   r(   r   )r.   r/   r0   arraynanTr   r   r   r6   r   r7   r2   r8   r9   allclose)r:   r-   r;   r<   r?   r@   result2rA   rA   rB   'test_permutation_importance_mixed_types   s   "&r   c            	      C   s   t d} tjd}d}| dddtjgg dd}tg d	}tt	 t
 }td
|dgfdt dgfg}t|tdd}||| t|||||d}|jj|jd |fks]J t|jd |jd d ksnJ d S )NrD   r"   r#   r   ru   rv   )abr   r   )col1col2rw   numr   catr   rx   ry   r{   r   r(   )rH   rI   r.   r/   r0   rL   r}   r|   r   r   r   r   r   r   r6   r   r7   r2   r8   r9   )	rN   r:   r-   r;   r<   num_preprocess
preprocessr?   r@   rA   rA   rB   .test_permutation_importance_mixed_types_pandas   s   
&r   c                  C   sf   t dddd\} }t| } t|}t | |}d|jd  }t|| |ddd}t||jd	d
d d S )N  r)   r   rc   rg   r,   r   2   neg_mean_squared_error)r-   scoringg?gư>)rtolatol)r   r   r   r6   coef_r   r   r9   )r;   r<   lrexpected_importancesresultsrA   rA   rB   .test_permutation_importance_linear_regresssion   s   


r   r   c           	   	   C   s   t dddd\}}t ||}t|||ddd| d}|d  }|d  }|| d	ks/J t|||ddd
d}t|d |d  td t|||ddd
d}W d    n1 s[w   Y  t|d |d  d S )Nr   r)   r   r   r#   r   rG   r7   333333?r   )r-   r,   r   	threading)r   r   r6   r   r`   r_   r   r   )	r   r;   r<   r   importance_sequentialimp_minimp_maximportance_processesimportance_threadingrA   rA   rB   ;test_permutation_importance_equivalence_sequential_parallel  s,   
r   )Nr   r   c              	   C   s6  t d}tdddd\}}||}tdddd	}||d
d}t||g}|j	j
dks2J || }t|j}|||< || j	|j	ksLJ tt|t|_tdddd}	|	|| d}
t|	|||
d| |d}|d  }|d  }|| dksJ t|	|||
d| |d}t|d |d  d S )NrD   d   r#   r   r      ordinalaveraged_inverted_cdf)n_binsencodequantile_methodr(   r   f)r+   	max_depthr,   rG   r7   r   )rH   rI   r   rL   r   fit_transformr3   r.   r4   dtypekindCategoricalravellenrE   rX   rZ   strindexr   r6   r   r`   r_   r   )r   r   rN   r;   r<   X_dfbinner
cat_columnnew_col_idxrfr-   importance_arrayr   r   importance_dataframerA   rA   rB   7test_permutation_importance_equivalence_array_dataframe-  sV   


	r   
input_typer|   	dataframec           	      C   s~   t dd}}t||dd\}}|jdksJ t|| }tdd||}d}t||||d	d
}t||f}t	||j
 d S )Ng     j@rt   r   r   g    .Aprior)strategyr#   r   )r-   r   )intr   nbytesr   r	   r6   r   r.   zerosr   r7   )	r   rc   rg   r;   r<   r?   r-   rp   r   rA   rA   rB   /test_permutation_importance_large_memmaped_datas  s   

r   c               	   C   s  t jd} d}d}|d }| dd||f}t |}d|d |df  |d |df  |d |< ||d df d||d df   ||d < tdd}||| t|||dd	d
d}|jd |jd  }|t	
ddksqJ t |}	t|||dd	d
|	d}|jd |jd  }
|
t	
|dksJ t t d|t d|g}	||||	 t|||dd	d
|	d}|jd |jd  }|| t	
ddksJ d S )Nr   rQ   r   g        r%   r   F)fit_interceptneg_mean_absolute_error   r,   r   r-   g{Gz?r,   r   r-   r    g    _Br   )r.   r/   r0   r1   r   r   r6   r   r9   rH   approxr!   r4   repeat)r:   rc   rg   n_half_samplesxr<   r   pix1_x2_imp_ratio_w_nonewx1_x2_imp_ratio_w_onesx1_x2_imp_ratio_wrA   rA   rB   )test_permutation_importance_sample_weight  sP   
,,

		r   c               
   C   s   dd } t ddgddgg}t ddg}t ddg}t }||| zt|||d| dd W n ty@   td Y nw tt t|||d| d|d	 W d    d S 1 s]w   Y  d S )
Nc                 S   s   dS )Nr   rA   	estimatorr;   r<   rA   rA   rB   	my_scorer  s   zJtest_permutation_importance_no_weights_scoring_function.<locals>.my_scorerr   r   r   rt   r   zpermutation_test raised an error when using a scorer function that does not accept sample_weight even though sample_weight was Noner   )	r.   r|   r   r6   r   	TypeErrorrH   failraises)r   r   r<   r   r   rA   rA   rB   7test_permutation_importance_no_weights_scoring_function  s$   	"r   z list_single_scorer, multi_scorerr2r   r   r   c                 C   s$   t || |t|| | dS )Nr   )r   predictr   r   rA   rA   rB   <lambda>  s   r   c           	   	   C   s   t dddd\}}t ||}t|||d|dd}t| t| ks&J | D ]}|| }t|||d|dd}t|j|j q(d S )Nr   r)   r   r   r   r   r   )r   r   r6   r   setkeysr   r7   )	list_single_scorermulti_scorerr   r<   r   multi_importancescorermulti_resultsingle_resultrA   rA   rB   (test_permutation_importance_multi_metric  s   r   c                  C   sv   t dgj} t g d}t }|| | d}tjt|d t|| |dd W d   dS 1 s4w   Y  dS )zjCheck that a proper error message is raised when `max_samples` is not
    set to a valid input value.
    )r   ru   rv   g      @rw   z max_samples must be <= n_samples)matchr#   )r   N)	r.   r|   r~   r   r6   rH   r   
ValueErrorr   )r;   r<   r?   err_msgrA   rA   rB   -test_permutation_importance_max_samples_error  s   "r   )r"   )8numpyr.   rH   joblibr   numpy.testingr   sklearn.composer   sklearn.datasetsr   r   r   r   sklearn.dummyr	   sklearn.ensembler
   r   sklearn.imputer   sklearn.inspectionr   sklearn.linear_modelr   r   sklearn.metricsr   r   r   sklearn.model_selectionr   sklearn.pipeliner   sklearn.preprocessingr   r   r   r   sklearn.utils._testingr   markparametrizerC   rP   rs   r   r   r   r   r   r   r   r   r   r   rA   rA   rA   rB   <module>   sj    #&O
(D
=
