o
    \iU                     @   s  d Z ddlZddlZddlZddlmZ ddlmZm	Z	 ddl
mZmZ ddlmZmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZmZ ddlmZmZ ddlmZ ddlm Z  ddl!m"Z"m#Z#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z)m*Z* ej+,dZ-ddgddgddgddgddgddggZ.g dZ/g dZ0ddgddgddggZ1g dZ2g dZ3e4 Z5e-6e5j7j8Z9ee5j:e5j7e-d\e5_:e5_7e; Z<ee<j:e<j7e-d\e<_:e<_7dd Z=dd Z>dd Z?d d! Z@d"d# ZAejBCd$g d%d&d' ZDd(d) ZEd*d+ ZFd,d- ZGd.d/ ZHd0d1 ZId2d3 ZJd4d5 ZKejBCd6eLg e'e(e*e&e)e'd7e(  d8d9 ZMejBCd6eLg e'e(e*e&e)e'd7e(  d:d; ZNd<d= ZOd>d? ZPd@dA ZQdBdC ZRdDdE ZSejBCdFe e5j:e5j7fe e<j:e<j7fgdGdH ZTdIdJ ZUdKdL ZVdMdN ZWdS )Oz6Testing for the boost module (sklearn.ensemble.boost).    N)datasets)BaseEstimatorclone)DummyClassifierDummyRegressor)AdaBoostClassifierAdaBoostRegressor)_samme_proba)LinearRegression)GridSearchCVtrain_test_split)SVCSVR)DecisionTreeClassifierDecisionTreeRegressor)shuffle)NoSampleWeightWrapper)assert_allcloseassert_array_almost_equalassert_array_equal)COO_CONTAINERSCSC_CONTAINERSCSR_CONTAINERSDOK_CONTAINERSLIL_CONTAINERS      )foor   r   r   r   r   )r   r   r   r   r   r      )r   r   r   )r   r   r   random_statec                     s   t g dg dg dg dg  t  jddd d t jf   G  fddd} |  }t|d	t  }t|j j t 	|
 sHJ tt j|ddg d
 tt j|ddg d d S )N)r   ư>r   )gRQ?g333333?皙?)igRQ?g      ?)r#   r   g&.>r   axisc                       s   e Zd Z fddZdS )z'test_samme_proba.<locals>.MockEstimatorc                    s   t |j j  S N)r   shapeselfXprobs /var/www/www-root/data/www/176.119.141.140/sports-predictor/venv/lib/python3.10/site-packages/sklearn/ensemble/tests/test_weight_boosting.pypredict_probaC   s   z5test_samme_proba.<locals>.MockEstimator.predict_probaN)__name__
__module____qualname__r0   r.   r,   r.   r/   MockEstimatorB   s    r4   r    )r   r   r   r   )r   r   r   r   )nparrayabssumnewaxisr	   	ones_liker   r(   isfiniteallargminargmax)r4   mocksamme_probar.   r,   r/   test_samme_proba7   s   $rA   c                  C   s>   t tt} t t| }t|tt ttdf d S )Nr   )r5   oneslenr+   r   fitr   r0   )y_tclfr.   r.   r/   test_oneclass_adaboost_probaT   s   "rG   c                  C   sx   t dd} | tt t| tt tt	t
t| j | tjttdfks-J | tjttfks:J d S )Nr   r!   r   )r   rD   r+   y_classr   predictT	y_t_classr5   uniqueasarrayclasses_r0   r(   rC   decision_functionrF   r.   r.   r/   test_classification_toy]   s   
rQ   c                  C   s*   t dd} | tt t| tt d S Nr   r!   )r   rD   r+   y_regrr   rI   rJ   y_t_regrrP   r.   r.   r/   test_regression_toyg   s   
rU   c                  C   s   t tj} t }|tjtj t| |j |	tj}|j
d t| ks(J |tjj
d t| ks7J |tjtj}|dksJJ d|t|jdksSJ ttdd |jD t|jksfJ d S )Nr   g?zFailed with score = c                 s       | ]}|j V  qd S r'   r!   .0estr.   r.   r/   	<genexpr>       ztest_iris.<locals>.<genexpr>)r5   rL   iristargetr   rD   datar   rN   r0   r(   rC   rO   scoreestimators_set)classesrF   probar_   r.   r.   r/   	test_irisn   s   *rd   loss)linearsquareexponentialc                 C   st   t | dd}|tjtj |tjtj}|dksJ t|jdks%J ttdd |jD t|jks8J d S )Nr   )re   r"   g?r   c                 s   rV   r'   r!   rW   r.   r.   r/   rZ      r[   z test_diabetes.<locals>.<genexpr>)	r   rD   diabetesr^   r]   r_   rC   r`   ra   )re   regr_   r.   r.   r/   test_diabetes   s   *rk   c            
      C   s  t jd} | jdtjjd}| jdtjjd}tdd}|j	tj
tj|d |tj
}dd |tj
D }|tj
}dd |tj
D }|jtj
tj|d}d	d |jtj
tj|dD }	t|dksjJ t||d
  t|dksyJ t||d
  t|	dksJ t||	d
  tddd}|j	tj
tj|d |tj
}dd |tj
D }|jtj
tj|d}dd |jtj
tj|dD }	t|dksJ t||d
  t|	dksJ t||	d
  d S )Nr   
   sizen_estimatorssample_weightc                 S      g | ]}|qS r.   r.   rX   pr.   r.   r/   
<listcomp>       z'test_staged_predict.<locals>.<listcomp>c                 S   rs   r.   r.   rt   r.   r.   r/   rv      rw   c                 S   rs   r.   r.   rX   sr.   r.   r/   rv      s    r   rp   r"   c                 S   rs   r.   r.   rt   r.   r.   r/   rv      rw   c                 S   rs   r.   r.   rx   r.   r.   r/   rv      s    )r5   randomRandomStaterandintr\   r]   r(   ri   r   rD   r^   rI   staged_predictr0   staged_predict_probar_   staged_scorerC   r   r   )
rngiris_weightsdiabetes_weightsrF   predictionsstaged_predictionsrc   staged_probasr_   staged_scoresr.   r.   r/   test_staged_predict   sB   

r   c                  C   sf   t t d} ddd}t| |}|tjtj tt dd} ddd}t| |}|t	jt	j d S )N	estimator)r   r   )rp   estimator__max_depthr   r   r"   )
r   r   r   rD   r\   r^   r]   r   r   ri   )boost
parametersrF   r.   r.   r/   test_gridsearch   s   


r   c                  C   s   dd l } t }|tjtj |tjtj}| |}| |}t	||j
ks*J |tjtj}||ks8J tdd}|tjtj |tjtj}| |}| |}t	||j
ks`J |tjtj}||ksnJ d S rR   )pickler   rD   r\   r^   r]   r_   dumpsloadstype	__class__r   ri   )r   objr_   ry   obj2score2r.   r.   r/   test_pickle   s"   




r   c               	   C   sp   t jdddddddd\} }t }|| | |j}|jd dks#J |d dtjf |dd  k s6J d S )Ni  rl   r    r   Fr   )	n_samples
n_featuresn_informativen_redundant
n_repeatedr   r"   )	r   make_classificationr   rD   feature_importances_r(   r5   r9   r<   )r+   yrF   importancesr.   r.   r/   test_importances   s   

*r   c                  C   s\   t  } td}tjt|d | jttt	
dgd W d    d S 1 s'w   Y  d S )Nz*sample_weight.shape == (1,), expected (6,)matchr   rq   )r   reescapepytestraises
ValueErrorrD   r+   rH   r5   rM   )rF   msgr.   r.   r/   ,test_adaboost_classifier_sample_weight_error  s
   
"r   c                  C   s   ddl m}  t|  }|tt tt }|tt ddl m} t	| dd}|tt t	t
 dd}|tt ddgddgddgddgg}g d}tt }tjtdd ||| W d    d S 1 slw   Y  d S )	Nr   )RandomForestClassifier)RandomForestRegressorr!   r   )r   barr   r   zworse than randomr   )sklearn.ensembler   r   rD   r+   rS   r   rH   r   r   r   r   r   r   )r   rF   r   X_faily_failr.   r.   r/   test_estimator	  s    


"r   c                  C   sT   d} t ddd}tjt| d |tjtj W d    d S 1 s#w   Y  d S )Nz+Sample weights have reached infinite values   g      7@)rp   learning_rater   )r   r   warnsUserWarningrD   r\   r^   r]   )r   rF   r.   r.   r/   test_sample_weights_infinite%  s
   "r   z(sparse_container, expected_internal_type   c                    s   G dd dt }tjddddd\}}t|}t||dd	\}}}}| |}	| |}
t|d
ddd|	|}t|d
ddd||}||
}||}t	|| |
|
}|
|}t|| ||
}||}t|| ||
}||}t|| ||
|}|||}t|| ||
}||}t||D ]	\}}t|| q||
}||}t||D ]	\}}t	|| q||
}||}t||D ]	\}}t|| q||
|}|||}t||D ]	\}}t	|| qdd |jD }t fdd|D sJ d S )Nc                       "   e Zd ZdZd fdd	Z  ZS )z-test_sparse_classification.<locals>.CustomSVCz8SVC variant that records the nature of the training set.Nc                        t  j|||d t|| _| S z<Modification on fit caries data type for later verification.rq   superrD   r   
data_type_r*   r+   r   rr   r   r.   r/   rD   ?     
z1test_sparse_classification.<locals>.CustomSVC.fitr'   r1   r2   r3   __doc__rD   __classcell__r.   r.   r   r/   	CustomSVC<      r   r         *   )	n_classesr   r   r"   r   r!   T)probabilityr   c                 S      g | ]}|j qS r.   r   rX   ir.   r.   r/   rv         z.test_sparse_classification.<locals>.<listcomp>c                       g | ]}| kqS r.   r.   rX   texpected_internal_typer.   r/   rv         )r   r   make_multilabel_classificationr5   ravelr   r   rD   rI   r   rO   r   predict_log_probar0   r_   staged_decision_functionzipr~   r   r   r`   r<   )sparse_containerr   r   r+   r   X_trainX_testy_trainy_testX_train_sparseX_test_sparsesparse_classifierdense_classifiersparse_clf_resultsdense_clf_resultssparse_clf_resdense_clf_restypesr.   r   r/   test_sparse_classification,  sj   	




















 r   c                    s   G dd dt }tjddddd\}}t||dd	\}}}}| |}	| |}
t| dd
|	|}t| dd
||}||
}||}t|| ||
}||}t	||D ]	\}}t|| qZdd |j
D }t fdd|D syJ d S )Nc                       r   )z)test_sparse_regression.<locals>.CustomSVRz8SVR variant that records the nature of the training set.Nc                    r   r   r   r   r   r.   r/   rD     r   z-test_sparse_regression.<locals>.CustomSVR.fitr'   r   r.   r.   r   r/   	CustomSVR  r   r   r   2   r   r   )r   r   	n_targetsr"   r   r!   r   c                 S   r   r.   r   r   r.   r.   r/   rv     r   z*test_sparse_regression.<locals>.<listcomp>c                    r   r.   r.   r   r   r.   r/   rv     r   )r   r   make_regressionr   r   rD   rI   r   r~   r   r`   r<   )r   r   r   r+   r   r   r   r   r   r   r   sparse_regressordense_regressorsparse_regr_resultsdense_regr_resultssparse_regr_resdense_regr_resr   r.   r   r/   test_sparse_regression  s,   	





r   c                  C   sF   G dd dt } t|  dd}|tt t|jt|jks!J dS )z
    AdaBoostRegressor should work without sample_weights in the base estimator
    The random weighted sampling is done internally in the _boost method in
    AdaBoostRegressor.
    c                   @   s   e Zd Zdd Zdd ZdS )z=test_sample_weight_adaboost_regressor.<locals>.DummyEstimatorc                 S   s   d S r'   r.   )r*   r+   r   r.   r.   r/   rD     s   zAtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.fitc                 S   s   t |jd S )Nr   )r5   zerosr(   r)   r.   r.   r/   rI     s   zEtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.predictN)r1   r2   r3   rD   rI   r.   r.   r.   r/   DummyEstimator  s    r   r    ro   N)r   r   rD   r+   rS   rC   estimator_weights_estimator_errors_)r   r   r.   r.   r/   %test_sample_weight_adaboost_regressor  s   r   c                  C   s   t jd} | ddd}| ddgd}| d}ttdd}||| || |	| t
t }||| || dS )zX
    Check that the AdaBoost estimators can work with n-dimensional
    data matrix
    r   3   r    r   most_frequent)strategyN)r5   r{   r|   randnchoicer   r   rD   rI   r0   r   r   )r   r+   ycyrr   r.   r.   r/   test_multidimensional_X  s   



r  c                  C   sp   t jt j} }tt }t|d}d|jj}t	j
t|d || | W d    d S 1 s1w   Y  d S )Nr   z {} doesn't support sample_weightr   )r\   r^   r]   r   r   r   formatr   r1   r   r   r   rD   )r+   r   r   rF   err_msgr.   r.   r/   -test_adaboostclassifier_without_sample_weight  s   

"r  c            
      C   sL  t jd} t jdddd}d| d | |jd d  }|d	d
}|d	  d9  < d|d	< tt d
dd}t	|}t	|}|
|| |
|d d	 |d d	  t |}d|d	< |j
|||d ||d d	 |d d	 }||d d	 |d d	 }||d d	 |d d	 }	||k sJ ||	k sJ |t|	ksJ d S )Nr   r   d     )numg?r$   g-C6?r   r   rl   i'  r   rp   r"   rq   )r5   r{   r|   linspacerandr(   reshaper   r
   r   rD   r:   r_   r   approx)
r   r+   r   regr_no_outlierregr_with_weightregr_with_outlierrr   score_with_outlierscore_no_outlierscore_with_weightr.   r.   r/   $test_adaboostregressor_sample_weight   s,    
r  c                  C   sX   t tjddddi\} }}}tdd}|| | ttj||dd|	| d S )NT)
return_X_yr"   r   r!   r   r%   )
r   r   load_digitsr   rD   r   r5   r>   r0   rI   )r   r   r   r   modelr.   r.   r/    test_adaboost_consistent_predict&  s   

r  zmodel, X, yc                 C   sZ   t |}d|d< d}tjt|d | j|||d W d    d S 1 s&w   Y  d S )Nir   z1Negative values in data passed to `sample_weight`r   rq   )r5   r:   r   r   r   rD   )r  r+   r   rr   r  r.   r.   r/   #test_adaboost_negative_weight_error5  s   
"r  c                  C   s~   t jd} | jdd}| jddgdd}t |d }tdd	d
}t|dd	d}|j|||d t 	|j
 dks=J dS )zCheck that we don't create NaN feature importance with numerically
    instable inputs.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20320
    r   )r  rl   rm   r   r   r  gtDS 'T	rl      )	max_depthr"      r	  rq   N)r5   r{   r|   normalr   r:   r   r   rD   isnanr   r8   )r   r+   r   rr   tree	ada_modelr.   r.   r/   Ftest_adaboost_numerically_stable_feature_importance_with_small_weightsE  s   r!  c                 C   s  d}t j|d| d\}}td| d||}||}t|jddddd tt	|dd	|d  hks7J |
|D ]}t|jddddd tt	|dd	|d  hksZJ q<|jd
d|| ||}t|jddddd |
|D ]}t|jddddd qzdS )zCheck that the decision function respects the symmetric constraint for weak
    learners.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/26520
    r    r   )r   n_clusters_per_classr"   rz   r%   r   g:0yE>)atolr   r   ro   N)r   r   r   rD   rO   r   r8   ra   r5   rL   r   
set_params)global_random_seedr   r+   r   rF   y_scorer.   r.   r/   test_adaboost_decision_functionV  s"   

"$
r'  c                  C   sL   t ddd} tjtdd | tt W d    d S 1 sw   Y  d S )Nr   SAMME)rp   	algorithmz'The parameter 'algorithm' is deprecatedr   )r   r   r   FutureWarningrD   r+   rH   )adaboost_clfr.   r.   r/   test_deprecated_algorithm|  s   "r,  )Xr   r   numpyr5   r   sklearnr   sklearn.baser   r   sklearn.dummyr   r   r   r   r   !sklearn.ensemble._weight_boostingr	   sklearn.linear_modelr
   sklearn.model_selectionr   r   sklearn.svmr   r   sklearn.treer   r   sklearn.utilsr   sklearn.utils._mockingr   sklearn.utils._testingr   r   r   sklearn.utils.fixesr   r   r   r   r   r{   r|   r   r+   rH   rS   rJ   rK   rT   	load_irisr\   permutationr]   rn   permr^   load_diabetesri   rA   rG   rQ   rU   rd   markparametrizerk   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r!  r'  r,  r.   r.   r.   r/   <module>   s    	(
	

-

Z

/	&
	&