o
    Ó\iã4  ã                   @   s$  d Z ddlZddlmZmZ ddlZddlZddlm	Z	 ddl
mZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZmZ ddlmZm Z  eƒ Z!eƒ Z"dd„ Z#ej$ %dee  ¡dd„ ƒZ&dd„ Z'dd„ Z(dd„ Z)dd„ Z*dd„ Z+ej$ %dddg¡dd „ ƒZ,d!d"„ Z-d#d$„ Z.d%d&„ Z/d'd(„ Z0d)d*„ Z1ed+edEi d,d-i¤Žd.ej$ %d/d0d1g¡d2d3„ ƒƒZ2ed+edEi d,d4i¤Žd.ej$ %d/d0d1g¡d5d6„ ƒƒZ3d7d8„ Z4ej$ %d9e¡d:d;„ ƒZ5d<d=„ Z6ej$ %dee  ¡d>d?„ ƒZ7ej$ %d@dAdBg¡ej$ %dddg¡dCdD„ ƒƒZ8dS )FzD
Testing for Isolation Forest algorithm (sklearn.ensemble.iforest).
é    N)ÚMockÚpatch)Úparallel_backend)Úload_diabetesÚ	load_irisÚmake_classification)ÚIsolationForest)Ú_average_path_length)Úroc_auc_score)ÚParameterGridÚtrain_test_split)Úcheck_random_state)Úassert_allcloseÚassert_array_almost_equalÚassert_array_equalÚignore_warnings)ÚCSC_CONTAINERSÚCSR_CONTAINERSc                 C   sš   t  ddgddgg¡}t  ddgddgg¡}tdgg d¢ddgdœƒ}tƒ  |D ]}tdd	| i|¤Ž |¡ |¡ q)W d
  ƒ d
S 1 sFw   Y  d
S )z6Check Isolation Forest for various parameter settings.r   é   é   é   )ç      à?ç      ð?r   TF)Ún_estimatorsÚmax_samplesÚ	bootstrapÚrandom_stateN© )ÚnpÚarrayr   r   r   ÚfitÚpredict)Úglobal_random_seedÚX_trainÚX_testÚgridÚparamsr   r   ú„/var/www/www-root/data/www/176.119.141.140/sports-predictor/venv/lib/python3.10/site-packages/sklearn/ensemble/tests/test_iforest.pyÚtest_iforest"   s   ÿÿ
ý"ÿr(   Úsparse_containerc                 C   s¨   t | ƒ}ttjdd… |d\}}tddgddgdœƒ}||ƒ}||ƒ}|D ]+}tdd	| d
œ|¤Ž |¡}	|	 |¡}
tdd	| d
œ|¤Ž |¡}| |¡}t|
|ƒ q&dS )z=Check IForest for various parameter settings on sparse input.Né2   ©r   r   r   TF)r   r   é
   )r   r   r   )	r   r   ÚdiabetesÚdatar   r   r    r!   r   )r"   r)   Úrngr#   r$   r%   ÚX_train_sparseÚX_test_sparser&   Úsparse_classifierÚsparse_resultsÚdense_classifierÚdense_resultsr   r   r'   Útest_iforest_sparse2   s0   ÿÿþ
ÿÿþ
ór6   c                  C   s(  t j} d}tjt|d tdd | ¡ W d  ƒ n1 sw   Y  t ¡  t 	dt¡ tdd | ¡ W d  ƒ n1 sAw   Y  t ¡  t 	dt¡ tt
 d¡d | ¡ W d  ƒ n1 sfw   Y  t t¡ tƒ  | ¡ | dd…d	d…f ¡ W d  ƒ dS 1 sw   Y  dS )
z7Test that it gives proper exception on deficient input.ú3max_samples will be set to n_samples for estimation©Úmatchéè  ©r   NÚerrorÚautor   r   )Úirisr.   ÚpytestÚwarnsÚUserWarningr   r    ÚwarningsÚcatch_warningsÚsimplefilterr   Úint64ÚraisesÚ
ValueErrorr!   )ÚXÚwarn_msgr   r   r'   Útest_iforest_errorL   s    ÿ
þ
þ$"ÿrJ   c               	   C   sF   t j} tƒ  | ¡}|jD ]}|jtt t 	| j
d ¡¡ƒks J ‚qdS )zDCheck max_depth recalculation when max_samples is reset to n_samplesr   N)r>   r.   r   r    Úestimators_Ú	max_depthÚintr   ÚceilÚlog2Úshape)rH   ÚclfÚestr   r   r'   Útest_recalculate_max_depthb   s
   
&ÿrS   c                  C   s¬   t j} tƒ  | ¡}|j| jd ksJ ‚tdd}d}tjt|d | | ¡ W d   ƒ n1 s1w   Y  |j| jd ks@J ‚tdd | ¡}|jd| jd  ksTJ ‚d S )Nr   iô  r;   r7   r8   gš™™™™™Ù?)	r>   r.   r   r    Úmax_samples_rP   r?   r@   rA   )rH   rQ   rI   r   r   r'   Útest_max_samples_attributej   s   
ÿrU   c                 C   sŒ   t | ƒ}ttj|d\}}td| d |¡}|jdd | |¡}|jdd | |¡}t||ƒ td| d |¡}| |¡}t||ƒ dS )zCheck parallel regression.r+   r   )Ún_jobsr   r   ©rV   r   N)	r   r   r-   r.   r   r    Ú
set_paramsr!   r   )r"   r/   r#   r$   ÚensembleÚy1Úy2Úy3r   r   r'   Ú test_iforest_parallel_regressiony   s   



r]   c           	      C   s´   t | ƒ}d| dd¡ }| t |d |d f¡¡}|dd… }|jdddd	}t |dd… |f¡}t d
gd dgd  ¡}td|d |¡}| 	|¡ }t
||ƒdksXJ ‚dS )z#Test Isolation Forest performs wellg333333Ó?iX  r   Nr:   éÿÿÿÿr   )éÈ   r   )ÚlowÚhighÚsizer   r_   éd   )r   r   g\Âõ(\ï?)r   ÚrandnÚpermutationr   ÚvstackÚuniformr   r   r    Údecision_functionr
   )	r"   r/   rH   r#   Ú
X_outliersr$   Úy_testrQ   Úy_predr   r   r'   Útest_iforest_performance   s   rl   Úcontaminationç      Ð?r=   c              	   C   s¦   ddgddgddgddgddgddgddgddgg}t || d	}| |¡ | |¡ }| |¡}t |dd … ¡t |d d… ¡ksDJ ‚t|d
dg ddg  ƒ d S )Néþÿÿÿr^   r   r   é   é   éûÿÿÿé	   ©r   rm   é   )r   r    rh   r!   r   ÚminÚmaxr   )rm   r"   rH   rQ   Údecision_funcÚpredr   r   r'   Útest_iforest_works¥   s   4

(rz   c                  C   s&   t j} tƒ  | ¡}|j|jksJ ‚d S ©N)r>   r.   r   r    rT   Ú_max_samples)rH   rQ   r   r   r'   Útest_max_samples_consistency´   s   r}   c                  C   sV   t dƒ} ttjd d… tjd d… | d\}}}}tdd}| ||¡ | |¡ d S )Nr   r*   r+   gš™™™™™é?)Úmax_features)r   r   r-   r.   Útargetr   r    r!   )r/   r#   r$   Úy_trainrj   rQ   r   r   r'   Ú test_iforest_subsampled_features»   s   ÿ
r   c                  C   sÌ   dt  d¡t j  d } dt  d¡t j  d }ttdgƒdgƒ ttdgƒdgƒ ttd	gƒd
gƒ ttdgƒ| gƒ ttdgƒ|gƒ ttt  g d¢¡ƒdd
| |gƒ tt  d¡ƒ}t|t  |¡ƒ d S )Nç       @g      @gš™™™™™ù?g     0@g}ÿ­¿Ì÷ÿ?r   g        r   r   r   é   éç  )r   r   rƒ   r„   )	r   ÚlogÚeuler_gammar   r	   r   Úaranger   Úsort)Ú
result_oneÚ
result_twoÚavg_path_lengthr   r   r'   Ú test_iforest_average_path_lengthÆ   s   
þrŒ   c                  C   s¨   ddgddgddgg} t dd | ¡}t ƒ  | ¡}t| ddgg¡| ddgg¡|j ƒ t| ddgg¡| ddgg¡|j ƒ t| ddgg¡| ddgg¡ƒ d S )Nr   r   çš™™™™™¹?)rm   r‚   )r   r    r   Úscore_samplesrh   Úoffset_)r#   Úclf1Úclf2r   r   r'   Útest_score_samplesÚ   s   þþÿr’   c                  C   sv   t dƒ} |  dd¡}tdd| dd}| |¡ |jd }|jdd | |¡ t|jƒdks0J ‚|jd |u s9J ‚dS )	z/Test iterative addition of iTrees to an iForestr   é   r   r,   T)r   r   r   Ú
warm_start)r   N)r   rd   r   r    rK   rX   Úlen)r/   rH   rQ   Útree_1r   r   r'   Útest_iforest_warm_startë   s   ÿ


r—   z*sklearn.ensemble._iforest.get_chunk_n_rowsÚreturn_valuer   )Úside_effectzcontamination, n_predict_calls)rn   r   )r=   r   c                 C   ó   t ||ƒ | j|ksJ ‚d S r{   ©rz   Ú
call_count©Úmocked_get_chunkrm   Ún_predict_callsr"   r   r   r'   Útest_iforest_chunks_works1  ó   
r    r,   c                 C   rš   r{   r›   r   r   r   r'   Útest_iforest_chunks_works2  r¡   r¢   c                  C   st  t  d¡} tƒ }| | ¡ t j d¡}t| | ¡dkƒsJ ‚t| | dd¡¡dkƒs-J ‚t| | d ¡dkƒs:J ‚t| | d ¡dkƒsGJ ‚t  	| dd¡dd¡} tƒ }| | ¡ t| | ¡dkƒseJ ‚t| | dd¡¡dkƒstJ ‚t| t  d¡¡dkƒs‚J ‚| dd¡} tƒ }| | ¡ t| | ¡dkƒs›J ‚t| | dd¡¡dkƒsªJ ‚t| t  d¡¡dkƒs¸J ‚dS )z=Test whether iforest predicts inliers when using uniform data)rc   r,   r   r   rc   r,   N)
r   Úonesr   r    ÚrandomÚRandomStateÚallr!   rd   Úrepeat)rH   Úiforestr/   r   r   r'   Útest_iforest_with_uniform_data  s(   



 r©   Úcsc_containerc                 C   s2   t dddd\}}| |ƒ}tdddd |¡ d	S )
zdCheck that Isolation Forest does not segfault with n_jobs=2

    Non-regression test for #23252
    iL rc   r   ©Ú	n_samplesÚ
n_featuresr   r,   é   r   )r   r   rV   N)r   r   r    )rª   rH   Ú_r   r   r'   Ú*test_iforest_with_n_jobs_does_not_segfault=  s   r°   c                  C   s|   t  d¡} tj d¡}| j| d¡dgd}tddd}t 	¡  t 
dt¡ | |¡ W d	  ƒ d	S 1 s7w   Y  d	S )
z¾Check that feature names are preserved when contamination is not "auto".

    Feature names are required for consistency checks during scoring.

    Non-regression test for Issue #25844
    Úpandasr   rq   Úa)r.   Úcolumnsgš™™™™™©?rt   r<   N)r?   Úimportorskipr   r¤   r¥   Ú	DataFramerd   r   rB   rC   rD   rA   r    )Úpdr/   rH   Úmodelr   r   r'   Ú#test_iforest_preserve_feature_namesH  s   

"þr¸   c                 C   sl   t dddd\}}| |ƒ}| ¡  d}td|dd |¡}| |¡}|dk  ¡ |jd  t |¡ks4J ‚dS )	zÀCheck that `IsolationForest` accepts sparse matrix input and float value for
    contamination.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/27626
    r*   rq   r   r«   r   rƒ   )r   rm   r   N)	r   Úsort_indicesr   r    rh   ÚsumrP   r?   Úapprox)r)   rH   r¯   rm   r¨   Ú
X_decisionr   r   r'   Ú-test_iforest_sparse_input_float_contaminationZ  s   ÿþ
(r½   rV   r   r   c           	   	   C   s   ddgddgddgddgddgddgddgddgg}t | |d	d
}| |¡ | |¡ }| |¡}t |dd	… ¡t |d	d… ¡ksEJ ‚t|ddg ddg  ƒ t | |dd
}| |¡ td|d | |¡}W d	  ƒ n1 stw   Y  t||ƒ d	S )z5Check that `IsolationForest.predict` is parallelized.ro   r^   r   r   rp   rq   rr   rs   N)r   rm   rV   ru   Ú	threadingrW   )	r   r    rh   r!   r   rv   rw   r   r   )	r"   rm   rV   rH   rQ   rx   ry   Úclf_parallelÚpred_paralellr   r   r'   Útest_iforest_predict_paralleln  s"   4ÿ

(ÿ
ÿrÁ   r   )9Ú__doc__rB   Úunittest.mockr   r   Únumpyr   r?   Újoblibr   Úsklearn.datasetsr   r   r   Úsklearn.ensembler   Úsklearn.ensemble._iforestr	   Úsklearn.metricsr
   Úsklearn.model_selectionr   r   Úsklearn.utilsr   Úsklearn.utils._testingr   r   r   r   Úsklearn.utils.fixesr   r   r>   r-   r(   ÚmarkÚparametrizer6   rJ   rS   rU   r]   rl   rz   r}   r   rŒ   r’   r—   r    r¢   r©   r°   r¸   r½   rÁ   r   r   r   r'   Ú<module>   sh    

þþ"


