o
    \ir                    @   s  U d Z ddlZddlZddlZddlZddlZddlZddlmZm	Z	m
Z
 ddlZddlZddlZddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZm Z  ddl!m"Z"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z)m*Z*m+Z+m,Z, ddl-m.Z.m/Z/m0Z0m1Z1 ddl2m3Z3 ddl4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z; ddl4m<Z= ddl>m?Z? ddl@mAZAmBZBmCZCmDZDmEZEmFZF ddlGmHZHmIZImJZJmKZK ddlLmMZM dZNdZOe)e+dZPe*e,dZQeR ZSeReTd< eSUeP eSUeQ g dZVeWg dg dg dg dg d g d!g d"g d#g d$g d%g d&g d'g d(g d)g d*g d+g d,g d-g d.g d/g d0g d1g d2gZXg d3ZYg d4ZZd5d6gd6d6gd6d5gd7d7gd7d8gd8d7ggZ[g d9Z\d6d6gd8d8gd:d8ggZ]g d;Z^e_ Z`ejabd7Zcecde`jejfZge`jheg e`_he`jeeg e`_eei ZjecdejjejfZgejjheg ej_hejjeeg ej_eek ZlecdeljejfZgeljheg el_heljeeg el_eeMdZmejndd<d=d>\ZoZpemjqd?d@ZrdAererdBk< emjsddCdDd@Zte'dEd=dFddGu Zve`jhe`jedHejjhejjedHeljheljedHe[e\dHeXeYdHeXeZdHeoepdHeretdHer etdHevetdHewdIetdHdJZxdKdL ZydMdN ZzdOdP Z{ej|}dQeQ~ ej|}dReOdSdT ZdUdV ZdWdX Zej|}dYeQ ej|}dReOdZd[ ZeFej|}dYeQ ej|}d\d]d^e d_fd`dEe d_fdad^e d_fdbd^ed<fgdcdd Zdedf Zdgdh Zdidj Zdkdl Zdmdn Zdodp Zdqdr Zdsdt Zdudv Zdwdx Zdydz ZdKd{d|Zej|}d}eSd~d Zej|}d}eVej|}deJdd Z	dKddZej|}d}eSdd Zej|}d}eVej|}deJdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zej|}d}ePdd Zej|}d}ePdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdKddZej|}deVej|}dddd Zej|}deeeVeQej|}dddgdd Zej|}deVej|}dg dej|}deJdd Zej|}dee
dd eVD eOee
dd eVD eN ej|}dg dej|}deJdd Zej|}deVej|}deeJeKddń ZddǄ Zej|}d}eSddɄ Zej|}d}eSej|}ddgeJ dd̄ Zej|}d}eSdd΄ Zej|}d}eVej|}deKddф Zddӄ Zej|}d}eSddՄ Zej|}d}eSej|}deKddׄ Zddل Zddۄ Zej|}ddgeJ dd݄ Zej|}deeex ddh ej|}de)e+gdd Zej|}dex ej|}de*e,gdd Zdd Zdd Zdd Zej|}d}eSej|}dddgej|}ddgeJ eK dd Zej|}dRg dej|}dQeQ~ dd Zej|}ded:dd Zdd Zej|}dQe)e+gej|}dd8dCgdd Zdd Zdd Zdd Zdd  Zΐdd Zϐdd ZАdd Zѐdd ZҐd	d
 ZӐdd Zej|}dee0~ e1~ dd ZՐdd Zej|}dQeS~ dd Zej|}dRd]dagdd Zej|}ded:ej|}dRd]dagdd Zej|}dRddgdd Zej|}dRddgdd Zej|}dRddgdd Zej|}dRddgd d! Zej|}ddgeK ej|}d"e*d`d#e,d`d#gd$d% Zej|}dQeQ~ d&d' Zߐd(d) Zej|}d*eje*dfeje,d+fee)d,fee+d-fgej|}d.dd/gd0d1 Zej|}d2eeP~ d3d4gd5d6 Zej|}d7eje*feje)fgd8d9 Zd:d; Zej|}dQe*e,gej|}d<eWejd8ejdCd=d>geWejejd:dCd=d>geWd7d8d:dCejejgeWd7d8d:ejd>ejggej|}dRd]dagd?d@ ZdAdB ZdCdD ZdEdF ZdGdH ZdIdJ ZdS (L  z-
Testing for the tree module (sklearn.tree).
    N)chainpairwiseproduct)NumpyPickler)assert_allclose)clonedatasetstree)DummyRegressor)NotFittedError)SimpleImputer)accuracy_scoremean_poisson_deviancemean_squared_error)cross_val_scoretrain_test_split)make_pipeline)_sparse_random_matrix)DecisionTreeClassifierDecisionTreeRegressorExtraTreeClassifierExtraTreeRegressor)CRITERIA_CLFCRITERIA_REGDENSE_SPLITTERSSPARSE_SPLITTERS)_py_sort)
NODE_DTYPE	TREE_LEAFTREE_UNDEFINED_build_pruned_tree_py_check_n_classes_check_node_ndarray_check_value_ndarray)Tree)compute_sample_weight)assert_almost_equalassert_array_almost_equalassert_array_equalcreate_memmap_backed_dataignore_warningsskip_if_32bit)	_IS_32BITCOO_CONTAINERSCSC_CONTAINERSCSR_CONTAINERS)check_random_state)ginilog_loss)squared_errorabsolute_errorfriedman_msepoisson)r   r   )r   r   	ALL_TREES)r   r      r   r   r      ir   r   r   r   r   )r   r         r   r:   r   r   r9   皙?r   r8   r9   )r?   r   r         r   r    @r9   r   r   r@   r   r9   )r?   r?   r   g333333r   r   r   r   r   r   r>   r   r   r9   )r?   r?   r   r   r   r   r   r<   r   r   r   r   r   r9   )r?   r   r8   
   r8   r   皙	r   r8   r<   r:   r9   )zG @r         r      r   r   rE            ?r   rC   r9   )rF   r   rG   rH   r   rI   r   r   rE   rJ   r   r   rB   r9   )rF      rG   rH   r   rI   r   r   rE   rJ   r   r   rB   r9   )rF   rL   rG   rH   r   rI   r   r   rE   rJ   rK   r   r?   r   )   rL   r;   r9   rK   r:   rD   r   r9   r=   r<   r   rM   r   )rM   r   r9   r9   r9   r?   r9   r   r   rB   r<   r   r9   r   )rM   r   r9   rM   r<   r?   rD   rM   r   r?   r9   rM   rM   r   )r9   r9   r   rM   rM   r?   r9   rM   r   r=   r9   rM   r<   r   )r<   r9   r   r<   r   r:   rD   r   r9   r=   r<   r   r<   r9   )rF   rL   rG   rH   r   r9   r   r   rE   rJ   rK   r   rC   r9   )rF   rL   rG   rH   r   r9   r   r   rE   rJ         ?r9   r?   r?   )rF   rL   rG   rH   r   rD   r   r   rE   rJ   rK   r   r?   r?   )rM   r   r;   r9   rK   rB   rD   r   r9   r=   r<   r9   r   r?   )rM   r   r9   r9   r9   rB   r9   r   r   rB   r   r   r   r9   )rM   r9   r9   r9   rM   r?   rD   rM   r   r?   r   rM   r9   r9   )r9   r9   r   r   r9   rC   r9   rM   r   r=   r9   rM   r9   r9   )r<   r9   r   r9   r   r:   r9   r   r9   rB   r   r   r9   r   )r9   r9   r   r   r   r   r9   r9   r9   r9   r9   r9   r   r   r   r9   r   r   r9   r   r   r   r   )      ?rA   333333?皙?rD   g333333@@g)\(?{Gz?gףp=
@rR   g?        rP   rM   rI   r   r         @g|?5^?g(\??r   rB   r?   r9   rM   )r?   r?   r?   r9   r9   r9   r<   )r?   r9   r9      rD   )random_state	n_samples
n_features)   r;   sizerT   g?r8   )r[   r[   g      ?)densityrX   Xy)r[   r<   )irisdiabetesdigitstoy	clf_small	reg_small
multilabel
sparse-pos
sparse-neg
sparse-mixzerosc                 C   s   |j | j ksJ d||j | j t| j|j|d  t| j|j|d  | jtk}t|}t| j| |j| |d  t| j	| |j	| |d  t| j
 |j
 |d  t| j
|j
|d  t| j|j|d d	 t| j| |j| |d
 d	 d S )Nz({0}: inequal number of node ({1} != {2})z: inequal children_rightz: inequal children_leftz: inequal featuresz: inequal thresholdz: inequal sum(n_node_samples)z: inequal n_node_samplesz: inequal impurityerr_msgz: inequal value)
node_countformatr(   children_rightchildren_leftr   nplogical_notfeature	thresholdn_node_samplessumr&   impurityr'   value)dsmessageexternalinternal r   }/var/www/www-root/data/www/176.119.141.140/sports-predictor/venv/lib/python3.10/site-packages/sklearn/tree/tests/test_tree.pyassert_tree_equal   s>   



r   c                  C   st   t  D ]3\} }|dd}|tt t|ttd	|  |ddd}|tt t|ttd	|  qd S )Nr   rX   Failed with {0}r9   )max_featuresrX   )
	CLF_TREESitemsfitr`   ra   r(   predictTtrue_resultrp   namer$   clfr   r   r   test_classification_toy   s   
r   c                  C   s   t  D ]<\} }|dd}|jtttttd t|	t
td|  |jtttttdd t|	t
td|  qd S )Nr   r   sample_weightr   rK   )r   r   r   r`   ra   rs   oneslenr(   r   r   r   rp   fullr   r   r   r    test_weighted_classification_toy   s   
r   r$   	criterionc                 C   s   |dkrt t td }t t| }t t| }nt}t}| |dd}|t| t|	t
| | |ddd}|t| t|	t
| d S )Nr6   r9   r   rX   r   r   rX   )rs   absminra   arrayr   r   r`   r   r   r   )r$   r   ay_trainy_testregr   r   r   r   test_regression_toy  s   r   c                  C   s   t d} d| d dd df< d| dd dd f< t | j\}}t | | gj}|  } t D ]9\}}|dd}|	||  |
|| dksSJ d||ddd}|	||  |
|| dksnJ d|q5d S )	N)rD   rD   r9   r;   r   r   rO   r   rX   r   )rs   rl   indicesshapevstackravelr   r   r   r   scorerp   )ra   gridxgridyr`   r   r$   r   r   r   r   test_xor  s   

 r   c                  C   s   t t tD ]Q\\} }}||dd}|tjtj t|	tjtj}|dks2J d
| ||||ddd}|tjtj t|	tjtj}|dksXJ d
| ||qd S )Nr   r   rV   z0Failed with {0}, criterion = {1} and score = {2}rM   r   rK   )r   r   r   CLF_CRITERIONSr   rb   datatargetr   r   rp   )r   r$   r   r   r   r   r   r   	test_iris3  s   r   z
name, Treec                 C   s\   ||dd}| tjtj ttj|tj}|tdks,J d|  d| d| d S )Nr   r   zFailed with z, criterion = z and score = )r   rc   r   r   r   r   pytestapprox)r   r$   r   r   r   r   r   r   test_diabetes_overfitE  s   r   z&criterion, max_depth, metric, max_lossr3      <   r4   r5   r6   c                 C   sT   |||ddd}| tjtj |tj|tj}d|  k r%|k s(J  J d S )NrJ   r   )r   	max_depthr   rX   )r   rc   r   r   r   )r   r$   r   r   metricmax_lossr   lossr   r   r   test_diabetes_underfitR  s    r   c                  C   s   t  D ]V\} }|dddd}|tjtj |tj}tt	|dt
tjjd d| d tt|d|tjd| d t|tjt|tjdd| d qd S )Nr9   *   r   r   rX   r   r   rm   rL   )r   r   r   rb   r   r   predict_probar'   rs   rx   r   r   rp   r(   argmaxr   r&   exppredict_log_proba)r   r$   r   prob_predictr   r   r   test_probabilityg  s*   



r   c                  C   sP   t dd d t jf } t d}t D ]\}}|d dd}|| | qd S )Ni'  r   r   rX   )rs   arangenewaxis	REG_TREESr   r   r`   ra   r   r$   r   r   r   r   test_arrayrepr  s   
r   c                  C   s   ddgddgddgddgddgddgg} g d}t  D ]\}}|dd}|| | t|| |d|d	 qt D ]\}}|dd}|| | t|| |d|d	 q=d S )
NrB   r?   r9   rM   )r9   r9   r9   r9   r9   r9   r   r   r   rm   )r   r   r   r(   r   rp   r   r&   )r`   ra   r   TreeClassifierr   TreeRegressorr   r   r   r   test_pure_set  s   (

r   c               
   C   s   t g dg dg dg dg dg dg dg} t g d}t jd	d
3 t D ]%\}}|dd}|| | || |  ||  | ||  |  q,W d    d S 1 s]w   Y  d S )N)gs_c@d	a@籛 `8`@?c@)g_9a@g 8`@g-Vu]@g    @Xd@)gSW j_@r   r   r   )g ً`@4Ta@	lKa@{c@)g|@Y@g~G`a@gwI?lKa@g/"c@)g_@r   r   r   )g:^@r   r   r   )rO   gAw?gtQ?5??rT   g7G?gۺ?gb'?raise)allr   r   )rs   r   errstater   r   r   r   r   r   r   test_numerical_stability  s(   
"r   c               	   C   s   t jdddddddd\} }t D ]2\}}|dd}|| | |j}t|dk}|jd dks9J d		||dksDJ d		|qt
dd}|tjtj t
dttjd
}|tjtj t|j|j d S )N  rD   r<   r   FrY   rZ   n_informativen_redundant
n_repeatedshufflerX   r   皙?r   rX   max_leaf_nodes)r   make_classificationr   r   r   feature_importances_rs   rx   r   rp   r   rb   r   r   r   r(   )r`   ra   r   r$   r   importancesn_importantclf2r   r   r   test_importances  s*   



r   c                  C   s@   t  } tt t| d W d    d S 1 sw   Y  d S )Nr   )r   r   raises
ValueErrorgetattrr   r   r   r   test_importances_raises  s   "r   c               	   C   s   t jdddddddd\} }tdddd	| |}td
ddd	| |}t|j|j t|jj	|jj	 t|jj
|jj
 t|jj|jj t|jj|jj d S )Ni  rD   r<   r   Fr   r1   r;   )r   r   rX   r3   )r   r   r   r   r   r&   r   r(   tree_ru   rr   rq   rw   )r`   ra   r   r   r   r   r   )test_importances_gini_equal_squared_error  s,   
r   c                  C   s  t  D ]\} }|dd}|tjtj |jtt	tjj
d ks%J |dd}|tjtj |jtttjj
d ksBJ |dd}|tjtj |jdksVJ |dd}|tjtj |jdksjJ |dd}|tjtj |jdks~J |dd}|tjtj |jtdtjj
d  ksJ |dd}|tjtj |jtjj
d ksJ |d d}|tjtj |jtjj
d ksJ qd S )	Nsqrt)r   r9   log2r<   rS   rK   rO   )r7   r   r   rb   r   r   max_features_intrs   r   r   r   )r   TreeEstimatorestr   r   r   test_max_features  s4   
 
 





r   c            	   	   C   s  t  D ]o\} }| }tt |t W d    n1 s!w   Y  |tt g dg}tt	 || W d    n1 sFw   Y  | }td d }tt	 |t| W d    n1 sjw   Y  t
t}| }||t t|tt | }tt |t W d    n1 sw   Y  |tt t
t}tt	 ||d d dd f  W d    n1 sw   Y  t
tj}| }|t
t|t tt	 |t W d    n1 sw   Y  tt	 |t W d    n	1 sw   Y  | }|tt tt	 || W d    n	1 s6w   Y  tt	 || W d    n	1 sQw   Y  | }tt |t W d    n	1 sow   Y  qtdd}tjt	dd |g dgg d	 W d    n	1 sw   Y  tjt	d
d |g dgg d W d    d S 1 sw   Y  d S )N)rB   r?   r9   r?   r9   r6   r   zy is not positive.*Poissonmatchr   r9   rM   )r   r   r   zSome.*y are negative.*Poisson)r;   grM   )r   r   r   r   r   r   r`   r   ra   r   rs   asfortranarrayr&   r   r   r   asarrayr   dotapplyr   )	r   r   r   X2y2XftXtr   r   r   r   
test_error  sr   



$r   c                  C   s   t jtjtjjd} tj}tdt	
 D ]R\}}t	| }|d|dd}|| | |jj|jjdk }t |dksAJ d||d	|dd}|| | |jj|jjdk }t |dksfJ d|qd
S )z Test min_samples_split parameterdtypeN  rD   r   )min_samples_splitr   rX   r?   	   r   r>   N)rs   r   rb   r   r	   _treeDTYPEr   r   r7   keysr   r   rw   rr   r   rp   )r`   ra   r   r   r   r   node_samplesr   r   r   test_min_samples_split_  s"   r  c            	      C   s   t jtjtjjd} tj}tdt	
 D ]`\}}t	| }|d|dd}|| | |j| }t |}||dk }t |dksHJ d||d|dd}|| | |j| }t |}||dk }t |dkstJ d|qd S )	Nr   r   r;   r   )min_samples_leafr   rX   r8   r   r   )rs   r   rb   r   r	   r  r  r   r   r7   r  r   r   r   bincountr   rp   )	r`   ra   r   r   r   r   outnode_counts
leaf_countr   r   r   test_min_samples_leaf~  s*   

r  c                 C   s  t | d tj}|dur||}t | d }t|jd }t|}t|  }t	dt
dddD ]F\}}	||	|dd}
|
j|||d	 |durS|
j| }n|
j|}tj||d
}||dk }t|||
j ksyJ d| |
jq3|jd }t	dt
dddD ]B\}}	||	|dd}
|
|| |dur|
j| }n|
j|}t|}||dk }t|||
j ksJ d| |
jqdS )zPTest if leaves contain at least min_weight_fraction_leaf of the
    training setr`   Nra   r   r   rK   rJ   )min_weight_fraction_leafr   rX   r   )weightsz,Failed with {0} min_weight_fraction_leaf={1})DATASETSastypers   float32rngrandr   rx   r7   r   linspacer   r   r   tocsrr
  r   r  rp   )r   r   sparse_containerr`   ra   r  total_weightr   r   fracr   r  node_weightsleaf_weightsr   r   r   check_min_weight_fraction_leaf  sN   


r  r   c                 C      t | d d S Nrb   r  r   r   r   r   ,test_min_weight_fraction_leaf_on_dense_input     r"  csc_containerc                 C      t | d|d d S Nrh   )r  r   r   r$  r   r   r   -test_min_weight_fraction_leaf_on_sparse_input  s   r(  c                 C   s  t | d tj}|dur||}t | d }|jd }t|  }tdtdddD ]H\}}|||ddd	}	|	|| |durJ|	j	
| }
n|	j	
|}
t|
}||dk }t|t||	j dkssJ d
| |	j|	jq+tdtdddD ]K\}}|||ddd	}	|	|| |dur|	j	
| }
n|	j	
|}
t|
}||dk }t|t||	j ||	j ksJ d
| |	j|	jq~dS )zzTest the interaction between min_weight_fraction_leaf and
    min_samples_leaf when sample_weights is not provided in fit.r`   Nra   r   r   rK   r<   r;   )r  r   r	  rX   zBFailed with {0} min_weight_fraction_leaf={1}, min_samples_leaf={2}r   )r  r  rs   r  r   r7   r   r  r   r   r   r  r
  r   maxr  rp   r	  )r   r   r  r`   ra   r  r   r   r  r   r  r  r  r   r   r   4check_min_weight_fraction_leaf_with_min_samples_leaf  s`   







r*  c                 C   r  r  r*  r!  r   r   r   Btest_min_weight_fraction_leaf_with_min_samples_leaf_on_dense_input"  r#  r,  c                 C   r%  r&  r+  r'  r   r   r   Ctest_min_weight_fraction_leaf_with_min_samples_leaf_on_sparse_input'  s   
r-  c                 C   s  t jd| d\}}tdt D ]\}}t| }||dd}||ddd}||ddd}||d	dd}	|d
f|df|df|	d	ffD ]\}
}|
j|ksSJ d|
j||
|| t|
j	j
D ]g}|
j	j| tkr|
j	j| }|
j	j| }|
j	j| }|
j	j| }|
j	j| }|| }|
j	j| }|
j	j| }|
j	j| }|| }|| }|| }|
j	j| |jd  }|||  }||ksJ d||q_qAqd S )Nd   rY   rX   r   r   r   rX   rQ   )r   min_impurity_decreaserX   g-C6?r   gHz>z)Failed, min_impurity_decrease = {0} > {1}z2Failed with {0} expected min_impurity_decrease={1})r   r   r   r7   r  r1  rp   r   ranger   ro   rr   r   ry   weighted_n_node_samplesrq   r   )global_random_seedr`   ra   r   r   r   est1est2est3est4r   expected_decreasenode
imp_parent
wtd_n_nodeleft
wtd_n_leftimp_leftwtd_imp_leftrightwtd_n_right	imp_rightwtd_imp_rightwtd_avg_left_right_impfractional_node_weightactual_decreaser   r   r   test_min_impurity_decrease1  sh   
rH  c               	      s   t  D ]q\} }d| v rtjtj}}ntjtj}}|dd  ||  ||}g d} fdd|D }t	 }t
|}t| jksLJ |||}	||	ks]J d| |D ]}
tt|j|
||
 d|
 d	|  d
 q_qdS )z8Test pickling preserves Tree properties and performance.
Classifierr   r   )r   ro   capacity	n_classesrr   rq   n_leavesru   rv   ry   rw   r3  rz   c                    s   i | ]	}|t  j|qS r   )r   r   ).0	attributer   r   r   
<dictcomp>      ztest_pickle.<locals>.<dictcomp>z6Failed to generate same score  after pickling with {0}z"Failed to generate same attribute z after pickling with rm   N)r7   r   rb   r   r   rc   r   r   pickledumpsloadstype	__class__rp   r(   r   r   )r   r   r`   ra   r   
attributesfitted_attributeserialized_objectr6  score2rN  r   rO  r   test_pickley  s8   





r[  c                  C   s  ddgddgddgddgddgddgddgddgddgddgddgddgg} ddgddgddgddgddgddgddgddgddgddgddgddgg}ddgddgddgddgg}ddgddgddgddgg}t  D ]\\}}|dd}|| ||}t|| |jdksJ ||}t|dksJ |d jdksJ |d jd	ksJ ||}	t|	dksJ |	d jdksJ |	d jd	ksJ qlt	 D ]\}}
|
dd}|| ||}t
|| |jdksJ qd S )
NrB   r?   r9   rM   r   r<   r   r8   rM   )r8   r8   )r   r   r   r   r(   r   r   r   r   r   r&   )r`   ra   r   y_truer   r   r   y_hatproba	log_probar   r   r   r   r   test_multioutput  s^   





ra  c                  C   s   t  D ]\\} }|dd}|tt |jdksJ t|jddg t	tt
td fj}|dd}|t| t|jdksCJ t|jdksLJ t|jddg t|jddgddgg qd S )Nr   r   rM   r?   r9   rB   )r   r   r   r`   ra   
n_classes_r(   classes_rs   r   r   r   r   )r   r   r   _yr   r   r   test_classes_shape  s   

re  c                  C   sf   t jd d } t jd d }td|}t D ]\}}|dd}|j| ||d t|| | qd S )N}   balancedr   r   r   )	rb   r   r   r%   r   r   r   r&   r   )unbalanced_Xunbalanced_yr   r   r   r   r   r   r   test_unbalanced_iris  s   

rj  c                  C   s  t t tjtjgD ]\\} }}|dd}tjtj|d}tj	}t
||||| tjtjd|d}tj	}t
||||| tjtjd|d}tj	}t
||||| tjtj|d}tj	}t
||||| tD ]}|tj|d}tj	}t
||||| qvtD ]}|tj|d}tj	}t
||||| qtjtjd d d |d}tj	d d d }t
||||| qd S )Nr   r   r   C)orderr   Fr<   )r   r7   r   rs   float64r  r   rb   r   r   r(   r   r   ascontiguousarrayr/   r.   )r   r   r   r   r`   ra   csr_containerr$  r   r   r   test_memory_layout  s8   
rq  c                  C   s  t dd d t jf } t d}d|d d< t d}d||dk< tdd}|j| ||d t|| t d t dd d t jf } t d}d|dd< d	|dd< d| dddf< t d}d
||d	k< tddd}|j| ||d |j	j
d dksJ d||d	k< tddd}|j| ||d |j	j
d dksJ tj} tj}td| jd d}tdd}|| | ||  t j|| jd d}tdd}|j| ||d |j	jtjjk}t|j	j
| |j	j
|  d S )Nr.  rT   2   r   r   r      r9   rM   gRQ?r   g     b@rK   g     H@)	minlength)rs   r   r   r   r   r   r(   r   rl   r   rv   rb   r   r   r  randintr   r
  rr   r	   r  r   r'   )r`   ra   r   r   
duplicatesr   r   r   r   r   test_sample_weight5  sD   






rw  c                  C   s   t dd d t jf } t d}d|d d< tdd}t jdd}tt	 |j
| ||d W d    n1 s;w   Y  t d}td}tjt|d	 |j
| ||d W d    d S 1 sew   Y  d S )
Nr.  rT   rr  r   r   r9   r   zgInput should have at least 1 dimension i.e. satisfy `len(x.shape) > 0`, got scalar `array(0.)` instead.r   )rs   r   r   r   r   randomr  r   r   r   r   r   reescape	TypeError)r`   ra   r   r   expected_errr   r   r   test_sample_weight_invalidi  s   


"r}  c           	      C   s  t |  }|dd}|tjtj |ddd}|tjtj t|j|j ttjtjtjfj	}|ddddddddddddgdd}|tj| t|j|j |ddd}|tj| t|j|j t
tjj}|tjdk  d	9  < dd
dd}|dd}|tjtj| ||dd}|tjtj t|j|j |dd}|tjtj|d  ||dd}|tjtj| t|j|j d S )Nr   r   rg  class_weightrX   g       @rO   r   r9   r.  g      Y@rM   )r   r   rb   r   r   r&   r   rs   r   r   r   r   )	r   r   clf1r   
iris_multiclf3clf4r   r  r   r   r   test_class_weights  s@   





r  c                 C   sz   t |  }ttttd fj}|dddgdd}d}tjt|d |	t
| W d    d S 1 s6w   Y  d S )	NrM   rK   rO   r?   r9   r   r~  zBnumber of elements in class_weight should match number of outputs.r   )r   rs   r   ra   r   r   r   r   r   r   r`   )r   r   rd  r   rn   r   r   r   test_class_weight_errors  s   "r  c                  C   sX   t jddd\} }d}t D ]\}}|d |d d| |}| |d ks)J qd S Nr.  r9   r/  r8   )r   r   )r   make_hastie_10_2r7   r   r   get_n_leavesr`   ra   kr   r   r   r   r   r   test_max_leaf_nodes  s   r  c                  C   sP   t jddd\} }d}t D ]\}}|d|d| |}| dks%J qd S r  )r   r  r7   r   r   	get_depthr  r   r   r   test_max_leaf_nodes_max_depth  s   r  c                  C   sZ   dD ](} t t dgdggddgj| }d|jd   kr%dk s*J d J dqd S )N)rK  rz   rr   rq   rv   ry   ru   rw   r   r9   rC   r<   z Array points to arbitrary memory)r   r   r   r   flat)attrrz   r   r   r   test_arrays_persist  s   "
,r  c                  C   s\   t d} td}| ddd}t D ]\}}|dd}||| |jjdks+J qd S )Nr   )rD   r[   rM   )rD   r   )	r0   rs   rl   ru  r7   r   r   r   r   )rX   r`   ra   r   r   r   r   r   r   test_only_constant_features  s   

r  c                  C   s~   t t g dgt df} g d}t D ]$\}}d|vr<|ddd}|| | |jjdks4J |jj	d	ks<J qd S )
N)r   r   r   r   r   r9   rM   r8   r;   rJ      )r8   rI   )r   r   r   r9   r9   rM   rM   rM   r<   r<   r<   	ExtraTreer   r9   r   rM   r;   )
rs   	transposer   rl   r7   r   r   r   r   ro   r`   ra   r   r   r   r   r   r   ,test_behaviour_constant_feature_after_splits  s   r  c                  C   s   t t dgdgdgdggt dg} t g d}t D ]$\}}|ddd}|| | |jjdks8J t	|
| t dd	 q t D ]$\}}|ddd}|| | |jjdksaJ t	|| t d
d	 qId S )NrO   rT   )r8   r  )rT   rO   rT   rO   r   r9   r   r\  rK   )r8   )rs   hstackr   rl   r   r   r   r   r   r(   r   r   r   r   r  r   r   r   (test_with_only_one_non_constant_features  s   *r  c                  C   sf   t ddt jdd} t }tjtdd |	| g d W d    d S 1 s,w   Y  d S )Ng\)c=Hr8   r?   r9   r  r   )r   r9   r   r9   )
rs   repeatr  rn  reshaper   r   r   r   r   )r`   r   r   r   r   test_big_input  s
   "r  c                  C   sB   ddl m}  tt |   W d    d S 1 sw   Y  d S )Nr   _realloc_test)sklearn.tree._utilsr  r   r   MemoryErrorr  r   r   r   test_realloc  s   "r  c                  C   s   dt d } tjdd}tjddd}d| d  }td|d}tt	 |
|| W d    n1 s8w   Y  d| d  d }td|d}tt |
|| W d    d S 1 sbw   Y  d S )	NrL   PrD   rM   r   r9   best)splitterr   )structcalcsizers   rx  randnru  r   r   r   	Exceptionr   r  )n_bitsr`   ra   huger   r   r   r   test_huge_allocations  s   "r  c                 C   s(  t |  }t| d }t| d }|dv r'|jd d }|d | }|d | }tt t D ]d}||}|d|d||}	|d|d||}
t|	j|
jd	|  |	
|}| tv re|	|}|	|}tt t D ]%}||tjd}t|

|| | tv rt|
|| t|
|| qkq-d S )	Nr`   ra   )rd   rc   r   r;   rX   r   5{0} with dense and sparse format gave different treesr   )r7   r  r   r-   r.   r/   r   r   r   rp   r   r   r   r   rs   r  r'   )r	   datasetr   r   r`   ra   rY   r  X_sparser{   r|   y_predy_probay_log_probasparse_container_testX_sparse_testr   r   r   check_sparse_input/  s>   



r  	tree_typer  )rf   re   rd   rh   ri   rj   rk   rl   c                 C   s    |dkrdnd }t | || d S )Nrd   r<   r  )r  r  r   r   r   r   test_sparse_inputX  s   r  rc   rg   c                 C   s   t | |d d S )NrM   r  )r  r  r   r   r   test_sparse_input_reg_treesk  s   r  )ri   rj   rk   rl   c           	      C   s  t |  }t| d }||}t| d }|dddd||}|dddd||}t|j|jd|  t|||| |dddd	||}|dddd	||}t|j|jd|  t|||| |d|jd d d
||}|d|jd d d
||}t|j|jd|  t|||| |ddd||}|ddd||}t|j|jd|  t|||| d S )Nr`   ra   r   r9   rM   )rX   r   r   r  rD   )rX   r   r  )rX   r	  r<   r   )	r7   r  r   r   r   rp   r'   r   r   )	r  r  r$  r   r`   r  ra   r{   r|   r   r   r   test_sparse_parameterss  sP   r  ztree_type, criterionc                 C      g | ]}|t v r|qS r   )r   rM  r	   r   r   r   
<listcomp>      r  c                 C   r  r   )r   r  r   r   r   r    r  c           
      C   s   t |  }t| d }||}t| d }|dd|d||}|dd|d||}	t|j|	jd|  t|	||| d S )Nr`   ra   r   r<   rX   r   r   r  )r7   r  r   r   r   rp   r'   r   )
r  r  r$  r   r   r`   r  ra   r{   r|   r   r   r   test_sparse_criteria  s   
r  zcsc_container,csr_containerc                 C   s  t |  }d}d}|}t|}td}g }	g }
d}|g}t|D ]/}||d}||d | }|	| |jdd|fdd }|
| ||7 }|| q t|		tj
}	tj|tj
d}tjt|
tjd}
||
|	|f||fd}| }||
|	|f||fd}| }|jdd|fd}| }|jd	k dksJ |jd	k dksJ |d|d
||}|d|d
||}t|j|jdt ||f}t||D ]s\}}t|j||j| t|||| t|||j| t|j| |j|  t|| ||  t|| |j|  t|||| ttv rHt|||| qd S )Nr<   rD   r   rK   r\   r9   r   r   rT   r  r  )r7   rs   r   r0   r2  binomialpermutationappendconcatenater  int32r   r  toarrayru  copyr   rx   r   r   r   rp   r	   r   r'   r   decision_pathr   r   r   )r  r$  rp  r   r   rZ   rY   samplesrX   r   r   offsetindptrin_nonzero_i	indices_idata_ir  r`   r  X_testra   r{   r|   XsX1r   r   r   r   test_explicit_sparse_zeros  sr   



r  c                 C   s   t |  }tjd d df  }tjd d df d}tj}tt |dd	|| W d    n1 s7w   Y  |dd}|	|| tt |
|g W d    d S 1 s^w   Y  d S )Nr   r  r   )r7   rb   r   r   r  r   r   r   r   r   r   )r   r   r`   X_2dra   r   r   r   r   check_raise_error_on_1d_input  s   
"r  c                 C   s4   t   t|  W d    d S 1 sw   Y  d S N)r*   r  r!  r   r   r   test_1d_input"  s   
"r  r  c                 C   s   t |  }tdgdgdgdgdgg}g d}g d}|d ur#||}|dd}|j|||d |jjdks8J |ddd}|j|||d |jjdksNJ d S )	Nr   r9   )r   r   r   r   r9   )r>   r>   r>   r>   r>   r   r   g?)rX   r  )r7   rs   r   r   r   r   )r   r  r   r`   ra   r   r   r   r   r    test_min_weight_leaf_split_level(  s   
r  c                 C   sD   t jtjjdd}t|   }|t t t|	t |j
	| d S NFr  X_smallr  r	   r  r  r7   r   y_smallr(   r   r   )r   	X_small32r   r   r   r   test_public_apply_all_trees<  s   
r  rp  c                 C   sH   |t jtjjdd}t|   }|t t t|	t |j
	| d S r  r  )r   rp  r  r   r   r   r   test_public_apply_sparse_treesE  s   
r  c                  C   sP   t j} t j}tddd| |}|| d d  }t|g dg dg d S )Nr   r9   r  rM   )r9   r9   r   r9   r   r9   )rb   r   r   r   r   r  r  r(   )r`   ra   r   node_indicatorr   r   r   test_decision_path_hardcodedO  s
   r  c                    s   t j}t j}|jd }t|  }|ddd}||| ||}|   j||jj	fks/J |
|} fddt|D }t|tj|d |jjtk}	tt |	tj|d  jdd }
|jj|
kskJ d S )	Nr   rM   r  c                    s   g | ]
\}} ||f qS r   r   )rM  r  jr  r   r   r  g  s    z&test_decision_path.<locals>.<listcomp>r  r9   axis)rb   r   r   r   r7   r   r  r  r   ro   r   	enumerater'   rs   r   rr   r   r   rx   r)  r   )r   r`   ra   rY   r   r   node_indicator_csrleavesleave_indicator
all_leavesr   r   r  r   test_decision_pathW  s$   


r  c                 C   sX   t |t}}t|  }tt |dd|| W d    d S 1 s%w   Y  d S Nr   r   )X_multilabely_multilabelr7   r   r   r{  r   )r   rp  r`   ra   r   r   r   r   test_no_sparse_y_supportu  s
   "r  c                  C   s  t dddd} | jdgdgdgdgdggg dg d	d
 t| jjg d t| jjjg d | jdgdgdgdgdggg dt	dd
 t| jjg d t| jjjg d | jdgdgdgdgdggg dd t| jjg d t| jjjg d dS )aQ	  Check MAE criterion produces correct results on small toy dataset:

    ------------------
    | X | y | weight |
    ------------------
    | 3 | 3 |  0.1   |
    | 5 | 3 |  0.3   |
    | 8 | 4 |  1.0   |
    | 3 | 6 |  0.6   |
    | 5 | 7 |  0.3   |
    ------------------
    |sum wt:|  2.3   |
    ------------------

    Because we are dealing with sample weights, we cannot find the median by
    simply choosing/averaging the centre value(s), instead we consider the
    median where 50% of the cumulative weight is found (in a y sorted data set)
    . Therefore with regards to this test data, the cumulative weight is >= 50%
    when y = 4.  Therefore:
    Median = 4

    For all the samples, we can get the total error by summing:
    Absolute(Median - y) * weight

    I.e., total error = (Absolute(4 - 3) * 0.1)
                      + (Absolute(4 - 3) * 0.3)
                      + (Absolute(4 - 4) * 1.0)
                      + (Absolute(4 - 6) * 0.6)
                      + (Absolute(4 - 7) * 0.3)
                      = 2.5

    Impurity = Total error / total weight
             = 2.5 / 2.3
             = 1.08695652173913
             ------------------

    From this root node, the next best split is between X values of 3 and 5.
    Thus, we have left and right child nodes:

    LEFT                    RIGHT
    ------------------      ------------------
    | X | y | weight |      | X | y | weight |
    ------------------      ------------------
    | 3 | 3 |  0.1   |      | 5 | 3 |  0.3   |
    | 3 | 6 |  0.6   |      | 8 | 4 |  1.0   |
    ------------------      | 5 | 7 |  0.3   |
    |sum wt:|  0.7   |      ------------------
    ------------------      |sum wt:|  1.6   |
                            ------------------

    Impurity is found in the same way:
    Left node Median = 6
    Total error = (Absolute(6 - 3) * 0.1)
                + (Absolute(6 - 6) * 0.6)
                = 0.3

    Left Impurity = Total error / total weight
            = 0.3 / 0.7
            = 0.428571428571429
            -------------------

    Likewise for Right node:
    Right node Median = 4
    Total error = (Absolute(4 - 3) * 0.3)
                + (Absolute(4 - 4) * 1.0)
                + (Absolute(4 - 7) * 0.3)
                = 1.2

    Right Impurity = Total error / total weight
            = 1.2 / 1.6
            = 0.75
            ------
    r   r4   rM   )rX   r   r   r<   r;   rL   )rJ   r  r<   r8   r<   )333333?333333?r   rO   r  )r`   ra   r   )g,d?gܶm۶m?g?)      @g      @r  )ffffff?rN   gUUUUUU?)r8   rU   r  r_   N)
r   r   r   r   ry   r(   rz   r  rs   r   )dt_maer   r   r   test_mae  s    J.&r  c                  C   s   d} t jdt jd}d}dd }tjtj|fD ][}t D ]'\}}|| |}|| }|\}	\}
}}||	ks9J | |
ks?J t|| qt	 D ](\}}|| |}|| }|\}	\}
}}||	kseJ | |
kskJ ||ksqJ qIqd S )Nr<   r   r.  c                 S   s   t t | S r  )rR  rT  rS  )objr   r   r   _pickle_copy  s   z)test_criterion_copy.<locals>._pickle_copy)
rs   r   intpr  deepcopyr   r   
__reduce__r(   r   )	n_outputsrK  rY   r  	copy_func_typenamecriteriaresult	typename_
n_outputs_rb  
n_samples_r   r   r   test_criterion_copy  s*   

r  c           	      C   s   t jdddd }t |d}|d d d df }| d ur&| |}|d d df }tdd||}||}t	t 
|jjtkd }||}t 
t |jj d }t|dkscJ t|dkskJ d S )Nr   r.  rI   g*Gr  r?   r   )rs   rx  RandomStater  
nan_to_numr  r   r   r   setwherer   rr   r   
differenceisfiniterv   r   )	r  r   r`   ra   r	   terminal_regions	left_leaf
empty_leafinfinite_thresholdr   r   r   "test_empty_leaf_infinite_threshold  s   

r  tree_clsc                 C      t |  } | d | d }}|ddd}|||}|j}|j}tt|dks+J tt|dks7J t|||| d S Nr`   ra   r[   r   r0  r  cost_complexity_pruning_path
ccp_alphas
impuritiesrs   r   diffassert_pruning_creates_subtreer  r  r`   ra   r   infopruning_pathr  r   r   r   'test_prune_tree_classifier_are_subtrees  s   r  c                 C   r  r  r  r  r   r   r   'test_prune_tree_regression_are_subtrees$  s   r   c                  C   sX   t dd} | dgdggddg t ddd}|dgdggddg t| j|j d S )Nr   r   r9   rD   )rX   	ccp_alpha)r   r   assert_is_subtreer   )r  r   r   r   r   test_prune_single_node_tree5  s
   
r#  c           	      C   sR   g }|D ]}| d|dd ||}|| qt|D ]\}}t|j|j qd S )Nr[   r   )r   r!  rX   )r   r  r   r"  r   )	estimator_clsr`   ra   r  
estimatorsr!  r   prev_estnext_estr   r   r   r  A  s   r  c           	      C   s  | j |j ksJ | j|jksJ | j}| j}|j}|j}dg}|r| \}}t| j| |j|  t| j| |j|  t| j	| |j	|  t| j
| |j
|  || || krdtt|j|  n!t| j| |j|  ||| || f ||| || f |s!d S d S )N)r   r   )ro   r   rr   rq   popr'   rz   r&   ry   rw   r3  r   rv   r  )	r	   subtreetree_c_lefttree_c_rightsubtree_c_leftsubtree_c_rightstacktree_node_idxsubtree_node_idxr   r   r   r"  P  s@   r"  r  r  rx  c                 C   s   t d }|d jtjjdd}|d u rt|}n!||d }tj|jtjjd|_t|j|j	|j
f\|_|_	|_
ttjttjjd}t|  |d}||| t|||| t|| ||  d S )Nrf   r`   Fr  r   )r  )r  r  r	   r  r  r)   rs   r   r   r   r  r  r7   r   r(   r   r  todense)r   r  r  r  r  
X_readonly
y_readonlyr   r   r   r   "test_apply_path_readonly_all_treesx  s(   
r4  )r3   r5   r6   c                 C   sL   t jt j}}|| d}||| t||tt|ks$J d S )Nr   )	rc   r   r   r   rs   rx   r   r   r   )r   r$   r`   ra   r   r   r   r   test_balance_property  s   
(r5  seedc              	   C   s  ddgddgddgddgddgddgddgddgg}g d}t d| d}||| t||dks6J t d| d}||| t||dksNJ d	}tj|d d d
d||d d | d\}}d|d|k |dk @ < t|}t d| d}||| t||dksJ d S )Nr   r9   rM   r<   )r   r   r   r   r9   rM   r<   r8   r3   r   r6   rD   r  r  )effective_ranktail_strengthrY   rZ   r   rX   r?   )	r   r   rs   aminr   r   r   make_regressionr   )r6  r`   ra   r   rZ   r   r   r   test_poisson_zero_nodes  s,   4


	
r;  c                  C   sB  t jd} d\}}}tj|| || d}| jdd|dt j|dd }| jt || d	}t	|||| d
\}}}	}
t
dd| d}t
dd| d}|||	 |||	 tdd||	}||	df||
dffD ]6\}}}t|||}t|t ||dd }t|||}|dkr|d| k sJ |d| k sJ qhd S )Nr   )  r<  rD   rY   rZ   rX   rB   rM   )lowhighr]   r   r  )lam)	test_sizerX   r6   rD   )r   r  rX   r3   mean)strategytraintestgV瞯<rK   g      ?)rs   rx  r  r   make_low_rank_matrixuniformr)  r6   r   r   r   r   r
   r   r   clip)r  n_trainn_testrZ   r`   coefra   X_trainr  r   r   tree_poitree_msedummyval
metric_poi
metric_msemetric_dummyr   r   r   test_poisson_vs_mse  s6   

rT  rK  c                 C   sz   d\}}t j||||ddd\}}| ddd||}| ddd||}t|j|j| d	 t|||| d
S )z3Test that criterion=entropy gives same as log_loss.)rr  r;   r   r   )rK  rY   rZ   r   r   rX   r2   +   r   entropyz> with criterion 'entropy' and 'log_loss' gave different trees.N)r   r   r   r   r   r   r   )r$   rK  rY   rZ   r`   ra   tree_log_losstree_entropyr   r   r   'test_criterion_entropy_same_as_log_loss  s"   
rY  c                     sv   t jdd\} }tddd  | |  | |}dd  fdd}t| }|| |}t||s9J d S )	Nr   r   r<   r  c                 S   s   |   | j  S r  )byteswapviewr   newbyteorderr  )arrr   r   r   reduce_ndarray  s   z8test_different_endianness_pickle.<locals>.reduce_ndarrayc                     sB   t  } t| }tj |_|jtj< |	  | 
d | S Nr   )ioBytesIOrR  Picklercopyregdispatch_tabler  rs   ndarraydumpseekfpr   r^  r   r    get_pickle_non_native_endianness  s   


zJtest_different_endianness_pickle.<locals>.get_pickle_non_native_endianness)	r   r   r   r   r   rR  loadrs   isclose)r`   ra   r   rl  new_clf	new_scorer   rk  r    test_different_endianness_pickle  s   
rq  c                     s~   t jdd\} }tddd| | | |}G dd dt  fdd}t| }|| |}t	||s=J d S )	Nr   r   r<   r  c                       s   e Zd Z fddZ  ZS )zPtest_different_endianness_joblib_pickle.<locals>.NonNativeEndiannessNumpyPicklerc                    s0   t |tjr| |j }t | d S r  )	
isinstancers   re  rZ  r[  r   r\  supersave)selfr  rV  r   r   rt  (  s   zUtest_different_endianness_joblib_pickle.<locals>.NonNativeEndiannessNumpyPickler.save)__name__
__module____qualname__rt  __classcell__r   r   rv  r   NonNativeEndiannessNumpyPickler'  s    r{  c                     s(   t  }  | }| | d | S r_  )r`  ra  rf  rg  rh  r{  r   r   r   'get_joblib_pickle_non_native_endianness-  s
   

zXtest_different_endianness_joblib_pickle.<locals>.get_joblib_pickle_non_native_endianness)
r   r   r   r   r   r   joblibrm  rs   rn  )r`   ra   r   r}  ro  rp  r   r|  r   'test_different_endianness_joblib_pickle   s   r  c                 C   sn   t rtjntj}g d}dd | jj D }|D ]}|||< qtt| t|	 d}| j
|ddS )N)
left_childright_childru   rw   c                 S      i | ]	\}\}}||qS r   r   rM  r   r   r   r   r   r   rP  @  rQ  z6get_different_bitness_node_ndarray.<locals>.<dictcomp>namesformats	same_kindcasting)r,   rs   int64r  r   fieldsr   listr  valuesr  )node_ndarraynew_dtype_for_indexing_fieldsindexing_field_namesnew_dtype_dictr   	new_dtyper   r   r   "get_different_bitness_node_ndarray:  s   

r  c                 C   sj   dd | j j D }dd | j j D }dd |D }t t| t| |d}| j|ddS )	Nc                 S   r  r   r   r  r   r   r   rP  M  rQ  z8get_different_alignment_node_ndarray.<locals>.<dictcomp>c                 S   s   g | ]\}}|qS r   r   )rM  r   r  r   r   r   r  P      z8get_different_alignment_node_ndarray.<locals>.<listcomp>c                 S   s   g | ]}d | qS )rL   r   )rM  r  r   r   r   r  Q  r  )r  r  offsetsr  r  )r   r  r   r  rs   r  r  r  )r  r  r  shifted_offsetsr  r   r   r   $get_different_alignment_node_ndarrayL  s   


r  c           	      C   sZ   t rtjntj}|  \}\}}}}|j|dd}| }t|d |d< ||||f|fS )Nr  r  nodes)r,   rs   r  r  r  r  r  r  )	r	   r  r  rZ   rK  r  statenew_n_classes	new_stater   r   r   "reduce_tree_with_different_bitness]  s   r  c                     n   t jdd\} }tddd  | |  | |} fdd}t| }|| |}|t|ks5J d S )Nr   r   r<   r  c                     s@   t  } t| }tj |_t|jt< |	  | 
d | S r_  )r`  ra  rR  rb  rc  rd  r  r  
CythonTreerf  rg  rh  r   r   r   "pickle_dump_with_different_bitnesso  s   



zItest_different_bitness_pickle.<locals>.pickle_dump_with_different_bitness)	r   r   r   r   r   rR  rm  r   r   )r`   ra   r   r  ro  rp  r   r   r   test_different_bitness_pickleh  s   
r  c                     r  )Nr   r   r<   r  c                     s>   t  } t| }tj |_t|jt< |  | 	d | S r_  )
r`  ra  r   rc  rd  r  r  r  rf  rg  rh  r   r   r   "joblib_dump_with_different_bitness  s   


zPtest_different_bitness_joblib_pickle.<locals>.joblib_dump_with_different_bitness)	r   r   r   r   r   r~  rm  r   r   )r`   ra   r   r  ro  rp  r   r   r   $test_different_bitness_joblib_pickle~  s   
r  c                  C   s  t rttjnttj} ttjttjg}|dd |D 7 }tjddg| d}|D ]
}t|||  q.tj	t
dd tjddgg| d}t||  W d    n1 sZw   Y  tj	t
dd |tj}t||  W d    d S 1 s}w   Y  d S )	Nc                 S   s   g | ]}|  qS r   )r\  )rM  dtr   r   r   r    r  z(test_check_n_classes.<locals>.<listcomp>r   r9   r   zWrong dimensions.+n_classesr   zn_classes.+incompatible dtype)r,   rs   r   r  r  r   r!   r  r   r   r   rn  )expected_dtypeallowed_dtypesrK  r  wrong_dim_n_classeswrong_dtype_n_classesr   r   r   test_check_n_classes  s   "r  c               	   C   s0  t t j} d}t j|| d}| |  g}|D ]	}t|||d qtjtdd t|| dd W d    n1 s:w   Y  |d d d d d df t 	|fD ]!}tjtdd t|| |j
d W d    n1 smw   Y  qQtjtd	d t|t j| |d W d    d S 1 sw   Y  d S )
N)r;   r9   rM   r   )r  expected_shapezWrong shape.+value arrayr   )r9   rM   r9   zvalue array.+C-contiguouszvalue array.+incompatible dtype)rs   r   rn  rl   r\  r#   r   r   r   r   r   r  r  )r  r  value_ndarrayr  r  problematic_arrr   r   r   test_check_value_ndarray  s:   (
"r  c                  C   s  t } tjd| d}|t|t|g}|dd |D 7 }|D ]}t|| d qtjtdd tjd| d}t|| d W d    n1 sEw   Y  tjtd	d |d d d
 }t|| d W d    n1 siw   Y  dd |j	j
 D }| }tj|d< t	t| t| d}||}tjtdd t|| d W d    n1 sw   Y  | }tj|d< t	t| t| d}||}tjtdd t|| d W d    d S 1 sw   Y  d S )N)r;   r   c                 S   s   g | ]
}| |j qS r   )r  r   r\  )rM  r]  r   r   r   r    s    z+test_check_node_ndarray.<locals>.<listcomp>)r  zWrong dimensions.+node arrayr   )r;   rM   znode array.+C-contiguousrM   c                 S   r  r   r   r  r   r   r   rP    s    z+test_check_node_ndarray.<locals>.<dictcomp>rv   r  znode array.+incompatible dtyper  )r   rs   rl   r  r  r"   r   r   r   r   r  r   r  r  r  r  r  r  rn  )r  r  valid_node_ndarraysr]  problematic_node_ndarray
dtype_dictr  r  r   r   r   test_check_node_ndarray  sL   



"r  Splitterc           	      C   s   t jd}d}dt jddgt jd}}td ||}| ||dd|d	d
}t|}t|}|j	|ks7J t
|| s>J d	S )z&Check that splitters are serializable.r   rD   rM   r<   r   r1   r;   rK   N)monotonic_cst)rs   rx  r  r   r  r   rR  rS  rT  r   rr  )	r  r  r   r  rK  r   r  splitter_serializesplitter_backr   r   r   test_splitter_serializable 	  s   

r  c                 C   sR   t | d}tdd}|tt t|| tj|dd}t	|j
|j
d dS )zhCheck that Trees can be deserialized with read only buffers.

    Non-regression test for gh-25584.
    z
clf.joblibr   r   r)	mmap_modez?The trees of the original and loaded classifiers are not equal.N)strjoinr   r   r  r  r~  rf  rm  r   r   )tmpdirpickle_pathr   
loaded_clfr   r   r   /test_tree_deserialization_from_read_only_buffer	  s   
r  c                 C   s   t ddgddgg}t ddg}| dd|| | dd}d}tjt|d ||| W d   dS 1 s;w   Y  dS )zhCheck that an error is raised when min_sample_split=1.

    non-regression test for issue gh-25481.
    r   r9   rO   )r  zb'min_samples_split' .* must be an int in the range \[2, inf\) or a float in the range \(0.0, 1.0\]r   N)rs   r   r   r   r   r   )r$   r`   ra   r	   msgr   r   r   test_min_sample_split_1_error%	  s   
"r  c                 C   s   t g dgj}t g d}tdd| d}||| |t jgg}t|t |dd g |dd }|dd }tdd| d}||| |t jgg}t|t |d	d g dS )
z=Check missing values goes to correct node during predictions.	r   r9   rM   r<   rL   r  rI      r   	r   r>   r  r>   r  r  rN   g?g@r   r9   r  r=   Nr?   r:   )	rs   r   r   r   r   r   nanr   rB  )r   r`   ra   dtcr  X_equaly_equalr   r   r   ;test_missing_values_best_splitter_on_equal_nodes_no_missing;	  s   r  c                 C   s   t g dgj}t g d}t|d| d}||| |jjd }|jjd }|jj| }|jj| }||k}	|jj	| d }
|jj	| d }|
t jgg}|	rZt|
| dS t|| dS )zCheck missing values go to the correct node during predictions for ExtraTree.

    Since ETC use random splits, we use different seeds to verify that the
    left/right node is chosen correctly when the splits occur.
    r  r  r9   r  r   N)rs   r   r   r   r   r   rr   rq   r3  rz   r   r  r   )r   r6  r`   ra   etrr  r  left_samplesright_samples	went_lefty_pred_lefty_pred_rightr  r   r   r   =test_missing_values_random_splitter_on_equal_nodes_no_missingU	  s   r  rV  r1   c                 C   s   d}t t jgd g d gj}t |gd dgd  dgd  }tdd| d}||| t t jdd	ggj}||}t||ddg d
S )zITest when missing values are uniquely present in a class among 3 classes.r   r8   )r   r9   rM   r<   rL   r  rI   r  r9   rM   r   r  r<   r  Nrs   r   r  r   r   r   r   r(   )r   missing_values_classr`   ra   r  r  
y_nan_predr   r   r   /test_missing_values_best_splitter_three_classesx	  s   $
r  c                 C   s   t t jgd g d gj}t dgd dgd  }tdd| d}||| t t jd	t jggj}||}t|g d
 dS )zMissing values spanning only one class at fit-time must make missing
    values at predict-time be classified has belonging to this class.r8   r   r9   rM   r<   r8   r;   r   r9   rJ   r   rM   r  r;   )r   r9   r   Nr  r   r`   ra   r  r  r  r   r   r   )test_missing_values_best_splitter_to_left	  s   
r  c                 C   s   t t jgd g d gj}t dgd dgd  dgd  }tdd| d}||| t t jdd	ggj}||}t|g d
 dS )zMissing values and non-missing values sharing one class at fit-time
    must make missing values at predict-time be classified has belonging
    to this class.r8   r  r9   r   rM   r   r  rP   g333333@r  Nr  r  r   r   r   *test_missing_values_best_splitter_to_right	  s   $
r  c                 C   s   t ddddt jddddt jg
gj}t d	gd dgd  }td
d| d}||| t t jddggj}||}t|g d dS )zNCheck behavior of missing value when there is one missing value in each class.r9   rM   r<   r;   rD   r[   rW   r   r   r   r  gffffff@gA@r  Nr  r  r   r   r   >test_missing_values_best_splitter_missing_both_classes_has_nan	  s   &
r  r	   r   c                 C   s   t ddddt jddddt jg
gj}t d	gd dgd  }| d
ur(| |}tjtdd ||| W d
   d
S 1 sAw   Y  d
S )z4Check unsupported configurations for missing values.r9   rM   r<   r;   rD   r[   rW   r   r   NzInput X contains NaNr   )rs   r   r  r   r   r   r   r   )r  r	   r`   ra   r   r   r   test_missing_value_errors	  s   &"r  c                 C   sp   t j t j}}tj|ddddf< tj|ddddf< | ddd}||| ||}|d	k s6J dS )
z5Smoke test for poisson regression and missing values.Nr;   r   rJ   r?   r6   r   r   rT   )	rc   r   r  r   rs   r  r   r   r   )r$   r`   ra   r   r  r   r   r   test_missing_values_poisson	  s   
r  c                  O   s$   t j| i |\}}|dk}||fS )N   )r   make_friedman1)argskwargsr`   ra   r   r   r   make_friedman1_classification	  s   r  zmake_data, Tree, tolerancegQ?gQ?gQ?sample_weight_trainr   c                 C   s  d\}}| ||d|d\}}|  }	tj|}
tj|	|
jddg|jddgd< t|	||d	\}}}}|d
kr@t|jd }nd}d}|||d}|j	|||d |
||}tt |||d}|	|| |
||}|| |ksJ d|d| d| dS )zFCheck that trees can deal with missing values have decent performance.)r   rD   rO   )rY   rZ   noiserX   FTrV   r   r]   rj  r   r   r   NrD   r   r   zscore_native_tree=z + z! should be strictly greater than )r  rs   rx  r  r  choicer   r   r   r   r   r   r   )	make_datar$   r  r4  	tolerancerY   rZ   r`   ra   	X_missingr  X_missing_trainX_missing_testr   r   r   r   native_treescore_native_treetree_with_imputerscore_tree_with_imputerr   r   r   !test_missing_values_is_resilience	  s<   
 r  zTree, expected_scoreg333333?g(\?c                 C   s   t jd}d}|j|dfd}t t |d t |d g}|jddg|dd	gd
}| 	t
}||  ||< |j|d}	t j|	|< |	|dddf< | |d}
t|
||dd }||kskJ d| d| dS )z@Check the tree learns when only the missing value is predictive.r   r<  r[   r\   rM   FTgffffff?rQ   r  Nr;   r   )cvzExpected CV score: z	 but got )rs   rx  r  standard_normalr  rl   r   r  r  r  boolr  r   rB  )r$   expected_scorer4  r  rY   r`   ra   X_random_masky_maskX_predictiver	   tree_cv_scorer   r   r    test_missing_value_is_predictive 
  s   "


r  zmake_data, Treec           
      C   s   t jd}d\}}| |||d\}}t j||jddg|jddgd< t |jd }d	|d
d
d< |dd}|j|||d |dd}	|	|dd
dd
d
f |dd
d  t|		||	| d
S )z=Check sample weight is correctly handled with missing values.r   )r  rD   r=  FTrV   r   r  rT   NrM   r   r   r9   )
rs   rx  r  r  r  r   r   r   r   r   )
r  r$   r  rY   rZ   r`   ra   r   tree_with_swtree_samples_removedr   r   r   test_sample_weight_non_uniform>
  s   	 

(r  c                  C   sP   t ddtjtj} t ddtjtj}t| }t|}||ks&J d S r  )r   r   rb   r   r   rR  rS  )tree1tree2pickle1pickle2r   r   r   test_deterministic_pickle[
  s
   

r  r`   r;   rJ   c                 C   s   | dd}td}| |dd||}t|| dd|}|jj}t|dks1J | t	|jjdd |jjdd  t
|jjdk|jjdk@ }t	|jj| d dS )	a'  Check that we properly handle missing values in regression trees using a toy
    dataset.

    The regression targeted by this test was that we were not reinitializing the
    criterion when it comes to the number of missing values. Therefore, the value
    of the critetion (i.e. MSE) was completely wrong.

    This test check that the MSE is null when there is a single sample in the leaf.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28254
    https://github.com/scikit-learn/scikit-learn/issues/28316
    r?   r9   rJ   r   r   NrM   rT   )r  rs   r   r   r   r   ry   r   r   r   flatnonzerorr   rw   )r$   r`   r   ra   r	   tree_refry   
leaves_idxr   r   r   'test_regression_tree_missing_values_toyh
  s   
"r  c                 C   s   t j| }d}t j|t jddd}t j|dd d d f< || t |}t| dd	||}|j
j}t|dksAJ |d S )	Nr.  r   r?   r9   ir;   r  r   )rs   rx  r  r   rn  r  r  r   r   r   r   ry   r   )r4  r  rY   r`   ra   r	   ry   r   r   r   -test_regression_extra_tree_missing_values_toy
  s   

r  c                  C   s   t jdd\} }tjd}|  }|jtjdtjd| dddgf d d		t
}tj||< t||d
d\}}}}tjg dtjd}tdddd}	|	|| ||  t|	jjdks`J t|	jjdk|	jjdk@ }
t|	jj|
 d dS )a  Check that we properly handle missing values in classification trees using a toy
    dataset.

    The test is more involved because we use a case where we detected a regression
    in a random forest. We therefore define the seed and bootstrap indices to detect
    one of the non-frequent regression.

    Here, we check that the impurity is null or positive in the leaves.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28254
    T)
return_X_yr   )r9   r8   )r   r   NrM   rL   )nrj     r   )prM   Q   '   a   [   &   .      e   r  Y   R   r.  r   E      r     I   J   3   /   k      K   n   r[   r   h   9      r   r  O   #   M   Z   r  r  r  ^   r	     rL   ]   r$  r  r   r  r  r  m   r     rD   r#  r  r  \   4   r[   r%  rL   rL      r  r  r  r  r  rU  r   rW   r  N   r  r&  i   rr  r   r  r  f   r+  r  r  r9   r  rI       r  r!  j   r"  r   8   r  r  >   U   r  r  P   r  ?   rJ   r2  T   r<   r<   L   r*  r   r<   r   iHnr   r   r?   r9   rT   )r   	load_irisrs   rx  r  r  r  r   r  r  r  r  r   r   r   r   r   r   ry   r  rr   rw   r   )r`   ra   r  r  maskrL  r   r   r   r	   r   r   r   r   +test_classification_tree_missing_values_toy
  s,   "


r8  c                  C   sH  t ddd} | tjtj t| j}t| j	|| j
}tj| jjtjd}d|d< t|| j| | jjdks9J |jdks@J tt t| jj|j W d   n1 sXw   Y  t| jjd |jd  t| j	|| j
}tj| jjtjd}d|dd< t|| j| | jjdksJ |jdksJ |jt| jj|j dS )zHTest pruning a tree with the Python caller of the Cythonized prune tree.r   r9   r  r   r<   N)r   r   rb   r   r   rs   
atleast_1drb  r  n_features_in_r  rl   r   ro   uint8r    r   r   AssertionErrorr(   rz   r	   rK  pruned_treeleave_in_subtreer   r   r   test_build_pruned_tree_py
  s(   r@  c                  C   s   t ddd} | tjtj t| j}t| j	|| j
}tj| jjtjd}d|d< tjtdd t|| j| W d   dS 1 sDw   Y  dS )z8Test pruning a tree does not result in an infinite loop.r   r9   r  r   z,Node has reached a leaf in the original treer   N)r   r   rb   r   r   rs   r9  rb  r  r:  r  rl   r   ro   r;  r   r   r   r    r=  r   r   r   $test_build_pruned_tree_infinite_loop
  s   "rA  c                  C   sf   t jd} | jddddt j}t |gd }t jdt jd}t	||d g d	}t
|| d
S )zNon-regression test for gh-30554.

    Using log2 and log in sort correctly sorts feature_values, but the tie breaking is
    different which can results in placing samples in a different order.
    r  rT   g      $@rD   )locscaler]   r;   rr  r   )2r   (   rW   r[   rD      r     1   r  -   r   r  r;      rI   r  )   r9         r  rM   r   r-  r#  r  rU  r<   !   rJ   $   r  r  r  r8   r  r&  "   ,   r  r  r  %   r  rL   r
  0   r)     N)rs   rx  default_rngnormalr  r  r  r   r  r   r(   )r  somefeature_valuesr  expected_samplesr   r   r   test_sort_log2_build  s   rY  r  )__doc__r  rc  r`  rR  ry  r  	itertoolsr   r   r   r~  numpyrs   r   joblib.numpy_pickler   numpy.testingr   sklearnr   r   r	   sklearn.dummyr
   sklearn.exceptionsr   sklearn.imputer   sklearn.metricsr   r   r   sklearn.model_selectionr   r   sklearn.pipeliner   sklearn.random_projectionr   sklearn.treer   r   r   r   sklearn.tree._classesr   r   r   r   sklearn.tree._partitionerr   sklearn.tree._treer   r   r   r    r!   r"   r#   r$   r  sklearn.utilsr%   sklearn.utils._testingr&   r'   r(   r)   r*   r+   sklearn.utils.fixesr,   r-   r.   r/   sklearn.utils.validationr0   r   REG_CRITERIONSr   r   dictr7   __annotations__updateSPARSE_TREESr   r  r  y_small_regr`   ra   r   r   r6  rb   rx  r  r  r  r   r]   permr   load_diabetesrc   load_digitsrd   rX   make_multilabel_classificationr  r  rG  X_sparse_posru  y_randomr  X_sparse_mixrl   r  r   r   r   markparametrizer  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r"  r(  r*  r,  r-  rH  r[  ra  re  rj  rq  rw  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  sortedr
  intersectionr  r  r  r  zipr  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r#  r  r"  r4  r5  r2  r;  rT  rY  rq  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r:  r   r  r  r  r  r  r8  r@  rA  rY  r   r   r   r   <module>   s   $	 

(

'



	
$B!;
=
H3<-4
/
		
)3
K


d
(
!*
!4!




 -!/