o
    \ir                     @   sX	  d Z ddlZddlZddlmZ ddlZddlZddlm	Z
 ddlmZ ddlmZmZmZmZ ddlmZmZmZmZmZmZmZ ddlmZmZ dd	lmZ dd
lm Z  ddl!m"Z"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 e2g dg dg dgZ3dZ4e3j5\Z6Z7ee4e3ddd\Z8Z9dd e/D Z:ej2ge/ Z;e<e:dkrg dnddgZ=ej>j?de;e=d ej>?d!d"d#gej>?d$ej@ejAgd%d& ZBej>j?de;e=d ej>?d!d"d#gd'd( ZCej>j?de;e=d d)d* ZDej>?d+d,d-gej>j?de;e=d ej>?d.g d/d0d1 ZEej>?d2d"d#gd3d4 ZFej>?d5e:d6d7 ZGd8d9 ZHej>j?d:e8ge: e=d ej>j?d;d<d=e3d>d? gg d@d ej>?dAeegdBdC ZIej>j?d;d<d=e3dDd? gg d@d dEdF ZJej>?dGdHdIdJd? dKfdLgej>?dAeegdMdN ZKej>?dAeegdOdP ZLdQdR ZMej>?d2d"d#gej>?d.dSdgdTdU ZNdVdW ZOej>?dAeegdXdY ZPdZd[ ZQej>j?d:e8ge: e=d d\d] ZRd^d_ ZSd`da ZTej>?dbdcddgdedf ZUdgdh ZVdidj ZWej>?dAeegdkdl ZXej>j?de;e=d ej>?dmed"fed#fedfgej>?dnddgdodp ZYej>?d5e:ej>?dAeegdqdr ZZej>?d5e:ej>j?d;d<d=e3gg dsd ej>?dAeegdtdu Z[ej>j?de;e=d ej>?d$ej\ej]gej>?d;d=dvgej>?dAeegdwdx Z^ej>?dAeegdydz Z_ej>?dAeegd{d| Z`d}d~ Zadd Zbej>j?d:e8ge: e=d ej>?dAeegdd Zcej>?d$ej\ej]ej@ejAgej>?dAeegdd Zdej>j?d:e8ge: e=d dd Zedd Zfdd Zgdd Zhej>j?d:e8ge: e=d ej>?dAeegdd Ziej>j?d:e8ge: e=d ej>?dAeegdd Zjdd Zkej>j?de;e=d dd Zlej>?dAeegdd Zmdd Znej>j?de;e=d ej>?d!d"d#gdd Zoej>?d$ej@ejAgej>?dddgdd Zpej>?d$ej@ejAgdd Zqej>?dedfedfgdd Zrej>?dAeegdd Zsej>?dAeegej>?dde4d idfd;e8dd idfd;dd? idfd;e8ddddf idfd;dd? idfgdd Ztej>?dde8dd idfgdd Zuej>?d:e8ge: ej>?d$ejAej@gdd Zvej>?de-e8dddgdd Zwdd Zxdd Zyej>?dd;ezd=iddgddggddfddĄ Z{ej>?dedfedfedfgddɄ Z|ej>?de/dg dd̄ Z}ej>?d;d=d<gdd΄ Z~ej>?d;d=d<gddЄ Zej>j?de;e=d ej>?d2d"d#gdd҄ ZdS )zTesting for K-means    N)StringIO)sparse)clone)KMeansMiniBatchKMeansk_meanskmeans_plusplus)_euclidean_dense_dense_wrapper_euclidean_sparse_dense_wrapper_inertia_dense_inertia_sparse_is_same_clustering_relocate_empty_clusters_dense_relocate_empty_clusters_sparse)_labels_inertia_mini_batch_step)
make_blobs)ConvergenceWarning)pairwise_distancespairwise_distances_argmin)v_measure_score)euclidean_distances)assert_allcloseassert_array_equalcreate_memmap_backed_data)	row_norms)CSR_CONTAINERS)_get_threadpool_controller)              @r   r   r   )      ?r    g      @r   r   )r    r   r   r   r    d   r    *   )	n_samplescenterscluster_stdrandom_statec                 C   s   g | ]}|t qS  )X).0	containerr'   r'   /var/www/www-root/data/www/176.119.141.140/sports-predictor/venv/lib/python3.10/site-packages/sklearn/cluster/tests/test_k_means.py
<listcomp>2       r,      )densesparse_matrixsparse_arrayr/   r0   array_constr)idsalgolloydelkandtypec                 C   s   | ddgddgddgddgg|d}g d}t jddgddgg|d}g d}d}t jddgd	dgg|d}d
}	td
d||d}
|
j||d t|
j| t|
j| t|
j| |
j	|	ks`J d S )Nr         ?   r7   )   r9   r9   r;   r   r   r9   r9   g      ?g      ?g      ?r.   
n_clustersn_initinit	algorithmsample_weight)
nparrayr   fitr   labels_r   inertia_cluster_centers_n_iter_)r2   r4   r7   r(   rC   init_centersexpected_labelsexpected_inertiaexpected_centersexpected_n_iterkmeansr'   r'   r+   test_kmeans_results;   s   $rQ   c           	      C   s   | ddgddgddgddgg}t ddgddgg}tdd||d}|| d}d}t|j| |j|ks9J zg d}ddgd	dgg}t|j| t|j	| W d S  t
yv   g d
}d	dgddgg}t|j| t|j	| Y d S w )Nr   r8   r9   r;   r.   r=   g      ?r<   g      ?)r9   r9   r   r   r    r   )rD   rE   r   rF   r   rH   rJ   r   rG   rI   AssertionError)	r2   r4   r(   rK   rP   rO   rM   rL   rN   r'   r'   r+   test_kmeans_relocated_clustersS   s&    
rS   c              	   C   s   t g ddd}| |}t d}t g ddd}t g ddd}t g d}t jdt jd}| t ju rHt|||||| nt|j|j	|j
||||| t|g d	 t|d
gdgdgg d S )N)
      $g      #ig      !ir9   	         #@
   rU   r9   rX   )rT   rY   )g     0rY   rY   )g      $@r   r   r:   )   r9   r9   irW   )rD   rE   reshapeoneszerosint32r   r   dataindicesindptrr   r   )r2   r(   rC   centers_oldcenters_newweight_in_clusterslabelsr'   r'   r+   test_relocate_empty_clustersr   s.   

rf   distributionnormalblobstol){Gz?g:0yE>g0.++r   c           	      C   s   t j|}| dkr|jdd}nt|d\}}d||dk < ||}td|d|d}td	d|d|d
}|| || t|j|j t	|j
|j
 |j|jksSJ |jtj|jddks`J d S )Nrh   i  rX   sizer&   r      r9   )r>   r&   r?   rj   r6   )rA   r>   r&   r?   rj   ư>)rel)rD   randomRandomStaterh   r   r   rF   r   rI   r   rG   rJ   rH   pytestapprox)	rg   r2   rj   global_random_seedrndr(   _km_lloydkm_elkanr'   r'   r+   test_kmeans_elkan_results   s(   

r|   rA   c                 C   sH   t j|}|jdd}d}t| d|dd|d|}|j|k s"J d S )Nrl   rm   i,  rp   r9   r   )rA   r>   r&   r?   rj   max_iter)rD   rs   rt   rh   r   rF   rJ   )rA   rw   rx   r(   r}   kmr'   r'   r+   test_kmeans_convergence   s   	r   X_csrc              	   C   sp  t j|}t|jtjd }| }t |}t |}t j|jd t	j
d}t j|jd t	j
d}t jt	jd t	j
d}	t	d d }
| d d }|	d d }t|
||||t j|dd}|dkseJ t|
||\}}|dkssJ ||k syJ t|||||t j|dd}|dksJ t|||\}}|dksJ ||k sJ t|| t|| t|| t|| d S )Nrm   r   r:   rX   F)random_reassignr   )rD   rs   rt   r$   rh   shapecopy
zeros_liker]   r(   r7   r\   r   r   r   r   )r   rw   rngrb   centers_old_csrrc   centers_new_csrweight_sumsweight_sums_csrrC   X_mbX_mb_csrsample_weight_mbold_inertiare   new_inertiaold_inertia_csr
labels_csrnew_inertia_csrr'   r'   r+   !test_minibatch_update_consistency   sV   


	
	


r   c                 C   sX   | j }|jttfksJ | j}t|jd tksJ ttt	|d | j
dks*J d S )Nr   r    r   )rI   r   r>   
n_featuresrG   rD   uniquer   r   true_labelsrH   )r~   r$   re   r'   r'   r+   _check_fitted_model  s   r   
input_datar@   rs   	k-means++c                 C      t S Nr$   r(   kr&   r'   r'   r+   <lambda>"      r   )rs   r   ndarraycallable	Estimatorc                 C   s4   t |trdnd}| |td|d|}t| d S )NrX   r9   r"   r@   r>   r&   r?   )
isinstancestrr>   rF   r   )r   r   r@   r?   r~   r'   r'   r+   test_all_init  s   r   c                 C   r   r   r   r   r'   r'   r+   r   1  r   c                 C   sF   t | trdnd}t| td|d}tdD ]}|t qt| d S )NrX   r9   r   r   r!   )r   r   r   r>   rangepartial_fitr(   r   )r@   r?   r~   ir'   r'   r+   &test_minibatch_kmeans_partial_fit_init/  s   r   zinit, expected_n_init)r   r9   )rs   defaultc                 C   s   |j || jd fdS )Nr9   rm   )uniformr   )r(   r>   r&   r'   r'   r+   r   F  s    r   )
array-liker9   c                 C   sl   d\}}}t j||}|dkrt j||}|dkr#| tu r!dnd}| ||dd|}|j|ks4J dS )	zCheck that `n_init="auto"` chooses the right number of initializations.
    Non-regression test for #26657:
    https://github.com/scikit-learn/scikit-learn/pull/26657
    )r!   rX   rp   r   r   r;   rX   autor>   r@   r?   N)rD   rs   randnr   rF   _n_init)r   r@   expected_n_initn_sampler   r>   r(   rP   r'   r'   r+   ,test_kmeans_init_auto_with_initial_centroids@  s   
r   c                 C   s`   t t}t t}| ttd|dt}| t|d|d|}t|j|j t|j	|j	 d S )Nr9   r>   r@   r?   r&   )
rD   asfortranarrayr(   r$   r>   rF   r   rI   r   rG   )r   rw   	X_fortrancenters_fortrankm_ckm_fr'   r'   r+   test_fortran_aligned_data_  s"   

r   c                  C   s<   t tddd} tj}t t_z| t W |t_d S |t_w )Nr"   r9   )r>   r&   verbose)r   r>   sysstdoutr   rF   r(   )r~   
old_stdoutr'   r'   r+   test_minibatch_kmeans_verboser  s   r   rk   c              	   C   s   t jdjdd}t| tddd|dd| | }t	d|j
s%J t	d	|j
s.J |dkr=t	d
|j
s;J d S t	d|j
sFJ d S )Nr   rl   rm   r"   rs   r9   )rA   r>   r&   r@   r?   rj   r   zInitialization completezIteration [0-9]+, inertiazstrict convergencez center shift .* within tolerance)rD   rs   rt   rh   r   r>   rF   
readouterrresearchout)rA   rj   capsysr(   capturedr'   r'   r+   test_kmeans_verbose}  s"   r   c                   C   F   t jtdd tdddt W d    d S 1 sw   Y  d S )Nz,init_size.* should be larger than n_clustersmatchrX      )	init_sizer>   )ru   warnsRuntimeWarningr   rF   r(   r'   r'   r'   r+   'test_minibatch_kmeans_warning_init_size  s
   "r   c                 C   sH   t jtdd | ttddt W d    d S 1 sw   Y  d S )NzAExplicit initial center position passed: performing only one initr   rX   r@   r>   r?   )ru   r   r   r$   r>   rF   r(   )r   r'   r'   r+   'test_warning_n_init_precomputed_centers  s   "r   c                 C   s   t dd| d\}}d|d d dd d f< tdd| dd	|}|jjd
d }|dks4J d|dtdd| dd	|}|jjd
d }|dksTJ d|dtd| dd}tdD ]}|| q_|jjd
d }|dks|J d|dd S )Nr!   rp   )r#   r$   r&   r   r.   r   rX   rs   )r>   
batch_sizer&   r@   r9   axisrV   znum_non_zero_clusters=z is too small   )r>   r&   r@   )r   r   rF   rI   anysumr   r   )rw   zeroed_Xr   r~   num_non_zero_clustersr   r'   r'   r+    test_minibatch_sensible_reassign  s.   
r   c              
   C   s   t ttf}ttD ]}tt|k jdd||< qt t	}t 
|}t| ||dd  }t| |||t tt j|ddd t| ||dd  }||ksQJ t| |||t tt j|ddd t|| d S )Nr   r   r9   T)r   reassignment_ratiogV瞯<)rD   emptyr>   r   r   r(   r   meanr\   r#   
empty_liker   r   r]   rs   rt   r   )r   rw   perfect_centersr   rC   rc   score_beforescore_afterr'   r'   r+   test_minibatch_reassign  s:   



r   c                   C   s   t ddtdddt d S )Nr!   rX   r"   T)r>   r   r   r&   r   )r   r#   rF   r(   r'   r'   r'   r+   &test_minibatch_with_many_reassignments  s   r   c                  C   sp   t ddddt} | jdksJ t ddddt} | jdks"J t dddtd dt} | jtks6J d S )NrX   rp   r9   )r>   r   r?         )r>   r   r?   r   )r   rF   r(   
_init_sizer#   r~   r'   r'   r+   test_minibatch_kmeans_init_size  s   r   ztol, max_no_improvement)-C6?N)r   rX   c                 C   s   t dddd\}}}td|d|dddd|d	}|| d|j  k r(dk s+J  J |  }|d u r:d	|jv s:J |dkrEd
|jv sGJ d S d S )Nr;   r   T)r$   r&   return_centersr   rX   r9   )	r>   r@   r   rj   r&   r}   r?   r   max_no_improvementz Converged (small centers change)z*Converged (lack of improvement in inertia))r   r   rF   rJ   r   r   )r   rj   r   r(   ry   r$   r~   r   r'   r'   r+   #test_minibatch_declared_convergence  s*   
r   c                  C   s   d} t jd }td| ddt }|jt|j|  | ks J t|jt	s(J td| ddd ddt }|jdks<J |jd| |  ksGJ t|jt	sOJ d S )Nr   r   r;   )r>   r   r&   rX   )r>   r   r&   rj   r   r}   )
r(   r   r   rF   rJ   rD   ceiln_steps_r   int)r   r#   r~   r'   r'   r+   test_minibatch_iter_steps;  s$   
	r   c                  C   s6   t  } tdtdd}||  t| t| t  d S )NFr"   )copy_xr>   r&   )r(   r   r   r>   rF   r   r   )my_Xr~   r'   r'   r+   test_kmeans_copyxT  s
   
r   c                 C   s`   t j|dd}| d|dd}|||}| d|dd}|||}||ks.J d S )Nr!   rX   r9   )r?   r&   r}   )rD   rs   rt   r   rF   score)r   rw   r(   km1s1km2s2r'   r'   r+   test_score_max_iter_  s   r   zEstimator, algorithmr}   c                 C   s   t ddd|d\}}|||d}| ddd||d}|d ur#|j|d || |j}	||}
t|
|	 ||}
t|
|	 ||j}
t|
t	d d S )Nr   rX   r#   r   r$   r&   r:   rs   )r>   r@   r?   r}   r&   )rA   )
r   
set_paramsrF   rG   predictr   fit_predictrI   rD   arange)r   rA   r2   r}   global_dtyperw   r(   ry   r~   re   predr'   r'   r+   test_kmeans_predictl  s*   





r  c                 C   sl   t j|tf}| t|dd}|jt|d | t|dd}|j||d t|j	|j	 t
|j|j d S Nr9   r>   r&   r?   rB   )rD   rs   rt   random_sampler#   r>   rF   r(   r   rG   r   rI   )r   r   rw   rC   km_dense	km_sparser'   r'   r+   test_dense_sparse  s   r  )rs   r   r   c                 C   s^   t |trdnd}| t||dd}|| t|t|j |t t|||j d S )NrX   r9   r   r   )r   r   r>   rF   r   r   r(   rG   )r   r@   r   r?   r~   r'   r'   r+   test_predict_dense_sparse  s   

r  r   c           
   	   C   s   t ddgddgddgddgddgddgg}|||d	}|d
kr#dnd}|d
kr/|d d n|}| d|||d}| tu rC|jdd || |jjt jksQJ g d}	tt	|j
|	d | tu rrt||}|jjt jkstJ d S d S )Nr   rX      rV   rU   r9   r.   rZ   r:   r   r   )r   )r   r9   r9   r   r   r9   r    )rD   rE   r   r   rF   rI   r7   float64r   r   rG   r   r   )
r   r2   r7   r@   rw   X_denser(   r?   r~   rL   r'   r'   r+   test_integer_input  s$   .
r  c                 C   sb   | t |dt}||j}t|t|j t| t	
t  |t}t|tt|j d S )Nr>   r&   )r>   rF   r(   	transformrI   r   r   r   diagonalrD   r]   )r   rw   r~   Xtr'   r'   r+   test_transform  s   
r  c                 C   s8   | |dd tt}| |ddt}t|| d S )Nr9   )r&   r?   )rF   r(   r  fit_transformr   )r   rw   X1X2r'   r'   r+   test_fit_transform  s   r  c                 C   s:   t j}dD ]}ttd|| ddt}|j|ksJ qd S )N)r9   rp   rX   rs   r9   )r>   r@   r?   r&   r}   )rD   infr   r>   rF   r(   rH   )rw   previous_inertiar?   r~   r'   r'   r+   test_n_init  s   r  c                 C   s`   t ttd | d\}}}|jttfksJ t|jd tks J ttt	|d |dks.J d S )N)r>   rC   r&   r   r    r   )
r   r(   r>   r   r   rD   r   r   r   r   )rw   cluster_centersre   inertiar'   r'   r+   test_k_means_function  s   r  c           
      C   s0  | d|d}i }i }i }i }t jt jfD ]A}|j|dd}	||	 |j||< ||	||< |j||< |j||< |jj	|ks@J | t
u rU||	dd  |jj	|ksUJ qt|t j |t j dd t|t j |t j |t j  d d	 t|t j |t j |t j  d d	 t|t j |t j  d S )
Nr9   )r?   r&   Fr   r   r;   r   rtol)atol)rD   r
  float32astyperF   rH   r  rI   rG   r7   r   r   r   maxr   )
r   r   rw   r~   r  r  r$   re   r7   r(   r'   r'   r+   test_float_precision  s.   



( r#  c                 C   sJ   t j|dd}tj|dd}| |tdd}|| t|j|r#J d S )NFr  r9   r   )r(   r!  r$   r>   rF   rD   may_share_memoryrI   )r   r7   
X_new_typecenters_new_typer~   r'   r'   r+   test_centers_not_mutated=  s
   
r'  c                 C   s8   t td| }t t|jdd| }t|j|j d S )N)r>   r9   r   )r   r>   rF   rI   r   )r   r   r   r'   r'   r+   test_kmeans_init_fitted_centersL  s
   r(  c                 C   s   t ddgddgddgddgg}td| d}d}tjt|d || t|jtt	dks3J W d    d S 1 s>w   Y  d S )Nr   r9      r  zmNumber of distinct clusters \(3\) found smaller than n_clusters \(4\). Possibly due to duplicate points in X.r   r;   )
rD   asarrayr   ru   r   r   rF   setrG   r   )rw   r(   r~   msgr'   r'   r+   1test_kmeans_warns_less_centers_than_unique_points\  s   "
"r-  c                 C   s   t j| ddS Nr   r   )rD   sortr   r'   r'   r+   _sort_centersm  s   r0  c                 C   s   t j| jddtd}t jt|dd}ttdt	| d}t
|jt|d}t |j|}t
||}t|j| t|j|j tt|jt|j d S )Nr9   rp   rm   r   r   )r@   r?   r>   r&   rB   )rD   rs   rt   randintr#   repeatr(   r   r$   r>   r   rF   rG   r   r   rH   r0  rI   )rw   rC   X_repeatr~   km_weightedrepeated_labelskm_repeatedr'   r'   r+   test_weighted_vs_repeatedq  s    r7  c                 C   s\   t t}| t|dd}t|j|d d}t|j||d}t|j|j t|j	|j	 d S r  )
rD   r\   r#   r>   r   rF   r   rG   r   rI   )r   r   rw   rC   r~   km_nonekm_onesr'   r'   r+   test_unit_weights_vs_no_weights  s   
	r:  c                 C   sj   t j|jtd}| t|dd}t|j||d}t|j|d| d}t|j	|j	 t
|j|j d S )Nrm   r9   r  rB   r8   )rD   rs   rt   r   r#   r>   r   rF   r   rG   r   rI   )r   r   rw   rC   r~   km_orig	km_scaledr'   r'   r+   test_scaled_weights  s   	r=  c                  C   s$   t dddt} | jdksJ d S )Nr6   r9   )rA   r}   )r   rF   r(   rJ   r   r'   r'   r+    test_kmeans_elkan_iter_attribute  s   r>  c                 C   st   | dgdgg}ddg}t dgdgg}td|dd}|j||d tt|jdks.J t|jdgdgg d S )	NrU   r9   gffffff?g?rX   r.   r   rB   )	rD   rE   r   rF   lenr+  rG   r   rI   )r2   r(   rC   r@   r~   r'   r'   r+   #test_kmeans_empty_cluster_relocated  s   r@  c                 C   s   t j|}|jdd}t jddd | t|d|j}W d    n1 s)w   Y  t jddd | t|d|j}W d    n1 sKw   Y  t	|| d S )N)2   rX   rm   r9   openmp)limitsuser_apir  r.   )
rD   rs   rt   rh   r   limitr>   rF   rG   r   )r   rw   rx   r(   result_1result_2r'   r'   r+   #test_result_equal_in_diff_n_threads  s   

rH  c                   C   r   )Nz9algorithm='elkan' doesn't make sense for a single clusterr   r9   r6   )r>   rA   )ru   r   r   r   rF   r(   r'   r'   r'   r+   test_warning_elkan_1_cluster  s   "rI  c                 C   sz   t j|jdd}|d d }| |}dd }|||\}}tdd||dd|}|j}	|j}
t||	 t	||
 d S )N)r!   rp   rm   rp   c                 S   sP   |  }t| |}t|jd D ]}| ||k jdd||< qt| |}||fS r.  )r   r   r   r   r   )r(   r@   new_centersre   labelr'   r'   r+   	py_kmeans  s   

z+test_k_means_1_iteration.<locals>.py_kmeansr9   )r>   r?   r@   rA   r}   )
rD   rs   rt   r   r   rF   rG   rI   r   r   )r2   r4   rw   r(   rK   rL  	py_labels
py_centers	cy_kmeans	cy_labels
cy_centersr'   r'   r+   test_k_means_1_iteration  s   

rR  squaredTFc                 C   s   t j|}tjdddd|| d}| d}|dj| dd}|d	  }|| d	  }|r4|nt 	|}t
|||}	t|j|j|||}
| t jkrPd
nd}t|	|
|d t|	||d t|
||d d S )Nr9   r!   r8   csrdensityformatr&   r7   rU   Fr  r.   r   gHz>r  )rD   rs   rt   sptoarrayr[   r   r!  r   sqrtr	   r
   r_   r`   r   r   )r7   rS  rw   r   a_sparsea_densebb_squared_normexpecteddistance_dense_densedistance_sparse_denser  r'   r'   r+   test_euclidean_distance  s"   rb  c                 C   s|  t j|}tjdddd|| d}| }|dj| dd}|ddj| dd}|jddt jd	}|||  d
 j	dd}t 	|| }	t
||||dd}
t||||dd}| t jkr_dnd}t|
||d t|
|	|d t||	|d d}||k}|| ||  d
 j	dd}t 	|||  }	t
||||d|d}
t||||d|d}t|
||d t|
|	|d t||	|d d S )Nr!   rX   r8   rT  rU  Fr  rp   )rn   r7   r.   r9   r   )	n_threadsr   rq   r  )rc  single_label)rD   rs   rt   rX  rY  r   r!  r1  r^   r   r   r   r   r   )r7   rw   r   X_sparser  rC   r$   re   	distancesr_  inertia_denseinertia_sparser  rK  maskr'   r'   r+   test_inertia  s>   
rj  zKlass, default_n_initrX   r;   c                 C   sZ   | ddd}| t |jdksJ | ddd}| t | jdkr)|jdks+J d S d S )Nr   r   )r?   r@   r9   rs   r   rX   )rF   r(   r   __name__)Klassdefault_n_initestr'   r'   r+   test_n_init_autoI  s   

 ro  c                 C   sR   t dgdgdgg}t g d}| dddj||d t|t g d d S )Nr9   r.   r)  )r8   g?g333333?r   r  rB   )rD   rE   rF   r   )r   r(   rC   r'   r'   r+   test_sample_weight_unchangedT  s   rp  zparam, matchr>   r9   z#n_samples.* should be >= n_clusterszIThe shape of the initial centers .* does not match the number of clustersc                 C   s   | d d S )Nr.   r'   X_r   r&   r'   r'   r+   r   i  s    rZ   zUThe shape of the initial centers .* does not match the number of features of the datac                 C   s   | d dd df S )NrZ   r.   r'   rq  r'   r'   r+   r   s  r-   c                 C   sT   | dd}t jt|d |jdi |t W d    d S 1 s#w   Y  d S )Nr9   )r?   r   r'   )ru   raises
ValueErrorr   rF   r(   )r   paramr   r~   r'   r'   r+   test_wrong_params^  s   
"rv  x_squared_normszKThe length of x_squared_norms .* should be equal to the length of n_samplesc                 C   sF   t jt|d tttfi |  W d    d S 1 sw   Y  d S )Nr   )ru   rs  rt  r   r(   r>   )ru  r   r'   r'   r+   !test_kmeans_plusplus_wrong_params  s   "rx  c                 C   s   |  |}t|t|d\}}|jd tksJ |dk sJ ||jd k s*J |jd tks3J |jdd|jddk sCJ |jdd|jddk sSJ tt|  || d S )Nro   r   r   )	r!  r   r>   r   allr"  minr   r(   )r   r7   rw   r_   r$   r`   r'   r'   r+   test_kmeans_plusplus_output  s   

  r{  rS  c                 C   s$   t tt| d\}}tt| | d S )N)rw  )r   r(   r>   r   )rw  r$   r`   r'   r'   r+   test_kmeans_plusplus_norms  s   r}  c                 C   s<   t tt| d\}}tt}t |t| d\}}t|| d S )Nro   )r   r(   r>   rD   r   r   )rw   	centers_cry   r   r   r'   r'   r+   test_kmeans_plusplus_dataorder  s   

r  c                  C   sp   t jg dt jd} t| | dsJ t jg dt jd}t| |ds$J t jg dt jd}t| |dr6J d S )N)r9   r   r   r9   r.   r   r.   r9   r:   r;   )r   r.   r.   r   r9   r.   r9   r   )r9   r   r   r.   r.   r   r.   r9   )rD   rE   r^   r   )labels1labels2labels3r'   r'   r+   test_is_same_clustering  s   r  kwargs)r@   r?   c                 C   sH   t jddgddgddgddggt jd}tdddi| }|| dS )	zZCheck that init works with numpy scalar strings.

    Non-regression test for #21964.
    r   r8   r9   r:   r>   r.   Nr'   )rD   r*  r
  r   rF   )r  r(   
clusteringr'   r'   r+   -test_kmeans_with_array_like_or_np_scalar_init  s   (r  zKlass, methodrF   r   c                    sR   | j   |  }t||t |jjd }| }t fddt|D | dS )z=Check `feature_names_out` for `KMeans` and `MiniBatchKMeans`.r   c                    s   g | ]}  | qS r'   r'   )r)   r   
class_namer'   r+   r,     s    z*test_feature_names_out.<locals>.<listcomp>N)	rk  lowergetattrr(   rI   r   get_feature_names_outr   r   )rl  methodrP   r>   	names_outr'   r  r+   test_feature_names_out  s   
 r  csr_containerc                 C   sd   t ddddd\}}| dur| |}t }||}t|j|_t|j|_||}t|| dS )z_Check that predict does not change cluster centers.

    Non-regression test for gh-24253.
    r   rX   r   r   N)r   r   r   r   rI   rG   r   r   )r  r(   ry   rP   y_pred1y_pred2r'   r'   r+   ,test_predict_does_not_change_cluster_centers  s   

r  c           	   	   C   s   t j|}tddd|d\}}t|dd}t }|j||| |j|jd ddt j|d	}|j||| t 	|jd dt j|d	}t
t t|| W d
   d
S 1 sZw   Y  d
S )zCheck that sample weight is used during init.

    `_init_centroids` is shared across all classes inheriting from _BaseKMeans so
    it's enough to check for KMeans.
    r   rX   r   Tr|  r   rm   rp   r(   rw  r@   rC   n_centroidsr&   N)rD   rs   rt   r   r   r   _init_centroidsr   r   r\   ru   rs  rR   r   )	r@   rw   r   r(   ry   rw  rP   clusters_weightedclustersr'   r'   r+   test_sample_weight_init  s2   


"r  c           
   	   C   s   t j|}tddd|d\}}|j|jd d}d|ddd< t|dd	}t }|j||| |d
t j|d}t	|ddd |}	t 
t |	drMJ dS )zCheck that if sample weight is 0, this sample won't be chosen.

    `_init_centroids` is shared across all classes inheriting from _BaseKMeans so
    it's enough to check for KMeans.
    r!   rp   r   r   rm   Nr.   Tr|  rX   r  )rD   rs   rt   r   r   r   r   r   r  r   r   isclose)
r@   rw   r   r(   ry   rC   rw  rP   r  dr'   r'   r+   test_sample_weight_zero'  s$   


r  c                 C   s   t ddgddgddgddgddgg}td|| d}d}tjt|d ||| W d   n1 s6w   Y  |jdksBJ dS )zCheck that kmeans stops when there are more centers than non-duplicate samples

    Non-regression test for issue:
    https://github.com/scikit-learn/scikit-learn/issues/28055
    r   r9   rp   )r>   r@   rA   zENumber of distinct clusters \(4\) found smaller than n_clusters \(5\)r   N)rD   rE   r   ru   r   r   rF   rJ   )rA   r2   r(   r~   r,  r'   r'   r+   test_relocating_with_duplicatesE  s   (r  )__doc__r   r   ior   numpyrD   ru   scipyr   rX  sklearn.baser   sklearn.clusterr   r   r   r   sklearn.cluster._k_means_commonr	   r
   r   r   r   r   r   sklearn.cluster._kmeansr   r   sklearn.datasetsr   sklearn.exceptionsr   sklearn.metricsr   r   sklearn.metrics.clusterr   sklearn.metrics.pairwiser   sklearn.utils._testingr   r   r   sklearn.utils.extmathr   sklearn.utils.fixesr   sklearn.utils.parallelr   rE   r$   r#   r   r>   r   r(   r   X_as_any_csrdata_containersr?  data_containers_idsmarkparametrizer   r
  rQ   rS   rf   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r^   int64r  r  r  r  r  r#  r'  r(  r-  r0  r7  r:  r=  r>  r@  rH  rI  rR  rb  rj  ro  rp  rv  rx  r{  r}  r  r  str_r  r  r  r  r  r  r'   r'   r'   r+   <module>   s   $	


)

?	



!
.

#

$


	
*


	

	


$



!
