o
    \i+U                     @   s  d dl Z d dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZ d dlmZmZmZmZmZ g dg dg dg dgZejd	d
dg dg dg dg dgdfddg dg dg dg dgdfddg dg dg dg dgdfd
dg dg dg dg dgg dfd
dg dg dg dg dgg dfddg dg dg dg dgg dfddg dg dg dg dgg dfddg dg dg dg dgg dfddg dg dg dg dgg dfddg dg dg dg dgg dfg
dd Zdd Zdd Zejd	d
dg dg dg dg dgdfddg dg dg dg dgdfdd g dg dg dg dgdfddg dg dg dg dgdfddg dg dg dg dgg dfddg dg dg dg dgg d!fddg dg dg dg dgg d"fddg dg dg dg dgg dfgd#d$ Zejd%d&d' Zejd(ddgd)d* Zejd(g d+d,d- Zd.d/ Zejd0ed1d2d3d4 Zd5d6 Z ejd7d
dg d8g d9g d:fddg d8g d;g d<fddg d=g d;g d>fgd?d@ Z!ejdAd
g dBg dCg dDg dEgdfdg dFg dFg dGg dHgdfdg dIg dJg dKg dKgdfgejdLg dMdNdO Z"ejd(g d+dPdQ Z#dRdS Z$ejdTdg dUdfdg dUdfgdVdW Z%dXdY Z&ejdZej'ej(ej)gejd[dej(ej)gejdLg dMd\d] Z*ejd^ej'ej(ej)gejdLg dMd_d` Z+dadb Z,ejdcdddedf edgD fdhdidf edgD fdjdkdf edgD fgdldm Z-ejd(g d+dndo Z.dpdq Z/drds Z0dS )t    N)clone)KBinsDiscretizerOneHotEncoder)assert_allcloseassert_allclose_dense_sparseassert_array_almost_equalassert_array_equalignore_warnings      ?)r         @      )r   g      @r         ?)   g      @r      z2strategy, quantile_method, expected, sample_weightuniformwarn)r   r   r   r   )r   r   r   r   )r   r   r   r   )r   r   r   r   kmeans)r   r   r   r   quantileaveraged_inverted_cdf)r   r   r   r   )r   r   r   r   )r   r      r   c                 C   s\   t dd| |d}ttd |jt|d W d    n1 sw   Y  t|t| d S )Nr   ordinaln_binsencodestrategyquantile_method)categorysample_weight)r   r	   UserWarningfitXr   	transform)r   r    expectedr#   est r*   /var/www/www-root/data/www/176.119.141.140/sports-predictor/venv/lib/python3.10/site-packages/sklearn/preprocessing/tests/test_discretization.pytest_fit_transform   s   Br,   c                   C   sX   t dddt t tdgd ddt t dddtjjttks*J d S )Nr   r   r   r    r   )	r   fit_transformr&   nparrayr%   n_bins_dtypeintr*   r*   r*   r+   test_valid_n_bins_   s   r4   c                  C   sJ  t dd} t| dd}d}tjt|d |t W d    n1 s%w   Y  g d} t| dd}d}tjt|d |t W d    n1 sMw   Y  g d} t| dd}d	}tjt|d |t W d    n1 suw   Y  g d
} t| dd}d}tjt|d |t W d    d S 1 sw   Y  d S )N)r             @r   r-   z:n_bins must be a scalar or array of shape \(n_features,\).match)r   r   r   r   r   r   r   z{KBinsDiscretizer received an invalid number of bins at indices 0, 3. Number of bins must be at least 2, and must be an int.) @r   r:   r   z{KBinsDiscretizer received an invalid number of bins at indices 0, 2. Number of bins must be at least 2, and must be an int.)r/   fullr   pytestraises
ValueErrorr.   r&   )r   r)   err_msgr*   r*   r+   test_invalid_n_bins_arrayi   s4   "r@   )r   r   r   r   r9   )r   r   r   r   linear)r   r   r   r   )r   r   r   r   c                 C   s   t g dd| |djt|d}t|t| ttjd }|jj|fks(J t	|j|j
D ]\}}|j|d fks=J q/d S )Nr   r   r   r   r   r   r"   r   )r   r%   r&   r   r'   r/   r0   shape
bin_edges_zipr1   )r   r    r(   r#   r)   
n_features	bin_edgesr   r*   r*   r+   test_fit_transform_n_bins_array   s   6rH   z&ignore: Bins whose width are too smallc                  C   s   t dgdgdgdgdgdgg} tddd	d
d}|j| g dd t|jd g d t|| dgdgdgdgdgdgg dS )z;Check the impact of `sample_weight` one computed quantiles.r   r   r   r   i  i  
   r   r   r   r   )r   r   r   r   r   r   r"   r   )r   r   r   r   r           g      ?      @N)r/   r0   r   r%   r   rD   r'   r&   r)   r*   r*   r+   *test_kbinsdiscretizer_effect_sample_weight   s   ",rM   r   c                 C   sb   | dkrt dd| dd}nt dd| d}tjg dtjd}t|}|jt|d	 t|| d
S )z7Make sure that `sample_weight` is not changed in place.r   r   r   r   r   )r   r   r   )r   r   r   r   r2   r"   N)r   r/   r0   float64copyr%   r&   r   )r   r)   r#   sample_weight_copyr*   r*   r+   /test_kbinsdiscretizer_no_mutating_sample_weight   s   
rR   )r   r   r   c                 C   s   t d tddgddgddgddgg}| dkr#t| ddd	d
}nt| ddd}d}tjt|d || W d    n1 sCw   Y  |j	d dksQJ |
|}t|d d df t|jd  d S )Nalwaysr   r   r   r   r   r   r   r   )r   r   r   r    )r   r   r   z2Feature 0 is constant and will be replaced with 0.r7   )warningssimplefilterr/   r0   r   r<   warnsr$   r%   r1   r'   r   zerosrC   )r   r&   r)   warning_messageXtr*   r*   r+   test_same_min_max   s"   
"
&rZ   c                  C   s   t d} tddd}tt ||  W d    n1 s w   Y  tddd}|| dd tt ||  W d    d S 1 sJw   Y  d S )Nr5   r   r   r-   r   r   )	r/   aranger   r<   r=   r>   r%   reshaper'   rL   r*   r*   r+   test_transform_1d_behavior  s   
"r]   ir   	   c                 C   sZ   t g ddd}t g ddd}|d|   }tdddd	|}t|| d S )
N)r6         @g      @g       @g      $@r   r   )r   r   r   r   r   rI   r   r   r   r   r   r    )r/   r0   r\   r   r.   r   )r^   X_initXt_expectedr&   rY   r*   r*   r+   test_numeric_stability  s   rd   c                  C   s   t g ddddt} | t}t g ddddt} | t}t|r)J ttdd dD dd	|| t g dd
ddt} | t}t|sRJ ttdd dD dd	|	 |	  d S )NrB   r   r   ra   onehot-densec                 S      g | ]}t |qS r*   r/   r[   .0r^   r*   r*   r+   
<listcomp>5      z'test_encode_options.<locals>.<listcomp>F)
categoriessparse_outputonehotc                 S   rf   r*   rg   rh   r*   r*   r+   rj   @  rk   T)
r   r%   r&   r'   spissparser   r   r.   toarray)r)   Xt_1Xt_2Xt_3r*   r*   r+   test_encode_options'  sJ   




ru   zIstrategy, quantile_method, expected_2bins, expected_3bins, expected_5bins)r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r5   r5   )r   r   r   r   r   r   )r   r   r   r   r   r5   )r   r   r   r   r   r   )r   r   r   r   r5   r5   c                 C   s   t g ddd}td| |dd}||}t||  td| |dd}||}t||  td| |dd}||}t||  d S )	N)r   r   r   r   r_   rI   r   r   r   r   r   r   r    r   r      )r/   r0   r\   r   r.   r   ravel)r   r    expected_2binsexpected_3binsexpected_5binsr&   r)   rY   r*   r*   r+   test_nonuniform_strategiesH  s    


r|   z&strategy, expected_inv,quantile_method)      r6         r   )r   rK         r   )r   r`   r}   r   )r   r`   r}   r   )g      g      @g      g      )g      g      @g      g      ?)g      ?g      @g      g      ?)r}   r6   r~   g      )r   rK   r   rJ   )r   r`   r}   g      ?r   )r   rn   re   c                 C   s2   t d| ||d}|t}||}t|| d S )Nr   rv   )r   r.   r&   inverse_transformr   )r   r   expected_invr    kbdrY   Xinvr*   r*   r+   test_inverse_transformq  s   %

r   c                 C   s   t g dd d d f }| dkrtd| ddd}ntd| dd}|| t dd	gd d d f }||}t|jd
dd |j t|jd
dd
g d S )Nr   r   r   r   r   r5   r   r   )r   r   r   r    )r   r   r   r   rw   r   )axisr   )	r/   r0   r   r%   r'   r   maxr1   min)r   r&   r   X2X2tr*   r*   r+    test_transform_outside_fit_range  s   

r   c                  C   s   t g dd d d f } |  }tdddd}|| }t| | | }||}t|| t|t dgdgdgdgg d S )	Nr   r   r   r   )r   r    r   r   r   r   )r/   r0   rP   r   r.   r   r   )r&   X_beforer)   rY   	Xt_beforer   r*   r*   r+   test_overwrite  s   



$r   z-strategy, expected_bin_edges, quantile_method)r   r   r   c                 C   s|   dgdgdgdgdgdgg}t d| |d d}d}tjt|d || W d    n1 s/w   Y  t|jd | d S )Nr   r   )r   r   r    	subsample'Consider decreasing the number of bins.r7   )r   r<   rV   r$   r%   r   rD   )r   expected_bin_edgesr    r&   r   rX   r*   r*   r+   test_redundant_bins  s   r   c                  C   s   t g ddd} t g d}t g ddd}tdddd	d
}d}tjt|d ||  W d    n1 s>w   Y  t|j	d | t|
| | d S )N)皙?r   ffffff?r   r   )r   gq=
ףp?g=
ףp=?gzG?gp=
ף?r   )r   r   r5   rI   r   r   rA   r   r   r7   r   )r/   r0   r\   r   r<   rV   r$   r%   r   rD   r'   )r&   rG   rY   r   rX   r*   r*   r+   !test_percentile_numeric_stability  s   	r   in_dtype	out_dtypec                 C   st   t jt| d}td|d|d}|| |d ur|}n|d u r)|jt jkr)t j}n|j}||}|j|ks8J d S NrN   r   r   )r   r   r    r2   )	r/   r0   r&   r   r%   r2   float16rO   r'   )r   r   r   X_inputr   expected_dtyperY   r*   r*   r+   test_consistent_dtype  s   

r   input_dtypec                 C   sh   t jt| d}td|dt jd}|| ||}td|dt jd}|| ||}t|| d S r   )	r/   r0   r&   r   float32r%   r'   rO   r   )r   r   r   kbd_32Xt_32kbd_64Xt_64r*   r*   r+   test_32_equal_64  s$   



r   c                  C   s   t g ddd} tddddd}||  t|}|jd d	 ||  t|jd
 |jd
 D ]\}}t j	
|| q2|jj|jjksHJ d S )Nr
   r   r   rI   r   r   r   r   r   r   )r/   r0   r\   r   r%   r   
set_paramsrE   rD   testingr   rC   )r&   kbd_defaultkbd_without_subsamplingbin_kbd_defaultbin_kbd_with_subsamplingr*   r*   r+   'test_kbinsdiscretizer_subsample_default"  s    

r   zencode, expected_namesrn   c                 C   .   g | ]}t d D ]}d| dt| qqS r5   feat_rangefloatri   col_idbin_idr*   r*   r+   rj   =      rj   r   re   c                 C   r   r   r   r   r*   r*   r+   rj   E  r   r   c                 C      g | ]}d | qS r   r*   )ri   r   r*   r*   r+   rj   K  rk   c                 C   s|   g dg dg dg dg}t d| dd|}||}dd	 td
D }||}|jd |jd ks7J t|| dS )z[Check get_feature_names_out for different settings.
    Non-regression test for #22731
    r   r   r   r   r   r   r   r   r   r   r5   r   r5   r   ra   c                 S   r   r   r*   rh   r*   r*   r+   rj   Y  rk   z>test_kbinsdiscrtizer_get_feature_names_out.<locals>.<listcomp>r   r   r   N)r   r%   r'   r   get_feature_names_outrC   r   )r   expected_namesr&   r   rY   input_featuresoutput_namesr*   r*   r+   *test_kbinsdiscrtizer_get_feature_names_out8  s   

r   c                 C   s   t j|dd }| dkrt| d|dd}nt| d|d}|| t|}|jd d || t|j	d	 |j	d	 d
d d S )N)i r   r   r   iP  r   )r   r   random_stater    )r   r   r   r   r   g{Gz?)rtol)
r/   randomRandomStaterandom_sampler   r%   r   r   r   rD   )r   global_random_seedr&   kbd_subsamplingkbd_no_subsamplingr*   r*   r+   test_kbinsdiscretizer_subsample`  s$   


r   c                  C   s`   g dg dg dg dg} t jtdd tdd|  W d    d S 1 s)w   Y  d S )	Nr   r   r   r   a%  The current default behavior, quantile_method='linear', will be changed to quantile_method='averaged_inverted_cdf' in scikit-learn version 1.9 to naturally support sample weight equivalence properties by default. Pass quantile_method='averaged_inverted_cdf' explicitly to silence this warning.r7   r   )r   )r<   rV   FutureWarningr   r%   )r&   r*   r*   r+   $test_quantile_method_future_warnings}  s   	"r   c                  C   sn   g dg dg dg dg} d}t jt|d tddd	j| g d
d W d    d S 1 s0w   Y  d S )Nr   r   r   r   zWhen fitting with strategy='quantile' and sample weights, quantile_method should either be set to 'averaged_inverted_cdf' or 'inverted_cdf', got quantile_method='linear' instead.r7   r   rA   )r   r    )r   r   r   r   r"   )r<   r=   r>   r   r%   )r&   expected_msgr*   r*   r+   /test_invalid_quantile_method_with_sample_weight  s   "r   )1rT   numpyr/   r<   scipy.sparsesparsero   sklearnr   sklearn.preprocessingr   r   sklearn.utils._testingr   r   r   r   r	   r&   markparametrizer,   r4   r@   rH   filterwarningsrM   rR   rZ   r]   r   rd   ru   r|   r   r   r   r   r   r   r   rO   r   r   r   r   r   r   r   r*   r*   r*   r+   <module>   s   
A

&
5




!
#	

	

