o
    \ies                    @   s  d Z ddlZddlZddlZddlZddlZddlZddlm	Z	 ddl
mZ ddlmZ ddlmZmZ ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZmZ ddl m!Z!m"Z"m#Z# ddl$m%Z% ddl&m'Z'm(Z(m)Z)m*Z* ddl+m,Z,m-Z- G dd deeZ.G dd deZ/G dd deZ0G dd deZ1G dd deZ2dd Z3dd Z4ej56ddd gd!d" Z7ej5j6d#d$d%gd#d&gd'ej5j6d(g e8d%d%gd%d%ggg d)d'ej56d*d%d$gd+d, Z9d-d. Z:d/d0 Z;ej56d1e,d2d3 Z<d4d5 Z=ej56d1e,d6d7 Z>d8d9 Z?d:d; Z@d<d= ZAd>d? ZBd@dA ZCej56dBdCdDgdEdF ZDdGdH ZEdIdJ ZFdKdL ZGdMdN ZHdOdP ZIdQdR ZJdSdT ZKdUdV ZLdWdX ZMdYdZ ZNd[d\ ZOej56d]dgg d^d_gfdgd`gd_gfdadb dcdb d_gfddgdegdfgfg dgg d^g dhfgdidj ZPej56dkd$d%gdldm ZQej56dndgd`gfe8dgd`gfeRdd`d`gfe8d$d%gd%d$gfgdodp ZSej56dndgd`gfeRdd`d`gfe8d$d%gd%d$gfdqgdrgfdsdrgfe8dqgdrgfej8dqgeTdtdrgfeRddqdrgfeRdqdqdrgfg	dudv ZUej56dndgd`d_gfe8dgd`d_gfeRdd`d`d_gfe8g dgg dwfgdxdy ZVdzd{ ZWd|d} ZXej56d1e,d~d ZYej56d1e,dd ZZdd Z[dd Z\ej56ddgede. dgfde. d`gfge/ ddfede. dgfde. d`gfgdDddfede. dgfddCd`gfgdDddfede. dgfddDd`gfgdDddfede. dgfgdDddfede. dgfde. d`gfgdCddfede. dgfgdCddfgej56dddgdd Z]dd Z^dd Z_dd Z`dd Zaej56dejbge,dd Zcdd Zdej56dddgdejedfddgddeTdgfddgdefeggdfdgdeTdgdfdgdeggdfdgdegdfdgdejegdfdgdefgdfdgdejegdfddgdddfdgddefgfg degdfg ddejeeTdgdfgdd Zhdd Zidd Zjdd Zkej5j6dg ej8g efdtddb gg dd'dd Zlej56dd`gddb dgddb d%d$gddb gdd Zmej56dd`gddb d%d$gddb gddÄ Znej56dBdDe gddń ZoddǄ Zpej56dBdDe gddɄ Zqej56dBdDe gdd˄ Zrej56dg d͢ej56dBe. dDdCgddτ Zsddф Ztddӄ ZuG ddՄ de.Zvej56ddev ddgfddDdgfgdDg dۢfdev ddgfddDdgfgdCg dܢfdev dgfddCdgfgdDg dޢfdevddgg dfgdDg dfdevddgdgfddDdgfgdCg dfdevdd ewd_D dgfdevdd ewd_D dgfgdDg dfddCdgfgdCg fdev eRd`dfgdCddgfdev dgfddCeRddfgdDg dޢfdev ddgfddDeRddfgdDg dۢfdev eRddكfgdCddgfdev dgfddCeRdd؃fgdDddgfdev ddgfddDeRdd؃fgdDg dfgdd ZxdeydeyfddZzdeydeyfddZ{ej56ddev ddgfddDdgfgdDezg dfdev ddgfddDdgfgdCdg dfdev ddgfddDeRdd؃fgdDe{g dfgdd Z|ej56ddev ddgfddDdgfgdDg dfdevdgddgfddDdgfgdCddgfdev dgfddCdgfgdDg dfdevddgg dfgdDg d fdevddgdgfddDdgfgdCg dfdevdd ewd_D dgfdevdd ewd_D dgfgdDg dfddCdgfgdCg fdev eRd`d_fddCdgfgdDg dfdev dgfddCeRddfgdDg dfdev ddgfddDeRdd_fgdCg dfdev eRdd݃fddCdgfgdDg dfdev dgfddCeRdd؃fgdDddgfdev ddgfddDeRdd݃fgdCg dfdev ddgfddDeRdd݃fgdCg dfgdd Z}ej56d	dev dgfddDdgfgdCd
fdevddgdgfddDdgfgdCdfdevdgdgfddDdgfgdDdfdevdgdgfddCdgfgdDdfdevddgdgfddDddgfgdCdfdevdgdgfddDdgfdevdgdgfgdDdfdevddgdgfddDdgfdevdgdgfgdDdfdevdd ewdD dgfdevdd ewdD dgfgdDdfdevddgeRd`d_fddDdgfdevdgdgfgdDdfdevddgdgfddDeRdd`fdevdgdgfgdDdfdevddgeRddكfddDdgfdevdgdgfgdDdfdevddgdgfddDeRddfdevdgdgfgdDdfgdd Z~ej56dd$d%gej56dBdCdDgdd Zej56dBdCdDgej56dd$d%gdd Zej56dBdCdDgdd ZG dd deZej56ded d!d"d#gd$d%gfdCd"gd$gfdDd"d#gd$d%gfgd&d' Zej5j6d(g e8d%d%gd%d%ggg d)d'd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zej56d5d6d#gd7d8 Zej56d9d#d gd:d; Zej56d9d#d gd<d= Zej5je-eje-d>k d?d@dAdB ZdCdD Zej56dg dEdFdG Zej56dg dEed$dHdIdJ Zed$dHdKdL Zej56dg dEed$dHdMdN Zed$dHdOdP Zed$dHdQdR Zed$dHdSdT ZdS (U  z
Test the ColumnTransformer.
    N)assert_allclosesparse)config_context)BaseEstimatorTransformerMixin)ColumnTransformermake_column_selectormake_column_transformer)NotFittedError)DictVectorizer)VarianceThreshold)FunctionTransformer
NormalizerOneHotEncoderStandardScaler)ConsumingTransformer	_Registrycheck_recorded_metadata_safe_indexing)_convert_containerassert_allclose_dense_sparseassert_almost_equalassert_array_equal)CSR_CONTAINERSparse_versionc                   @       e Zd ZdddZdddZdS )TransNc                 C      | S N selfXyr!   r!   /var/www/www-root/data/www/176.119.141.140/sports-predictor/venv/lib/python3.10/site-packages/sklearn/compose/tests/test_column_transformer.pyfit/      z	Trans.fitc                 C   s2   t |dr	| S t|dddkrt|jS |S )Nto_framendim      )hasattrr)   getattrnp
atleast_2dTr"   r!   r!   r&   	transform2   s
   
zTrans.transformr    __name__
__module____qualname__r'   r2   r!   r!   r!   r&   r   .       
r   c                   @      e Zd ZdddZdd ZdS )DoubleTransNc                 C   r   r    r!   r"   r!   r!   r&   r'   =   r(   zDoubleTrans.fitc                 C   s   d| S )Nr+   r!   r#   r$   r!   r!   r&   r2   @      zDoubleTrans.transformr    r3   r!   r!   r!   r&   r9   <       
r9   c                   @   s(   e Zd Zdd ZdddZdddZdS )	SparseMatrixTransc                 C   
   || _ d S r    )csr_container)r#   r?   r!   r!   r&   __init__E      
zSparseMatrixTrans.__init__Nc                 C   r   r    r!   r"   r!   r!   r&   r'   H   r(   zSparseMatrixTrans.fitc                 C   s   t |}| t||S r    )lenr?   r   eye)r#   r$   r%   	n_samplesr!   r!   r&   r2   K   s   zSparseMatrixTrans.transformr    r4   r5   r6   r@   r'   r2   r!   r!   r!   r&   r=   D   s    
r=   c                   @   r   )	TransNo2DNc                 C   r   r    r!   r"   r!   r!   r&   r'   Q   r(   zTransNo2D.fitc                 C      |S r    r!   r"   r!   r!   r&   r2   T   r(   zTransNo2D.transformr    r3   r!   r!   r!   r&   rF   P   r7   rF   c                   @   r   )
TransRaiseNc                 C      t dNspecific message
ValueErrorr"   r!   r!   r&   r'   Y   r;   zTransRaise.fitc                 C   rI   rJ   rL   r"   r!   r!   r&   r2   \   r;   zTransRaise.transformr    r3   r!   r!   r!   r&   rH   X   r7   rH   c            
         s  t g dg dgj} t g d}t g d}|dd}| }d|fdg|fddg|ft ddg|ftdd|ftdd|ft ddg|fddg|ft ddg|fddg|fg
}|D ]D\ }td	t  fgd
d}t|| | t|	| 
| | td	t  fddfgd
d}t|| | t|	| 
| | q`tdt dgfdt dgfg}t|| | t|	| 
| | t|jdksJ ddd}tdt dgfdt dgfg|d}	t |d | |d | gj}t|	| | t|		| 
| | t|	jdksJ td	t ddgfgd	did}	t|	| d|  t|		| 
| d|  t|	jdksEJ d S )Nr   r,   r+   r+         r,   r   r+   TFtransdrop	remainderc                        S r    r!   x	selectionr!   r&   <lambda>       z)test_column_transformer.<locals>.<lambda>trans1trans2皙?
   r^   r_   transformer_weights)r/   arrayr1   reshapeslicer   r   r   fit_transformr'   r2   rB   transformers_vstack)
X_arrayX_res_first1DX_res_second1DX_res_first
X_res_bothcasesresctrd   bothr!   rZ   r&   test_column_transformer`   sd   


 


rt   c                  C   s   t g dg dgj} dt dgfdt dgfg}t|}tt|}t|| ||  t|| 	| || 	|  d S )NrN   rO   r^   r   r_   r,   )
r/   re   r1   r   r   tupler   rh   r'   r2   )rk   transformersct_with_listct_with_tupler!   r!   r&   4test_column_transformer_tuple_transformers_parameter   s   ry   constructor_name	dataframepolarsc                    s0  | dkr
t d nt |  tg dg dgj}t|| ddgd}tg ddd	}|}dg|fddg|ftdd|fd
g|fd
d	g|ftd
d	g|ftd
d	|ftd
d|ftddg|fddg|fg
}| dkr|d
|fd|f j	ddgddgd|fg |D ]D\}t
dt fgdd}t||| t|||| t
dt fddfgdd}t||| t|||| qt
dt dgfdt dgfg}t||| t|||| t|jdksJ |jd d
 dksJ t
dt d
gfdt d	gfg}t||| t|||| t|jdks3J |jd d
 dks?J ddd}t
dt dgfdt dgfg|d}	t|d |d  |d |d  gj}t|	|| t|	||| t|	jdksJ |	jd d
 dksJ t
dt ddgfgddid}	t|	|d|  t|	||d|  t|	jd	ksJ |	jd d
 dksJ t
dt d
d	gfgddid}	t|	|d|  t|	||d|  t|	jd	ksJ |	jd d
 dksJ G  fdddt}
t
d|
 jdddgfg}|| | dkrt
d|
 j	ddfgdd}|| | }d	d
g|_t
dt d
fgdd}t||| t|||| t|jdksqJ |jd d
 dks}J |jd d	 dksJ t|jd d d	g d S d S )Nr{   pandasrN   rO   firstsecond)columns_namerR   r,   r   r+   TFindexrS   rT   rU   c                    rW   r    r!   r$   rZ   r!   r&   r\      r]   z3test_column_transformer_dataframe.<locals>.<lambda>r^   r_   rV   r`   ra   rb   rc   c                       s,   e Zd Zdd ZdddZd fdd	ZdS )	z6test_column_transformer_dataframe.<locals>.TransAssertc                 S   r>   r    expected_type_transform)r#   r   r!   r!   r&   r@   "  rA   z?test_column_transformer_dataframe.<locals>.TransAssert.__init__Nc                 S   r   r    r!   r"   r!   r!   r&   r'   %  r(   z:test_column_transformer_dataframe.<locals>.TransAssert.fitc                    s(   t || jsJ t | jr| }|S r    )
isinstancer   Seriesr)   r"   dataframe_libr!   r&   r2   (  s   z@test_column_transformer_dataframe.<locals>.TransAssert.transformr    rE   r!   r   r!   r&   TransAssert!  s    
r   r   )pytestimportorskipr/   re   r1   r   rf   rg   extendr   r   r   r   rh   r'   r2   rB   ri   rj   r   	DataFramecopycolumns)rz   rk   X_dfrn   ro   rp   rq   rr   rd   rs   r   X_df2r!   )r   r[   r&   !test_column_transformer_dataframe   s   




 


	




r   r}   TFnumpy)idscolumn_selection)listboolbool_intcallable_columnc           
         s  t g dg dgj}|}| rtd}|j|ddgd}n|}|r* fdd}n }td	t d
dgfdt |fg}t	|
|| t	|||| t|jdksXJ t|jd d tsdJ td	t |fdt d
dgfg}t	|
|| t	|||| t|jdksJ t|jd
 d tsJ tdt |fgdd}t	|
|| t	|||| t|jdksJ t|jd
 d tsJ t g g g g}	tdt |fgdd}t	|
||	 t	||||	 t|jdksJ t|jd
 d tsJ d S )NrN   rO   r}   r~   r   r   c                    rW   r    r!   r   r   r!   r&   r\   k  r]   z7test_column_transformer_empty_columns.<locals>.<lambda>r^   r   r,   r_   r+   rS   passthroughrU   rT   )r/   re   r1   r   r   r   r   r   rH   r   rh   r'   r2   rB   ri   r   )
r}   r   r   rk   ro   pdr$   columnrr   fixturer!   r   r&   %test_column_transformer_empty_columnsW  sD   

r   c                  C   s  t ddd} tdt dgfdt dgfg}|| }|jtddtddtdddks2J t|d d dgf |d d |jd f  t|d d dgf |d d |jd f  td	t ddgfgd	d
id}|| }|jtddtdddksJ t|d d ddgf |d d |jd	 f  t|d d g f |d d |jd f  tdt ddgfdt	 g fg}|| }|jtddtddtdddksJ t|d d ddgf |d d |jd f  t|d d g f |d d |jd f  t|d d g f |d d |jd f  td	t	 g fgdd}|| }|jtddtdddks2J t|d d g f |d d |jd	 f  t|d d ddgf |d d |jd f  d S )NrQ      r+   r^   r   r_   r,   r^   r_   rV   rS   r`   rc   )rS   rV   rV   r   rU   )
r/   arangerf   r   r   rh   output_indices_rg   r   rH   )rk   rr   X_transr!   r!   r&   &test_column_transformer_output_indices  s@    
**
 ,( 
,((
"(0r   c                  C   s  t d} | jtdddddgd}tdt dgfd	t dgfg}||}|j	t
d
dt
ddt
d
d
dks>J t|d d d
gf |d d |j	d f  t|d d dgf |d d |j	d	 f  t|d d g f |d d |j	d f  tdt d
gfd	t dgfg}||}|j	t
d
dt
ddt
d
d
dksJ t|d d d
gf |d d |j	d f  t|d d dgf |d d |j	d	 f  t|d d g f |d d |j	d f  d S )Nr}   rQ   r   r+   r~   r   r   r^   r_   r   r,   r   rV   )r   r   r   r/   r   rf   r   r   rh   r   rg   r   )r   r   rr   r   r!   r!   r&   )test_column_transformer_output_indices_df  s0   
 
**( 
**,r   r?   c                 C   s  | t dd}|d d dgf }|}ddgtddfD ]8}d|fd|ffD ]-\}}tdt |fg|d	d
}t ||s@J t||| t||	|| q&qddgtddfD ]*}tdt |fgd	d}t ||suJ t||| t||	|| q^d S )Nr   r+   r   )r   r,   rT   r   rS   皙?rV   sparse_thresholdr   )
r   rC   rg   r   r   issparserh   r   r'   r2   )r?   X_sparsern   ro   colrV   rq   rr   r!   r!   r&   $test_column_transformer_sparse_array  s$   r   c                  C   s   dt ddgg dg} tdt dddgg dg}tdt ddgfdt d	gfg}t|| | t|| 	| | d S )
Nr,   nana)r   r   br   )rR   r   r   r,   	numericalcategoricalr+   )
floatr/   re   r   r   r   r   rh   r'   r2   )X_listexpected_resultrr   r!   r!   r&   test_column_transformer_list  s   r   c                 C   sd  t g dg dgj}tdt dgfdt| dfgdd}|| ||}t	|s/J |j
|j
d |j
d d fks@J t| d d dd f t |j
d  t|jd	ks^J |jd
 d dksiJ tdt dgfdt| dfgdd}|| ||}t	|rJ |j
|j
d |j
d d fksJ t|d d dd f t |j
d  d S )NrN   rO   r^   r   r_   r,   r   r   r+   rR   rV   r`   )r/   re   r1   r   r   r=   r'   r2   r   r   shaper   toarrayrC   rB   ri   )r?   rk   	col_transr   r!   r!   r&   'test_column_transformer_sparse_stacking  s*   

"*

"*r   c                  C   s   t jg dg dgdd} tt dgfdddgfd	d
}|| }|jdks(J t| t g dg dg tt dgfddgfd	d
}tj	t
dd ||  W d    d S 1 s^w   Y  d S )N)r   r,   T)r   r+   FOdtyper   r   r,   r+         ?r   csr)r,   r   r,   r,   )r   r,   r+   r   z'For a sparse output, all columns shouldmatch)r/   re   r
   r   rh   formatr   r   r   raisesrM   )dfrr   r   r!   r!   r&   )test_column_transformer_mixed_cols_sparse  s   
 "r   c                  C   sn  t jddgddggtdj} tdt dgfdt d	gfgd
d}|| }t|r,J |j	r1J dD ])}tdtdddgfdtddd	gfg|d}|| }t|sWJ |j	s\J q3dD ])}tdtdddgfdtddd	gfg|d}|| }t|rJ |j	rJ q_dD ])}tdtdddgfdtddd	gfg|d}|| }t|rJ |j	rJ qd S )Nr   r   ABr   r^   r   r_   r,   皙?r   )g㈵ ?r,   T)sparse_outputF)g      ?r   )gQ?r   r,   )
r/   re   objectr1   r   r   rh   r   r   sparse_output_)rk   r   rq   thresr!   r!   r&   (test_column_transformer_sparse_threshold3  sN   




r   c               	   C   s   t g dg dgj} tdt dfg}d}tjt|d ||  W d    n1 s.w   Y  tjt|d |	|  W d    n1 sJw   Y  tdt
 dfg}|j|j	fD ]}tjtdd ||  W d    n1 svw   Y  q^d S )N        r          @r         @g      @rS   r   z1D data passed to a transformerr   rK   )r/   re   r1   r   r   r   r   rM   r'   rh   rH   )rk   r   msgfuncr!   r!   r&   $test_column_transformer_error_msg_1Dg  s    
r   c                  C   s   t g dg dgj} tddt dfg}d}tjt|d ||  W d    n1 s/w   Y  tjt|d |	|  W d    d S 1 sLw   Y  d S )NrN   rO   )r^   rT   r   r_   r,   z%the 'trans2' transformer should be 2Dr   )
r/   re   r1   r   rF   r   r   rM   rh   r'   rk   rr   r   r!   r!   r&   test_2D_transformer_outputx  s   "r   c                  C   s   t d} tg dg dgj}| j|ddgd}tdt dfg}d}t jt	|d	 |
| W d    n1 s<w   Y  t jt	|d	 || W d    d S 1 sYw   Y  d S )
Nr}   rN   rO   col1col2r   r^   z%the 'trans1' transformer should be 2Dr   )r   r   r/   re   r1   r   r   rF   r   rM   rh   r'   )r   rk   r   rr   r   r!   r!   r&   !test_2D_transformer_output_pandas  s   
"r   rV   rT   r   c              	   C   s  t g dg dgj}dddgtddt dgfD ])}tdt |fg| d	}tjtd
d |	| W d    n1 s?w   Y  qdddgtddfD ])}tdt |fg| d	}tjtdd |	| W d    n1 ssw   Y  qOddg}tdt |fg| d	}|	| t g dg dg dgj}d}tjt|d |
| W d    n1 sw   Y  t g dgj}d}tjt|d |
| W d    d S 1 sw   Y  d S )NrN   rO         ?stringr,   sr   rS   rU   zNo valid specificationr   otherr   r   Specifying the columnsr   r   rQ   	   zIX has 3 features, but ColumnTransformer is expecting 2 features as input.zIX has 1 features, but ColumnTransformer is expecting 2 features as input.)r/   re   r1   rg   r   r   r   r   rM   r'   r2   )rV   rk   r   rr   X_array_morer   X_array_fewererr_msgr!   r!   r&   'test_column_transformer_invalid_columns  s@   "
"r   c                  C   s~   G dd dt } tg dg dgj}td|  dgfg}d}tjt|d || W d    d S 1 s8w   Y  d S )	Nc                   @   r8   )z<test_column_transformer_invalid_transformer.<locals>.NoTransNc                 S   r   r    r!   r"   r!   r!   r&   r'     r(   z@test_column_transformer_invalid_transformer.<locals>.NoTrans.fitc                 S   rG   r    r!   r:   r!   r!   r&   predict  r(   zDtest_column_transformer_invalid_transformer.<locals>.NoTrans.predictr    )r4   r5   r6   r'   r   r!   r!   r!   r&   NoTrans  r<   r   rN   rO   rS   r   z1All estimators should implement fit and transformr   )	r   r/   re   r1   r   r   r   	TypeErrorr'   )r   rk   rr   r   r!   r!   r&   +test_column_transformer_invalid_transformer  s   "r   c                  C   sb   t  } t }t| df|dgf}t|j \}}}|dksJ || |fks&J |ddgfks/J d S )Nr~   r   )standardscaler
normalizer)r   r   r
   ziprv   )scalernormrr   namesrv   r   r!   r!   r&   test_make_column_transformer  s   r   c                  C   sv   t d} tg dg dgj}| j|ddgd}t }tdt |jfg}t	||jf}t
|||| d S )Nr}   rN   rO   r~   r   r   r   )r   r   r/   re   r1   r   r   r   r   r
   r   rh   )r   rk   r   r   ct1ct2r!   r!   r&   #test_make_column_transformer_pandas  s   
r   c                  C   s   t  } t }t| df|dgfdddd}|jt| df|dgfjks$J |jdks+J |jdks2J |jdks9J td}t	j
t|d t| df|dgfd	d
dd W d    d S 1 s`w   Y  d S )Nr~   r   r   rT   g      ?)n_jobsrV   r   zRmake_column_transformer() got an unexpected keyword argument 'transformer_weights'r   ra   r,   )pcaTransfrc   )r   r   r
   rv   r   rV   r   reescaper   r   r   )r   r   rr   r   r!   r!   r&   #test_make_column_transformer_kwargs  s2   "r   c                  C   s<   t  } t }t  }t| df|dgf|d}|j|ksJ d S )Nr~   r   rU   )r   r   r
   rV   )r   r   rV   rr   r!   r!   r&   2test_make_column_transformer_remainder_transformer  s   r   c                  C   s  t dt dgfdt dgfg} i dd dddd	d| jd d d
dddddd| jd d ddddddd| jdd dddddd}|  |ksUJ | jdd |  d rcJ | jdd d dd	d| jd d ddd| jd dddd}|  |ksJ d S )Nr^   r   r_   r,   r   rV   rT   r   333333?trans1__copyTtrans1__with_meantrans1__with_stdtrans2__copytrans2__with_meantrans2__with_stdrv   rd   verbose_feature_names_outverboseFforce_int_remainder_cols
deprecated)r   r   r^   )r   rV   r   r^   r_   r  r  r  rv   rd   r  r  r  )r   r   rv   
get_params
set_paramsrr   expr!   r!   r&   &test_column_transformer_get_set_params  sn   	
r  c                  C   s   t g dg dgj} tdt dgfdtdddgfg}t|d	r%J ||  t|d	s1J t|jd ts;J t|jj	tsDJ t|jd tsNJ t|jj
tsWJ |jj
jr^J |jj	jd
ksgJ d S )Nr   r   r^   r   r_   F)with_stdr,   ri   r   )r/   re   r1   r   r   r-   r'   r   named_transformers_r^   r_   r  mean_rk   rr   r!   r!   r&   (test_column_transformer_named_estimators6  s   
r  c                  C   s   t g dg dgj} tdt dgfg}||  t|jd d dr'J t|jd d ds3J tdt dgfg}|	|  t|jd d drNJ t|jd d dsZJ d S )Nr   r   rS   r   r,   r  )
r/   re   r1   r   r   r'   r-   rv   ri   rh   r  r!   r!   r&   test_column_transformer_cloningJ  s   

r  c                  C   s   t g dg dgj} tdt ddgfg}tt |  W d    n1 s+w   Y  |	|  t
d}tjt|d |  W d    d S 1 sQw   Y  d S )Nr   r   rS   r   r,   zETransformer trans (type Trans) does not provide get_feature_names_outr   )r/   re   r1   r   r   r   r   r   get_feature_names_outr'   r   r   AttributeErrorr   r!   r!   r&   )test_column_transformer_get_feature_namesX  s   


"r  c                  C   s  t g dg dgj} tdt dgfdddgfg}t dgd	gd
gg}t|| | t|| | | t	|j
dksBJ |j
d d dksMJ tdddgfdddgfg}t|| | jd t|| jd t	|j
dksyJ |j
d d dksJ t g dg dgj} tdt dgfdddgfg}| }t|| | t|| | | t	|j
dksJ |j
d d dksJ d S )Nr   r   r^   r   r_   rT   r,   r   r   r   r+   rR   rV   )r   r   r   )r/   re   r1   r   r   r   rh   r'   r2   rB   ri   r   )rk   rr   r  r!   r!   r&   'test_column_transformer_special_stringsg  s&   r  c                  C   s  t g dg dgj} t g ddd}t g ddd}| }tdt dgfg}t|| | t|| 	| | t
|jdksJJ |jd d dksUJ |jd d d	ks`J t|jd d dg td
t dgfgdd}t|| | t|| 	| | t
|jdksJ |jd d dksJ t|jd d tsJ t|jd d dg tdt dgfgdd}t|| |d d d d df  t|| 	| |d d d d df  t
|jdksJ |jd d dksJ t|jd d tsJ t|jd d dg tdd	dgfgdd}t|| | t|| 	| | t
|jdks:J |jd d dksFJ t|jd d tsSJ t|jd d dg tt dgf}|jd	ksnJ d S )NrN   rO   rR   r,   r^   r   r+   rV   rT   rS   r   rU   )r/   re   r1   rf   r   r   r   rh   r'   r2   rB   ri   r   r   r
   rV   )rk   rn   X_res_secondro   rr   r!   r!   r&   !test_column_transformer_remainder  sD   "(r  z%cols1, cols2, expected_remainder_cols)FTFr+   r,   c                 C      dgS Nr   r!   rX   r!   r!   r&   r\         r\   c                 C   r  Nr,   r!   rX   r!   r!   r&   r\     r  r   r   C)TFF)FFTc                 C   s|   t d}t| trt| d trtd}|j|g dd}tt	 | ft	 |fdd}|
| |jd d |ks<J d	S )
z{Check that the remainder columns format matches the format of the other
    columns when they're all strings or masks.
    r,   r   r   r}   )r   r   r  r   r   rU   rR   N)r/   onesr   r   strr   r   r   r
   r   rh   ri   )cols1cols2expected_remainder_colsr$   r   rr   r!   r!   r&   (test_column_transformer_remainder_dtypes  s   


r&  r  c                 C   sn   t d}tdt dgfdt dgfgd| d}tjtdd	 || W d
   d
S 1 s0w   Y  d
S )zbCheck that ColumnTransformer raises a FutureWarning when
    force_int_remainder_cols is set.
    r   T1r   T2r,   r   )rV   r  z(`force_int_remainder_cols` is deprecatedr   N)r/   r!  r   r   r   warnsFutureWarningr'   )r  r$   rr   r!   r!   r&   )test_force_int_remainder_cols_deprecation  s   
"r+  zkey, expected_colsc                 C   s   t g dg dgj}|}tdt | fgdd}t||| t|||| t	|j
dks5J |j
d d d	ks@J t|j
d d
 tsLJ |j
d d |ksWJ d S )NrN   rO   r^   r   rU   r+   rR   r   rV   r,   )r/   re   r1   r   r   r   rh   r'   r2   rB   ri   r   r   keyexpected_colsrk   ro   rr   r!   r!   r&   'test_column_transformer_remainder_numpy  s   r/  r~   r   pd-indexr   c                 C   s   t d}t| tr| dkr|dg} tg dg dgj}|j|ddgd}|}t	dt
 | fgd	d
}t||| t|||| t|jdksRJ |jd d dks]J t|jd d tsiJ |jd d |kstJ d S )Nr}   r0  r~   rN   rO   r   r   r^   r   rU   r+   rR   r   rV   r,   )r   r   r   r"  Indexr/   re   r1   r   r   r   r   rh   r'   r2   rB   ri   r   )r-  r.  r   rk   r   ro   rr   r!   r!   r&   (test_column_transformer_remainder_pandas  s    
r2  )FTTc                 C   s   t g dg dg dgj}| }|d d ddf  d9  < tdt | fgt d}t||| t|	|
|| t|jdksIJ |jd	 d
 dksTJ t|jd	 d ts`J |jd	 d |kskJ d S )NrN   rO      rQ   rP   r,   r   r+   r^   rU   rR   r   rV   )r/   re   r1   r   r   r   r9   r   rh   r'   r2   rB   ri   r   r,  r!   r!   r&   -test_column_transformer_remainder_transformer  s   
r5  c                  C   s   t g dg dg dgj} tdt g dfgt d}t|| |  t|| 	| |  t
|jdks9J |jd d d	ksDJ d S )
NrN   rO   r3  r^   rU   r,   rR   r   rV   )r/   re   r1   r   r   r9   r   rh   r'   r2   rB   ri   r  r!   r!   r&   :test_column_transformer_no_remaining_remainder_transformer<  s   r6  c                  C   s   t g dg dg dgj} d|  d d ddf  }tddd	gfgt d
}t|| | t|| 	| | t
|jdksEJ |jd d	 dksPJ t|jd d ts\J t|jd d ddg d S )NrN   rO   r3  r+   r,   r   r^   rT   r   rU   rR   rV   )r/   re   r1   r   r   r9   r   rh   r'   r2   rB   ri   r   )rk   ro   rr   r!   r!   r&   7test_column_transformer_drops_all_remainder_transformerG  s   r7  c                 C   s   t g dg dg dgj}tdt dgfgt| dd}||}t|s*J |j	dks1J t 
|d d df d	d
t df}t| | t|jdksUJ |jd	 d dks`J t|jd	 d
 tslJ t|jd	 d d
dg d S )NrN   rO   r3  r^   r   r   r   r   rP   rR   r,   r   r+   rV   )r/   re   r1   r   r   r=   rh   r   r   r   hstackrf   rC   r   r   rB   ri   r   )r?   rk   rr   r   	exp_arrayr!   r!   r&   4test_column_transformer_sparse_remainder_transformerW  s   
(r;  c                 C   s   t g dg dg dgj}tdddgfgt| dd}||}t|s)J |jd	ks0J t	|
 t d
 t|jdksCJ |jd d dksNJ t|jd d tsZJ t	|jd d ddg d S )NrN   rO   r3  r^   rT   r   r   r   )r   r   r   r+   rR   rV   r,   )r/   re   r1   r   r=   rh   r   r   r   r   r   rC   rB   ri   r   )r?   rk   rr   r   r!   r!   r&   =test_column_transformer_drop_all_sparse_remainder_transformero  s   
r<  c                  C   s   t dt dgfgt d} d | jdddd| jd d ddd| jd dddd	}|  |ks.J | jdd
 |  d r<J | jdd d | jddddd| jd dddd}|  |ks[J d S )Nr^   r   rU   Tr   r,   Fr  )r   rV   remainder__copyremainder__with_meanremainder__with_stdr   r^   r   r   r  rv   rd   r  r  r  )r?  r?  r   r	  )r   rV   r=  r>  r?  r   r^   rv   rd   r  r  r  )r   r   rV   rv   r
  r  r  r!   r!   r&   5test_column_transformer_get_set_params_with_remainder  sJ   r@  c                  C   s   t g dg dg dgdj} tg t d}| }|d s#J || }|j| jks0J t	|j
dks9J |j
d d	 d
ksDJ |j
d d g dksQJ d S )NrN   rO   r3  r   rU   r>  r,   rR   r   rV   r+   )r/   re   astyper1   r   r   r
  rh   r   rB   ri   )rk   rr   paramsr   r!   r!   r&   %test_column_transformer_no_estimators  s   $
rC  estpatternr^   r_   rU   z\[ColumnTransformer\].*\(1 of 3\) Processing trans1.* total=.*\n\[ColumnTransformer\].*\(2 of 3\) Processing trans2.* total=.*\n\[ColumnTransformer\].*\(3 of 3\) Processing remainder.* total=.*\n$z\[ColumnTransformer\].*\(1 of 2\) Processing trans1.* total=.*\n\[ColumnTransformer\].*\(2 of 2\) Processing remainder.* total=.*\n$z\[ColumnTransformer\].*\(1 of 2\) Processing trans1.* total=.*\n\[ColumnTransformer\].*\(2 of 2\) Processing trans2.* total=.*\n$zA\[ColumnTransformer\].*\(1 of 1\) Processing trans1.* total=.*\n$methodr'   rh   c                 C   s~   t g dg dg dgj}t| |}| jdd || | jr'J d| jdd || t|| d s=J d S )	NrN   rO   r3  F)r  zGot output for verbose=FalseTr   )	r/   re   r1   r.   r  
readouterroutr   r   )rD  rE  rF  capsysrk   r   r!   r!   r&   test_column_transformer_verbose  s   F
rJ  c                  C   s"   t g jdd} | jdksJ d S )Nr+   )r   )r   r  r   rr   r!   r!   r&   0test_column_transformer_no_estimators_set_params  s   rL  c                     s   t g dg dgj t g dgj}  fdd}tdt |fgdd}t| |  t|  |  t	|j
d d	 sDJ |jd d	 dgksPJ d S )
NrN   rO   c                    s   t |   dgS r  )r   r   rk   r!   r&   r     s   
z8test_column_transformer_callable_specifier.<locals>.funcrS   rT   rU   r   r+   )r/   re   r1   r   r   r   rh   r'   r2   callablerv   ri   )rn   r   rr   r!   rM  r&   *test_column_transformer_callable_specifier  s   rO  c                     s   t d} tg dg dgj}tg dgj}| j|ddgd  fdd}td	t |fgd
d}t|	 | t|
  | t|jd d sRJ |jd d dgks^J d S )Nr}   rN   rO   r~   r   r   c                    s"   t | j j t | j j dgS )Nr~   )r   r   valuesr   r   r!   r&   r   /  s   zBtest_column_transformer_callable_specifier_dataframe.<locals>.funcrS   rT   rU   r   r+   )r   r   r/   re   r1   r   r   r   r   rh   r'   r2   rN  rv   ri   )r   rk   rn   r   rr   r!   rQ  r&   4test_column_transformer_callable_specifier_dataframe'  s   
rR  c                  C   s~   t jdd} t dgdgg}t j| |gdd} t }td|dgfgdd}td|dgfgdd}t|| ||  d S )Nr+   r,   axisoherR   r   rU   )	r/   randomrandnre   concatenater   r   r   rh   )r$   X_categoriesrU  tf_1tf_2r!   r!   r&   /test_column_transformer_negative_column_indexes;  s   r\  
array_typec                 C   s\   t g dg dg dg dg}| |}tdt g dfg}||}|jdks,J d S )Nr,   r+   r   rP      rQ   )r`  rQ      )r4  r   ra   identity)FTFT)r   r+   )r/   	transposer   r   rh   r   )r]  r$   column_transformerr   r!   r!   r&   %test_column_transformer_mask_indexingG  s   "
re  c                  C   s`   ddgddgddgg} t dt dgfd	t dgfg}t|d
r"J ||  |jdks.J d S )Nr,   r+   r   rP   r`  rQ   r   r   r   n_features_in_)r   r9   r-   r'   rf  )r$   rr   r!   r!   r&   test_n_features_inT  s
    
rg  zcols, pattern, include, excludecol_int	col_floatr   col_strzat$z^col_intz	float|strz^col_szstr$rh  ri  rj  c                 C   sd   t d}|jtjg dtdtjg dtdg ddg dd}t|||d}t|||  d S )	Nr}   rN   r   r   onetwothreerk  r   )dtype_includedtype_excluderE  )	r   r   r   r/   re   intr   r	   r   )colsrE  includeexcluder   r   selectorr!   r!   r&   ,test_make_column_selector_with_select_dtypes_  s   
	rw  c            
      C   s   t d} | jtjg dtdtjg dtdg dg ddg dd}|d	 d
|d	< td
t	dgd}ttj
d}t }t }t||f||f}t|dd	gf|ddgf}||}||}	t||	 d S )Nr}   rN   r   r   )rm  rn  rm  )lowmiddlehigh)rh  ri  col_catrj  r   rj  categoryr   rp  r{  ri  rh  )r   r   r   r/   re   rr  r   rA  r	   r   numberr   r   r
   rh   r   )
r   r   cat_selectornum_selectorrU  r   ct_selector	ct_direct
X_selectorX_directr!   r!   r&   1test_column_transformer_with_make_column_selector  s*   
	

r  c                  C   s\   t tjd} tddgg}d}tjt|d | | W d    d S 1 s'w   Y  d S )Nr}  r`   r   z=make_column_selector can only be applied to pandas dataframesr   )r	   r/   r~  re   r   r   rM   )rv  r$   r   r!   r!   r&   test_make_column_selector_error  s   
"r  c                  C   sv   t d} | jtjg dtdtjg dtdg ddg dd}ttgd}t	
t	|}t|||| d S )	Nr}   rN   r   r   rl  rk  r   r}  )r   r   r   r/   re   rr  r   r	   r   pickleloadsdumpsr   )r   r   rv  selector_pickedr!   r!   r&    test_make_column_selector_pickle  s   
	r  	empty_colc                 C      g S r    r!   rX   r!   r!   r&   r\     r]   )r   re   rN  c                 C   sd   t d}|g dg dd}tdt ddgfdt | fgd	}|| t| g d
 d S )Nr}   r   r   r   zr  r  r   r   rU  r   r   empty_featuresrv   )ohe__col1_aohe__col1_bohe__col2_zr   r   r   r   r   r'   r   r  )r  r   r   rr   r!   r!   r&    test_feature_names_empty_columns  s   


r  rv  c                 C   r  r  r!   rX   r!   r!   r&   r\     r  r   c                 C   r  )Nr   r!   rX   r!   r!   r&   r\     r  c                 C      ddgS NFTr!   rX   r!   r!   r&   r\         c                 C   sR   t d}|g dg dd}tdt | fg}|| t| dg dS )z1Checks name when selecting only the second columnr}   r  r  r  rU  r  Nr  )rv  r   r   rr   r!   r!   r&   test_feature_names_out_pandas  s
   

r  c                 C   r  r  r!   rX   r!   r!   r&   r\     r  c                 C   r  r  r!   rX   r!   r!   r&   r\     r  c                 C   sF   ddgddgddgg}t dt | fg}|| t| dg dS )z=Checks name when selecting the second column with numpy arrayr   r  r   rU  	ohe__x1_zN)r   r   r'   r   r  )rv  r$   rr   r!   r!   r&   !test_feature_names_out_non_pandas  s   
r  c                 C   s`   t  }td|ddgfg| d}| }|jdksJ |jddgdfks%J |j|| fks.J d S )NrU  r   r   rv   rV   rU  rV    r   r   _sk_visual_block_r   name_details
estimators)rV   rU  rr   visual_blockr!   r!   r&   test_sk_visual_block_remainder  s   r  c                  C   sZ   t  } td| ddgfgd}| }|jdksJ |jddgfks#J |j| fks+J d S )NrU  r   r   r  )rU  r  )rU  rr   r  r!   r!   r&   #test_sk_visual_block_remainder_drop  s   r  c                 C   s   t d}t }td|ddgfg| d}|g dg dg dg d	d
}|| | }|jdks6J |jddgddgfksCJ |j	|| fksLJ d S )Nr}   rU  r   r   r  r   r   cr  r^  r   rP   r`  )r   r   col3col4r  r  r  )
r   r   r   r   r   r'   r  r   r  r  )rV   r   rU  rr   r   r  r!   r!   r&   ,test_sk_visual_block_remainder_fitted_pandas  s$   

r  c                 C   s   t jg dg dgtd}t }td|ddgfg| d}|| | }|jdks,J |jddgd	gfks8J |j	|| fksAJ d S )
Nr^  r_  r   scaler   r+   r  )r  rV   r,   )
r/   re   r   r   r   r'   r  r   r  r  )rV   r$   r   rr   r  r!   r!   r&   +test_sk_visual_block_remainder_fitted_numpy)  s   
r  explicit_colname)r~   r   r   r,   c                 C   s  t d}tg dg dgj}|j|ddgd}tg dg dgj}|j|ddgd}tdt | fg|d}|| |	|}|	|}	t
|	| | }
g d	|
d
< |	|
}	t
|	| t| tr| }d}t jt|d |	| W d   dS 1 sw   Y  dS dS )z=Test the interaction between remainder and column transformerr}   rN   rO   r~   r   r   bycolrU   r   thirdr   r   N)r   r   r/   re   r1   r   r   r   r'   r2   r   r   r   r"  r   rM   )r  rV   r   X_fit_arrayX_fit_dfX_trans_array
X_trans_dftfX_fit_transr   X_extended_dfrk   r   r!   r!   r&   8test_column_transformer_reordered_column_names_remainder8  s*   







"r  c                  C   s  t d} tjdd}| j|g dd}|jddd}td	t dgfgd
d}|| d}t j	t
|d || W d   n1 sEw   Y  td	t dgfgdd}|| ||}||}t|| td	ddgfgd
d}|| ||}||}t|| dS )zRTest the interaction between {'drop', 'passthrough'} and
    missing column names.r}   r8  )r   r   r   r  dr   r  r,   rS  r  r   rU   zcolumns are missing: {'c'}r   NrT   )r   r   r/   r!  r   rT   r   r   r'   r   rM   r2   r   )r   r$   r   
df_droppedr  r   df_dropped_transdf_fit_transr!   r!   r&   <test_feature_name_validation_missing_columns_drop_passthough^  s(   








r  c                  C   s|   t d} g d}| jg dg|d}tdt ddgfgdd	}|| t|j| t|jt	j
s4J |jjtks<J d
S )a?  Feature names are stored in column transformer.

    Column transformer deliberately does not check for column name consistency.
    It only checks that the non-dropped names seen in `fit` are seen
    in `transform`. This behavior is already tested in
    `test_feature_name_validation_missing_columns_drop_passthough`r}   r   r  r  r^  r   r  r   r  r   rU   N)r   r   r   r   r   r'   r   feature_names_in_r   r/   ndarrayr   r   )r   feature_namesr   rr   r!   r!   r&   test_feature_names_in_  s   

r  c                   @   r   )TransWithNamesNc                 C   r>   r    feature_names_out)r#   r  r!   r!   r&   r@     rA   zTransWithNames.__init__c                 C   s   | j d urtj| j tdS |S )Nr   )r  r/   asarrayr   )r#   input_featuresr!   r!   r&   r       
z$TransWithNames.get_feature_names_outr    )r4   r5   r6   r@   r  r!   r!   r!   r&   r    r7   r  z'transformers, remainder, expected_namesbycol1r  r  bycol2)	bycol1__d	bycol1__c	bycol2__dremainder__aremainder__b)r  r  r  r   )	bycol1__br  remainder__cpca1pca2)r   r   r  )bycol1__pca1bycol1__pca2r  r   )	bycol1__ar  	bycol2__bc                 C      g | ]}d | qS r   r!   .0ir!   r!   r&   
<listcomp>      r  c                 C   r  r  r!   r  r!   r!   r&   r    r  )bycol1__pca0r  bycol2__pca0bycol2__pca1r  r  remainder__dr   r  r  rP   r  )r  r  	bycol2__cr  r  r  c                 C   sl   t d}|jg dgg dd}t| |d}|| | }t|tjs(J |j	t
ks/J t|| dS )DCheck feature_names_out for verbose_feature_names_out=True (default)r}   r,   r+   r   rP   r  r   rU   Nr   r   r   r   r'   r  r   r/   r  r   r   r   rv   rV   expected_namesr   r   rr   r   r!   r!   r&   #test_verbose_feature_names_out_true  s   
z
r  
trans_name	feat_namec                 C   s   | d d  d| S )Nr+   z++r!   r  r  r!   r!   r&   &_feature_names_out_callable_name_clash&     r  c                 C   s   |    d|   S )N=)upperr  r!   r!   r&   !_feature_names_out_callable_upper*  r  r  zBtransformers, remainder, verbose_feature_names_out, expected_names)by++dzby++cr  zre++azre++bz!{feature_name}-{transformer_name})zd-bycol1zc-bycol1zd-bycol2)zBYCOL1=DzBYCOL1=CzBYCOL2=CzBYCOL2=DzREMAINDER=AzREMAINDER=Bc                 C   sn   t d}|jg dgg dd}t| ||d}|| | }t|tjs)J |j	t
ks0J t|| dS )r  r}   r  r  r   rV   r  Nr  )rv   rV   r  r  r   r   rr   r   r!   r!   r&   .test_verbose_feature_names_out_callable_or_str.  s   
+
r  )r  r  r   r   )r   r   r  )r  r  r  r  c                 C   r  r  r!   r  r!   r!   r&   r    r  c                 C   r  )kpcar!   r  r!   r!   r&   r    r  )pca0r  kpca0kpca1r   r  r  r  )r  r  r   c                 C   sn   t d}|jg dgg dd}t| |dd}|| | }t|tjs)J |j	t
ks0J t|| dS );Check feature_names_out for verbose_feature_names_out=Falser}   r  r  r   Fr  Nr  r  r!   r!   r&   $test_verbose_feature_names_out_falseh  s   
u
r  z*transformers, remainder, colliding_columnsz['b']z['c']z['a']z
['b', 'c']bycol3z
['a', 'b']c                 C   r  r  r!   r  r!   r!   r&   r  +  r  rQ   c                 C   r  r  r!   r  r!   r!   r&   r  ,  r  z-['pca0', 'pca1', 'pca2', 'pca3', 'pca4', ...]c                 C   s   t d}|jg dgg dd}t| |dd}|| td| d}t jt|d	 |	  W d
   d
S 1 s=w   Y  d
S )r  r}   r  r  r   Fr  zOutput feature names: z[ are not unique. Please set verbose_feature_names_out=True to add prefixes to feature namesr   N)
r   r   r   r   r'   r   r   r   rM   r  )rv   rV   colliding_columnsr   r   rr   r   r!   r!   r&   +test_verbose_feature_names_out_false_errors  s   
p


"r  r  c                 C   s   t d}|jg dgg ddgd}tdt ddgfd	t d
gfg|| d}||}t|tjs4J |j	dd |jg dg|j
dgd}||}t||jsTJ | }t|j
| t|j|j dS )z2Check column transformer behavior with set_output.r}   r  r  ra   r   r   r~   r   r  r   r  r  r2      N)r   r   r   r   r  rh   r   r/   r  
set_outputr   r2   r  r   r   )r  rV   r   r   rr   r   df_testr  r!   r!   r&   "test_column_transformer_set_outputm  s    


r  c           	   
   C   s  t d}||jg ddd|jg dddg dg d|jd	|jd
gddd}tdtddddgfdt dgfg| ddjdd}|rM|	|}n|
||}t||js]J t|j|  dddddddd}|j D ]\}}||| ksJ qtdS )z6Check ColumnTransformer outputs mixed types correctly.r}   dogcatsnaker|  r   )greenblueredr   gffffff?g @g@r  (   ra   r  d   Int32)petcolorageheightdistancecolor_encodeFint8r   r   r  r  r  r  float64int64)
color_bluecolor_green	color_redr  r  r  r  N)r   r   r   r   NAr   r   r   r  rh   r'   r2   r   r   r   r  dtypesitems)	rV   rh   r   r   rr   r   expected_dtypesr   r   r!   r!   r&   &test_column_transform_set_output_mixed  sJ   
	
	r%  c           	      C   s   t d}||jg dddg dg dd}tdtd	d
ddgfdt dgfg| d	d}||}t|t	j
s<J |jdksCJ |jdd ||}d
d
d
ddd}|j D ]\}}||| ksgJ q[d S )Nr}   r  r|  r   r  r  )r  r  r  r  Fint16r  r  r  r  r  r  r  )pet_catpet_dog	pet_snaker  r  )r   r   r   r   r   r   r   rh   r   r/   r  r   r  r2   r"  r#  )	rV   r   r   rr   r   r  r$  r   r   r!   r!   r&   .test_column_transform_set_output_after_fitting  s>   



r*  c                   @   s4   e Zd ZdddZdddZdddZdd	d
ZdS )PandasOutTransformerr   c                 C   r>   r    offset)r#   r-  r!   r!   r&   r@     rA   zPandasOutTransformer.__init__Nc                 C   s   t d}t||jsJ | S Nr}   )r   r   r   r   r#   r$   r%   r   r!   r!   r&   r'     r  zPandasOutTransformer.fitc                 C   s$   t d}t||jsJ || j S r.  )r   r   r   r   r-  r/  r!   r!   r&   r2     s   

zPandasOutTransformer.transformc                 C   r   r    r!   )r#   r2   r!   r!   r&   r    s   zPandasOutTransformer.set_output)r   r    )r4   r5   r6   r@   r'   r2   r  r!   r!   r!   r&   r+    s
    


r+  z;trans_1, expected_verbose_names, expected_non_verbose_namesr   r,  trans_0__feat1trans_1__feat0feat1feat0c           	      C   s   t d}|g dg dd}tdtdddgfd	| d
gfg}||}t|tjs/J t j	t
dd |  W d   n1 sEw   Y  |jdd ||}t|j| |jdd ||}t|j| dS )zCheck that set_config(transform="pandas") is compatible with more transformers.

    Specifically, if transformers returns a DataFrame, but does not define
    `get_feature_names_out`.
    r}   )r   r         @)r   r4  r   )r3  r2  trans_0r4  r,  r2  trans_1r3  z!not provide get_feature_names_outr   Nr  Fr  )r   r   r   r   r+  rh   r   r/   r  r   r  r  r  r   r   r  )	r6  expected_verbose_namesexpected_non_verbose_namesr   r   rr   
X_trans_npX_trans_df0X_trans_df1r!   r!   r&   ;test_transformers_with_pandas_out_but_not_feature_names_out  s$   





r=  empty_selectionc                 C   s   t d}|jddgddggddgd}tdd	| fd
t ddgfgdd}|jdd ||}t|jddg |j	dd ||}t|jddg dS )zkCheck that pandas output works when there is an empty selection.

    Non-regression test for gh-25487
    r}   r   皙@r4  r   r   r   r   r   r   Tr7  r  numerical__anumerical__bFN)
r   r   r   r   r   r  rh   r   r   r  )r>  r   r$   rr   X_outr!   r!   r&   "test_empty_selection_pandas_output+	  s   



rC  c                  C   s   t d} | jddgddggddgddgd	}td
d dd}tdddgfd|dgfg}|jdd d}t jt|d || W d   dS 1 sMw   Y  dS )zmCheck column transformer raises error if indices are not aligned.

    Non-regression test for gh-26210.
    r}   r   r?  r4  r   r   r4  r   r  c                 S   s   | j ddS )NT)rT   )reset_indexrX   r!   r!   r&   r\   Q	  s    z7test_raise_error_if_index_not_aligned.<locals>.<lambda>z
one-to-oner  num1r   num2r  zConcatenating DataFrames from the transformer's output lead to an inconsistent number of samples. The output may have Pandas Indexes that do not match.r   N)	r   r   r   r   r   r  r   rM   rh   )r   r$   reset_index_transformerrr   r   r!   r!   r&   %test_raise_error_if_index_not_alignedH	  s    
$

"rH  c                  C   s   t d} | g dg dd}tt ttdft dd}|jdd ||}| j	
|| |jd	d ||}t|tjsDJ d
S )zYCheck that the output is set for the remainder.

    Non-regression test for #26306.
    r}   )TFTr^  )r   r   r}  Fr  r  defaultN)r   r   r   r
   r   r	   r   r  rh   testingassert_frame_equalr   r/   r  )r   r   rr   rH  r!   r!   r&   test_remainder_set_outputd	  s   


rL  c                  C   s   t d} t| dst d | dddgi}tddgf}t  td |	| W d   n1 s8w   Y  |
 }t jtd	d
 |	| W d   n1 sXw   Y  |jdd |	| |jdd |	|d dS )zCheck behavior when a tranformer's output contains pandas.NA

    It should raise an error unless the output config is set to 'pandas'.
    r}   Float64DtypezfThe issue with pd.NA tested here does not happen in old versions that do not have the extension dtypesr   r   Nr   errorz set_output\(transform='pandas'\)r   r  rI  g      )r   r   r-   skipr   r
   warningscatch_warningssimplefilterrh   convert_dtypesr   rM   r  fillna)r   r   rr   r!   r!   r&   test_transform_pd_na|	  s&   




rU  c                  C   s   t d} t d}tddgddgddgg}tddgddgddgg}| j|d	d
gd}|j|d	d
gd}tt ddgf}|| ||}t	|| |j|d	d
gd}| j|d	d
gd}	|| ||	}
t	|
| dS )z?Check fitting and transforming on pandas and polars dataframes.r}   r|   r   r,   r+   rP   r`  r   r   r   r   )schemaN)
r   r   r/   re   r   r
   r   r'   r2   r   )r   pl
X_train_np	X_test_np
X_train_pd	X_test_plrr   	out_pl_in
X_train_pl	X_test_pd	out_pd_inr!   r!   r&   ,test_dataframe_different_dataframe_libraries	  s   






r`  c                  C   s   t g dg dg} tdt ddgfdt ddgfg}d}tjt|d	 |d  W d
   n1 s5w   Y  ||  |d |jd u sJJ |d |jd u sUJ d}tjt	|d	 |d  W d
   d
S 1 snw   Y  d
S )z(Check __getitem__ for ColumnTransformer.rN   r  t1r   r,   t2r+   z5ColumnTransformer is subscriptable after it is fittedr   Nz0'does_not_exist' is not a valid transformer namedoes_not_exist)
r/   re   r   r   r   r   r   r'   r  KeyError)r$   rr   r   r!   r!   r&   "test_column_transformer__getitem__	  s   $


"re  transform_outputrI  c                 C   s   t d}|tjdd}tdt ddgfgddj| d	}|	|}|j
|j
ks-J g d
}t|dr?|j |ks?J |  |ksIJ dS )zCheck that when `remainder="passthrough"`, inconsistent naming is handled
    correctly by the underlying `FunctionTransformer`.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28232
    r}   ra   rP   r   r   r,   r   r  r  )
scaler__x0
scaler__x1remainder__x2remainder__x3r   N)r   r   r   r/   rV  rW  r   r   r  rh   r   r-   r   tolistr  )rf  r   r$   preprocessorr   expected_column_namesr!   r!   r&   @test_column_transformer_remainder_passthrough_naming_consistency	  s   


rn  r   c                 C   s   t | }|g dg dg dd}tddg dfdt dd	gfd
t ddgfdtdd g dfgddj| d}||}t|j	g dksLJ dS )zCheck that we properly rename columns when using `ColumnTransformer` and
    selected columns are redundant between transformers.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28260
    r^  ra   r     r     i,  x1x2x3r   r   r   rt  ru  r  rv  Dc                 S      t | g ddS Nr,   rS  r   rX   r!   r!   r&   r\   	      z9test_column_transformer_column_renaming.<locals>.<lambda>Trv   r  r  )A__x1A__x2A__x3B__x1B__x2C__x1C__x3N)
r   r   r   r   r   r   r  rh   r   r   )r   libr   transformerdf_transr!   r!   r&   'test_column_transformer_column_renaming	  s"   


r  c                 C   s   t | }|g dg dg dd}tddg dfdt dd	gfd
t ddgfdtdd g dfgddj| d}td}t j	t
|d || W d   dS 1 sYw   Y  dS )zzCheck that we raise an error when using `ColumnTransformer` and
    the columns names are duplicated between transformers.r^  ro  rq  rs  r   r   r   rt  ru  r  rv  rw  c                 S   rx  ry  r   rX   r!   r!   r&   r\   
  rz  zGtest_column_transformer_error_with_duplicated_columns.<locals>.<lambda>Fr{  r  a   Duplicated feature names found before concatenating the outputs of the transformers: ['x1', 'x2', 'x3'].
Transformer A has conflicting columns names: ['x1', 'x2', 'x3'].
Transformer B has conflicting columns names: ['x1', 'x2'].
Transformer C has conflicting columns names: ['x1', 'x3'].
r   N)r   r   r   r   r   r   r  r   r   r   rM   rh   )r   r  r   r  r   r!   r!   r&   5test_column_transformer_error_with_duplicated_columns
  s*   

"r  z1.3zrequires joblib >= 1.3)reasonc                  C   s   t jdjdd} tdd}td|dgfgdd}tjd	d
d || }W d   n1 s1w   Y  t	|t | dddgf  dS )z|Check that ColumnTransformer works in parallel with joblib's auto-memmapping.

    non-regression test for issue #28781
    r   r8  )sizeF)r   r   r+   )rv   r   lokyr,   )
max_nbytesN)
r/   rV  RandomStateuniformr   r   joblibparallel_backendrh   r   )r$   r   r  Xtr!   r!   r&   #test_column_transformer_auto_memmap,
  s   	
$r  c                  C   s   t d} | jddddddgddgdddgd}ttd	d
dft dgf}|jdd ||}t|j	ddgks?J dS )zCheck index handling when both pd.Series and pd.DataFrame slices are used in
    ColumnTransformer.

    Non-regression test for issue #31546.
    r}   r,   r+   )foobarr   )r  baz)dict_col	dummy_colr   Fr   r  r  r  N)
r   r   r   r
   r   r   r  rh   r   r   )r   r   tr$   r!   r!   r&   )test_column_transformer_non_default_indexD
  s   


r  )r2   rh   r'   c                 C   s   t g dg dgj}g d}tdt dgfg||}tjtdd t	|| dggdgd	d
 W d   dS 1 s>w   Y  dS )zTest that the right error message is raised when metadata is passed while
    not supported when `enable_metadata_routing=False`.rN   rO   r^  rS   r   z1is only supported if enable_metadata_routing=Truer   r,   r   )sample_weightpropN)
r/   re   r1   r   r   r'   r   r   rM   r.   )rF  r$   r%   trsr!   r!   r&   *test_routing_passed_metadata_not_supported_
  s   "r  )enable_metadata_routingc                 C   s   t g dg dgj}g d}t }dgd}}tdt|djddd	jddd	d
gfg}| dkrE|j||||d	 |j	|||d	 nt
|| ||||d	 t|sVJ |D ]}t|| | ||d qXdS )z>Test that metadata is routed correctly for column transformer.rN   rO   r^  r,   r   rS   )registryTr  metadatar   r2   )objrF  parentr  r  N)r/   re   r1   r   r   r   set_fit_requestset_transform_requestr'   r2   r.   rB   r   )rF  r$   r%   r  r  r  r  _trsr!   r!   r&   ,test_metadata_routing_for_column_transformern
  s:   r  c                  C   s   G dd dt } tg dg dgj}g d}dgd}}td|  jd	d	d
jd	d	d
dgfg}|j||||d
 |j||||d
 dS )zUTest metadata routing when the sub-estimator doesn't implement
    ``fit_transform``.c                   @   s    e Zd ZdddZdddZdS )z>test_metadata_routing_no_fit_transform.<locals>.NoFitTransformNc                 S   s   |sJ |sJ | S r    r!   )r#   r$   r%   r  r  r!   r!   r&   r'   
     zBtest_metadata_routing_no_fit_transform.<locals>.NoFitTransform.fitc                 S   s   |sJ |sJ |S r    r!   )r#   r$   r  r  r!   r!   r&   r2   
  r  zHtest_metadata_routing_no_fit_transform.<locals>.NoFitTransform.transform)NNN)NNr3   r!   r!   r!   r&   NoFitTransform
  s    
r  rN   rO   r^  r,   r   rS   Tr  r   N)	r   r/   re   r1   r   r  r  r'   rh   )r  r$   r%   r  r  r  r!   r!   r&   &test_metadata_routing_no_fit_transform
  s"   r  c                 C   s   t g dg dgj}g d}dgd}}tdt dgfg}d|  }tjtt	|d	/ | d
krC|
|| |j|||d nt|| ||||d W d   dS W d   dS 1 saw   Y  dS )zCTest that the right error is raised when metadata is not requested.rN   rO   r^  r,   r   rS   r   zw[sample_weight, metadata] are passed but are not explicitly set as requested or not requested for ConsumingTransformer.r   r2   r  N)r/   re   r1   r   r   r   r   rM   r   r   r'   r2   r.   )rF  r$   r%   r  r  r  error_messager!   r!   r&   2test_metadata_routing_error_for_column_transformer
  s    "r  c                  C   s    t dt dgfg} |   d S )NrS   r   )r   r   get_metadata_routingrK  r!   r!   r&   +test_get_metadata_routing_works_without_fit
  s   r  c                  C   sR   t dt dgfgt jddjddd} |  }|ddgtdgks'J d S )NrS   r   Tr  rU   r'   r  )r   r   r   r  r  r  consumessetrr   routerr!   r!   r&   %test_remainder_request_always_present
  s    r  c                  C   sN   t dt jddjdddd fg} |  }|ddgtdgks%J d S )NrS   Tr  c                 S   r  r    r!   r   r!   r!   r&   r\   
  r]   z9test_unused_transformer_request_present.<locals>.<lambda>r'   r  )r   r   r  r  r  r  r  r  r!   r!   r&   'test_unused_transformer_request_present
  s    r  )__doc__r  r   rP  r  r   r/   r   numpy.testingr   scipyr   sklearnr   sklearn.baser   r   sklearn.composer   r	   r
   sklearn.exceptionsr   sklearn.feature_extractionr   sklearn.feature_selectionr   sklearn.preprocessingr   r   r   r   %sklearn.tests.metadata_routing_commonr   r   r   sklearn.utils._indexingr   sklearn.utils._testingr   r   r   r   sklearn.utils.fixesr   r   r   r9   r=   rF   rH   rt   ry   markparametrizer   re   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r&  r+  rg   r/  r   r2  r5  r6  r7  r;  r<  r@  rC  rJ  rL  rO  rR  r\  r  re  rg  r~  rr  r   rw  r  r  r  r  r  r  r  r  r  r  r  r  r  r  ranger  r"  r  r  r  r  r  r  r%  r*  r+  r=  rC  rH  rL  rU  r`  re  rn  r  r  skipif__version__r  r  r  r  r  r  r  r  r  r!   r!   r!   r&   <module>   sh   B
 $/+

4
#


10




	



	

.
D







	



$"






	


		

	

x


'





	



	







s










  


k-*



# % 

#"



