o
    \iy                     @   s  d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlZddlZddlZddlZddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlm Z m!Z!m"Z" dZ#dZ$dZ%G dd dZ&eeddZdd Z'ej()ddddidddfdddddddfdddidddfdd dddddfd!dd!id"d#dfd!d$ddd"d#dfd%dd%id&d'd(fd)dd)id*d+dfd)d,d-id*d+dfd.dd.id#d/dfd.d,d0id#d/dfd1dd1id2d&dfgej()d3d4d5gej()d6dd7gd8d9 Z*ej()ddddidddfdddddddfdddidddfdd dddddfd!dd!id"d#dfd!d$ddd"d#dfd%dd%id&d'd(fd)dd)id*d+dfd)d,d-id*d+dfd.dd.id#d/dfd.d,d0id#d/dfgej()d3d4d5gd:d; Z+ej()dg d<d=d> Z,ej()d3d4d5gd?d@ Z-ej()d3d4d5gdAdB Z.ej()d3d4d5gej()dCdDdDdEggdFdG Z/ej()dg dHej()d3d4d5gdIdJ Z0ej()dg dKej()d3d4d5gdLdM Z1dNdO Z2ej3dPdQdRdS Z4ej3dPdQdTdU Z5ej()dVg dWej()d6dd7gdXdY Z6ej()dZd3d[id\fd]d[id^fgd_d` Z7ej()daddbdcdbdbdcd7d5dcd7dbdcgddde Z8ej(9dfej()dZd3d5idgfd]didhfd5ddidhfgdjdk Z:ej(9dfej()dldmdngdodp Z;dqdr Z<ej()d6dd7gdsdt Z=ej()d6dd7gdudv Z>ej()d6dd7gej()d3d4d5gdwdx Z?ej()d6dd7gej()dyddzidd{dd|gd}d~ Z@ej()d6dd7gej()ddzd,d{ieAdfddddgdeAdfd1d1d7deAdfdddddeAdfdddd7deAdfddddeBdfddddgdeBdfgej()d3d4d5gdd ZCej()ddddd|eAdfdddeAdfdddd|eAdfi eAdfgdd ZDej()d6dd7gdd ZEej()d6dd7gdd ZFej()d6dd7gdd ZGdd ZHej()d6dd7gdd ZIej()ddd7gdd ZJdd ZKdd ZLej()d6dd7gdd ZMej()dg ddd ZNdd ZOej()d6dd7gej()d3ddd ZPdd ZQdd ZRdd ZSdS )zTest the openml loader.    N)partial)	resources)BytesIO)	HTTPError)config_context)fetch_openml)_get_local_path_open_openml_url_retry_with_clean_cache)Bunch)check_pandas_support)SkipTestassert_allcloseassert_array_equalz"sklearn.datasets.tests.data.openmlTzdata/v1/download/{}c                   @   sF   e Zd Zdd ZdddZdd Zdd	 Zd
d Zdd Zdd Z	dS )_MockHTTPResponsec                 C   s   || _ || _d S N)datais_gzip)selfr   r    r   /var/www/www-root/data/www/176.119.141.140/sports-predictor/venv/lib/python3.10/site-packages/sklearn/datasets/tests/test_openml.py__init__'   s   
z_MockHTTPResponse.__init__c                 C   s   | j |S r   )r   read)r   amtr   r   r   r   +   s   z_MockHTTPResponse.readc                 C   s   | j   d S r   )r   closer   r   r   r   r   .   s   z_MockHTTPResponse.closec                 C   s   | j rddiS i S )NzContent-Encodinggzipr   r   r   r   r   info1   s   z_MockHTTPResponse.infoc                 C   s
   t | jS r   )iterr   r   r   r   r   __iter__6   s   
z_MockHTTPResponse.__iter__c                 C   s   | S r   r   r   r   r   r   	__enter__9      z_MockHTTPResponse.__enter__c                 C   s   dS )NFr   )r   exc_typeexc_valexc_tbr   r   r   __exit__<   r#   z_MockHTTPResponse.__exit__N)r   )
__name__
__module____qualname__r   r   r   r   r!   r"   r'   r   r   r   r   r   &   s    
r   )	data_homec                    s   d
ddddt j	td d|  fdd	  	fd
d
fddfddfdd 	fdd
fdd}tr]| tjjd| d S d S )Nz(https://api.openml.org/api/v1/json/data/z1https://api.openml.org/api/v1/json/data/features/z'https://www.openml.org/data/v1/downloadz-https://api.openml.org/api/v1/json/data/list/z.gz.id_c                    s~   t dd| tdd  |   }|dddddd	d
dddddddddddddddS )Nz\W-zhttps://api.openml.org/z-json-data-listz-jdlz-json-data-featuresz-jdfz-json-data-qualitiesz-jdqz
-json-dataz-jdz
-data_namez-dnz	-downloadz-dlz-limitz-lz-data_versionz-dvz-statusz-sz-deactivatedz-dactz-activez-act)resublenreplace)urlsuffixoutput)path_suffixr   r   
_file_nameU   s$   
z4_monkey_patch_webbased_functions.<locals>._file_namec           	         s   |  |sJ |d|  | |}t| }|d.}|r6r6t| }t|dW  d    S |d}t| }t|dW  d    S 1 sPw   Y  d S )N does not match rbTF)
startswithr   filesopenr   r   r   )	r3   has_gzip_headerexpected_prefixr4   data_file_namedata_file_pathffpdecompressed_f)r7   data_modulegzip_responseread_fnr   r   _mock_urlopen_sharedk   s   

$z>_monkey_patch_webbased_functions.<locals>._mock_urlopen_sharedc                        | |ddS N.jsonr3   r=   r>   r4   r   r3   r=   )rG   url_prefix_data_descriptionr   r   _mock_urlopen_data_description|      zH_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_descriptionc                    rH   rI   r   rL   )rG   url_prefix_data_featuresr   r   _mock_urlopen_data_features   rO   zE_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_featuresc                    s    |  ddd } ||ddS )N/   r   z.arffrK   )rsplit)r3   r=   url_without_filename)rG   url_prefix_download_datar   r   _mock_urlopen_download_data   s   	zE_monkey_patch_webbased_functions.<locals>._mock_urlopen_download_datac           	         s  |  sJ d|  | d}t| }|d}|d}| d}t|}W d    n1 s;w   Y  d|v rNtd ddd t	 d|d,}|rht	| }t
|d	W  d    S |d}t	| }t
|d
W  d    S 1 sw   Y  d S )Nr8   rJ   r9   zutf-8error  Simulated mock errorr3   codemsghdrsrB   TF)r:   r   r;   r<   r   decodejsonloadsr   r   r   )	r3   r=   r?   r@   rA   rC   	decoded_s	json_datarB   )r7   rD   rF   url_prefix_data_listr   r   _mock_urlopen_data_list   s.   


$zA_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_listc                    sr   |   }| ddk}|r||S |r||S |r)||S |r3 ||S td| )NzAccept-encodingr   zUnknown mocking URL pattern: %s)get_full_url
get_headerr:   
ValueError)requestargskwargsr3   r=   )rN   rQ   re   rW   rM   rP   rd   rV   r   r   _mock_urlopen   s   







z7_monkey_patch_webbased_functions.<locals>._mock_urlopenurlopen)r   r<   OPENML_TEST_DATA_MODULEtest_offlinesetattrsklearndatasets_openml)contextdata_idrE   rl   r   )r7   rN   rQ   re   rW   rG   rD   rE   r6   rF   rM   rP   rd   rV   r    _monkey_patch_webbased_functionsG   s"   rv   z9data_id, dataset_params, n_samples, n_features, n_targets=   ru         rS   iris)nameversion      &   anneal1        cpu鍞     H      _  
      r{   zadult-census  M   MiceProtein  i  parser	liac-arffpandasrE   Fc           
      C   s  t d}t| ||d td	dd|d|}	t|	jd |ks"J t|	ts)J t|	j|j	s2J |	jj
||| fks>J t|	j|j	sGJ |	jj
||fksQJ |dkrht|	j|js^J |	jj
|fksgJ nt|	j|j	sqJ |	jj
||fks{J |	jdu sJ dS )
zCheck the behaviour of `fetch_openml` with `as_frame=True`.

    Fetch by ID and/or name (depending if the file was previously cached).
    r   rE   TFas_framecacher   idrS   Nr   )pytestimportorskiprv   r   intdetails
isinstancer   frame	DataFrameshaper   targetSeries
categories)
monkeypatchru   dataset_params	n_samples
n_features	n_targetsr   rE   pdbunchr   r   r   test_fetch_openml_as_frame_true   s*   
(r   c                 C   s   t d t| |dd td	dd|d|}t|jd |ks"J t|ts)J |jdu s0J t|j	t
js9J |j	j||fksCJ t|jt
jsLJ |dkrZ|jj|fksYJ n
|jj||fksdJ t|jtslJ dS )
znCheck the behaviour of `fetch_openml` with `as_frame=False`.

    Fetch both by ID and/or name + version.
    r   Tr   Fr   r   NrS   r   )r   r   rv   r   r   r   r   r   r   r   npndarrayr   r   r   dict)r   ru   r   r   r   r   r   r   r   r   r    test_fetch_openml_as_frame_false  s&   
$r   )rw   r   r   c           
         s   t dt| |dd t|dddd}t|dddd}|j|j}  fdd}||}j|  |j|j}j|j	   fd	d
}||}	j|	 dS )z:Check the consistency of the LIAC-ARFF and pandas parsers.r   Tr   Fr   ru   r   r   r   c                    s(    | j  }jj|r| |jS | S r   )r{   apitypesis_numeric_dtypeastypedtypeseriespandas_series)data_pandasr   r   r   convert_numerical_dtypesk  s   
zFtest_fetch_openml_consistency_parser.<locals>.convert_numerical_dtypesc                    sF    | j  }jj|r| |jS t|jjr!| j	|jj
S | S r   )r{   r   r   r   r   r   r   CategoricalDtypecatrename_categoriesr   r   )frame_pandasr   r   r   (convert_numerical_and_categorical_dtypes  s   
zVtest_fetch_openml_consistency_parser.<locals>.convert_numerical_and_categorical_dtypesN)
r   r   rv   r   r   applytestingassert_frame_equalr   feature_names)
r   ru   
bunch_liacbunch_pandas	data_liacr   data_liac_with_fixed_dtypes
frame_liacr   frame_liac_with_fixed_dtypesr   )r   r   r   r   $test_fetch_openml_consistency_parserS  s2   


r   c                 C   s\   t d d}t| |dd t|dd|d}t|dd|d}t|j|j t|j|j dS )z^Check the equivalence of the dataset when using `as_frame=False` and
    `as_frame=True`.
    r   rw   Tr   Fr   N)r   r   rv   r   r   r   r   r   )r   r   ru   bunch_as_frame_truebunch_as_frame_falser   r   r   -test_fetch_openml_equivalence_array_dataframe  s"   
r   c                 C   sn  t d}|jjj}d}d}d}d}|g d}tjgd }	g d}
d	}t| |d
 t|d
d|d}|j	}|j
}|j}t||jsCJ t|j|	ksMJ |j|ksTJ t|j|
ks^J t|j|
kshJ |j|gkspJ t||jsxJ |j|ksJ |j|ksJ |j|ksJ |jjsJ t||jsJ |j|ksJ t|j|	|g ksJ |jjsJ dS )z>Check fetching on a numerical only dataset with string labels.r   rw   rx   ry   )rx   )rx      )zIris-setosazIris-versicolorzIris-virginicary   )sepallength
sepalwidthpetallength
petalwidthclassTFr   N)r   r   r   r   r   r   float64rv   r   r   r   r   r   r   alldtypesr   columnsr   target_namesr   r   r{   index	is_unique)r   r   r   r   ru   
data_shapetarget_shapeframe_shapetarget_dtypedata_dtypes
data_namestarget_namer   r   r   r   r   r   r   test_fetch_openml_iris_pandas  sJ   

r   target_columnr   r   c                 C   s   t d}d}t| |d t|dd||d}t|dd|d}|j|j|j t|trB|j	|j
j|| |jjdks@J d	S |j
j|ksJJ |jjdksRJ d	S )
z@Check that we can force the target to not be the default target.r   rw   TF)ru   r   r   r   r   r   )rx      r   N)r   r   rv   r   r   r   r   r   listassert_index_equalr   r   Indexr   r   r{   )r   r   r   r   ru   bunch_forcing_targetbunch_defaultr   r   r   !test_fetch_openml_forcing_targets  s0   

r   )rw   r}   r   r   r   c                 C   s   t d}t| |dd t|ddd|d}t|ddd|d\}}|j|j| t||jr8|j	|j
| dS |j|j
| dS )z>Check the behaviour of `return_X_y=True` when `as_frame=True`.r   Tr   Fru   r   r   
return_X_yr   N)r   r   rv   r   r   r   r   r   r   assert_series_equalr   )r   ru   r   r   r   Xyr   r   r   .test_fetch_openml_equivalence_frame_return_X_y  s(   

r   )rw   r   r   r   c                 C   s\   t d t| |dd t|ddd|d}t|ddd|d\}}t|j| t|j| dS )z?Check the behaviour of `return_X_y=True` when `as_frame=False`.r   Tr   Fr   N)r   r   rv   r   r   r   r   )r   ru   r   r   r   r   r   r   r   .test_fetch_openml_equivalence_array_return_X_y  s$   

r   c                 C   sf   t d d}t| |dd d}t||ddd}t||ddd}|jjjdks)J |jjd	ks1J d
S )z9Check the difference between liac-arff and pandas parser.r   r   Tr   Fr   r   rA   ON)r   r   rv   r   r   r   kind)r   ru   r   bunch_liac_arffr   r   r   r   $test_fetch_openml_difference_parsers6  s$   
r   module)scopec                   C   s0   g dg dg dg dg dg dg ddS )	z+Returns the columns names for each dataset.)r   r   r   r   r   )'familyzproduct-typesteelcarbonhardnesstemper_rolling	conditionformabilitystrength
non-ageingsurface-finishzsurface-qualityenamelabilitybcbfbtbw%2Fmeblmchromphoscbondmarviexptlferrocorrblue%2Fbright%2Fvarn%2Fcleanlustrejurofmspr   thickwidthr1   oilborepackingr   )vendorMYCTMMINMMAXCACHCHMINCHMAXr   )N Mean_Acc1298_Mean_Mem40_CentroidMean_Acc1298_Mean_Mem40_RolloffMean_Acc1298_Mean_Mem40_FluxMean_Acc1298_Mean_Mem40_MFCC_0Mean_Acc1298_Mean_Mem40_MFCC_1Mean_Acc1298_Mean_Mem40_MFCC_2Mean_Acc1298_Mean_Mem40_MFCC_3Mean_Acc1298_Mean_Mem40_MFCC_4Mean_Acc1298_Mean_Mem40_MFCC_5Mean_Acc1298_Mean_Mem40_MFCC_6Mean_Acc1298_Mean_Mem40_MFCC_7Mean_Acc1298_Mean_Mem40_MFCC_8Mean_Acc1298_Mean_Mem40_MFCC_9Mean_Acc1298_Mean_Mem40_MFCC_10Mean_Acc1298_Mean_Mem40_MFCC_11Mean_Acc1298_Mean_Mem40_MFCC_12Mean_Acc1298_Std_Mem40_CentroidMean_Acc1298_Std_Mem40_RolloffMean_Acc1298_Std_Mem40_FluxMean_Acc1298_Std_Mem40_MFCC_0Mean_Acc1298_Std_Mem40_MFCC_1Mean_Acc1298_Std_Mem40_MFCC_2Mean_Acc1298_Std_Mem40_MFCC_3Mean_Acc1298_Std_Mem40_MFCC_4Mean_Acc1298_Std_Mem40_MFCC_5Mean_Acc1298_Std_Mem40_MFCC_6Mean_Acc1298_Std_Mem40_MFCC_7Mean_Acc1298_Std_Mem40_MFCC_8Mean_Acc1298_Std_Mem40_MFCC_9Mean_Acc1298_Std_Mem40_MFCC_10Mean_Acc1298_Std_Mem40_MFCC_11Mean_Acc1298_Std_Mem40_MFCC_12Std_Acc1298_Mean_Mem40_CentroidStd_Acc1298_Mean_Mem40_RolloffStd_Acc1298_Mean_Mem40_FluxStd_Acc1298_Mean_Mem40_MFCC_0Std_Acc1298_Mean_Mem40_MFCC_1Std_Acc1298_Mean_Mem40_MFCC_2Std_Acc1298_Mean_Mem40_MFCC_3Std_Acc1298_Mean_Mem40_MFCC_4Std_Acc1298_Mean_Mem40_MFCC_5Std_Acc1298_Mean_Mem40_MFCC_6Std_Acc1298_Mean_Mem40_MFCC_7Std_Acc1298_Mean_Mem40_MFCC_8Std_Acc1298_Mean_Mem40_MFCC_9Std_Acc1298_Mean_Mem40_MFCC_10Std_Acc1298_Mean_Mem40_MFCC_11Std_Acc1298_Mean_Mem40_MFCC_12Std_Acc1298_Std_Mem40_CentroidStd_Acc1298_Std_Mem40_RolloffStd_Acc1298_Std_Mem40_FluxStd_Acc1298_Std_Mem40_MFCC_0Std_Acc1298_Std_Mem40_MFCC_1Std_Acc1298_Std_Mem40_MFCC_2Std_Acc1298_Std_Mem40_MFCC_3Std_Acc1298_Std_Mem40_MFCC_4Std_Acc1298_Std_Mem40_MFCC_5Std_Acc1298_Std_Mem40_MFCC_6Std_Acc1298_Std_Mem40_MFCC_7Std_Acc1298_Std_Mem40_MFCC_8Std_Acc1298_Std_Mem40_MFCC_9Std_Acc1298_Std_Mem40_MFCC_10Std_Acc1298_Std_Mem40_MFCC_11Std_Acc1298_Std_Mem40_MFCC_12BH_LowPeakAmpBH_LowPeakBPMBH_HighPeakAmpBH_HighPeakBPMBH_HighLowRatioBHSUM1BHSUM2BHSUM3zamazed.suprisedzhappy.pleasedzrelaxing.calmzquiet.stillz
sad.lonelyzangry.aggresive)age	workclasszfnlwgt:z
education:zeducation-num:zmarital-status:zoccupation:zrelationship:zrace:zsex:zcapital-gain:zcapital-loss:zhours-per-week:znative-country:r   )NDYRK1A_NITSN1_NBDNF_NNR1_NNR2A_NpAKT_NpBRAF_N	pCAMKII_NpCREB_NpELK_NpERK_NpJNK_NPKCA_NpMEK_NpNR1_NpNR2A_NpNR2B_NpPKCAB_NpRSK_NAKT_NBRAF_NCAMKII_NCREB_NELK_NERK_NGSK3B_NJNK_NMEK_NTRKA_NRSK_NAPP_N
Bcatenin_NSOD1_NMTOR_NP38_NpMTOR_NDSCR1_NAMPKA_NNR2B_NpNUMB_NRAPTOR_NTIAM1_NpP70S6_NNUMB_NP70S6_NpGSK3B_NpPKCG_NCDK5_NS6_NADARB1_NAcetylH3K9_NRRP1_NBAX_NARC_NERBB4_NnNOS_NTau_NGFAP_NGluR3_NGluR4_NIL1B_NP3525_NpCASP9_NPSD95_NSNCA_NUbiquitin_NpGSK3B_Tyr216_NSHH_NBAD_NBCL2_NpS6_NpCFOS_NSYP_N	H3AcK18_NEGR1_NH3MeK4_NCaNA_Nr   )pclasssurvivedr{   sexri  sibspparchticketfarecabinembarkedboatbody	home.destrw   r}   r   r   r   r   r   r   r   r   r   r   datasets_column_namesU  s   )PP r  c                   C   s   i i ddddddddd	d
dddddddd
dddddddddddddd
ddddddddddd
dd
i i i ddiddddddd d!d"S )#Nr   r~   r   	   r   r}   r   ry   r   r   r  r  r  r  r  r     r  r  r	  r
  r  r  )
r  r  r  r  r  r  r  r  r  r  r  r   i  rS   i  i7  i  i4  )ri  r  r  r  r  r  r  r  r   r   r   r   r   datasets_missing_valuesH  sx   	
r  zJdata_id, parser, expected_n_categories, expected_n_floats, expected_n_ints))rw   r   rS   ry   r   )rw   r   rS   ry   r   )r}   r   !   r   r   )r}   r   r  r}   ry   )r   r   rS   r   r   )r   r   rS   r   r   )r   r   r   r   r   )r   r   r   E   r   )r   r   r  r   r   )r   r   r  r   r   )r   r   rS   r   r   )r   r   rS   r   r   )r   r   r   r   r   )r   r   r   r   r   c	                    s   t d}	|	jjj t| ||d t|dd|d}
|
j}t fdd|j	D }tdd |j	D }td	d |j	D }||ksBJ ||ksHJ ||ksNJ |j
 || ksYJ |   }| D ]\}}|| |d
}||kswJ qedS )zYCheck that `fetch_openml` infer the right number of categories, integers, and
    floats.r   r   TFr   c                    s   g | ]	}t | r|qS r   )r   .0r   r   r   r   
<listcomp>      z5test_fetch_openml_types_inference.<locals>.<listcomp>c                 S      g | ]	}|j d kr|qS )rA   r   r  r   r   r   r    r  c                 S   r  )ir  r  r   r   r   r    r  r   N)r   r   r   r   r   rv   r   r   r1   r   r   tolistisnasumto_dictitemsget)r   ru   r   expected_n_categoriesexpected_n_floatsexpected_n_intsrE   r  r  r   r   r   n_categoriesn_floatsn_intsframe_feature_to_n_nanr{   	n_missingexpected_missingr   r  r   !test_fetch_openml_types_inferencey  s0   
(
r  zparams, err_msgunknownz:The 'parser' parameter of fetch_openml must be a str amongr   z<The 'as_frame' parameter of fetch_openml must be an instancec                 C   sV   d}t | |d tjt|d tdd|i| W d    d S 1 s$w   Y  d S )Nr   Tmatchru   r   )rv   r   raisesrh   r   r   paramserr_msgru   r   r   r   &test_fetch_openml_validation_parameter  s
   "r  r  auto)r   r   c                 C   s   d}z	t d W td ty?   t| |d d}tjt|d td	d|i| W d   Y dS 1 s7w   Y  Y dS w )
z=Check that we raise the proper errors when we require pandas.r   !test_fetch_openml_requires_pandasTz:requires pandas to be installed. Alternatively, explicitlyr  ru   Nz.This test requires pandas to not be installed.r   )r   ImportErrorrv   r   r  r   r   )r   r  ru   r  r   r   r   'test_fetch_openml_requires_pandas_error  s   
&r  z2ignore:Version 1 of dataset Australian is inactivez:Sparse ARFF datasets cannot be loaded with parser='pandas'z9Sparse ARFF datasets cannot be loaded with as_frame=True.)r   r   c                 C   sb   t d d}t| |d t jt|d td|dd| W d   dS 1 s*w   Y  dS )	ztCheck that we raise the expected error for sparse ARFF datasets and
    a wrong set of incompatible parameters.
    r   $  Tr  F)ru   r   Nr   )r   r   rv   r  rh   r   r  r   r   r   #test_fetch_openml_sparse_arff_error  s   
"r  zdata_id, data_type)rw   	dataframe)r  sparsec                 C   sN   t d}t| |d t|ddd}|dkr|jntjj}t|j	|s%J dS )z&Check the auto mode of `fetch_openml`.r   Tr  F)ru   r   r   r  N)
r   r   rv   r   r   scipyr  
csr_matrixr   r   )r   ru   	data_typer   r   klassr   r   r   test_fetch_openml_auto_mode  s
   

r  c              	   C   s   t d d}t| |d d}t jt|d. tdd t|ddd	d
 W d   n1 s/w   Y  W d   dS W d   dS 1 sGw   Y  dS )z[Check that we raise a warning regarding the working memory when using
    LIAC-ARFF parser.r   r   Tz*Could not adhere to working_memory config.r  gư>)working_memoryFr   r   N)r   r   rv   warnsUserWarningr   r   )r   ru   r]   r   r   r   :test_convert_arff_data_dataframe_warning_low_memory_pandas#  s    
"r  c                 C   sb   d}d}t | || td}tjt|d t|dddd W d   dS 1 s*w   Y  dS )	z\Check that a warning is raised when multiple versions exist and no version is
    requested.rw   rz   a;  Multiple active versions of the dataset matching the name iris exist. Versions may be fundamentally different, returning version 1. Available versions:
- version 1, status: active
  url: https://www.openml.org/search?type=data&id=61
- version 3, status: active
  url: https://www.openml.org/search?type=data&id=969
r  Fr   )r{   r   r   r   N)rv   r/   escaper   r  r  r   )r   rE   ru   	data_namer]   r   r   r   ,test_fetch_openml_iris_warn_multiple_version6  s   	"r  c                 C   sT   d}d}d}d}t | || t||dddd}|jj||fks!J |jdu s(J dS )z/Check that we can get a dataset without target.rw   Nrx   r   Fr   ru   r   r   r   r   )rv   r   r   r   r   )r   rE   ru   r   expected_observationsexpected_featuresr   r   r   r   test_fetch_openml_no_targetQ  s   r  c                 C   sb   t d d}t| ||d t|dd|d}|jjd }|jd   s'J t|j	g d d	S )
zRcheck that missing values in categories are compatible with pandas
    categoricalr   iY  r   FTru   r   r   r   r  )FEMALEMALE_N)
r   r   rv   r   r   r   r  anyr   r   )r   rE   r   ru   penguins	cat_dtyper   r   r   test_missing_values_pandase  s   
r
  r     glass2)ru   r{   r|   c                 C   s~   d}t | || d}tjt|d tddddd|}W d   n1 s'w   Y  |jjdks4J |jd	 d
ks=J dS )z;Check that we raise a warning when the dataset is inactive.r  z(Version 1 of dataset glass2 is inactive,r  Fr   )r   r   r   N)   r  r   40675r   )rv   r   r  r  r   r   r   r   )r   rE   r   ru   r]   r  r   r   r   test_fetch_openml_inactive{  s   
r  z"data_id, params, err_type, err_msgzNo active dataset glass2 foundr   r   )ru   r   z1Can only handle homogeneous multi-target datasets)ru   r   zOSTRING attributes are not supported for array representation. Try as_frame=Truer   )ru   r   r   zTarget column 'family'	undefinedz(Could not find target_column='undefined'c                 C   sr   t | || |dds|dkrtd tj||d tdd|d| W d    d S 1 s2w   Y  d S )Nr   Tr   r  F)r   r   r   )rv   r  r   r   r  r   )r   rE   ru   r  err_typer  r   r   r   r   test_fetch_openml_error  s   2
"r  zparams, err_type, err_msgr   r|   zCThe 'version' parameter of fetch_openml must be an int in the rangenAmE)ru   r{   zCThe 'data_id' parameter of fetch_openml must be an int in the rangez6The 'version' parameter of fetch_openml must be an intzFNeither name nor data_id are provided. Please provide name or data_id.c                 C   sB   t j||d tdi |  W d    d S 1 sw   Y  d S )Nr  r   )r   r  r   )r  r  r  r   r   r   )test_fetch_openml_raises_illegal_argument  s   "r  c                 C   s^  d}d}d}t | || d}||}tjt|d t||dddd W d    n1 s.w   Y  d	}||}tjt|d t||dddd W d    n1 sUw   Y  d}||}tjt|d t||d
gdddd W d    n1 s~w   Y  d	}||}tjt|d t||d
gdddd W d    d S 1 sw   Y  d S )Nr   z.target_column='{}' has flag is_row_identifier.z&target_column='{}' has flag is_ignore.MouseIDr  Fr   r  Genotyper   )rv   formatr   r  r  r   )r   rE   ru   expected_row_id_msgexpected_ignore_msg
target_colr]   r   r   r   test_warn_ignore_attribute  s`   

	

"r  c                 C   X   d}t | || d}tjt|d t|dddd W d    d S 1 s%w   Y  d S )NrS   zJOpenML registered a problem with the dataset. It might be unusable. Error:r  Fr   r  rv   r   r  r  r   r   rE   ru   r]   r   r   r   test_dataset_with_openml_error     "r  c                 C   r  )Nr   zFOpenML raised a warning on the dataset. It might be unusable. Warning:r  Fr   r  r  r  r   r   r    test_dataset_with_openml_warning"  r   r!  c                 C   s   t d d}t| |dd |dddd}tdi |}tdi |dddii}td	d
 |jd jjD s9J tdd
 |jd jjD rIJ dS )zACheck that we can overwrite the default parameters of `read_csv`.r   6  Fru   rE   Tr   read_csv_kwargsskipinitialspacec                 s       | ]}| d V  qdS  Nr:   r  r   r   r   r   	<genexpr>>  s    

zFtest_fetch_openml_overwrite_default_params_read_csv.<locals>.<genexpr>r   c                 s   r&  r'  r)  r*  r   r   r   r+  A  s
    
Nr   )	r   r   rv   r   r   r   r   r   r  )r   ru   common_paramsadult_without_spacesadult_with_spacesr   r   r   3test_fetch_openml_overwrite_default_params_read_csv+  s(   
	r/  c           
      C   s|   d}t | || t|d }d| }t|d}t||}t||}tj	|s-J t||}	|
 |	
 ks<J d S )Nrw   /filename.arffhttps://www.openml.org/scikit_learn_data)rv   _MONKEY_PATCH_LOCAL_OPENML_PATHr  strmkdirr	   r   ospathisfiler   )
r   rE   tmpdirru   openml_pathr3   cache_directory	response1location	response2r   r   r   test_open_openml_url_cacheK  s   



r?  write_to_diskc                    s   d}t |d }d| }t|d}t||  fdd}| tjjd| t	j
tdd	 t|| W d    n1 sAw   Y  tj rNJ d S )
Nrw   r0  r1  r2  c                    sF   rt  d}|d W d    td1 sw   Y  td)Nw Invalid request)r<   writerh   )ri   rj   rk   rA   r=  r@  r   r   rl   e  s   
z>test_open_openml_url_unlinks_local_path.<locals>._mock_urlopenrm   rC  r  )r3  r  r4  r5  r   rp   rq   rr   rs   r   r  rh   r	   r6  r7  exists)r   r9  r@  ru   r:  r3   r;  rl   r   rE  r   'test_open_openml_url_unlinks_local_path]  s   

rG  c                    s   d}t |}t| d}t|| ttj  t	 d}|
d W d    n1 s1w   Y  t|| fdd}d}tjt|d | }W d    n1 sXw   Y  |d	kscJ d S )
Nrw   r2  rA  rB  c                      s   t j r
tddS )NzFile exist!rS   )r6  r7  rF  	Exceptionr   r=  r   r   
_load_data}  s   z/test_retry_with_clean_cache.<locals>._load_dataz!Invalid cache, redownloading filer  rS   )r3  r  r4  r5  r   r6  makedirsr7  dirnamer<   rD  r
   r   r  RuntimeWarning)r9  ru   r:  r;  rA   rJ  warn_msgresultr   rI  r   test_retry_with_clean_caches  s   

rP  c                 C   sl   d}t |}t| d}t||dd }d}tjt|d |  W d    d S 1 s/w   Y  d S )Nrw   r2  c                   S      t d ddd t d)NrY   rZ   r[   r   r   r   r   r   r   rJ    s   z:test_retry_with_clean_cache_http_error.<locals>._load_datarZ   r  )r3  r  r4  r5  r
   r   r  r   )r9  ru   r:  r;  rJ  	error_msgr   r   r   &test_retry_with_clean_cache_http_error  s   

"rT  c           
      C   s   dd }d}t |d}t| || t|d|dddd\}}| tjjd	| t|d|dddd\}}	tj	
|| tj	
||	 d S )
Nc                 _   s   t d|   )NzhThis mechanism intends to test correct cachehandling. As such, urlopen should never be accessed. URL: %s)rh   rf   ri   rj   rk   r   r   r   _mock_urlopen_raise  s
   z4test_fetch_openml_cache.<locals>._mock_urlopen_raiserw   r2  TFr   )ru   r   r+   r   r   r   rm   )r4  r5  rv   r   rp   rq   rr   rs   r   r   r   )
r   rE   r9  rV  ru   r;  	X_fetched	y_fetchedX_cachedy_cachedr   r   r   test_fetch_openml_cache  s.   
	
r[  zas_frame, parser))Tr   )Fr   )Tr   )Fr   c                    sT  |s|dkrt d d}t| |d td d|  }d}t|| }|d  |d}t|d}	t|		 }
d	|
t
|
d
 < W d   n1 sMw   Y  t d}||
 W d   n1 shw   Y  tjjj fdd}| tjjd| t t}tjj|d||d W d   n1 sw   Y  |dsJ dS )z/Check that the checksum is working as expected.r   r}   Tr,   r-   zdata-v1-dl-1666876.arff.gzztest_invalid_checksum.arffr9   %   rS   Nwbc                    s\   |   }|dr*t d}| }W d    n1 sw   Y  tt|ddS | S )Nz$data/v1/download/1666876/anneal.arffr9   Tr   )rf   endswithr<   r   r   r   )ri   rj   rk   r3   rA   corrupted_datacorrupt_copy_pathmocked_openml_urlr   r   swap_file_mock  s   

z9test_fetch_openml_verify_checksum.<locals>.swap_file_mockrm   Fr  1666876)r   r   rv   rn   r   r;   r<   r   	bytearrayr   r1   GzipFilerD  rq   rr   rs   rm   rp   r  rh   r   r  )r   r   r9  r   ru   original_data_moduleoriginal_data_file_nameoriginal_data_path	orig_file	orig_gzipr   modified_gziprc  excr   r`  r   !test_fetch_openml_verify_checksum  s2   

	rn  c              	   C   s   dd }|  tjjd| d}tjttd| dd4}tj	t
dd}t|d d	d
 W d    n1 s7w   Y  t|dksDJ |j  W d    d S 1 sTw   Y  d S )Nc                 _   rQ  )Ni  Simulated network errorr[   rR  rU  r   r   r   _mock_urlopen_network_error  s   zPtest_open_openml_url_retry_on_network_error.<locals>._mock_urlopen_network_errorrm   z"https://api.openml.org/invalid-urlz+A network error occurred while downloading z. Retrying...r  ro  r   )delayr   )rp   rq   rr   rs   r   r  r  r/   r  r  r   r	   r1   valuer   )r   rp  invalid_openml_urlrecordexc_infor   r   r   +test_open_openml_url_retry_on_network_error  s(   
"rv  )r   r   c                 C   sh   |dkr	t d d}t| || tjj|dd|d}|dus!J |d jdks*J d|d	 vs2J dS )
zCheck that we can load the "zoo" dataset.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/14340
    r   >   Fr  Nr   )e      animalr   )r   r   rv   rq   rr   r   r   )r   rE   r   ru   datasetr   r   r   &test_fetch_openml_with_ignored_feature  s   
r|  c                 C   s  t d}d}t| |dd dd|d}tdddi|}tdddi|}|j|j|j |jjd		 r:J |jj
d		 rEJ tddd
d|}tddd
d|}|j|jd |jd  |jd jd		 rsJ |jd j
d		 rJ dS )zCheck that we strip the single quotes when used as a string delimiter.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/23381
    r   r   Fr#  Tr   r   ru   r   r   'r  )r   r   r   Nr   )r   r   rv   r   r   r   r   r4  r:   r  r^  r   )r   r   ru   r,  mice_pandasmice_liac_arffr   r   r   test_fetch_openml_strip_quotes(  s(   
r  c                 C   sj   t d}d}t| |dd dd|d}tdddi|}tdddi|}|j|jd	 |jd	  d
S )zCheck that we can strip leading whitespace in pandas parser.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/25311
    r   r"  Fr#  Tr}  r   r   r   Nr   )r   r   rv   r   r   r   r   r   r   ru   r,  adult_pandasadult_liac_arffr   r   r   $test_fetch_openml_leading_whitespaceE  s   
r  c                 C   sb   t d}d}t| |dd dd|d}td
ddi|}td
ddi|}|j|j|j d	S )zCheck that we can handle escapechar and single/double quotechar.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/25478
    r   iZ  Fr#  Tr}  r   r   Nr   )r   r   rv   r   r   r   r   r  r   r   r   &test_fetch_openml_quotechar_escapecharW  s   
r  )T__doc__r   r`   r6  r/   	functoolsr   	importlibr   ior   urllib.errorr   numpyr   r   scipy.sparser  rq   r   sklearn.datasetsr   fetch_openml_origsklearn.datasets._openmlr   r	   r
   sklearn.utilsr   $sklearn.utils._optional_dependenciesr   sklearn.utils._testingr   r   r   rn   ro   r3  r   rv   markparametrizer   r   r   r   r   r   r   r   r   fixturer  r  r  r  r  filterwarningsr  r  r  r  r  r
  r  rh   KeyErrorr  r  r  r  r!  r/  r?  rG  rP  rT  r[  rn  rv  r|  r  r  r  r   r   r   r   <module>   s    
+'
?

/

 
s
00

	








-




1

 


"
	.