o
    \iN                     @   sJ  d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlZd dlZd dlmZ d dlZd dlmZmZmZ d dlmZmZmZmZ d dlmZ d	Zd
ZdZdZ dZ!dd Z"dd Z#dd Z$dd Z%dd Z&dd Z'dd Z(dd Z)dd Z*d d! Z+d"d# Z,d$d% Z-d&d' Z.d(d) Z/ej01d*d+d, Z2d-d. Z3d/d0 Z4d1d2 Z5ej06d3ed4d5 Z7ej06d3ed6d7 Z8d8d9 Z9d:d; Z:d<d= Z;d>d? Z<d@dA Z=ej06d3edBdC Z>ej06dDg dEej06dFdGdHgej06dIg dJej06d3edKdL Z?ej06d3edMdN Z@dOdP ZAej06d3edQdR ZBdSdT ZCdS )U    N)BZ2File)	resources)BytesIO)NamedTemporaryFile)dump_svmlight_fileload_svmlight_fileload_svmlight_files)assert_allcloseassert_array_almost_equalassert_array_equalcreate_memmap_backed_data)CSR_CONTAINERSzsklearn.datasets.tests.datazsvmlight_classification.txtzsvmlight_multilabel.txtzsvmlight_invalid.txtzsvmlight_invalid_order.txtc                 C   s   t t|  S N)r   filesTEST_DATA_MODULE)filename r   /var/www/www-root/data/www/176.119.141.140/sports-predictor/venv/lib/python3.10/site-packages/sklearn/datasets/tests/test_svmlight_format.py_svmlight_local_test_file_path   s   r   c                 K   sF   t | }|d}t|fi |W  d   S 1 sw   Y  dS )zG
    Helper to load resource `filename` with `importlib.resources`
    rbN)r   openr   )r   kwargs	data_pathfr   r   r   _load_svmlight_local_test_file"   s   $r   c                  C   s   t t\} }| jjd dksJ | jd dksJ | jd dks"J |jd dks+J dD ]\}}}| ||f |ks<J q-| d dksEJ | d dksMJ | d	 dksUJ | d
 dks]J | d dkseJ | d  d9  < | d dksuJ t|g d d S )Nr               )r      g      @r   
   g)r      g      ?r            ?r      )r          )r      )r   r%   )r      )r      )r       )r   r    r    r%   )r   r    r,      r   r    )r   datafileindptrshaper   Xyijvalr   r   r   test_load_svmlight_file+   s   r:   c               	   C   sr   t tt } t| } t| \}}t| tj}zt|\}}t	|j
|j
 t	|| W t| d S t| w r   )r   r   r   r1   strr   osr   O_RDONLYr
   dataclose)r   X1y1fdX2y2r   r   r   test_load_svmlight_file_fdN   s   rE   c                  C   s@   t t} tt| \}}t| \}}t|j|j t|| d S r   )r   r1   r   r;   r	   r>   )r   r@   rA   rC   rD   r   r   r   test_load_svmlight_pathliba   s
   rF   c                  C   s$   t tdd\} }|g dksJ d S )NT
multilabel))r   r   )r    r   )r   r    )r   	multifile)r5   r6   r   r   r   "test_load_svmlight_file_multilabelk   s   rJ   c                  C   s   t t} tt| gd tjd\}}}}t| |  t|| |j	tjks*J |j	tjks2J tt| gd tj
d\}}}}}	}
|j	|j	ksLJ |j	|	j	ksTJ |	j	tj
ks\J d S )Nr    )dtyper,   )r   r1   r   r;   npfloat32r   toarrayr
   rK   float64)r   X_trainy_trainX_testy_testr@   rA   rC   rD   X3y3r   r   r   test_load_svmlight_filesp   s   
$rV   c                  C   s   t tdd\} }| jjd dksJ | jd dksJ | jd dks$J dD ]\}}}| ||f |ks5J q&tt t tdd W d    d S 1 sMw   Y  d S )	N   )
n_featuresr   r   r   r   )r   r!   r$   r'   r*   )r   r1   r2   r3   pytestraises
ValueErrorr4   r   r   r   "test_load_svmlight_file_n_features   s   "r\   c            	   
   C   s  t t\} }tdddM}|  ttd%}t|jd}t	|| W d    n1 s1w   Y  W d    n1 s@w   Y  t
|j\}}t|j W d    n1 s\w   Y  t|  |  t|| tdddL}|  ttd$}t|jd}t	|| W d    n1 sw   Y  W d    n1 sw   Y  t
|j\}}t|j W d    n1 sw   Y  t|  |  t|| d S )Nzsklearn-testz.gz)prefixsuffixr   wbz.bz2)r   r1   r   r?   r   r   gzipnameshutilcopyfileobjr   r<   remover
   rN   r   )	r5   r6   tmpr   fh_outXgzygzXbzybzr   r   r   test_load_compressed   s6   	
	rk   c                   C   8   t t tt W d    d S 1 sw   Y  d S r   )rY   rZ   r[   r   invalidfiler   r   r   r   test_load_invalid_file      
"rn   c                   C   rl   r   )rY   rZ   r[   r   invalidfile2r   r   r   r   test_load_invalid_order_file   ro   rq   c                  C   sD   t d} tt t| dd W d    d S 1 sw   Y  d S )Ns   -1 4:1.
1 0:1
F
zero_based)r   rY   rZ   r[   r   )r   r   r   r   test_load_zero_based   s   "rt   c            
      C   sv   d} d}t | }t|dd\}}|jdksJ t | }t |}t||gdd\}}}}	|jdks2J |jdks9J d S )Ns   -1 1:1 2:2 3:3
s   -1 0:0 1:1
autorr   )r   r,   )r   r0   )r   r   r3   r   )
data1data2f1r5   r6   f2r@   rA   rC   rD   r   r   r   test_load_zero_based_auto   s   rz   c                  C   s   d} t t| dd\}}t|g d t| ddgddgd	dgg tt| gd
d}t t| d
d}||fD ]#\}}}t|g d t|g d t| ddgddgd	dgg q8d S )NsM   
    3 qid:1 1:0.53 2:0.12
    2 qid:1 1:0.13 2:0.1
    7 qid:2 1:0.87 2:0.12Fquery_id)r,   r    r   g(\?gQ?gp=
ף?皙?gףp=
?T)r   r   r    )r   r   r   rN   r   )r>   r5   r6   res1res2qidr   r   r   test_load_with_qid   s    "r   zPtesting the overflow of 32 bit sparse indexing requires a large amount of memoryc                  C   sb   d dd tddD } tt| dd\}}}t|dd	 g d
 tt|tdd d	S )zU
    load large libsvm / svmlight file with qid attribute. Tests 64-bit query ID
       
c                 s   s    | ]
}d  | V  qdS )z.3 qid:{0} 1:0.53 2:0.12
2 qid:{0} 1:0.13 2:0.1N)formatencode).0r7   r   r   r   	<genexpr>   s
    
z&test_load_large_qid.<locals>.<genexpr>r   i ZbTr{   N)r,   r    r,   r    )joinranger   r   r   rL   uniquearange)r>   r5   r6   r   r   r   r   test_load_large_qid   s   r   c                  C   sZ   t t tt} tt}tt| t|t| g W d    d S 1 s&w   Y  d S r   )rY   rZ   r[   r   r1   rm   r   r;   )r   invalid_pathr   r   r   test_load_invalid_file2   s
   "r   c                   C   8   t t td W d    d S 1 sw   Y  d S )NgzG?)rY   rZ   	TypeErrorr   r   r   r   r   test_not_a_filename   s   
"r   c                   C   r   )Nztrou pic nic douille)rY   rZ   OSErrorr   r   r   r   r   test_invalid_filename   ro   r   csr_containerc                 C   s  t t\}}| }| t|}|t|jd  }|t|jd  }|||fD ]}|||fD ]}dD ]}	tjtjtj	tj
fD ]}
t }t|rU|jd dkrU|j}||
}t|||d|	d |d | }t|d}dtj |v szJ | }t|d}dd	g|	 d
 |v sJ t||
|	d\}}|j|
ksJ t| j|j | }t|r| }n|}|
tjkrt||d t|j|
dd|d qAt||d t|j|
dd|d qAq5q1q*d S )Nr   )TFr   testcommentrs   utf-8zscikit-learn %sonezeroz-based)rK   rs   r0   F)copyr#   )r   r1   rN   rL   
atleast_2dr   r3   rM   rO   int32int64r   spissparseTastyper   seekreadliner;   sklearn__version__r   rK   r   sorted_indicesindicesr
   )r   X_sparsey_denseX_densey_sparseX_slicedy_slicedr5   r6   rs   rK   r   X_inputr   rC   rD   X2_denseX_input_denser   r   r   	test_dump  sV   







r   c                 C   s   g dg dg dg}g dg dg dg}| |}||fD ]*}t  }t|||dd |d	 | d
ks8J | dks@J | dksHJ qd S )N)r   r   r,   r   r%   r   r   r   r   r   )r   r%   r   r   r   r   r   r   )r   r   r   )r   r   r   TrG   r   s   1 0:1 2:3 4:5
s   0,2 
s   0,1 1:5 3:1
)r   r   r   r   )r   r5   r   r   r6   r   r   r   r   test_dump_multilabelH  s   
r   c            
      C   s   d} d}d}d}d}| ||||gg dg dg dg dg}| ||||g}t  }t||| |d | d	ks;J | d
ksCJ | dksKJ | dksSJ | dks[J |d t|\}}	t||  t||	 d S )Nr   g @gGz@g     ?r&   )g    eAg NgmCgkcEr   r   r   r   s+   1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1
s!   2.1 0:1000000000 1:2e+18 2:3e+27
s   3.01 
s   1.000000000000001 
s   1 
)r   r   r   r   r   r
   rN   )
r   twothreeexactalmostr5   r6   r   rC   rD   r   r   r   test_dump_conciseW  s0   

r   c                  C   s@  t t\} }|  } t }d}t| |||dd |d t|dd\}}t| |  t|| d}t }t	t
 t| |||d W d    n1 sNw   Y  |d}t }t| |||dd |d t|dd\}}t| |  t|| t }t	t t| ||d	d W d    d S 1 sw   Y  d S )
Nz*This is a comment
spanning multiple lines.Fr   r   rr   s   It is true that
½² = ¼)r   r   zI've got a  .)r   r1   rN   r   r   r   r   r
   rY   rZ   UnicodeDecodeErrordecoder[   )r5   r6   r   ascii_commentrC   rD   utf8_commentunicode_commentr   r   r   test_dump_commentv  s2   




"r   c                  C   s   t t\} }t }|g}tt t| || W d    n1 s"w   Y  t }tt t| |d d | W d    d S 1 sEw   Y  d S )N)r   r1   r   rY   rZ   r[   r   )r5   r6   r   y2dr   r   r   test_dump_invalid  s   "r   c                  C   s   t t\} }|  } t| jd d }t }t| |||dd |d t	|ddd\}}}t
| |  t
|| t
|| d S )Nr   r    Tr|   rs   )r   r1   rN   rL   r   r3   r   r   r   r   r
   )r5   r6   r|   r   r@   rA   	query_id1r   r   r   test_dump_query_id  s   

r   c                  C   s   d} t t| dd\}}}g dg dg dg dg}g d}g d}t|| t| | t|| t }t||||dd |d	 t |ddd\}}}t|| t| | t|| |d	 t |d
dd\}}t|| t| | d S )Ns   
    1 qid:0 0:1 1:2 2:3
    0 qid:72048431380967004 0:1440446648 1:72048431380967004 2:236784985
    0 qid:-9223372036854775807 0:1440446648 1:72048431380967004 2:236784985
    3 qid:9223372036854775807  0:1440446648 1:72048431380967004 2:236784985Tr{   )r   r    r,   )ixU   \.,N^iY)r   r   r   r,   )r   r   l l    r   r   F)r   r   r   rN   r   r   )r>   r5   r6   r   true_Xtrue_ytrueQIDr   r   r   r   test_load_with_long_qid  s.   






r   c                 C   sv   t  }| tjdd}tg d}t||| dD ]}|d t|d|d\}}t|| t| |  qd S )N)r,   r0   r3   r   )ru   TFr   r0   )rX   rs   )	r   rL   zerosarrayr   r   r   r
   rN   )r   r   r   r   rs   r5   r6   r   r   r   test_load_zeros  s   

r   sparsity)r   r}   g      ?gGz?r   	n_samples   e   rX   )r    r   )   c                 C   s  t jd}|jdd||fd}| rd||| k < ||}|jdd|d}t }t||| |d t|	 }d}	|d }
|
|	 }d| d }||
 }t
|||	|d	\}}t
|||
|d	\}}t
|||d
\}}t |||g}t|||g}t|| t| |  d S )Nr           r&   lowhighsizer    r,   r0   r%   )rX   offsetlength)rX   r   )rL   randomRandomStateuniformrandintr   r   r   lengetvaluer   concatenater   vstackr
   rN   )r   r   rX   r   rngr5   r6   r   r   mark_0mark_1length_0mark_2length_1X_0y_0X_1y_1X_2y_2y_concatX_concatr   r   r   test_load_with_offsets  s4   



r   c              
   C   s8  t jd}t g dg dg dg dg dg dg dg}| |}|j\}}|jdd|d}t |d }t }t||||d	 |	d t
| }t|D ]G}	|	d t||d
d|	d\}
}}t||d
|	dd\}}}t ||g}t ||g}t|
|g}t|| t|| t| |  qRd S )Nr   )r   r   r   r   r   r   )r   r    r,   r0   r   r   )r   r   r,   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   r    r   r{   T)rX   r|   r   r   r   )rL   r   r   r   r3   r   r   r   r   r   r   r   r   r   r   r   r   r
   r   rN   )r   r   r5   r   rX   r6   r|   r   r   markr   r   q_0r   r   q_1q_concatr   r   r   r   r   "test_load_offset_exhaustive_splits  sD   






r   c                   C   sB   t jtdd ttddd W d    d S 1 sw   Y  d S )Nzn_features is required)matchr,   )r   r   )rY   rZ   r[   r   r1   r   r   r   r   test_load_with_offsets_error8  s   "r   c                 C   s   t | d }tjd}|ddtj}tg d}tg d}tg d}||||fdd	}t|||d
d t	|d
d\}	}
g d}|
|ksNJ dS )z
    Ensure that if y contains explicit zeros (i.e. elements of y.data equal to
    0) then those explicit zeros are not encoded.
    svm_explicit_zero*   r,   r%   )r   r    r,   r   )r   r    r    r   r   r    )r   r   r   r   r   r   )r,   r,   r   TrG   )g       @r   )r   r&   N)
r;   rL   r   r   randnr   rO   r   r   r   )tmp_pathr   	save_pathr   r5   r2   r   r>   r6   _y_loady_truer   r   r    test_multilabel_y_explicit_zeros=  s   r  c                 C   sN   t jd}|dd}|d}t||g\}}t| d }t||| dS )zEnsure that there is no ValueError when dumping a read-only `X`.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28026
    r   r%   r    svm_read_onlyN)rL   r   r   r   r   r;   r   )r   r   r5   r6   r   r   r   r   test_dump_read_onlyW  s   
r  )Dr`   r<   rb   bz2r   	importlibr   ior   tempfiler   numpyrL   rY   scipy.sparsesparser   r   sklearn.datasetsr   r   r   sklearn.utils._testingr	   r
   r   r   sklearn.utils.fixesr   r   r1   rI   rm   rp   r   r   r:   rE   rF   rJ   rV   r\   rk   rn   rq   rt   rz   r   r   skipr   r   r   r   parametrizer   r   r   r   r   r   r   r   r   r   r   r  r  r   r   r   r   <module>   s|    	#


E
!$
$
*
