o
    \i                     @   s   d dl Zd dlZd dlmZ d dlmZ d dlmZ	 dd Z
dd Zejjd	d
dgdd dD gddgddd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% ZdS )&    N)assert_array_equal)FeatureHasher)	transformc                  C   st   t dd} d| jksJ dddddd	d
g}t dd|}dd |D }t ddd|}t| |  d S )N   
n_featuresdictbar*   %   )foodadatzarabazstring1)r   gagac                 s       | ]	}t | V  qd S Niteritems.0d r   /var/www/www-root/data/www/176.119.141.140/sports-predictor/venv/lib/python3.10/site-packages/sklearn/feature_extraction/tests/test_feature_hasher.py	<genexpr>       z,test_feature_hasher_dicts.<locals>.<genexpr>pairr   
input_type)r   r    r   r   toarray)feature_hasherraw_XX1genX2r   r   r   test_feature_hasher_dicts	   s   
r'   c                  C   s   dddd dgd dddgg} dD ]H}d| }dd	 | D }t|d
dd}||}|jd t| ks7J |jd |ks@J |d  dksJJ |d  dksTJ |jdks[J qd S )Nr   r	   r   asciiquux)   	      r         c                 s       | ]}|V  qd S r   r   r   xr   r   r   r          z.test_feature_hasher_strings.<locals>.<genexpr>stringF)r   r    alternate_signr               )encoder   r   shapelensumnnz)r#   lg_n_featuresr   itr"   Xr   r   r   test_feature_hasher_strings   s    
rA   r#   	my_stringanother_stringc                 c   r/   r   r   r0   r   r   r   r   2   r2   r   )rB   rC   list	generator)idsc                 C   sN   d}t ddd}tjt|d ||  W d   dS 1 s w   Y  dS )zhFeatureHasher raises error when a sample is a single string.

    Non-regression test for gh-13199.
    z"Samples can not be a single string
   r3   r   )matchN)r   pytestraises
ValueErrorr   )r#   msgr"   r   r   r   !test_feature_hasher_single_string.   s
   "rM   c                  C   s   dddd dgd dddgg} dd | D }t|dtd	\}}}d
d | D }t|dtd	dd\}}}t|| t|| dd | D }t|dtd	dd\}}}tt t|| W d    d S 1 shw   Y  d S )Nr   r	   r   r(   r)   c                 s       | ]
}d d |D V  qdS )c                 s       | ]}|d fV  qdS r5   Nr   r   fr   r   r   r   I       8test_hashing_transform_seed.<locals>.<genexpr>.<genexpr>Nr   r0   r   r   r   r   I       z.test_hashing_transform_seed.<locals>.<genexpr>   Fc                 s   rN   )c                 s   rO   rP   r   rQ   r   r   r   r   L   rS   rT   Nr   r0   r   r   r   r   L   rU   r   )seedc                 s   rN   )c                 s   rO   rP   r   rQ   r   r   r   r   Q   rS   rT   Nr   r0   r   r   r   r   Q   rU   r5   )r9   _hashing_transformstrr   rI   rJ   AssertionError)r#   raw_X_indicesindptr_	indices_0indptr_0	indices_1r   r   r   test_hashing_transform_seedB   s   

"rb   c                  C   s   dd ddddddd	fD } t d
dd}||  \}}tt||dk }tt||dk }ddg|ks=J g d|ksEJ d S )Nc                 s   r   r   r   r   r   r   r   r   X   
    

z,test_feature_hasher_pairs.<locals>.<genexpr>r5   r.   r   r	   r7   r6   r   r)   r   r   r   r   r   )r5   r7   r6   )r   r   r!   sortednpabsr#   r"   x1x2x1_nzx2_nzr   r   r   test_feature_hasher_pairsW   s   ro   c                  C   s  dd ddddddd	fD } t d
dd}||  \}}tt||dk }tt||dk }ddg|ks=J g d|ksEJ dd ddiddifD } ||  \}}t||dk }t||dk }dg|kstJ dg|ks{J t|| d S )Nc                 s   r   r   r   r   r   r   r   r   e   rc   z?test_feature_hasher_pairs_with_string_values.<locals>.<genexpr>r5   ard   abcr6   re   rf   r   r   r   r   )r5   r5   r6   c                 s   r   r   r   r   r   r   r   r   p   r   bax)r   r   r!   rg   rh   ri   r   rj   r   r   r   ,test_feature_hasher_pairs_with_string_valuesd   s    rs   c                  C   sL   d} g dt tdg}t| dd}||}t| tt|| f d S )Nr   r   r   r3   r   )	r   ranger   r   r   r!   rh   zerosr;   )r   r#   r"   r@   r   r   r   test_hash_empty_inputy   s
   
 rv   c                  C   s&   t  ddig} | jjdksJ d S )Nr   r   )r   )r   r   datar:   )r@   r   r   r   test_hasher_zeros   s   rx   c                  C   sd   t dg} tddd| }|j dk r|j dksJ tddd| }|j dks0J d S )NThequickbrownfoxjumpedTr3   )r4   r    r   F)rD   r   fit_transformrw   minmaxr@   Xtr   r   r   test_hasher_alternate_sign   s
   
 r   c                  C   sn   t dg} tdddd| }t|jd t| d k sJ tdddd| }|jd t| d ks5J d S )Nry   Tr5   r3   )r4   r   r    r   F)rD   r   rz   ri   rw   r;   r}   r   r   r   test_hash_collisions   s   
r   c                  C   s   t  } |  }|jrJ dS )z3Test that FeatureHasher has requires_fit=False tag.N)r   __sklearn_tags__requires_fit)hashertagsr   r   r   $test_feature_hasher_requires_fit_tag   s   r   c                  C   s:   t dd} ddddddg}| |}|jdksJ d	S )
z6Test that FeatureHasher can transform without fitting.rG   r   r5   r.   )dogcat   )r   run)r.   rG   N)r   r   r:   )r   rw   resultr   r   r   )test_feature_hasher_transform_without_fit   s   

r   )numpyrh   rI   numpy.testingr   sklearn.feature_extractionr   (sklearn.feature_extraction._hashing_fastr   rX   r'   rA   markparametrizerM   rb   ro   rs   rv   rx   r   r   r   r   r   r   r   r   <module>   s0    


