o
     \iD%                    @   s  d dl m Z  d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlmZ ejfddZi dejd	ejd
d dejdejdejdejdejdejdejdejdejdejdd dejdejdejd ejejd ejdZejddd Zdd Zdd  Zd!d" Zejd#d$d%gd&d' Zejdd(d) Zd*d+ Z d,d- Z!d.d/ Z"ejd0d1g d2eg d3d1d4fd1d5gg d6eg d3d1d4e	g d7gfd1d8gg d9eg d3d1d4eg d3d1d4gfgejd:d$d%gd;d< Z#d=d> Z$d?d@ Z%dAdB Z&ejd#d$d%gejdCd$d%gejdDd$d%gdEdF Z'dGdH Z(dIdJ Z)dKdL Z*dMdN Z+dOdP Z,ejdQdRgee-dSd dTgdUdVgdWfee-dS.dXdY dVgd dUgdWfee-dZd dTgdUdVgdWfgd[d\ Z/d]d^ Z0d_d` Z1dadb Z2ejdcddddgfddedfgfddedgfddddfgfgdgdh Z3didj Z4dkdl Z5dmdn Z6ejd#d$d%gdodp Z7ejd#d$d%gdqdr Z8dsdt Z9dudv Z:dwdx Z;ejdyd$ee
j<eg dzd{d|g d}gd1d5gd~g d3ddfd%eeg dzd{d|g d}g d3dfgdd Z=ejddejej>gdd Z?ej@dd ZAejdddgdd ZBejdddgejdCd%dgdd ZCejdd$e
<e	dgdS dgdS  d{dde	g dd{dde	ddgdS gg dfd%e
jDeddgd%deg dd%de	ddggg dd~dVdVdSdSejejdUdUejejdTdTgfde
jDeddgd%deg dd%de	ddggg dd~dVdVdSdSejejdUdUejejdTdTgfgdd ZEdd ZFejdg dg dgdd ZGdd ZHdd ZIdd ZJdd ZKdd ZLejdCd%dgdd ZMejddd ZNejdejOg dzfejPg d}fgdd ZQdd ZRejdddgdd ZSdd ZTdd ZUdd ZVdd ZWddÄ ZXejdddgdeYdCeZfddńZ[ejdddgdeYdCeZfddǄZ\ddɄ Z]ejdddgdd˄ Z^dd̈́ Z_ddτ Z`ddф Zaejdg dӢejd#d$d%gddՄ Zbejdddgejd#d$d%gddل Zcejdg dӢejdddgejd#d$d%gddބ Zdejdg dӢejdg dߢejd#d$d%gdd Zeejdg dӢdd Zfejdd1d5d1d5ggejdd1d5d1d5ggdd Zgejd:d$d%gejddgdd8ggdd ZhdS )    )datetimeN)CategoricalCategoricalIndex	DataFrameIndex
MultiIndexSeriesqcut)SeriesGroupBy)get_groupby_method_argsc                 C   sr   dd }t jt|||d}t|tr0i }| jD ]}| | j||| d||< qt||d S | j||d S )zpReindex to a cartesian production for the groupers,
    preserving the nature (Categorical) of each grouper
    c                 S   s4   t | ttfr| j}tjtt||| jd} | S )N
categoriesordered)	
isinstancer   r   r   
from_codesnparangelenr   )ar    r   /var/www/www-root/data/www/176.119.141.140/sports-predictor/venv/lib/python3.10/site-packages/pandas/tests/groupby/test_categorical.pyf   s   z)cartesian_product_for_groupers.<locals>.fnames
fill_valueindex)	r   from_productmapr   dictcolumnsreindexr   
sort_index)resultargsr   r   r   r   rescolr   r   r   cartesian_product_for_groupers   s   

r(   allanycountcorrwithfirstidxmaxidxminlastmaxmeanmedianminnthnuniqueprodquantilesem)sizeskewstdsumvarz7ignore:invalid value encountered in cast:RuntimeWarningc                 C   sB   t | jd}dd }| j|ddj|}|jjd dksJ d S )N   c                 S   s   |   |  |  |  dS )Nr4   r1   r+   r2   r@   )groupr   r   r   	get_statsR   s
   z2test_apply_use_categorical_name.<locals>.get_statsFobservedr   C)r	   rE   groupbyDapplyr   r   )dfcatsrB   r$   r   r   r   test_apply_use_categorical_nameN   s   rK   c                  C   sV  t g dg ddd}tg d|d}ttdddd	}td
dddtjgi|d}|jddd }t	|| t g dg ddd}t g dg ddd}t||g dd}|jddd}	tg dddd	}
tdt
g d|
di}|	jdd}t	|| tddgddgddggddgd}t |j|d< |jdgdd}|d d! }t	||dg  |d}|jd"dg }t	|| d#d$ }d%}tjt|d& ||}W d    n1 sw   Y  |jd"dg  }tddgdd'|_| rd(nt}|d ||d< t	|| td
g d)i}tj|jg d*d+}d,}tjt|d& |jj|ddt}W d    n	1 s>w   Y  t||d
  t|jj|ddd-d! |d
  d.}tjt|d& |j|ddt}W d    n	1 s}w   Y  |d
g }t	|| |j|dd}|d/d! }t	||d
g  |d0d! }d1}tjt|d& |t}W d    n	1 sw   Y  |tjj}|d2d! }tj	||d
g dd3 tj	||d
g dd3 t	||d
g  t	||d
g  t|jj|ddtj |d
  t	|j|ddtj | td
g d4i}tj|jg d5d+}d,}tjt|d& |jj|ddt}W d    n	1 sSw   Y  t||d
  t|jj|ddd6d! |d
  d.}tjt|d& |j|ddt}W d    n	1 sw   Y  |d
g }t	|| t	|j|ddd7d! |d
g  td
g d8i}tj|jg d9t tdd:}|j|ddt!}t|j"j#|j"j$d;}t
g d8|d}d
|j_%t|| g d<}tj&'dj(d"dd=d>}t j)||dd;}ttj&'d*d?}|j|dd }|jt+|dd }t||j#dd}
|,|
}t	|| |j|dd}|- }|j./ }t+|0|}|0|}t |dg d<d@}|j|dddA- }t	|| t j)t1d2dB|dd;}t|}t3|j4ddCj5d"| tg dDd }t3|j4ddCj5d| d S )EN	r   r   r   brM   rM   crN   rN   r   rM   rN   dTr   	   rR   rR      rS   rS      r?      r   rM   abcdrM   namer   r   rR   rS   r?   r   FrC   r   r   rM   rM   r   rM   zrN   rP   rN   rP   rN   rP   yrR   rS   rT   r?   ABvaluesrb   rd   )rT      r   numeric_onlyzJohn P. Doez	Jane Dove	person_idperson_namer!   c                 S   s   | S Nr   xr   r   r   <lambda>~       ztest_basic.<locals>.<lambda>r   c                 S   s   |  djd S )Nri   r   )drop_duplicatesilocrl   r   r   r   r      s   ztest_basic.<locals>.f7DataFrameGroupBy.apply operated on the grouping columnsmatchrY   str)rU         )r   
         (   )binsusing SeriesGroupBy.sumc                 S   
   t | S rk   r   r=   xsr   r   r   rn         
 zusing DataFrameGroupBy.sumc                 S      t j| ddS Nr   axisr   r1   r   r   r   r   rn          c                 S   r   r   r   r   r   r   r   rn      r   zusing DataFrameGroupBy.maxc                 S   s   t j| S rk   )r   maximumreducer   r   r   r   rn          )check_dtype)rU   rw   rx   )ir   ry   rz   r{   r|   c                 S   r   rk   r   r   r   r   r   rn      r   c                 S   r   rk   r   r   r   r   r   rn      r   )rR   r   r   r   r   rR   rS   rT   r?   )labelsr   foobarbazquxd   r:   r   r?   )r   r   sortrD      future_stackr+   r2   r<   r4   z25%z50%z75%r1   )6r   r   r   listr   nanrF   r2   tmassert_frame_equalr   r=   ri   	transformrp   rq   assert_produces_warningFutureWarningrH   copyr   r   objectastypepdcutr   assert_series_equalr1   r   r   filterr)   r   rd   r   r   rY   randomdefault_rngintegersr   standard_normalasarrayr"   describecodesargsorttaker   repeatassert_index_equalstackget_level_values) using_infer_stringrJ   data	exp_indexexpectedr$   cat1cat2rI   gbexp_idxrm   gr   msgdtyperN   gbcresult2result3result4result5levelsr   groupeddesc_resultidx
ord_labelsord_dataexp_catsexpcexpr   r   r   
test_basic^   s   

$




r   c                 C   s   t tdddttddgtdgdgd dgd  tdgd	d
gdd}|jd	g| d}t tdddttddgtdgdgd tdgd	d
gdd}d}tjt	|d |
d}W d    n1 shw   Y  t|| d S )NrS      r   rM   ry   r   rU   rR   Index1Index2)r   r   r   r   r   levelrD      z&you will need to pass a length-1 tuplers   )r   r   r   r   r   rangerF   r   r   r   	get_groupr   )rD   rI   r   r   r   r$   r   r   r   test_level_get_group  s,   r   c                  C   s   t dgd dgd  g dd tdd} t| jg dd	d
| _| dd  }|jdd	d}g d}t|g dd	d
}g dt|g}t	j
|ddgd}tdgd |dd}t|| d S )Nrb      rc   )highmedlowr?   g      (@)rA   doseoutcomes)r   r   r   Tr   rA   r   r   )r   sort_remaining)r   r   r   r   r   r   )rb   rb   rb   rc   rc   rc   r   rS   r+   r   rY   )r   r   r   r   r   rF   value_countsr#   r   r   from_arraysr   r   r   rI   r$   r   r   r   r   r   (test_sorting_with_different_categoricals#  s   
r   r   TFc           
      C   sj  t td| d}t tdddg| d}tt|}t|||d}|jdd	gd
d}tj||gdd	gd}tg d|dgd}|	dd }t
|| | }t
|| d}	t
jt|	d |tj}W d    n1 sqw   Y  t
|| tj||gdd	gd}td|d}d}	t
jt|	d |	dd }W d    n1 sw   Y  t
|| d S )Nabcr   aaar   rM   r   )missingdenserd   r   r   TrC   r   )r   rR          @rd   )r   r!   c                 S   r   r   )r   r2   rl   r   r   r   rn   I  r   ztest_apply.<locals>.<lambda>zusing DataFrameGroupBy.meanrs   rR   r   rr   c                 S   s   dS NrR   r   rl   r   r   r   rn   Z  ro   )r   r   r   r   r   r   rF   r   r   rH   r   r   r2   r   r   aggr   r   )
r   r   r   rd   rI   r   r   r   r$   r   r   r   r   
test_apply9  s.   r   c                 C   sp  t g dg ddd}t g dg ddd}t||g dd}d	d
gd |d< |jg d| d}tj||d	d
gd gg dd}tdtg d|di }| }| sdt|||d	d
ggt	ddd}t
|| |jddg| d}tj||gddgd}tg dg dd|d}| }| st|||gt	d|rdddndd}t
|| |d  }|d }t
|| t g dg dddg dg dd}	t|	}|jd | d}
|
 }tt	d!d t	d"dd#}td$d$gd%d&gd'|d}| s	tt	d"d t	d"dd#}||}t
|| |jd d(g| d}|d)}tg d*t g dg dddg d+d,d d(g}| sHt||jjd-dggd d(g}t
|| d.D ]}|\}}||}||j|k|j|k@  }t
|| qPg d/g d0g d1d2}	t|	}t|d	 tdd3d4}||d5< |jd5d6gd7| d8}|d)}|jd5d6gd| d8}|d) }t
|| d S )9NrZ   r[   Tr   r]   r^   r`   ra   r   r   rS   rE   )rb   rc   rE   rC   r   rd   r   ABCr   r   rb   rc   )r   r   r   r   )rd   rE   AB )r   rM   r   rM   r   rM   rN   rR   rR   rS   rS   ry   rz   r{   r|   )catintsvalr   abr   )rY   r   r   g      ?      4@r{   )r   r   r   r2   )      $@g      >@r   g      D@rR   rS   rR   rS   )r   r   r   rR   ))r   rR   )rM   rS   )rM   rR   )r   rS   )ry   r   r?   r   r?   rR   rR   ry   rz   r{   r|   2   <   F   )rP   rN   er   r   rP   rN   r   r   r   ry   rT   r   r   Fas_indexrD   )r   r   rF   r   r   r   r#   r=   r(   r   r   r   r   r2   r   r"   r   	set_indexr   rd   r   r   r   r   r   linspacereset_index)rD   r   r   r   rI   r   r   r   r$   rP   groups_single_keyr   groups_double_keykeyrN   ir   groupsgroups2r   r   r   test_observed^  s   	


	

r  c                 C   s   g dg dg dd}t |}t|d g d}d|_|j|dg| d	}tj|g dgddgd
}t g dg dd|d}| sOt||jg dgddg}|	d}t
|| d S )N)rT   rT   r?   rU   r`   )ry   r      "   )C1C2C3r  )rR   rS   rT   r   r   r  rC   r   )      @r  g      @g      @)r   g      Y@g      i@g      A@)r  r  r   r2   )r   r   r   rY   rF   r   r   r(   rd   r   r   r   )rD   rP   rI   rd   r  r   r   r$   r   r   r   test_observed_codes_remap  s   
r  c                  C   s   t tjdjddddtjdjddddtjdjdddddd} | jtd| d	< | jg d
dd}|	 }|j
jd  | j ksNJ |j
jd  | j ks]J |j
jd  | j kslJ d S )NrS   r      i0u  r   '  )r   int_idother_idr   categoryr   )r   r  r  TrC   rR   )r   r   r   r   r   r   r   rv   rF   r+   r   r   r6   r  r  )rI   r   r$   r   r   r   test_observed_perf  s   "r  c                 C   s   t g dg dd}t|g dd}|jd| d}|j}| r0tdd	gd
dtdgd
dd}ntdd	gd
dtg d
dtdgd
dd}t|| d S )N)r   rN   r   r   r   rR   rS   rT   r   valsr   rC   r   rS   int64r   rR   )r   rN   )r   r   rF   r  r   r   assert_dict_equal)rD   r   rI   r   r$   r   r   r   r   test_observed_groups  s   "
r&  z,keys, expected_values, expected_index_levelsr   )rw   	   r   r   ru   rM   )	re   r   r   r   r   r'  r   r   r   r?   rU   r   a2)	rw   r   r   r   r'  r   r   r   r   test_seriesc           
      C   s   t tg dg ddtg dg ddg dg ddddg}d	| vr,|jd	d
}|j| dd}|r9|d }| }t| dkrF|}ng ddg d g}t||| d}t d|i|d}	|re|	d }	t	||	 d S )NrR   rR   rS   r   r  r(  )re   r   r'  )r   r)  rM   rN   r   r)  rM   rj   FrC   rN   rR   	r   r   r   rR   rR   rR   rS   rS   rS   rT   r   rR   rS   )r   r   r   )
r   r   r  droprF   r=   r   r   r   assert_equal)
keysexpected_valuesexpected_index_levelsr*  rI   r   r$   r   r   r   r   r   r   test_unobserved_in_index  s6   
r3  c                 C   s   t tdtjdgg ddg dd}|jd| d}|j}| r*dtdd	gd
di}ntdd	gd
dtg d
dtg d
dd}t|| d S )Nr   )r   rM   rP   r  r   r!  r   rC   r   rS   r#  r$  )	r   r   r   r   rF   r  r   r   r%  )rD   rI   r   r$   r   r   r   r   test_observed_groups_with_nanH  s   

r4  c                  C   sl   t dtjtjgg dd} tg d}t| |d}|jdddd	 d
}|d	 jd
g }t	|| d S )Nr   r   r  r   )r   serr   FrC   r5  r   )
r   r   r   r   r   rF   r5   rq   r   r   )r   r5  rI   r$   r   r   r   r   test_observed_nth]  s   r6  c                 C   s   t tjdtjdgg dd}tg d}t||d}|jd| d  }| r8tt dgg dddgd}ntt g dg dddtjtjgd}t	|| d S )	Nr   r   r  r`   )s1s2r7  rC   rS   )
r   r   r   r   r   rF   r-   r
  r   r   )rD   r7  r8  rI   r$   r   r   r   r   #test_dataframe_categorical_with_nanh  s   r9  rD   r   c           	      C   s   t g dg d| d}tg d}t||d}|jd||dd d}t|jjd	d
}t|j}|s:d|| < t||ksRd|  d| d| d| }J |d S )N)rP   r   rM   r   rP   rM   )r   rM   r   rP   r   )labelr   r:  )rD   r   r   r-   r   r$  r   zDLabels and aggregation results not consistently sorted
for (ordered=z, observed=z, sort=z
)
Result:
)	r   r   r   rF   	aggregater   arrayisnar)   )	r   rD   r   r:  r   rI   r$   aggrr   r   r   r   0test_dataframe_categorical_ordered_observed_sort|  s2   	
r?  c                  C   s  t jddd} tjdjdddd}tj|| dd	}ttjd	d
}|j
|dd }|j
t|dd }|| }t|j|jdd|_t|| |j
|dd}| }|j }||}	||}
|
j
|	dd }t|| t|j|j t|jd|jd tjtdd| dd	}t|}t|jddjd| tg dd }t|jddjd| d S )Nz
2014-01-01r?   )periodsrS   r   r   r   Tr   r   FrC   r   r   r   r   rR   )r   
date_ranger   r   r   r   r   r   r   r   rF   r2   r   r"   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   rJ   r   r$   r   r   r   r   r   r   r   r   r   r   r   test_datetime  s>   




rB  c                  C   s  t jd} g d}| jdddd}tj||dd}tt t dd	d	dt
d
d}||d< |djddd }|t
d
 j|jdd }ttjg d|dddd|_t|| |jddd }|t
d
 j|jdd }ttjg d|dddd|_t|| d S )NrS   r   r   r?   rz   r   Tr   rW   rj   rJ   Fr   rC   r   rR   rS   rT   ru   )r   r   r   r   r   r   r   r   r   reshaper   r  rF   r=   r   r   r   r   r   )sr   r   rJ   rI   r$   r   r   r   r   test_categorical_index  s$   &rG  c                  C   sz   t g dg ddd} ttjdd| d}|g dd	  }t	|j
dd
j|  t|j
dd
jj| j d S )N)r   r   r   r   r   Tr   rS   )rz   r?   rj   r`   rU   r   )r   r   r   r   r   r   rF   r   r   r   r   r!   assert_categorical_equalrd   )rJ   rI   r$   r   r   r   !test_describe_categorical_columns  s   rI  c                  C   s   t tdddgd tdd d} | d d	| d< | jd
dgddd   }| }tddgddd}t	
|j| t	|jj|j |d |d  }tddgtddgd
dd}t	|| d S )Nry   rb   rc   rU   XYXXYrS   )r   mediumartistrK  r  rL  FrC   r   r   rY   r   r?   XYru   r   )r   r   r   r   rF   r+   unstackr   r   r   r   r!   rH  rd   r   r   r   )rI   gcatr$   exp_columnsr   r   r   r   test_unstack_categorical  s   rS  c                  C   st   t tjtjddddddddg
} t|  jd}tjt	dd | 
|  W d    d S 1 s3w   Y  d S )NrR   rS   rT   r?   z$Grouper and axis must be same lengthrs   )r   r   r   r   r   dropnard   pytestraises
ValueErrorrF   r2   )seriesr}   r   r   r   test_bins_unequal_len  s
    "rY  rX  r   r?   rT   rR   rS   rb   rc   c                 C   s   | d S r   r   )r   r   r   r   rn     s    rn   re   c                 C   sH   | j ttddddd}|t}t|t| d}t|| d S )NABBAr  r$  FrC   r   )rF   r   r   r;  r   r0  r   r   )rX  r   rF   r$   r   r   r   r   test_categorical_series  s   
r\  c                     s  t tg dg dg dg dd  jddgdd	d
 } t tddg jjjdddgddgdg dd}t| |  fdd}d}tjt	|d  jd|gdd	d
 } W d    n1 sdw   Y  t tddg jjjdddgddgdg dd}t| | t
g ddd}d}tjt	|d  jd|gdd	d
 } W d    n1 sw   Y  t| | ddg}t tddg jjjdddgddgdg dd}dD ]}ttd|d _ j|dd	d
 } t| | qd S )NrR   rS   rS   r   )ry      r^  )e   f   g   )r   rb   rc   r   rb   FTr  rR   rS   r  ry   r^  r_     rj   c                    s    j | df S )Nrb   )loc)rrI   r   r   rn   ;  r   ztest_as_index.<locals>.<lambda>z*A grouping .* was excluded from the resultrs   r   )r   rM   rM   ru   )NrN  rc   r   )r   r   rF   r=   r   r   r   r   r   r   r   r   r   r   )r$   r   r   r   rF  group_columnsrY   r   re  r   test_as_index&  s^   	rg  c                  C   s  t d} tdtt d| ddi}t| | ddd}tt d| ddd}t|jdddd	 j| t|jdddd	 j| tdtt d| ddi}t| | ddd}tt dt dddd}t|jdddd	 j| t|jdddd	 j| d S )
Nr   rb   baTr   rM  bacFr   )	r   r   r   r   r   r   rF   r-   r   )r   rI   r#   nosort_indexr   r   r   test_preserve_categoriesb  s&   rk  c               	   C   s   t g dg dttdtdddttdtdddd} t d	d
tjgddtjgttdtdddttdtdddd}dD ]/}| j|dddjdd}| j|dddjdd }|j|j	d}t
|| t
|| qDd S )N)rR   rS   rR   rR   rS   )ry      r      r  abaabri  Fr   T)rb   rc   r  r  r   g      ?g      9@r   )r  r  byr  rD   rf   rj   )r   r   r   r   r   rF   r2   r
  r"   r!   r   r   )rI   exp_fullr'   result1r   r   r   r   r   test_preserve_categorical_dtype  s6   	

rs  zfunc, valuessecondfourththirdc                 C   s   t g ddd}tg d|d}|d}t||  }tddgt||jd	dd}t|| |dd
 }t||  }|d
 }t	|| d S )N)r-   rt  rv  ru  Tr   )rC  rC  rw  )payloadr'   rx  rw  rC  r$  r'   )
r   r   rF   getattrr   r   r  r   r   r   )funcrd   rN   rI   r   r$   r   sgbr   r   r   test_preserve_on_ordered_ops  s   
r|  c                  C   sV  t tjdd} tg d}tj|g ddd}| j|dd	 }| j|dd	 }t
|j|j|jd	|_t|| tg d
}tj|g ddd}| j|dd	 }| j|dd	 |j}t
|j|j|jd	|_t|| tg dg ddd	}tg d|d} | jddd	 }|d j}tdddtjg}t|| d S )NrS   r'  r,  r-  Tr   FrC   r   )	r   r   r   rR   rR   rR   rT   rT   rT   rD  rL   rO   rQ   rV   rM   r   rR   r?   )r   r   r   r   r   r<  r   r   rF   r2   r   r   r   r   r   r   r"   r   rd   r   assert_numpy_array_equal)r   r   rJ   r$   r   r   r   r   test_categorical_no_compress  s6   
r~  c                  C   sb   t d gd tg dd} | d d }ttg ddgdtg d	dd
dd}t|| d S )NrT   )trainr  testrZ  rb   rc   r  r  r  r   r   rY   r   )r   r   rF   r-   r   r   r   rI   r$   r   r   r   r    test_groupby_empty_with_category  s   r  c                  C   s   t dtjddddi} dd tdddD }t||}| jdgd	d
} tj	| j
tdddd|d| d< | jdgddd  }|t|jdd d }t|j|jjd|_t|| d S )NvaluerS   r   r  r   c                 S   s   g | ]}| d |d  qS )z - i  r   ).0r  r   r   r   
<listcomp>  s    ztest_sort.<locals>.<listcomp>i  T)rp  	ascendingi)  F)rightr   value_grouprC   c                 S   s   t |  d S )Nr   )floatsplitrl   r   r   r   rn     s    ztest_sort.<locals>.<lambda>)r  ru   )r   r   r   r   r   r   r   sort_valuesr   r   r  rF   r+   sortedr   r   rY   r   r   )rI   r   
cat_labelsr&   r   r   r   r   	test_sort  s   

r  c              	   C   s   t g dg dg dg dg dg dg dgg dd	}t|d
 |d|d
< |jd
| dd }| rFddgddgddgddgg}g d}nddgddgddgddgg}g d}t |ddgt|d
|dd}t|| d S )N)	(7.5, 10]ry   ry   )r  r   rz   )(2.5, 5]rU   r{   )(5, 7.5]r   r|   )r  r?   r  )(0, 2.5]rR   r  )r  re   r  )r   r   r   rj   r   r   Fr   rR   r  rU   r{   r   r|   ry   )r  r  r  r  )r  r  r  r  r   r   rX   r!   r   )r   r   rF   r-   r   r   r   )r   r   rI   r$   data_valuesindex_valuesr   r   r   r   
test_sort2  s0   	
r  c                 C   sP  t tdddtdddtdddtdddtdddtdddtdddgg dg ddg dd	}t|d
 |d|d
< | rdddgddgddgddgg}tdddtdddtdddtdddg}n$ddgddgddgddgg}tdddtdddtdddtdddg}t |ddgt|d
|dd}|jd
| dd }t|| d S )Ni  re   rR   rS   rU   )ry   r   rU   r   r?   rR   re   r   )dtr   r   rj   r  r   r  r{   r   r|   ry   r   r   rX   r  Fr   )r   r   r   r   rF   r-   r   r   )r   r   rI   r  r  r   r$   r   r   r   test_sort_datetimelike)  sF   






	







r  c                  C   s  t tg dg ddg dd} tg ddd}| jddd	j }tg d
|dd}t|| | jddd	jjdd}tg d
|dd}t|| | jddd	jjdd}tddt	j
g|dd}t|| | jddd	jjdd}tdt	j
t	j
g|dd}t|| d S )Nr   r   rM   r   r  rR   rS   rR   rZ  rb   ru   FrC   )rT   rR   r   rc   r   	min_countrR   rT   rS   )r   r   r   rF   rc   r=   r   r   r   r   r   rI   expected_idxr$   r   r   r   r   test_empty_sum\  s    r  c                  C   s   t tg dg ddg dd} tg ddd}| jddd	j }tg d
|dd}t|| | jddd	jjdd}tg d
|dd}t|| | jddd	jjdd}tddt	j
g|dd}t|| d S )Nr  r   r  r  rZ  rb   ru   FrC   )rS   rR   rR   rc   r   r  rR   rS   )r   r   r   rF   rc   r7   r   r   r   r   r   r  r   r   r   test_empty_prodx  s   r  c                  C   s   t ttdtttjddddd tdd} | jdd	gd
d }t	j
tg dttjddddgdd	gd}t ddddddddtjdg	i|d}t|| d S )N	abcbabcbaz2018-06-01 001minrT   )freqr@  r'  )key1key2rd   r  r  FrC   r   r   rd   r   r?   r   rU   r   rS   r   )r   r   r   r   rA  r   r   rF   r2   r   r   r   r   r   )rI   r$   r   r   r   r   r   ,test_groupby_multiindex_categorical_datetime  s"   
	
$r  zas_index, expectedr+  r  r$  r]  r   rm   )r   r   rY   r   rM   rm   c                 C   sP   t tg dddg dg dd}|jddg| d	d
d  }t|| d S )Nr+  r  r$  r]  r   r  r   rM   Tr  rm   )r   r   rF   r=   r   r/  )r  r   rI   r$   r   r   r   ,test_groupby_agg_observed_true_single_column  s
   r  r   c                 C   sJ   t g dg ddd}t g dg ddd}|jd| d}t|| d S )NrO   Fr   )Nr   rM   rN   rR   r   )r   shiftr   r/  )r   ctr   r&   r   r   r   
test_shift  s   r  c                 C   sX   |   dd }|d d|d< |d d|d< tg d|d< |jdgd	d
}|S )a  
    DataFrame with multiple categorical columns and a column of integers.
    Shortened so as not to contain all possible combinations of categories.
    Useful for testing `observed` kwarg functionality on GroupBy objects.

    Parameters
    ----------
    df: DataFrame
        Non-categorical, longer DataFrame from another fixture, used to derive
        this one

    Returns
    -------
    df_cat: DataFrame
    Nr?   rb   r  rc   r`   rE   rG   rR   r   )r   r   r   r.  )rI   df_catr   r   r   r    s   r  	operationr   rH   c           	      C   s   t g d| d jdd}t g d| d jdd}t||g}tg d|dd }| jddgd	d
d }|dkr;dnd}tjt	|d t
||t}W d    n1 sVw   Y  t|| d S )N)r   r   r   r   rb   r  )onethreer  tworc   )rS   r?   rR   rT   rE   r   r   rY   TrC   rH   using np.sumr~   rs   )r   r   r   r   r   r#   rF   r   r   r   ry  r=   r   )	r  r  lev_alev_br   r   r   r   r$   r   r   r    test_seriesgroupby_observed_true  s   r  c           	      C   s  t jtddgddtg dddgddgd \}}td	d
tjdtjdg|dd}|dkrNd}tjt	|d |j
ddd}W d    n1 sIw   Y  | jddg|dd }|dkr_dnd}tjt	|d t||t}W d    n1 szw   Y  t|| d S )Nr   r   Fr   r  r  r  rb   rc   r   rS   r?   rR   rT   rE   r  r   z.The 'downcast' keyword in fillna is deprecatedrs   r   infer)downcastrC   r~   r  )r   r   r   	sortlevelr   r   r   r   r   r   fillnarF   ry  r=   r   )	r  rD   r  r   _r   r   r   r$   r   r   r   )test_seriesgroupby_observed_false_or_none  s(   r  zobserved, index, datar   r   rb   r  )r  r  r  r  r  r  r  r  rc   )rS   rS   r?   r?   rR   rR   rT   rT   r   r  )rb   rc   Nc                 C   s>   t ||dd}| jddg|dd dd }t|| d S )NrE   r  rb   rc   rC   c                 S   s   |   |  dS )Nr4   r1   r  rl   r   r   r   rn   M  s    z8test_seriesgroupby_observed_apply_dict.<locals>.<lambda>)r   rF   rH   r   r   )r  rD   r   r   r   r$   r   r   r   &test_seriesgroupby_observed_apply_dict  s
   .r  c                 C   sD   | j ddgddd  }| j ddgdd d }t|| d S )Nrb   rc   FrC   rE   )rF   r2   r   r   )r  r   r$   r   r   r   4test_groupby_categorical_series_dataframe_consistentR  s   r  code)rR   r   r   )r   r   r   c                 C   s   t g dg dg dd}tj| tdd}d}tjt|d |j|d	d
d}W d    n1 s3w   Y  | }d}tjt|d |j	j|dd
d}W d    n1 sYw   Y  | j	}t
|| d S )Nr`   )rC  rw  )rU   r   re   r   r   r   r  z+DataFrame.groupby with axis=1 is deprecatedrs   rR   F)r   rD   z5The 'axis' keyword in DataFrame.groupby is deprecatedr   )r   r   r   r   r   r   r   rF   r2   Tr   )r  rI   r   r   r   r$   gb2r   r   r   r   test_groupby_categorical_axis_1Y  s   
r  c                 C   s\   t tddg|dddgdddgd	}| }|jd| d
jt jdd }t|| d S )NBobGregr   rR   rS   )NameItemr  r  rj   rC   T)skipna)	r   r   r   rF   r   r=   r
  r   r   )rD   r   rI   r   r$   r   r   r   $test_groupby_cat_preserves_structurei  s   r  c                  C   s^   t g dtdd} tjtdd | ddd  W d    d S 1 s(w   Y  d S )	NrZ   r?   r>   r   z'vau'rs   r>   c                 S   s&   t | jd d g| jd d gdS )NrC  r>   vaur  )r   rq   )rowsr   r   r   rn     s     z/test_get_nonexistent_category.<locals>.<lambda>)r   r   rU  rV  KeyErrorrF   rH   re  r   r   r   test_get_nonexistent_categoryz  s   
"r  c                 C   s   | dkr	t d tttdtddttdd tdddgd	 d
}t| |}|r/d	nd}|jddg|dd }| dkrIt|| rGJ d S t|| }|sr| dv rrt j	t
dd ||  W d    d S 1 skw   Y  d S || }t||ks~J d S )Nngroupngroup is not truly a reductionAABBABCDr  r   rS   皙?r?   cat_1cat_2r  rl  r  r  rC   r  r,   r/   r.   (empty group due to unobserved categoriesrs   )rU  skipr   r   r   r   rF   hasattrry  rV  rW  r   )reduction_funcrD   rI   r%   expected_lengthseries_groupbyr   r$   r   r   r   0test_series_groupby_on_2_categoricals_unobserved  s4   




r  c                 C   sp  | dkr	t d | dkrt jjdd}|| tttdtddttd	d
 tdddgd d}tdtdtdtdtdg}t	| |}|j
ddgddd }t|| }| dv r~t jtdd ||  W d    d S 1 sww   Y  d S || }t|  }	|D ]}
|j|
 }t|	rt|s||	ksJ q|	dkr| dkrt|jtjsJ d S d S d S )Nr  r  r,   z6TODO: implemented SeriesGroupBy.corrwith. See GH 32293reasonr  r   r  r   rS   r  r?   r  ACBCCACBCCr  r  FrC   r  r  r  rs   r   r=   )rU  r  markxfailapplymarkerr   r   r   tupler   rF   ry  rV  rW  -_results_for_groupbys_with_missing_categoriesrc  r   r=  r   
issubdtyper   integer)r  requestr  rI   
unobservedr%   r  r   r$   zero_or_nanr   r   r   r   r   ?test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans  sH   

"




 r  c                 C   s   | dkr	t d tttdtddttdtddg dd	}g d
}|jddgdd}t| |}t|| | }|D ]	}||jvsGJ q>d S )Nr  2ngroup does not return the Categories on the indexr  r   r  111112r  r  r  r  r  )rb   2)rc   r  )rE   1)rE   r  r  r  TrC   )	rU  r  r   r   r   rF   r   ry  r   )r  rI   unobserved_catsdf_grpr%   r&   r   r   r   r   >test_dataframe_groupby_on_2_categoricals_when_observed_is_true  s   

r  c                 C   s  | dkr	t d tttdtddttdtddg dd	}g d
}|jddg|d}t| |}|s\| dv r\t jtdd t	|| |  W d    d S 1 sUw   Y  d S t	|| | }t
|  }|tju r{|j|    syJ d S |j| |k  sJ d S )Nr  r  r  r   r  r  r  r  r  r  r  r  rC   r  r  rs   )rU  r  r   r   r   rF   r   rV  rW  ry  r  r   r   rc  isnullr)   )r  rD   rI   r  r  r%   r&   r   r   r   r   ?test_dataframe_groupby_on_2_categoricals_when_observed_is_false  s2   	



r  c                  C   s   g dg dg dd} t | }t|d tddd}||d	< |jd	d
gdddd}|d d}|dd }t|| d S )N)ry   r   r?   rR   r   )rP   rN   rP   rN   r  r   r   rz   rU   r   r   TFr  r   rD   r2   )	r   r   r   r   r	  rF   r   r   r   )rP   rI   r   r  r$   r   r   r   r   3test_series_groupby_categorical_aggregation_getitem$  s   r  zfunc, expected_valuesc                 C   sb   t g dg dtg ddd}|d| }t d|itg ddd	d
}t|| d S )Nr   )r   rR   rR   rS   rS   )r   r   r   r   rR   )idr  r  r  r  r  r-  ru   r   )r   r   r  rF   r   r   r   r   )rz  r1  rI   r$   r   r   r   r   $test_groupby_agg_categorical_columns1  s   
r  c                  C   s~   t dtg dg ddi} t dddgitddgd}| g dtj}t	|| | g d }t	|| d S )	Nrb   r  r   r  rS   rR   r   r  )
r   r   r   r<  rF   r   r   r6   r   r   rI   r   r$   r   r   r   test_groupby_agg_non_numericF  s   r  rz  c                 C   sl   t dgtdgddj d}|dd }t||  }tdgtdgddd|d jd	}t	|| d S )
Ni  rM   r  r$  rZ  rb   rc   ru   r   rY   r   )
r   r   r   
as_orderedrF   ry  r   r   r   r   )rz  rI   
df_groupedr$   r   r   r   r   <test_groupby_first_returned_categorical_instead_of_dataframeQ  s    r	  c                  C   sx   t ddg} d| j_tg dtg dt| dd}tdd	d
git| ddd}|jdddd	 }t
|| d S )NrR   rS   F)rR   rT   rU   re   r   r  rV   r   r   g      @rM   ru   r   r   )r   r<  flags	writeabler   r   r   r   rF   r2   r   r   )rJ   rI   r   r$   r   r   r   test_read_only_category_no_sort`  s   r  c                  C   s   t g dg dd} | d djjg ddd| d< t d	d	d
d	dd
ddd
ddd
d}|jddd}tg dg ddddd|_| jddgdd 	 }t
|| d S )N)smalllarger  r  rK  r  r  rK  )rE   rb   rb   rE   rb   rE   rb   rE   )r   r   r   r  )tinyr  rK  r  Tr   r   )rb   rE   rR   rT   rS   r   r   r   )r   r   rY   r   FrC   )r   r   r   set_categoriesrename_axisr   r!   rF   r:   rP  r   r   r  r   r   r   #test_sorted_missing_category_valuesl  s6   
r  c                  C   s   t dg di} | d d| d< | dj }tg dtg dddd| d jd}t	|| | d
ddi}| }t|| d S )	Ncol_numrR   rR   rS   rT   r  col_catr   ru   r  r-   )r   r   rF   r  r-   r   r   r   r   r   r   to_framer   r  r   r   r   1test_agg_cython_category_not_implemented_fallback  s   r  c                  C   s   t g dg dddtjdgg dg dd} | dd	i} | d
dgdd }tjddgddggdd}t ddgddgddgd|d}t	|| d S )N)rR   rR   rR   rR   r   r  g?g333333?)r   r   r   fee)rb   rc   numerical_col
object_colcategorical_colr  r  rb   rc   c                 S   s   |    S rk   )r=  r=   re  r   r   r   rn     r   z7test_aggregate_categorical_with_isnan.<locals>.<lambda>rR   rS   rZ  r   r   )r  r  r  r   )
r   r   r   r   rF   r   r   r   r   r   r   r   r   r   %test_aggregate_categorical_with_isnan  s&   
r  c                  C   s   t g dg dd} tjg ddd}| d || d< d}tjt|d	 | d
d t	| d< W d    n1 s=w   Y  | 
 }t g dg dg dd}|d ||d< |d ||d< t|| d S )N)rR   rR   rR   rS   rS   rT   )WaitingOnTheWay	Deliveredr  r  r  )
package_idstatus)r  r  r  Tr   r!  zusing SeriesGroupBy.maxrs   r   last_status)r  r  r  r  r  r  )r   r!  r"  )r   r   CategoricalDtyper   r   r   r   rF   r   r1   r   r   )rI   delivery_status_typer   r$   r   r   r   r   test_categorical_transform  s.   r%  c                 C   s   t g d}g d}t|||d}t ddg}tj||gddgd}tdtjtjdg|d	d
tdtjtjdg|d	d
d}||  }|rL| tj	}|j
ddg|dd	 }	t|	|  }
t|
| d S N)r   r   rR   rR   )r   rR   rR   r   r   r   rR   r   rM   r   rN   ru   )r-   r0   rC   )r   r   r   r   r   r   r   rT  r   r#  rF   ry  r   r   )rz  rD   r   r   rI   r   r   expected_dictr   srs_grpr$   r   r   r   Ftest_series_groupby_first_on_categorical_col_grouped_on_2_categoricals  s   r)  c                 C   s   t g d}g d}t|||d}t ddg}tj||gddgd}tdtjtjdg|d	d
tdtjtjdg|d	d
d}||   }|rN| 	tj
}|jddg|d}	t|	|  }
t|
| d S r&  )r   r   r   r   r   r   r   r  rT  r   r#  rF   ry  r   r   )rz  rD   r   r   rI   r   r   r'  r   r  r$   r   r   r   Btest_df_groupby_first_on_categorical_col_grouped_on_2_categoricals  s   r*  c                  C   s   t tg dg ddtdd} | jdddd}|j}tjd	d
gddtjdgddtjg ddd}| | ks>J | D ]}t	|| ||  qBd S )N)rM   rM   r   r   r  rT   )r  r'   r  Fr   r   rR   intpr$  rS   )rM   r   rN   )
r   r   r   rF   indicesr   r<  r0  r   r}  )rI   r   r$   r   r  r   r   r   2test_groupby_categorical_indices_unused_categories4  s   r-  c                 C   sj   t dg di}|d d|d< t|dd |  }ttg ddtg dddd}t|| d S )Nr   r   r  rM   ru   )rY   r   )	r   r   ry  rF   r   r   r   r   r   )rz  rI   r$   r   r   r   r   1test_groupby_last_first_preserve_categoricaldtypeH  s   r.  c               	   C   s   t ddgddgddgd} | jdddd} | jd	d
gdd d }tddgttddgd	dtddgd
dgdd}t	|| d S )NrR   rS   ry   r^  r   r  rV   r$  r   rM   TrC   rN   ru   r   )
r   r   rF   r6   r   r   r   r   r   r   r  r   r   r   )test_groupby_categorical_observed_nuniqueT  s   r/  c                  C   s   t jddgdd} tddgddgddggddgd	d| i}|dd  }tddgtddgdd
dt jddgddd}t	|| d S )Nr  bigTr   rR   rS   grpdescriptionrj   ru   r  )
r   r#  r   r   rF   r1   r   r   r   r   )r   rI   r$   r   r   r   r   ,test_groupby_categorical_aggregate_functionsc  s   
r3  c                 C   s   t ddgg dd}tt ddgg ddddgd}|jd| |d	}| }| r4td
ddgi|d}ntg dg d}td
g di|d}d|j_t|| d S )NrR   rS   r   r  rT   r?   )rm   r_   rm   )rD   rT  r_   r   )rT   r?   r   )	r   r   rF   r=   r   r   rY   r   r   )rD   rT  r   rI   r   r$   r   r   r   r   r   test_groupby_categorical_dropnau  s   "r4  
index_kind)r   singlemultic                 C   s  |dkr|sd}|  tjj|d n|dkr|stjdd ttg dg d|dtd	d
}|dkr9dg}	n |dkrFdg}	||	}n|dkrYddg}	|d |d< ||	}t	||}
|j
|	|||d}|s|dv rtjtdd t|||
  W d    d S 1 sw   Y  d S t|||
 }|r|jdj}n|d jj}tg d}t|| |dkr|jdj}t|| d S d S )Nr,   zDGH#49950 - corrwith with as_index=False may not have grouping columnr  r   /Result doesn't have categories, nothing to testrS   rR   rS   rT   rR   r?   rT   rS   r   r?   rV   r   r6  r7  r)  r   r  r  rs   )r  rU  r  r  r  r   r   r   r  r   rF   rV  rW  ry  r   r   r   r   r   r   r   )r  r  r   rD   r  r5  r   r   rI   r0  r%   r   	op_resultr$   r   r   r   r   test_category_order_reducer  sP   


r<  r6  r7  c                 C   s   t tg dg d|dtdd}|dkrdg}||}n|dkr2dd	g}|d |d	< ||}t||}|j|| ||d
}	|dkrFtnd }
d}tj|
|d t	|	|| }W d    n1 scw   Y  |j
dj}tg d}t|| |dkr|j
d	j}t|| d S d S )Nr9  r:  r   r?   rV   r6  r   r7  r)  r   r  z%DataFrameGroupBy.fillna is deprecatedrs   )r   r   r   r  r   rF   r   r   r   ry  r   r   r   r   r   )r  r   rD   transformation_funcr5  r   rI   r0  r%   r   warnr   r;  r$   r   r   r   r   test_category_order_transformer  s4   

r?  methodheadtailc                 C   s   t tg dg d|dtdd}|dkrdg}n |dkr'dg}||}n|d	kr:dd
g}|d |d
< ||}|j|| ||d}t|| }	|dkrT|	d jj}
n|	j	dj}
t
g d}t|
| |d	krz|	j	d
j}
t|
| d S d S )Nr9  r:  r   r?   rV   r   r   r6  r7  r)  r   )r   r   r   r  rF   ry  r   r   r   r   r   r   r   )r  r   rD   r@  r5  r   rI   r0  r   r;  r$   r   r   r   r   test_category_order_head_tail  s2   
rC  )rH   r   r   c                 C   sz  |dkr|dks| s|dkrt d ttg dg d|dtdd}|dkr-d	g}n |d
kr:d	g}||}n|dkrMd	dg}|d	 |d< ||}|j|| ||d}|dkr`|dkr`tnd }	d}
tj	|	|
d t
||dd }W d    n1 sw   Y  |dks| s|dkr|d	 jj}n|jd	j}tg d}t|| |dkr|jdj}t|| d S d S )Nr   r   z(No categories in result, nothing to testr9  r:  r   r?   rV   r   r6  r7  r)  r   rH   rr   rs   c                 S   s   | j ddS )NTrf   )r=   rl   r   r   r   rn     r   z+test_category_order_apply.<locals>.<lambda>)rU  r  r   r   r   r  rF   r   r   r   ry  r   r   r   r   r   r   )r  r   rD   r@  r5  r   rI   r0  r   r>  r   r;  r$   r   r   r   r   test_category_order_apply  sD   

rD  c                 C   sV  |dkr| st jdd tddd}tg d||d}t|tdd	}|dkr-d
g}n |dkr:d
g}||}n|dkrMd
dg}|d
 |d< ||}|j|| |dd}|	 }	|r`g dng d}
t
|
|j|d
d}| rtd|
i}|dkrtt||d|_n||_n|dkrtt|t||
d}n	tt||
d	}t|	| d S )Nr   r8  r  i'  rC  r9  r   r?   rV   r   r6  r7  r)  Tr   )rT   rS   rR   )rS   rR   rT   )r   r   rY   rM   )r   r)  )r   r)  rM   )rU  r  r   r   r   r   r   r  rF   r=   r   r   r   
from_framer   r   r   r   )r  r   r5  r   r   grouperrI   r0  r   r$   r   r   r   r   r   r   test_many_categories'  s:   

rG  cat_columnsr0  c                 C   s   t g dg dd}||  d|| < d}t| t|@ r tnd }tj||d || W d    d S 1 s:w   Y  d S )Nr  )r?   rU   r   re   rV   r  z+The default of observed=False is deprecatedrs   )r   r   setr   r   r   rF   )rH  r0  rI   r   klassr   r   r   test_groupby_default_deprO  s   "rK  a1c                 C   s  |r|dkrt tdrJ td n-|dkr$d}| tjj|d n|dkr@|s@t|dkr@|s@|s@d}| tjj|d tg dg d	g d
d}|	ddd}d|vr`|j
dd}|j|||d}|rn|d }t||}	|s|dv r|ddgkrtjtdd |j|gg|	R   W d    d S 1 sw   Y  d S |j|gg|	R  }
t|||	 }|r|s|dkr||}|stdd |jd d D d|fg |_n|s||g |_t|
| d S )Nr,   z*corrwith not implemented for SeriesGroupByz1GH#32293: attempts to call SeriesGroupBy.corrwithr  r6   rR   zGH#52848 - raises a ValueError)r   r   rR   )rS   rT   rT   r(  )rL  r)  rM   r  )rL  r)  r)  rj   ro  rM   r  rL  r  rs   r:   c                 S   s   g | ]}|d fqS )r   r   )r  indr   r   r   r    s    z!test_agg_list.<locals>.<listcomp>rC  )r  r
   rU  r  r  r  r  r   r   r   r.  rF   r   rV  rW  r   ry  r  r   from_tuplesr!   r   r/  )r  r  rD   r  r*  r0  r   rI   r   r%   r$   r   r   r   r   test_agg_list[  sX   


 rO  )ir   numpyr   rU  pandasr   r   r   r   r   r   r   r	   pandas._testing_testingr   pandas.api.typingr
   pandas.tests.groupbyr   r   r(   r  r  filterwarningsrK   r   r   r   parametrizer   r  r  r  r&  r3  r4  r6  r9  r?  rB  rG  rI  rS  rY  r   renamer\  rg  rk  rs  r|  r~  r  r  r  r  r  r  r  r   r  NaTr  fixturer  r  r  r   r  r  r  r  r  r  r  r  r  r  r6   r+   r  r  r	  r  r  r  r  r%  rv   boolr)  r*  r-  r.  r/  r3  r4  r<  r?  rC  rD  rG  rK  rO  r   r   r   r   <module>   s    $		

 
 (

$
u" +
$

<!




	%
"
2




"
"
,	
&4

*


-8
0 !&
'
