o
    \i>                     @   s   d dl m Z  d dlmZmZ d dlmZ d dlmZ d dlZ	d dl
mZ d dlmZ ddlmZmZmZ dd	lmZ dd
lmZ G dd deeZdS )    )array)IterableMapping)Number)
itemgetterN)metadata_routing   )BaseEstimatorTransformerMixin_fit_context)check_array)check_is_fittedc                       s   e Zd ZU dZdejiZdegdgdgdZe	e
d< ejddddd	d
ZdddddddZeddd!ddZdd Zeddd!ddZe	fddZdd Zd!ddZd"ddZ fdd Z  ZS )#DictVectorizera  Transforms lists of feature-value mappings to vectors.

    This transformer turns lists of mappings (dict-like objects) of feature
    names to feature values into Numpy arrays or scipy.sparse matrices for use
    with scikit-learn estimators.

    When feature values are strings, this transformer will do a binary one-hot
    (aka one-of-K) coding: one boolean-valued feature is constructed for each
    of the possible string values that the feature can take on. For instance,
    a feature "f" that can take on the values "ham" and "spam" will become two
    features in the output, one signifying "f=ham", the other "f=spam".

    If a feature value is a sequence or set of strings, this transformer
    will iterate over the values and will count the occurrences of each string
    value.

    However, note that this transformer will only do a binary one-hot encoding
    when feature values are of type string. If categorical features are
    represented as numeric values such as int or iterables of strings, the
    DictVectorizer can be followed by
    :class:`~sklearn.preprocessing.OneHotEncoder` to complete
    binary one-hot encoding.

    Features that do not occur in a sample (mapping) will have a zero value
    in the resulting array/matrix.

    For an efficiency comparison of the different feature extractors, see
    :ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`.

    Read more in the :ref:`User Guide <dict_feature_extraction>`.

    Parameters
    ----------
    dtype : dtype, default=np.float64
        The type of feature values. Passed to Numpy array/scipy.sparse matrix
        constructors as the dtype argument.
    separator : str, default="="
        Separator string used when constructing new features for one-hot
        coding.
    sparse : bool, default=True
        Whether transform should produce scipy.sparse matrices.
    sort : bool, default=True
        Whether ``feature_names_`` and ``vocabulary_`` should be
        sorted when fitting.

    Attributes
    ----------
    vocabulary_ : dict
        A dictionary mapping feature names to feature indices.

    feature_names_ : list
        A list of length n_features containing the feature names (e.g., "f=ham"
        and "f=spam").

    See Also
    --------
    FeatureHasher : Performs vectorization using only a hash function.
    sklearn.preprocessing.OrdinalEncoder : Handles nominal/categorical
        features encoded as columns of arbitrary data types.

    Examples
    --------
    >>> from sklearn.feature_extraction import DictVectorizer
    >>> v = DictVectorizer(sparse=False)
    >>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
    >>> X = v.fit_transform(D)
    >>> X
    array([[2., 0., 1.],
           [0., 1., 3.]])
    >>> v.inverse_transform(X) == [{'bar': 2.0, 'foo': 1.0},
    ...                            {'baz': 1.0, 'foo': 3.0}]
    True
    >>> v.transform({'foo': 4, 'unseen_feature': 3})
    array([[0., 0., 4.]])
    	dict_typeno_validationbooleandtype	separatorsparsesort_parameter_constraints=Tc                C   s   || _ || _|| _|| _d S Nr   )selfr   r   r   r    r   /var/www/www-root/data/www/176.119.141.140/sports-predictor/venv/lib/python3.10/site-packages/sklearn/feature_extraction/_dict_vectorizer.py__init__j   s   
zDictVectorizer.__init__FNfittingtransformingindicesvaluesc                C   s   |D ]B}	t |	trd|| j|	f }
d}	n
tdt|	 d|r/|
|vr/t|||
< ||
 |rD|
|v rD|||
  || |	 qdS )z)Add feature names for iterable of strings%s%s%s   zUnsupported type z; in iterable value. Only iterables of string are supported.N)
isinstancestrr   	TypeErrortypelenappendr   )r   fvfeature_namesvocabr   r    r!   r"   vvfeature_namer   r   r   _add_iterable_elementp   s   

z$DictVectorizer._add_iterable_element)prefer_skip_nested_validationc           	   
   C   s   g }i }|D ]\}|  D ]U\}}t|trd|| j|f }n0t|ts'|du r*|}n$t|tr?tdt| d| d| dt|trNd}| 	|||| |dura||vrat
|||< || qq| jrs|  dd t|D }|| _|| _| S )	a)  Learn a list of feature name -> indices mappings.

        Parameters
        ----------
        X : Mapping or iterable over Mappings
            Dict(s) or Mapping(s) from feature names (arbitrary Python
            objects) to feature values (strings or convertible to dtype).

            .. versionchanged:: 0.24
               Accepts multiple string values for one categorical feature.

        y : (ignored)
            Ignored parameter.

        Returns
        -------
        self : object
            DictVectorizer class instance.
        r#   NzUnsupported value type  for : z$.
Mapping objects are not supported.c                 S   s   i | ]\}}||qS r   r   ).0ir+   r   r   r   
<dictcomp>   s    z&DictVectorizer.fit.<locals>.<dictcomp>)itemsr%   r&   r   r   r   r'   r(   r   r1   r)   r*   r   	enumeratefeature_names_vocabulary_)	r   Xyr-   r.   xr+   r,   r0   r   r   r   fit   s>   



zDictVectorizer.fitc                 C   sN  t djdksJ d| j}|rg }i }n| j}| j}d}t|tr%|gn|}t d}dg}g }	|D ]}
|
 D ]w\}}t|trLd|| j	|f }d}n;t|t
sU|d u rX|}n/t|tsrt|trrd }| j||||||||	d ntd	t| d
| d| dt| d	|d ur|r||vrt|||< || ||v r|||  |	| | q8|t| q2t|dkrtdtj|tjd}t|d t|f}tj|	||f||d}|r| jr|  tjt|tjd}t|D ]\}}|| ||< |||< q|d d |f }| jr|  n| }|r%|| _|| _|S )Nr6      zsizeof(int) != 4 on your platform; please report this at https://github.com/scikit-learn/scikit-learn/issues and include the output from platform.platform() in your bug reportTr   r#   r$   r   zUnsupported value Type r3   r4   z.
z objects are not supported.zSample sequence X is empty.r   )shaper   )r   itemsizer   r:   r;   r%   r   r8   r&   r   r   r   r1   r'   r(   r)   r*   
ValueErrornp
frombufferintcsp
csr_matrixr   emptyint32r9   r   sort_indicestoarray)r   r<   r   r   r-   r.   r    r!   indptrr"   r>   r+   r,   r0   rB   result_matrix	map_indexnew_valr   r   r   
_transform   s   



zDictVectorizer._transformc                 C   s   | j |ddS )a  Learn a list of feature name -> indices mappings and transform X.

        Like fit(X) followed by transform(X), but does not require
        materializing X in memory.

        Parameters
        ----------
        X : Mapping or iterable over Mappings
            Dict(s) or Mapping(s) from feature names (arbitrary Python
            objects) to feature values (strings or convertible to dtype).

            .. versionchanged:: 0.24
               Accepts multiple string values for one categorical feature.

        y : (ignored)
            Ignored parameter.

        Returns
        -------
        Xa : {array, sparse matrix}
            Feature vectors; always 2-d.
        Tr   )rR   )r   r<   r=   r   r   r   fit_transform(  s   zDictVectorizer.fit_transformc           
         s   t | d t|ddgd}|jd }| j} fddt|D }t|r>t|  D ]\}}|||f || || < q+|S t	|D ]!\}}t	||ddf D ]\}}	|	dkrb|||f ||| < qPqB|S )	aW  Transform array or sparse matrix X back to feature mappings.

        X must have been produced by this DictVectorizer's transform or
        fit_transform method; it may only have passed through transformers
        that preserve the number of features and their order.

        In the case of one-hot/one-of-K coding, the constructed feature
        names and values are returned rather than the original ones.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Sample matrix.
        dict_type : type, default=dict
            Constructor for feature mappings. Must conform to the
            collections.Mapping API.

        Returns
        -------
        X_original : list of dict_type objects of shape (n_samples,)
            Feature mappings for the samples in X.
        r:   csrcsc)accept_sparser   c                    s   g | ]}  qS r   r   )r5   _r   r   r   
<listcomp>`  s    z4DictVectorizer.inverse_transform.<locals>.<listcomp>N)
r   r   rB   r:   rangerH   issparsezipnonzeror9   )
r   r<   r   	n_samplesnamesdictsr6   jdr,   r   rY   r   inverse_transformB  s    


z DictVectorizer.inverse_transformc                 C   s   t | ddg | j|ddS )a  Transform feature->value dicts to array or sparse matrix.

        Named features not encountered during fit or fit_transform will be
        silently ignored.

        Parameters
        ----------
        X : Mapping or iterable over Mappings of shape (n_samples,)
            Dict(s) or Mapping(s) from feature names (arbitrary Python
            objects) to feature values (strings or convertible to dtype).

        Returns
        -------
        Xa : {array, sparse matrix}
            Feature vectors; always 2-d.
        r:   r;   FrS   )r   rR   )r   r<   r   r   r   	transformm  s   zDictVectorizer.transformc                 C   sD   t | d tdd | jD rdd | jD }n| j}tj|tdS )a^  Get output feature names for transformation.

        Parameters
        ----------
        input_features : array-like of str or None, default=None
            Not used, present here for API consistency by convention.

        Returns
        -------
        feature_names_out : ndarray of str objects
            Transformed feature names.
        r:   c                 s   s    | ]	}t |t V  qd S r   )r%   r&   r5   namer   r   r   	<genexpr>  s    z7DictVectorizer.get_feature_names_out.<locals>.<genexpr>c                 S   s   g | ]}t |qS r   )r&   rf   r   r   r   rZ     s    z8DictVectorizer.get_feature_names_out.<locals>.<listcomp>rA   )r   anyr:   rE   asarrayobject)r   input_featuresr-   r   r   r   get_feature_names_out  s
   
z$DictVectorizer.get_feature_names_outc                 C   sj   t | d |st|d }| j}i }|D ]
}t|||| < q|| _dd t| tddD | _| S )a=  Restrict the features to those in support using feature selection.

        This function modifies the estimator in-place.

        Parameters
        ----------
        support : array-like
            Boolean mask or list of indices (as returned by the get_support
            member of feature selectors).
        indices : bool, default=False
            Whether support is a list of indices.

        Returns
        -------
        self : object
            DictVectorizer class instance.

        Examples
        --------
        >>> from sklearn.feature_extraction import DictVectorizer
        >>> from sklearn.feature_selection import SelectKBest, chi2
        >>> v = DictVectorizer()
        >>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
        >>> X = v.fit_transform(D)
        >>> support = SelectKBest(chi2, k=2).fit(X, [0, 1])
        >>> v.get_feature_names_out()
        array(['bar', 'baz', 'foo'], ...)
        >>> v.restrict(support.get_support())
        DictVectorizer()
        >>> v.get_feature_names_out()
        array(['bar', 'foo'], ...)
        r:   r   c                 S   s   g | ]\}}|qS r   r   )r5   r+   r6   r   r   r   rZ     s    z+DictVectorizer.restrict.<locals>.<listcomp>r$   )key)	r   rE   wherer:   r)   r;   sortedr8   r   )r   supportr!   r`   	new_vocabr6   r   r   r   restrict  s   
!zDictVectorizer.restrictc                    s   t   }d|j_d|j_|S )NTF)super__sklearn_tags__
input_tagsdicttwo_d_array)r   tags	__class__r   r   ru     s   
zDictVectorizer.__sklearn_tags__r   )F)__name__
__module____qualname____doc__r   UNUSED4_DictVectorizer__metadata_request__inverse_transformr&   r   rw   __annotations__rE   float64r   r1   r   r?   rR   rT   rd   re   rm   rs   ru   __classcell__r   r   rz   r   r      s0   
 
M5c+

2r   )r   collections.abcr   r   numbersr   operatorr   numpyrE   scipy.sparser   rH   sklearn.utilsr   baser	   r
   r   utilsr   utils.validationr   r   r   r   r   r   <module>   s   