
    sg>                         d dl m Z  d dlmZmZ d dlmZ d dlmZ d dlZ	d dl
mZ d dlmZ ddlmZmZmZ dd	lmZ dd
lmZ  G d dee      Zy)    )array)IterableMapping)Number)
itemgetterN)metadata_routing   )BaseEstimatorTransformerMixin_fit_context)check_array)check_is_fittedc                        e Zd ZU dZdej
                  iZdegdgdgdZe	e
d<   ej                  ddddd	Zdd
ddddZ ed      dd       Zd Z ed      dd       Ze	fdZd ZddZddZ fdZ xZS )DictVectorizera  Transforms lists of feature-value mappings to vectors.

    This transformer turns lists of mappings (dict-like objects) of feature
    names to feature values into Numpy arrays or scipy.sparse matrices for use
    with scikit-learn estimators.

    When feature values are strings, this transformer will do a binary one-hot
    (aka one-of-K) coding: one boolean-valued feature is constructed for each
    of the possible string values that the feature can take on. For instance,
    a feature "f" that can take on the values "ham" and "spam" will become two
    features in the output, one signifying "f=ham", the other "f=spam".

    If a feature value is a sequence or set of strings, this transformer
    will iterate over the values and will count the occurrences of each string
    value.

    However, note that this transformer will only do a binary one-hot encoding
    when feature values are of type string. If categorical features are
    represented as numeric values such as int or iterables of strings, the
    DictVectorizer can be followed by
    :class:`~sklearn.preprocessing.OneHotEncoder` to complete
    binary one-hot encoding.

    Features that do not occur in a sample (mapping) will have a zero value
    in the resulting array/matrix.

    For an efficiency comparison of the different feature extractors, see
    :ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`.

    Read more in the :ref:`User Guide <dict_feature_extraction>`.

    Parameters
    ----------
    dtype : dtype, default=np.float64
        The type of feature values. Passed to Numpy array/scipy.sparse matrix
        constructors as the dtype argument.
    separator : str, default="="
        Separator string used when constructing new features for one-hot
        coding.
    sparse : bool, default=True
        Whether transform should produce scipy.sparse matrices.
    sort : bool, default=True
        Whether ``feature_names_`` and ``vocabulary_`` should be
        sorted when fitting.

    Attributes
    ----------
    vocabulary_ : dict
        A dictionary mapping feature names to feature indices.

    feature_names_ : list
        A list of length n_features containing the feature names (e.g., "f=ham"
        and "f=spam").

    See Also
    --------
    FeatureHasher : Performs vectorization using only a hash function.
    sklearn.preprocessing.OrdinalEncoder : Handles nominal/categorical
        features encoded as columns of arbitrary data types.

    Examples
    --------
    >>> from sklearn.feature_extraction import DictVectorizer
    >>> v = DictVectorizer(sparse=False)
    >>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
    >>> X = v.fit_transform(D)
    >>> X
    array([[2., 0., 1.],
           [0., 1., 3.]])
    >>> v.inverse_transform(X) == [{'bar': 2.0, 'foo': 1.0},
    ...                            {'baz': 1.0, 'foo': 3.0}]
    True
    >>> v.transform({'foo': 4, 'unseen_feature': 3})
    array([[0., 0., 4.]])
    	dict_typeno_validationbooleandtype	separatorsparsesort_parameter_constraints=Tc                <    || _         || _        || _        || _        y Nr   )selfr   r   r   r   s        ^/var/www/html/venv/lib/python3.12/site-packages/sklearn/feature_extraction/_dict_vectorizer.py__init__zDictVectorizer.__init__j   s    
"	    FNfittingtransformingindicesvaluesc                N   |D ]  }	t        |	t              r|| j                  |	}
d}	nt        dt	        |	       d      |r#|
|vrt        |      ||
<   |j                  |
       |sh|
|v sm|j                  ||
          |j                  | j                  |	              y)z)Add feature names for iterable of strings   zUnsupported type z; in iterable value. Only iterables of string are supported.N)
isinstancestrr   	TypeErrortypelenappendr   )r   fvfeature_namesvocabr"   r#   r$   r%   vvfeature_names              r   _add_iterable_elementz$DictVectorizer._add_iterable_elementp   s      	.B"c"+,dnnbA'Rz 2! ! 
 <u4&)-&8l#$$\2 5u\23djjn-!	.r    )prefer_skip_nested_validationc                 N   g }i }|D ]  }|j                         D ]  \  }}t        |t              r|| j                  |}nit        |t              s||}nTt        |t
              rt        dt        |       d| d| d      t        |t              rd}| j                  ||||       ||vst        |      ||<   |j                  |         | j                  r.|j                          t        |      D 	ci c]  \  }	}||	
 }}	}|| _        || _        | S c c}}	w )a)  Learn a list of feature name -> indices mappings.

        Parameters
        ----------
        X : Mapping or iterable over Mappings
            Dict(s) or Mapping(s) from feature names (arbitrary Python
            objects) to feature values (strings or convertible to dtype).

            .. versionchanged:: 0.24
               Accepts multiple string values for one categorical feature.

        y : (ignored)
            Ignored parameter.

        Returns
        -------
        self : object
            DictVectorizer class instance.
        NzUnsupported value type  for : z$.
Mapping objects are not supported.)itemsr(   r)   r   r   r   r*   r+   r   r4   r,   r-   r   	enumeratefeature_names_vocabulary_)
r   Xyr0   r1   xr.   r/   r3   is
             r   fitzDictVectorizer.fit   s?   *  	;A	 ;1a%/0$..!#DL6*qy#$L7+#1$q' ; cA3 '== 
  8,#'L..q!]EJ+#50.1-.@l+%,,\:%;	;* 99 &/&>?daQT?E?+  @s   D!c                    t        d      j                  dk(  sJ d       | j                  }|rg }i }n| j                  }| j                  }d}t        |t              r|gn|}t        d      }dg}g }	|D ]8  }
|
j                         D ]  \  }}t        |t              r|| j                  |}d}n{t        |t              s||}nft        |t              s,t        |t              rd }| j                  ||||||||	       n*t        dt        |       d	| d
| dt        |       d	      ||r#||vrt        |      ||<   |j!                  |       ||v s|j!                  ||          |	j!                  | j                  |             
 |j!                  t        |             ; t        |      dk(  rt#        d      t%        j&                  |t$        j(                        }t        |      dz
  t        |      f}t+        j,                  |	||f||      }|rs| j.                  rg|j/                          t%        j0                  t        |      t$        j2                        }t5        |      D ]  \  }}||   ||<   |||<    |d d |f   }| j6                  r|j9                          n|j;                         }|r|| _        || _        |S )Nr@      zsizeof(int) != 4 on your platform; please report this at https://github.com/scikit-learn/scikit-learn/issues and include the output from platform.platform() in your bug reportTr   r'   r!   zUnsupported value Type r7   r8   z.
z objects are not supported.zSample sequence X is empty.r   )shaper   )r   itemsizer   r;   r<   r(   r   r9   r)   r   r   r   r4   r*   r+   r,   r-   
ValueErrornp
frombufferintcsp
csr_matrixr   emptyint32r:   r   sort_indicestoarray)r   r=   r"   r   r0   r1   r#   r$   indptrr%   r?   r.   r/   r3   rE   result_matrix	map_indexnew_vals                     r   
_transformzDictVectorizer._transform   s   
 Sz""a' 	
N	
' 

ME //M$$E a)QCq*   #	(A	  51a%/0$..!#DLA6*qy#$L#Aw/Jq(4K#'L..% '%1 '% / 	 $1$q' ; cA3c7)#>@   +<u#<.1-.@l+%,,\:#u,u\':;djjm4A 5D MM#g,'G#	(J v;!:;;--rww7Vq#e*-Wf%U%

 tyy ]!3288DI'6 #
%*1X	'""a# *!Y,7M;;&&()113M"/D$Dr    c                 (    | j                  |d      S )a  Learn a list of feature name -> indices mappings and transform X.

        Like fit(X) followed by transform(X), but does not require
        materializing X in memory.

        Parameters
        ----------
        X : Mapping or iterable over Mappings
            Dict(s) or Mapping(s) from feature names (arbitrary Python
            objects) to feature values (strings or convertible to dtype).

            .. versionchanged:: 0.24
               Accepts multiple string values for one categorical feature.

        y : (ignored)
            Ignored parameter.

        Returns
        -------
        Xa : {array, sparse matrix}
            Feature vectors; always 2-d.
        Tr"   )rU   )r   r=   r>   s      r   fit_transformzDictVectorizer.fit_transform(  s    0 q$//r    c                    t        | d       t        |ddg      }|j                  d   }| j                  }t	        |      D cg c]	  } |        }}t        j                  |      r0t        |j                          D ]  \  }}|||f   ||   ||   <    |S t        |      D ]2  \  }}	t        ||ddf         D ]  \  }}
|
dk7  s|||f   |	||   <    4 |S c c}w )aN  Transform array or sparse matrix X back to feature mappings.

        X must have been produced by this DictVectorizer's transform or
        fit_transform method; it may only have passed through transformers
        that preserve the number of features and their order.

        In the case of one-hot/one-of-K coding, the constructed feature
        names and values are returned rather than the original ones.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Sample matrix.
        dict_type : type, default=dict
            Constructor for feature mappings. Must conform to the
            collections.Mapping API.

        Returns
        -------
        D : list of dict_type objects of shape (n_samples,)
            Feature mappings for the samples in X.
        r;   csrcsc)accept_sparser   N)
r   r   rE   r;   rangerK   issparsezipnonzeror:   )r   r=   r   	n_samplesnames_dictsr@   jdr/   s              r   inverse_transformz DictVectorizer.inverse_transformB  s    . 	./ %8GGAJ	##&+I&6777;;q>QYY[) -1%&q!tWaq"-  "%( .1%a1g. .DAqAv&'1g%(..
  8s   Cc                 D    t        | ddg       | j                  |d      S )a  Transform feature->value dicts to array or sparse matrix.

        Named features not encountered during fit or fit_transform will be
        silently ignored.

        Parameters
        ----------
        X : Mapping or iterable over Mappings of shape (n_samples,)
            Dict(s) or Mapping(s) from feature names (arbitrary Python
            objects) to feature values (strings or convertible to dtype).

        Returns
        -------
        Xa : {array, sparse matrix}
            Feature vectors; always 2-d.
        r;   r<   FrW   )r   rU   )r   r=   s     r   	transformzDictVectorizer.transformm  s'    " 	/?@q%00r    c                     t        | d       t        d | j                  D              r#| j                  D cg c]  }t        |       }}n| j                  }t	        j
                  |t              S c c}w )a^  Get output feature names for transformation.

        Parameters
        ----------
        input_features : array-like of str or None, default=None
            Not used, present here for API consistency by convention.

        Returns
        -------
        feature_names_out : ndarray of str objects
            Transformed feature names.
        r;   c              3   >   K   | ]  }t        |t                 y wr   )r(   r)   ).0names     r   	<genexpr>z7DictVectorizer.get_feature_names_out.<locals>.<genexpr>  s     IT:dC((Is   rD   )r   anyr;   r)   rH   asarrayobject)r   input_featuresrm   r0   s       r   get_feature_names_outz$DictVectorizer.get_feature_names_out  sb     	./IT5H5HII373F3FG4SYGMG //Mzz-v66 Hs   A3c                 0   t        | d       |st        j                  |      d   }| j                  }i }|D ]  }t	        |      |||   <    || _        t        |j                         t        d            D cg c]  \  }}|	 c}}| _        | S c c}}w )a=  Restrict the features to those in support using feature selection.

        This function modifies the estimator in-place.

        Parameters
        ----------
        support : array-like
            Boolean mask or list of indices (as returned by the get_support
            member of feature selectors).
        indices : bool, default=False
            Whether support is a list of indices.

        Returns
        -------
        self : object
            DictVectorizer class instance.

        Examples
        --------
        >>> from sklearn.feature_extraction import DictVectorizer
        >>> from sklearn.feature_selection import SelectKBest, chi2
        >>> v = DictVectorizer()
        >>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
        >>> X = v.fit_transform(D)
        >>> support = SelectKBest(chi2, k=2).fit(X, [0, 1])
        >>> v.get_feature_names_out()
        array(['bar', 'baz', 'foo'], ...)
        >>> v.restrict(support.get_support())
        DictVectorizer()
        >>> v.get_feature_names_out()
        array(['bar', 'foo'], ...)
        r;   r   r'   )key)	r   rH   wherer;   r,   r<   sortedr9   r   )r   supportr$   rb   	new_vocabr@   r.   s          r   restrictzDictVectorizer.restrict  s    B 	./hhw'*G##	 	1A"%i.IeAh	1 % !2
1F
!QA
 	
s   ;Bc                 h    t         |          }d|j                  _        d|j                  _        |S )NTF)super__sklearn_tags__
input_tagsdicttwo_d_array)r   tags	__class__s     r   r}   zDictVectorizer.__sklearn_tags__  s-    w')#&+#r    r   )F)__name__
__module____qualname____doc__r   UNUSED4_DictVectorizer__metadata_request__inverse_transformr)   r   r   __annotations__rH   float64r   r4   r   rA   rU   rX   rg   ri   rs   rz   r}   __classcell__)r   s   @r   r   r      s    JZ .9:J:Q:Q,R) !U+	$D  !#

c$T  .> 53 63jaF 50 602 .2 )V1(7(0d r    r   )r   collections.abcr   r   numbersr   operatorr   numpyrH   scipy.sparser   rK   sklearn.utilsr   baser
   r   r   utilsr   utils.validationr   r    r    r   <module>r      s9     -     * @ @  .x%} xr    