
    sg<.                         d dl mZ d dlmZ d dlmZ d dlZddlm	Z	m
Z
mZmZmZ ddlmZ ddd	d
ZddZ G d de      Zd Z G d de      Zd Zd ZdddZddZ G d de      Zd Zy)    )Counter)suppress)
NamedTupleN   )_isin_searchsorted
_setdiff1ddeviceget_namespace)is_scalar_nanFreturn_inversereturn_countsc                `    | j                   t        k(  rt        | ||      S t        | ||      S )a  Helper function to find unique values with support for python objects.

    Uses pure python method for object dtype, and numpy method for
    all other dtypes.

    Parameters
    ----------
    values : ndarray
        Values to check for unknowns.

    return_inverse : bool, default=False
        If True, also return the indices of the unique values.

    return_counts : bool, default=False
        If True, also return the number of times each unique item appears in
        values.

    Returns
    -------
    unique : ndarray
        The sorted unique values.

    unique_inverse : ndarray
        The indices to reconstruct the original array from the unique array.
        Only provided if `return_inverse` is True.

    unique_counts : ndarray
        The number of times each of the unique values comes up in the original
        array. Only provided if `return_counts` is True.
    r   )dtypeobject_unique_python
_unique_np)valuesr   r   s      H/var/www/html/venv/lib/python3.12/site-packages/sklearn/utils/_encode.py_uniquer      s:    > ||v>
 	
 ~]     c                    t        |       \  }}d\  }}|r|r|j                  |       \  }}}}n?|r|j                  |       \  }}n(|r|j                  |       \  }}n|j	                  |       }|j
                  rYt        |d         rKt        ||j                  |      }|d|dz    }|r||||kD  <   |r|j                  ||d       ||<   |d|dz    }|f}	|r|	|fz  }	|r|	|fz  }	t        |	      dk(  r|	d   S |	S )zHelper function to find unique values for numpy arrays that correctly
    accounts for nans. See `_unique` documentation for details.)NNxpNr   r   )r   
unique_allunique_inverseunique_countsunique_valuessizer   r   nansumlen)
r   r   r   r   _inversecountsuniquesnan_idxrets
             r   r   r   =   s(    &!EB OGV-&(mmF&;#GV	,,V4	**62""6* ||gbk2B7-GaK()0GGg%& ffVGH%56F7OMgk*F*CzyX]3q6++r   c                   ,    e Zd ZU dZeed<   eed<   d Zy)MissingValuesz'Data class for missing data informationr"   nonec                     g }| j                   r|j                  d       | j                  r|j                  t        j                         |S )z3Convert tuple to a list where None is always first.N)r-   appendr"   np)selfoutputs     r   to_listzMissingValues.to_listj   s6    99MM$88MM"&&!r   N)__name__
__module____qualname____doc__bool__annotations__r3    r   r   r,   r,   d   s    1	I
Jr   r,   c                     | D ch c]  }|t        |      s| }}|s| t        dd      fS d|v r*t        |      dk(  rt        dd      }nt        dd      }nt        dd      }| |z
  }||fS c c}w )a.  Extract missing values from `values`.

    Parameters
    ----------
    values: set
        Set of values to extract missing from.

    Returns
    -------
    output: set
        Set with missing values extracted.

    missing_values: MissingValues
        Object with missing value information.
    NF)r"   r-   r   T)r   r,   r$   )r   valuemissing_values_setoutput_missing_valuesr2   s        r   _extract_missingr?   t   s    " "U]mE6J  }U;;;!!!"a'$1e$$G! %2d$F! -$U C ((F((('s
   A3A3c                   (     e Zd ZdZ fdZd Z xZS )_nandictz!Dictionary with support for nans.c                 |    t         |   |       |j                         D ]  \  }}t        |      s|| _         y  y N)super__init__itemsr   	nan_value)r1   mappingkeyr<   	__class__s       r   rE   z_nandict.__init__   s;    !!--/ 	JCS!!&	r   c                 ^    t        | d      rt        |      r| j                  S t        |      )NrG   )hasattrr   rG   KeyErrorr1   rI   s     r   __missing__z_nandict.__missing__   '    4%-*<>>!smr   )r4   r5   r6   r7   rE   rO   __classcell__rJ   s   @r   rA   rA      s    +r   rA   c                     t        | |      \  }}t        t        |      D ci c]  \  }}||
 c}}      }|j                  | D cg c]  }||   	 c}t	        |             S c c}}w c c}w )z,Map values based on its position in uniques.)r
   )r   rA   	enumerateasarrayr
   )r   r(   r   r%   ivaltablevs           r   _map_to_integerrZ      sb    &'*EB9W+=>Cc1f>?E::0AuQx0:HH ?0s   A%
A+c                   	 t        |       }t        |      \  }}t        |      }|j                  |j	                                t        j                  || j                        }|f}|r|t        | |      fz  }|r|t        | |      fz  }t        |      dk(  r|d   S |S # t        $ r1 t        d t        d | D              D              }t        d|       w xY w)Nr   c              3   4   K   | ]  }|j                     y wrC   )r6   ).0ts     r   	<genexpr>z!_unique_python.<locals>.<genexpr>   s     L!q~~Ls   c              3   2   K   | ]  }t        |        y wrC   )type)r^   rY   s     r   r`   z!_unique_python.<locals>.<genexpr>   s     2Kq472Ks   zPEncoders require their input argument must be uniformly strings or numbers. Got r   r   )setr?   sortedextendr3   r0   arrayr   	TypeErrorrZ   _get_countsr$   )r   r   r   uniques_setmissing_valuesr(   typesr*   s           r   r   r      s    
&k&6{&C#^%~--/0((7&,,7 *C022FG,..X]3q6++  
Ls2KF2K/KLL'',g/
 	

s   A$B" ":CT)check_unknownc                ,   t        | |      \  }}|j                  | j                  d      s	 t        | |      S |r%t        | |      }|rt        dt        |             t        || |      S # t        $ r}t        dt        |             d}~ww xY w)a  Helper function to encode values into [0, n_uniques - 1].

    Uses pure python method for object dtype, and numpy method for
    all other dtypes.
    The numpy method has the limitation that the `uniques` need to
    be sorted. Importantly, this is not checked but assumed to already be
    the case. The calling method needs to ensure this for all non-object
    values.

    Parameters
    ----------
    values : ndarray
        Values to encode.
    uniques : ndarray
        The unique values in `values`. If the dtype is not object, then
        `uniques` needs to be sorted.
    check_unknown : bool, default=True
        If True, check for values in `values` that are not in `unique`
        and raise an error. This is ignored for object dtype, and treated as
        True in this case. This parameter is useful for
        _BaseEncoder._transform() to avoid calling _check_unknown()
        twice.

    Returns
    -------
    encoded : ndarray
        Encoded values
    numericz%y contains previously unseen labels: Nr   )	r   isdtyper   rZ   rM   
ValueErrorstr_check_unknownr   )r   r(   rl   r   r%   ediffs          r   _encoderu      s    : &'*EB::fllI.	O"6733 !&'2D #HT!TUUWf44  	ODSVHMNN	Os   A. .	B7BBc                 <   t        | |      \  }}d}|j                  | j                  d      st        |       }t	        |      \  }}t        |      t	              \  |z
  }|j
                  xr j
                   }	|j                  xr j                   }
fd}|rR|s|	s|
r&|j                  | D cg c]
  } ||       c}      }n&|j                  t        |       |j                        }t        |      }|
r|j                  d       |	r|j                  t        j
                         n|j                  |       }t        |||d      }|r@|j                   rt#        | ||      }n&|j                  t        |       |j                        }|j%                  |j'                  |            rL|j'                  |      }|j%                  |      r*|j                   r|r|j'                  |       }d||<   ||    }t        |      }|r||fS |S c c}w )a  
    Helper function to check for unknowns in values to be encoded.

    Uses pure python method for object dtype, and numpy method for
    all other dtypes.

    Parameters
    ----------
    values : array
        Values to check for unknowns.
    known_values : array
        Known values. Must be unique.
    return_mask : bool, default=False
        If True, return a mask of the same shape as `values` indicating
        the valid values.

    Returns
    -------
    diff : list
        The unique values present in `values` and not in `know_values`.
    valid_mask : boolean array
        Additionally returned if ``return_mask=True``.

    Nrn   c                 j    | v xs- j                   xr | d u xs j                  xr t        |       S rC   )r-   r"   r   )r<   missing_in_uniquesri   s    r   is_validz _check_unknown.<locals>.is_valid  sG    $ )%** "TM) &)) )!%(r   r\   Tassume_uniquer   )r   ro   r   rc   r?   r"   r-   rf   onesr$   r8   listr/   r0   r    r	   r!   r   anyisnan)r   known_valuesreturn_maskr   r%   
valid_mask
values_setmissing_in_valuesrt   nan_in_diffnone_in_diffry   r<   r    diff_is_nanis_nanrx   ri   s                   @@r   rr   rr      s   2 &,/EBJ::fllI.[
(8(D%
%,'*:;*G''K''++J4F4J4J0J(--M6H6M6M2M	 {lXXF&K5x&KL
WWS[W@
DzKKKK((0-rNyy"6<<
WWS[W@
 66"((<()((4.Kvvk"99XXf-F)*Jv& [L)DzZKC 'Ls   ;Hc                   .     e Zd ZdZ fdZd Zd Z xZS )_NaNCounterz$Counter with support for nan values.c                 B    t         |   | j                  |             y rC   )rD   rE   _generate_items)r1   rF   rJ   s     r   rE   z_NaNCounter.__init__O  s    --e45r   c              #      K   |D ]:  }t        |      s| t        | d      sd| _        | xj                  dz  c_        < yw)z>Generate items without nans. Stores the nan counts separately.	nan_countr   r   N)r   rL   r   )r1   rF   items      r   r   z_NaNCounter._generate_itemsR  sD      	 D &
4-!"NNaN	 s   AAc                 ^    t        | d      rt        |      r| j                  S t        |      )Nr   )rL   r   r   rM   rN   s     r   rO   z_NaNCounter.__missing__\  rP   r   )r4   r5   r6   r7   rE   r   rO   rQ   rR   s   @r   r   r   L  s    .6 r   r   c                 p   | j                   j                  dv rnt        |       }t        j                  t        |      t        j                        }t        |      D ]%  \  }}t        t              5  ||   ||<   ddd       ' |S t        | d      \  }}t        j                  ||d      }t        j                  |d         rt        j                  |d         rd|d<   t        j                  |||         }	t        j                  |t        j                        }||	   ||<   |S # 1 sw Y   xY w)zGet the count of each of the `uniques` in `values`.

    The counts will use the order passed in by `uniques`. For non-object dtypes,
    `uniques` is assumed to be sorted and `np.nan` is at the end.
    OUr\   NT)r   rz   r   )r   kindr   r0   zerosr$   int64rT   r   rM   r   isinr   searchsorted
zeros_like)
r   r(   counterr2   rV   r   r    r'   uniques_in_valuesunique_valid_indicess
             r   rh   rh   b  s    ||D f%#g,bhh7 ) 	*GAt(# *#DMq	* *	* &vTBM6 dK	xxb!"rxx'< $"??='BS:TU]]7"((3F &'; <FM* *s   2	D,,D5	)FF)F)collectionsr   
contextlibr   typingr   numpyr0   
_array_apir   r   r	   r
   r   _missingr   r   r   r,   r?   dictrA   rZ   r   ru   rr   r   rh   r:   r   r   <module>r      s          $ ',5 &R$,NJ  #)Lt  I,4 /3 (5VSl' ,r   