
    Ki-                         S SK Jr  S SKJr  S SKJr  S SKrS SKJ	r	J
r
JrJr  S SKJr  SSS.S	 jrSS
 jr " S S\5      rS r " S S\5      rS rS rSS.S jrSS jr " S S\5      rS rg)    )Counter)suppress)
NamedTupleN)_isindeviceget_namespacexpx)is_scalar_nanFreturn_inversereturn_countsc                R    U R                   [        :X  a
  [        XUS9$ [        XUS9$ )a)  Helper function to find unique values with support for python objects.

Uses pure python method for object dtype, and numpy method for
all other dtypes.

Parameters
----------
values : ndarray
    Values to check for unknowns.

return_inverse : bool, default=False
    If True, also return the indices of the unique values.

return_counts : bool, default=False
    If True, also return the number of times each unique item appears in
    values.

Returns
-------
unique : ndarray
    The sorted unique values.

unique_inverse : ndarray
    The indices to reconstruct the original array from the unique array.
    Only provided if `return_inverse` is True.

unique_counts : ndarray
    The number of times each of the unique values comes up in the original
    array. Only provided if `return_counts` is True.
r   )dtypeobject_unique_python
_unique_np)valuesr   r   s      W/var/www/html/dynamic-report/venv/lib/python3.13/site-packages/sklearn/utils/_encode.py_uniquer      s6    > ||v
 	
 ]     c                 J   [        U 5      u  p4Su  pVU(       a  U(       a  UR                  U 5      u  ptpVOGU(       a  UR                  U 5      u  puO,U(       a  UR                  U 5      u  pvOUR	                  U 5      nUR
                  (       ag  [        US   5      (       aT  UR                  XsR                  5      nUSUS-    nU(       a  XXX:  '   U(       a  UR                  XhS 5      Xh'   USUS-    nU4n	U(       a  X4-  n	U(       a  X4-  n	[        U	5      S:X  a  U	S   $ U	$ )zHelper function to find unique values for numpy arrays that correctly
accounts for nans. See `_unique` documentation for details.)NNN   r   )r   
unique_allunique_inverseunique_countsunique_valuessizer
   searchsortednansumlen)
r   r   r   xp_inversecountsuniquesnan_idxrets
             r   r   r   7   s    &!EB OG-&(mmF&;#GV	,,V4	**62""6* ||gbk22//'662-GaK()0G%& ffVH%56FOMgk*F*CzyX]3q6++r   c                   4    \ rS rSr% Sr\\S'   \\S'   S rSrg)MissingValues^   z'Data class for missing data informationr    nonec                     / nU R                   (       a  UR                  S5        U R                  (       a  UR                  [        R                  5        U$ )z3Convert tuple to a list where None is always first.N)r-   appendr    np)selfoutputs     r   to_listMissingValues.to_listd   s6    99MM$88MM"&&!r    N)	__name__
__module____qualname____firstlineno____doc__bool__annotations__r3   __static_attributes__r5   r   r   r+   r+   ^   s    1	I
Jr   r+   c                     U  Vs1 s H  ob  [        U5      (       d  M  UiM     nnU(       d  U [        SSS94$ SU;   a%  [        U5      S:X  a  [        SSS9nO[        SSS9nO
[        SSS9nX-
  nXC4$ s  snf )a  Extract missing values from `values`.

Parameters
----------
values: set
    Set of values to extract missing from.

Returns
-------
output: set
    Set with missing values extracted.

missing_values: MissingValues
    Object with missing value information.
NF)r    r-   r   T)r
   r+   r"   )r   valuemissing_values_setoutput_missing_valuesr2   s        r   _extract_missingrB   n   s    " "!%]mE6J6   }U;;;!!!"a'$1e$$G! %2d$F! -$U C (F(('s
   A6A6c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )_nandict   z!Dictionary with support for nans.c                    > [         TU ]  U5        UR                  5        H  u  p#[        U5      (       d  M  X0l          g    g N)super__init__itemsr
   	nan_value)r1   mappingkeyr?   	__class__s       r   rI   _nandict.__init__   s5    !!--/JCS!!!& *r   c                 r    [        U S5      (       a  [        U5      (       a  U R                  $ [        U5      e)NrK   )hasattrr
   rK   KeyErrorr1   rM   s     r   __missing___nandict.__missing__   -    4%%-*<*<>>!smr   )rK   )	r6   r7   r8   r9   r:   rI   rT   r=   __classcell__rN   s   @r   rD   rD      s    + r   rD   c                     [        X5      u  p#[        [        U5       VVs0 s H  u  pEXT_M	     snn5      nUR                  U  Vs/ s H  ovU   PM	     sn[	        U 5      S9$ s  snnf s  snf )z,Map values based on its position in uniques.)r   )r   rD   	enumerateasarrayr   )r   r'   r#   r$   ivaltablevs           r   _map_to_integerr`      sb    &*EB9W+=>+=cf+=>?E::0AQx0:HH ?0s   A#
A)c                    [        U 5      n[        U5      u  p4[        U5      nUR                  UR	                  5       5        [
        R                  " XPR                  S9nU4nU(       a  U[        X5      4-  nU(       a  U[        X5      4-  n[        U5      S:X  a  US   $ U$ ! [         a1    [        S [        S U  5       5       5       5      n[        SU 35      ef = f)Nr   c              3   8   #    U  H  oR                   v   M     g 7frG   )r8   ).0ts     r   	<genexpr>!_unique_python.<locals>.<genexpr>   s     L/K!~~/K   c              3   8   #    U  H  n[        U5      v   M     g 7frG   )type)rd   r_   s     r   rf   rg      s     2KFq477Frh   zPEncoders require their input argument must be uniformly strings or numbers. Got r   r   )setrB   sortedextendr3   r0   arrayr   	TypeErrorr`   _get_countsr"   )r   r   r   uniques_setmissing_valuesr'   typesr)   s           r   r   r      s    
&k&6{&C#%~--/0((7,,7 *C022F,..X]3q6++  
Ls2KF2K/KLL'',g/
 	

s   A B' ';C"T)check_unknownc                &   [        X5      u  p4UR                  U R                  S5      (       d   [        X5      $ U(       a   [        X5      nU(       a  [        SU 35      eUR                  X5      $ ! [         a  n[        SU 35      eSnAff = f)ax  Helper function to encode values into [0, n_uniques - 1].

Uses pure python method for object dtype, and numpy method for
all other dtypes.
The numpy method has the limitation that the `uniques` need to
be sorted. Importantly, this is not checked but assumed to already be
the case. The calling method needs to ensure this for all non-object
values.

Parameters
----------
values : ndarray
    Values to encode.
uniques : ndarray
    The unique values in `values`. If the dtype is not object, then
    `uniques` needs to be sorted.
check_unknown : bool, default=True
    If True, check for values in `values` that are not in `unique`
    and raise an error. This is ignored for object dtype, and treated as
    True in this case. This parameter is useful for
    _BaseEncoder._transform() to avoid calling _check_unknown()
    twice.

Returns
-------
encoded : ndarray
    Encoded values
numericz%y contains previously unseen labels: N)r   isdtyper   r`   rR   
ValueError_check_unknownr   )r   r'   rt   r#   r$   ediffs          r   _encoder|      s    : &*EB::fllI..	J"633 !&2D #H!OPPw//  	JDQCHII	Js   
A3 3
B=BBc                   ^^ [        X5      u  p4SnUR                  U R                  S5      (       Gd6  [        U 5      n[	        U5      u  pg[        U5      m[	        T5      u  mmUT-
  nUR
                  =(       a    TR
                  (       + n	UR                  =(       a    TR                  (       + n
UU4S jnU(       a`  U(       d  U	(       d  U
(       a(  UR                  U  Vs/ s H
  o" U5      PM     sn5      nO#UR                  [        U 5      UR                  S9n[        U5      nU
(       a  UR                  S5        U	(       a  UR                  [        R
                  5        OUR                  U 5      n[        R                   " XSUS9nU(       aA  UR"                  (       a  [%        XU5      nO#UR                  [        U 5      UR                  S9nUR'                  UR)                  U5      5      (       aY  UR)                  U5      nUR'                  U5      (       a2  UR"                  (       a  U(       a  UR)                  U 5      nSX_'   X)    n[        U5      nU(       a  X4$ U$ s  snf )a=  
Helper function to check for unknowns in values to be encoded.

Uses pure python method for object dtype, and numpy method for
all other dtypes.

Parameters
----------
values : array
    Values to check for unknowns.
known_values : array
    Known values. Must be unique.
return_mask : bool, default=False
    If True, return a mask of the same shape as `values` indicating
    the valid values.

Returns
-------
diff : list
    The unique values present in `values` and not in `know_values`.
valid_mask : boolean array
    Additionally returned if ``return_mask=True``.

Nrv   c                    > U T;   =(       d<    TR                   =(       a    U S L =(       d    TR                  =(       a    [        U 5      $ rG   )r-   r    r
   )r?   missing_in_uniquesrq   s    r   is_valid _check_unknown.<locals>.is_valid  sA    $ E&++=E&**C}U/Cr   rb   T)assume_uniquer#   r   )r   rw   r   rk   rB   r    r-   rn   onesr"   r;   listr/   r0   r   r	   	setdiff1dr   r   anyisnan)r   known_valuesreturn_maskr#   r$   
valid_mask
values_setmissing_in_valuesr{   nan_in_diffnone_in_diffr   r?   r   diff_is_nanis_nanr   rq   s                   @@r   ry   ry      s   2 &/EBJ::fllI..[
(8(D%
,'*:;*G''K''++J4F4J4J0J(--M6H6M6M2M	 {lXXF&KF5xF&KL
WWS[W@
DzKKKK((0}}]QSTyy"6<
WWS[W@
 66"((<())((4.Kvvk""99XXf-F)*J& L)DzKC 'Ls   $I.c                   8   ^  \ rS rSrSrU 4S jrS rS rSrU =r	$ )_NaNCounteriD  z$Counter with support for nan values.c                 B   > [         TU ]  U R                  U5      5        g rG   )rH   rI   _generate_items)r1   rJ   rN   s     r   rI   _NaNCounter.__init__G  s    --e45r   c              #      #    U HF  n[        U5      (       d  Uv   M  [        U S5      (       d  SU l        U =R                  S-  sl        MH     g7f)z>Generate items without nans. Stores the nan counts separately.	nan_countr   r   N)r
   rQ   r   )r1   rJ   items      r   r   _NaNCounter._generate_itemsJ  sD     D &&
4--!"NNaN s   AAc                 r    [        U S5      (       a  [        U5      (       a  U R                  $ [        U5      e)Nr   )rQ   r
   r   rR   rS   s     r   rT   _NaNCounter.__missing__T  rV   r   )r   )
r6   r7   r8   r9   r:   rI   r   rT   r=   rW   rX   s   @r   r   r   D  s    .6  r   r   c                 |   U R                   R                  S;   ak  [        U 5      n[        R                  " [        U5      [        R                  S9n[        U5       H#  u  pE[        [        5         X%   X4'   SSS5        M%     U$ [        U SS9u  pg[        R                  " XSS9n[        R                  " US   5      (       a#  [        R                  " US   5      (       a  SUS'   [        R                  " XaU   5      n	[        R                  " U[        R                  S9nXy   X8'   U$ ! , (       d  f       M  = f)zGet the count of each of the `uniques` in `values`.

The counts will use the order passed in by `uniques`. For non-object dtypes,
`uniques` is assumed to be sorted and `np.nan` is at the end.
OUrb   NT)r   )r   r   )r   kindr   r0   zerosr"   int64rZ   r   rR   r   isinr   r   
zeros_like)
r   r'   counterr2   r\   r   r   r&   uniques_in_valuesunique_valid_indicess
             r   rp   rp   Z  s     ||D f%#g,bhh7 )GA(##M	 $# * &vTBM dK	xxb!""rxx'<'< $"??=BS:TU]]7"((3F & <FM $#s   1D,,
D;	)FF)F)collectionsr   
contextlibr   typingr   numpyr0   sklearn.utils._array_apir   r   r   r	   sklearn.utils._missingr
   r   r   r+   rB   dictrD   r`   r   r|   ry   r   rp   r5   r   r   <module>r      s{         F F 0 ',5 &R$,NJ  #)Lt  I,4 /3 (0VQh' ,r   