
    KipO              	          S r SSKrSSKrSSKJr  SSKJr  SSKJ	r	  SSK
JrJrJr  SSKJr  SSKJr  SS	KJr  SS
KJrJr  SSKJrJr  SSKJr  SSKJr  SSKJrJ r   SSK!J"r"J#r#  \" SSS9u  r$r%\" \$\%SS9u  r$r%\" 5       RM                  \$5      r$/ SQr'S1\RP                  " 5        V Vs1 s H
  u  pUS   iM     snn -  r)SRS jr*\RV                  RY                  S\5      S 5       r-S r.\RV                  RY                  S/ \#Q\"Q5      S 5       r/S r0\RV                  RY                  S\'5      \RV                  RY                  S \5      S! 5       5       r1S" r2\RV                  RY                  S#S$5      S% 5       r3S& r4S' r5S( r6S) r7\RV                  RY                  S*S+S,/5      S- 5       r8\RV                  RY                  S.\#5      S/ 5       r9\RV                  RY                  S0\'5      S1 5       r:S2 r;S3 r<\RV                  RY                  S4S S50\Rz                  " S6\R|                  /\R|                  S6//5      4S S50S6S7/S7S6//40 S6S7/S8S9//4/5      S: 5       r?\RV                  RY                  S.\#5      S; 5       r@\RV                  RY                  S.\#5      S< 5       rAS= rBS> rCS? rD\RV                  RY                  S@SASB/5      \RV                  RY                  SCSSD/5      SE 5       5       rESF rF\RV                  RY                  SGSHSI/5      SJ 5       rG\RV                  RY                  SKSLSM/5      SN 5       rH\RV                  RY                  SOS+S,/5      SP 5       rISQ rJgs  snn f )SzF
Tests for HDBSCAN clustering algorithm
Based on the DBSCAN test code
    N)stats)distance)HDBSCAN)CONDENSED_dtype_condense_tree_do_labelling)_OUTLIER_ENCODING)
make_blobs)fowlkes_mallows_score)_VALID_METRICSeuclidean_distances)BallTreeKDTree)StandardScaler)shuffle)assert_allcloseassert_array_equal)CSC_CONTAINERSCSR_CONTAINERS   
   )	n_samplesrandom_state   )r   )kd_tree	ball_treebruteautolabelc                 v    [        [        U 5      [        -
  5      nUS:X  d   e[        U [        5      U:  d   eg )N   )lensetOUTLIER_SETr   y)labels	threshold
n_clusterss      d/var/www/html/dynamic-report/venv/lib/python3.13/site-packages/sklearn/cluster/tests/test_hdbscan.pycheck_label_qualityr+   )   s6    S[;./J?? +i777    outlier_typec                    [         R                  [         R                  S.U    nS S S.U    n[        U    S   n[        U    S   n[        R                  5       nUS/US'   X/US'   [        S	S
9R                  U5      nUR                  U:H  R                  5       u  n[        USS/5        U" UR                  U5      R                  5       u  n[        USS/5        [        [        SS5      5      [        [        SS5      5      -   n	[        S	S
9R                  XY   5      n
[        U
R                  UR                  U	   5        g)G
Tests if np.inf and np.nan data are each treated as special outliers.
)infinitemissingc                 
    X:H  $ N xr&   s     r*   <lambda>#test_outlier_data.<locals>.<lambda>9   s    r,   c                 .    [         R                  " U 5      $ r3   )npisnanr5   s     r*   r7   r8   :   s    r,   r    prob   r      Fcopy   r   N)r:   infnanr	   Xr@   r   fitlabels_nonzeror   probabilities_listrange)r-   outlier
prob_checkr    r<   	X_outliermodelmissing_labels_idxmissing_probs_idxclean_indicesclean_models              r*   test_outlier_datarS   /   s>    FF66 G
 (+ J l+G4E\*62DIQ<IaL%IaL##I.E"]]e3<<>)Aq62&u';';TBKKM(1a&1q!%U1c](;;Mu%)))*BCK{**EMM-,HIr,   c                     [        [        5      n U R                  5       n[        SSS9R	                  U 5      n[        X5        [        U5        Sn[        R                  " [        US9   [        SSS9R	                  [        5        SSS5        SnSU S	'   S
U S'   [        R                  " [        US9   [        SSS9R	                  U 5        SSS5        g! , (       d  f       NV= f! , (       d  f       g= f)zm
Tests that HDBSCAN works with precomputed distance matrices, and throws the
appropriate errors when needed.
precomputedTmetricr@   z*The precomputed distance matrix.*has shapematchNz'The precomputed distance matrix.*valuesr   )r   r=   r=   )r=   r   F)
r   rD   r@   r   fit_predictr   r+   pytestraises
ValueError)D
D_originalr'   msgs       r*   test_hdbscan_distance_matrixra   O   s    
 	AAJM5AA!DFA"
7C	z	-}40<<Q? 
. 5CAdGAdG	z	-}51==a@ 
.	- 
.	- 
.	-s   *C5C)
C&)
C7sparse_constructorc                 b   [         R                  " [         R                  " [        5      5      nU[        R
                  " U5      -  n[        R                  " UR                  5       S5      nSXU:  '   U " U5      nUR                  5         [        SSS9R                  U5      n[        U5        g)z9
Tests that HDBSCAN works with sparse distance matrices.
2           rU   FrV   N)r   
squareformpdistrD   r:   maxr   scoreatpercentileflatteneliminate_zerosr   rZ   r+   )rb   r^   r(   r'   s       r*   #test_hdbscan_sparse_distance_matrixrl   g   s    
 	HNN1-.ANA''		R8IA9n1AM6BB1EFr,   c                  R    [        SS9R                  [        5      n [        U 5        g)z
Tests that HDBSCAN works with feature array, including an arbitrary
goodness of fit check. Note that the check is a simple heuristic.
Fr?   N)r   rZ   rD   r+   r'   s    r*   test_hdbscan_feature_arrayro   y   s#    
 % ,,Q/F r,   algorW   c                 4   [        U SS9R                  [        5      n[        U5        U S;   a  g[        [
        S.nS[        R                  " [        R                  S   5      0S[        R                  " [        R                  S   5      0SS	0S	[        R                  " [        R                  S   5      S
.S.R                  US5      n[        U UUSS9nXU    R                  ;  a9  [        R                  " [        5         UR                  [        5        SSS5        gUS:X  a9  [        R                   " ["        5         UR                  [        5        SSS5        gUR                  [        5        g! , (       d  f       g= f! , (       d  f       g= f)zs
Tests that HDBSCAN works with the expected combinations of algorithms and
metrics, or raises the expected errors.
F)	algorithmr@   )r   r   N)r   r   Vr=   p   )rt   w)mahalanobis
seuclidean	minkowski
wminkowski)rr   rW   metric_paramsr@   rz   )r   rZ   rD   r+   r   r   r:   eyeshapeonesgetvalid_metricsr[   r\   r]   rE   warnsFutureWarning)rp   rW   r'   ALGOS_TREESr{   hdbs         r*   test_hdbscan_algorithmsr      s<    t%0<<Q?F    K
 RVVAGGAJ/0BGGAGGAJ/01XBGGAGGAJ$78	
 
c&$  #	C &444]]:&GGAJ '&	<	\\-(GGAJ )( 	
 '& )(s   E8F	8
F	
Fc                  r    [        SS9R                  [        5      n U R                  S5      n[	        USS9  g)z
Tests that HDBSCAN can generate a sufficiently accurate dbscan clustering.
This test is more of a sanity check than a rigorous evaluation.
Fr?   333333?gq=
ףp?)r(   N)r   rE   rD   dbscan_clusteringr+   )	clustererr'   s     r*   test_dbscan_clusteringr      s5    
 U#''*I((-F $/r,   cut_distance)皙?      ?r=   c                    [         S   S   n[         S   S   n[        R                  5       n[        R                  S/US'   S[        R
                  /US'   [        R                  [        R
                  /US'   [        SS	9R                  U5      nUR                  U S
9n[        R                  " XQ:H  5      n[        USS/5        [        R                  " XR:H  5      n[        US/5        [        [        [        S5      5      [        Xg-   5      -
  5      n[        SS	9R                  X8   5      n	U	R                  U S
9n
[        XU   5        g)r/   r1   r    r0   r=   r   ru   r>   Fr?   )r   r   N)r	   rD   r@   r:   rB   rC   r   rE   r   flatnonzeror   rI   r$   rJ   )r   missing_labelinfinite_labelrM   rN   r'   rO   infinite_labels_idx	clean_idxrR   clean_labelss              r*   #test_dbscan_clustering_outlier_datar      s%   
 &i09M&z27;NIFFA;IaLrvv;IaLFFBFF#IaL##I.E$$,$?F(?@)Aq62..)AB*QC0Ss_s+=+S'TTUIu%)))*>?K00l0KL|I%67r,   c                      [        SS[        R                  " [        R                  S   5      0SS9R                  [        5      n [        U 5        g)z,
Tests that HDBSCAN using `BallTree` works.
rx   rs   r=   F)rW   r{   r@   N)r   r:   r~   rD   r}   rZ   r+   rn   s    r*   !test_hdbscan_best_balltree_metricr      sA     C1D+EEk!n  r,   c                      [        [        [        5      S-
  SS9R                  [        5      n [	        U 5      R                  [        5      (       d   eg)zw
Tests that HDBSCAN correctly does not generate a valid cluster when the
`min_cluster_size` is too large for the data.
r=   Fmin_cluster_sizer@   N)r   r#   rD   rZ   r$   issubsetr%   rn   s    r*   test_hdbscan_no_clustersr      s>    
 c!fqju=II!LFv;,,,,r,   c                  >   [        S[        [        5      S5       Hz  n [        U SS9R	                  [        5      nU Vs/ s H  o"S:w  d  M
  UPM     nn[        U5      S:w  d  MJ  [
        R                  " [
        R                  " U5      5      U :  a  Mz   e   gs  snf )zV
Test that the smallest non-noise cluster has at least `min_cluster_size`
many points
ru   r=   Fr   r   r   N)rJ   r#   rD   r   rZ   r:   minbincount)r   r'   r    true_labelss       r*   test_hdbscan_min_cluster_sizer      s~    
 "!SVQ/*:GSSTUV*0@&RKu&@{q 66"++k237GGGG	 0@s   	BBc                  t    [         R                  n [        U SS9R                  [        5      n[        U5        g)z9
Tests that HDBSCAN works when passed a callable metric.
FrV   N)r   	euclideanr   rZ   rD   r+   )rW   r'   s     r*   test_hdbscan_callable_metricr      s.     FF/;;A>Fr,   treer   r   c                     [        SU SS9nSn[        R                  " [        US9   UR	                  [
        5        SSS5        g! , (       d  f       g= f)zu
Tests that HDBSCAN correctly raises an error when passing precomputed data
while requesting a tree-based algorithm.
rU   FrW   rr   r@   z%precomputed is not a valid metric forrX   N)r   r[   r\   r]   rE   rD   )r   r   r`   s      r*   "test_hdbscan_precomputed_non_bruter      s<     $U
CC
1C	z	-
 
.	-	-s   A
Acsr_containerc                 &   [        SS9R                  [        5      R                  n[	        U5        U " [        5      nUR                  5       n[        SS9R                  U5      R                  n[        X5        [        R                  S4[        R                  S44 H  u  pV[        R                  5       nXWS'   [        SS9R                  U5      R                  n[	        U5        US   [        U   S   :X  d   eUR                  5       nXSS'   [        SS9R                  U5      R                  n[        X5        M     Sn[        R                  " [        US	9   [        S
SSS9R                  U5        SSS5        g! , (       d  f       g= f)z
Tests that HDBSCAN works correctly when passing sparse feature data.
Evaluates correctness by comparing against the same data passed as a dense
array.
Fr?   r0   r1   r   r   r   r    z4Sparse data matrices only support algorithm `brute`.rX   r   r   r   N)r   rE   rD   rF   r+   r@   r   r:   rB   rC   r	   r[   r\   r]   )	r   dense_labels	_X_sparseX_sparsesparse_labelsoutlier_valr-   X_denser`   s	            r*   test_hdbscan_sparser     sH    &**1-55L%a I~~H'++H5==M|3 (*vvz&:RVVY<O%P!&&(#E*..w7??L)A"3L"A'"JJJJ>>#$U+//9AA<7 &Q AC	z	-{kFJJ8T 
.	-	-s   F
Frr   c                    SS/n[        SSUSS9u  p#[        SSS	9R                  U5      n[        XR                  UR
                  5       H  u  pVn[        XVS
SS9  [        XWS
SS9  M     [        U S[        R                  S   SS9R                  [        5      nUR                  R                  S   S:X  d   eUR
                  R                  S   S:X  d   eg)z^
Tests that HDBSCAN centers are calculated and stored properly, and are
accurate to the data.
)re   re   )      @r   i  r   r   )r   r   centerscluster_stdbothF)store_centersr@   r=   g?)rtolatol)rr   r   r   r@   N)	r
   r   rE   zip
centroids_medoids_r   rD   r}   )rr   r   H_r   centercentroidmedoids           r*   test_hdbscan_centersr   .  s     :&G1gSVWDA
U
3
7
7
:C$'$N &qt<QT: %O
 	
 
c!f  >>"a'''<<a A%%%r,   c            	         [         R                  R                  S5      n U R                  SS5      n[	        SSSSSS	9R                  U5      n[         R                  " USS
9u  p4[        U5      S:X  d   eXCS:H     S:  d   e[	        SSSSSSS9R                  U5      n[         R                  " USS
9u  p4[        U5      S:X  d   eXCS:H     S:X  d   eg)zK
Tests that HDBSCAN single-cluster selection with epsilon works correctly.
r      ru   r>   re   eomTF)r   cluster_selection_epsiloncluster_selection_methodallow_single_clusterr@   )return_countsr      g
ףp=
?r   )r   r   r   r   rr   r@   N)r:   randomRandomStaterandr   rZ   uniquer#   )rngno_structurer'   unique_labelscountss        r*   .test_hdbscan_allow_single_cluster_with_epsilonr   G  s     ))


"C88C#L"%!&! k,  IIfDAM}""" 2%&+++ "&!&! k,  IIfDAM}"""2%&!+++r,   c                      SS/SS/SS/SS//n [        SU / SQSS9u  p[        S	S
9R                  U5      R                  n[	        [        U5      5      [        SU;   5      -
  nUS:X  d   e[        X25      S:    g)z
Validate that HDBSCAN can properly cluster this difficult synthetic
dataset. Note that DBSCAN fails on this (see HDBSCAN plotting
example)
g333333g333333?r"   i  )皙?gffffff?皙?r   r   )r   r   r   r   Fr?   r      Gz?N)r
   r   rE   rF   r#   r$   intr   )r   rD   r&   r'   r)   s        r*   test_hdbscan_better_than_dbscanr   j  s     u~t}q!fq"g>G+	DA % $$Q'//FS[!Cf$55J??&$t+r,   z	kwargs, XrU   r=   ru   r"   r   c                 >    [        SSSS.UD6R                  U 5        g)zc
Tests that HDBSCAN works correctly for array-likes and precomputed inputs
with non-finite points.
r=   Fmin_samplesr@   Nr4   )r   rE   )rD   kwargss     r*   test_hdbscan_usable_inputsr   ~  s!     00044Q7r,   c                     U " [         R                  " S5      5      nSn[        R                  " [        US9   [        SSS9R                  U5        SSS5        g! , (       d  f       g= f)zX
Tests that HDBSCAN raises the correct error when there are too few
non-zero distances.
)r   r   z#There exists points with fewer thanrX   rU   FrV   N)r:   zerosr[   r\   r]   r   rE   r   rD   r`   s      r*   -test_hdbscan_sparse_distances_too_few_nonzeror     sK     	bhhx()A
/C	z	-}5155a8 
.	-	-s   A
A)c                 $   [         R                  " S5      nSUSS2SS24'   SUSS2SS24'   XR                  -   nU " U5      nSn[        R                  " [
        US9   [        SS	S
9R                  U5        SSS5        g! , (       d  f       g= f)zi
Tests that HDBSCAN raises the correct error when the distance matrix
has multiple connected components.
)   r   r=   Nr>      z3HDBSCAN cannot be performed on a disconnected graphrX   rU   FrV   )r:   r   Tr[   r\   r]   r   rE   r   s      r*   0test_hdbscan_sparse_distances_disconnected_graphr     s     	AAbqb"1"fIAab"#gJ	CCAaA
?C	z	-}5155a8 
.	-	-s   B
Bc                     S n Sn[         R                  " [        US9   [        SU SS9R	                  [
        5        SSS5        [         R                  " [        US9   [        SU SS9R	                  [
        5        SSS5        [        [        [        R                  5      [        [        R                  5      -
  5      n[        U5      S	:  aD  [         R                  " [        US9   [        SUS	   SS9R	                  [
        5        SSS5        gg! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g= f)
zJ
Tests that HDBSCAN correctly raises an error for invalid metric choices.
c                     U $ r3   r4   )r6   s    r*   r7   2test_hdbscan_tree_invalid_metric.<locals>.<lambda>  s    r,   zV.* is not a valid metric for a .*-based algorithm\. Please select a different metric\.rX   r   F)rr   rW   r@   Nr   r   )r[   r\   r]   r   rE   rD   rI   r$   r   r   r   r#   )metric_callabler`   metrics_not_kds      r*    test_hdbscan_tree_invalid_metricr     s     "O	  
z	-)O%HLLQO 
.	z	-+oEJNNqQ 
.
 #h445F<P<P8QQRN
>Q]]:S1iq0ANRRSTU 21  
.	-	-	- 21s#   DD'*"D8
D$'
D58
Ec                      [        [        [        5      S-   SS9n Sn[        R                  " [
        US9   U R                  [        5        SSS5        g! , (       d  f       g= f)zl
Tests that HDBSCAN correctly raises an error when setting `min_samples`
larger than the number of samples.
r=   Fr   z min_samples (.*) must be at mostrX   N)r   r#   rD   r[   r\   r]   rE   )r   r`   s     r*   !test_hdbscan_too_many_min_samplesr     sB    
 c!fqju
5C
-C	z	-
 
.	-	-s   A
A#c                      [         R                  5       n [        R                  U S'   Sn[	        SSS9n[
        R                  " [        US9   UR                  U 5        SSS5        g! , (       d  f       g= f)zi
Tests that HDBSCAN correctly raises an error when providing precomputed
distances with `np.nan` values.
r   z(np.nan values found in precomputed-denserU   FrV   rX   N)	rD   r@   r:   rC   r   r[   r\   r]   rE   )X_nanr`   r   s      r*   "test_hdbscan_precomputed_dense_nanr     sR    
 FFHE&&E$K
4C
U
3C	z	- 
.	-	-s   A((
A6r   TFepsilonr   c                 @   Sn[        UU SS/SS/SS//S9u  pE[        SS9R                  U5      n[        UR                  UR
                  S9nUS-   US	-   US
-   1nUS-   SUS	-   SUS
-   S0n	[        UUU	UUS9n
[        [        U5      5       Vs0 s H!  o[        R                  " X[:H  5      S   S   _M#     nn[        [        U5      5       Vs0 s H
  oXU      _M     nn[        R                  " UR                  5      " U5      n[        X5        gs  snf s  snf )zJ
Tests that the `_do_labelling` helper function correctly assigns labels.
0   r   r   )r   r   Fr?   r   ru   r"   r   r=   condensed_treeclusterscluster_label_mapr   r   N)r
   r   rE   r   _single_linkage_tree_r   r   rI   r$   r:   where	vectorizer   r   )global_random_seedr   r   r   rD   r&   estr   r   r   r'   _yfirst_with_labely_to_labelsaligned_targets                  r*   test_labelling_distinctr    sB    I' FGG
		DA u

!
!!
$C#!!C4H4HN Ay1}i!m<H"Q9q=!Y]AN%+1")F ?C3q6lKlBHHQW-a033lK>B3q6lKlvr233lKK\\+//215Nv. LKs   (DDc                  @   Sn Sn[         R                  " SSUS4SSSUS4SS/[        S	9n[        UU 1U SU S-   S0S
SS9nUS   S:  n[	        U5      [	        US:H  5      :X  d   e[        UU 1U SU S-   S0S
SS9nUS   U:  n[	        U5      [	        US:H  5      :X  d   eg)z
Tests that the `_do_labelling` helper function correctly thresholds the
incoming lambda values given various `cluster_selection_epsilon` values.
r>   g      ?ru   r=   )r>   r=   r   r=   r   )r>   r"   r   r=   )r>   r   r   r=   )dtypeTr   valuer   N)r:   arrayr   r   sum)r   
MAX_LAMBDAr   r'   	num_noises        r*   test_labelling_thresholdingr    s    
 IJXX:q!:q!	
 	N %$aQ:!"#F w'!+Iy>S2....%$aQ:!"#F w'*4Iy>S2....r,   r   r   r   c                    [         R                  R                  S5      nUR                  S5      n[        U5      nSn[        R
                  " [        US9   [        SU SS9R                  U5        SSS5        g! , (       d  f       g= f)	zCheck that we raise an error if the centers are requested together with
a precomputed input matrix.

Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/27893
r   d   ru   z>Cannot store centers when using a precomputed distance matrix.rX   rU   F)rW   r   r@   N)	r:   r   r   r   r[   r\   r]   r   rE   )r   r   rD   X_disterr_msgs        r*   0test_hdbscan_error_precomputed_and_store_centersr  +  sk     ))


"C

8A #FNG	z	1 '	
 #f+ 
2	1	1s   A;;
B	
valid_algor   r   c                 @    [        SU SS9R                  [        5        g)zTest that HDBSCAN works with the "cosine" metric when the algorithm is set
to "brute" or "auto".

Non-regression test for issue #28631
cosineFr   N)r   rZ   rD   )r  s    r*   *test_hdbscan_cosine_metric_valid_algorithmr  ?  s     8z>JJ1Mr,   invalid_algoc                     [        SU SS9n[        R                  " [        SS9   UR	                  [
        5        SSS5        g! , (       d  f       g= f)zxTest that HDBSCAN raises an informative error is raised when an unsupported
algorithm is used with the "cosine" metric.
r  Fr   zcosine is not a valid metricrX   N)r   r[   r\   r]   rZ   rD   )r  hdbscans     r*   ,test_hdbscan_cosine_metric_invalid_algorithmr  I  s<    
 XEJG	z)G	HA 
I	H	Hs   A
Ac                      [         R                  R                  S5      R                  S5      n Sn[        R                  " [
        US9   [        SS9nUR                  U 5        SSS5        g! , (       d  f       g= f)zP
Test that HDBSCAN raises a FutureWarning when the `copy`
parameter is not set.
r   r  zCThe default value of `copy` will change from False to True in 1.10.rX   r   r   N)r:   r   r   r[   r   r   r   rE   )rD   r`   r   s      r*   !test_hdbscan_default_copy_warningr  T  sW    
 			a ''1A
PC	m3	/r*
 
0	/	/s   
A..
A<)r   )K__doc__numpyr:   r[   scipyr   scipy.spatialr   sklearn.clusterr   sklearn.cluster._hdbscan._treer   r   r    sklearn.cluster._hdbscan.hdbscanr	   sklearn.datasetsr
   sklearn.metricsr   sklearn.metrics.pairwiser   r   sklearn.neighborsr   r   sklearn.preprocessingr   sklearn.utilsr   sklearn.utils._testingr   r   sklearn.utils.fixesr   r   rD   r&   fit_transform
ALGORITHMSitemsr%   r+   markparametrizerS   ra   rl   ro   r   r   r   r   r   r   r   r   r   r   r   r   r  rB   r   r   r   r   r   r   r  r  r  r  r  r  )r   outs   00r*   <module>r5     s  
    " # 
 ? ' 1 H . 0 ! F >Cb11q!!$1""1%
 d1B1H1H1JK1Jvqc'l1JKK8 ):;J <J>A0 -/Q/Q./QR  S "	  ,>2% 3 -%P
0 78 884 -	H  )[!9: ; .9U :UD j1& 2&0 ,F,( 
M	"BHHq"&&kBFFA;-G$HI
M	"aVaV$45	q!fq!f88 .9	9 :	9 .99 :9 V0
 /$?QH-!/ . @!/H&/R :x*@A B& '89N :N )[)AB C	] Ls   *M