
    sgN                        d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlZd dlZd dlmZ d dlZd dlmZmZmZ d dlmZmZmZmZ d dlmZ d	Zd
ZdZdZ dZ!d Z"d Z#d Z$d Z%d Z&d Z'd Z(d Z)d Z*d Z+d Z,d Z-d Z.d Z/ej`                  jc                  d      d        Z2d Z3d Z4d  Z5ej`                  jm                  d!e      d"        Z7ej`                  jm                  d!e      d#        Z8d$ Z9d% Z:d& Z;d' Z<d( Z=ej`                  jm                  d!e      d)        Z>ej`                  jm                  d*g d+      ej`                  jm                  d,d-d.g      ej`                  jm                  d/g d0      ej`                  jm                  d!e      d1                             Z?ej`                  jm                  d!e      d2        Z@d3 ZAej`                  jm                  d!e      d4        ZBd5 ZCy)6    N)BZ2File)	resources)BytesIO)NamedTemporaryFile)dump_svmlight_fileload_svmlight_fileload_svmlight_files)assert_allcloseassert_array_almost_equalassert_array_equalcreate_memmap_backed_data)CSR_CONTAINERSzsklearn.datasets.tests.datazsvmlight_classification.txtzsvmlight_multilabel.txtzsvmlight_invalid.txtzsvmlight_invalid_order.txtc                 :    t        j                  t              | z  S N)r   filesTEST_DATA_MODULE)filenames    ^/var/www/html/venv/lib/python3.12/site-packages/sklearn/datasets/tests/test_svmlight_format.py_svmlight_local_test_file_pathr      s    ??+,x77    c                 ~    t        |       }|j                  d      5 }t        |fi |cddd       S # 1 sw Y   yxY w)zG
    Helper to load resource `filename` with `importlib.resources`
    rbN)r   openr   )r   kwargs	data_pathfs       r   _load_svmlight_local_test_filer   "   s>     /x8I		 /!!.v./ / /s   3<c                     t        t              \  } }| j                  j                  d   dk(  sJ | j                  d   dk(  sJ | j                  d   dk(  sJ |j                  d   dk(  sJ dD ]  \  }}}| ||f   |k(  rJ  | d   dk(  sJ | d   dk(  sJ | d	   dk(  sJ | d
   dk(  sJ | d   dk(  sJ | dxx   dz  cc<   | d   dk(  sJ t	        |g d       y )Nr               )r      g      @r   
   g)r      g      ?r!            ?r!      )r$         )r      )r   r)   )r!      )r!      )r$      )r   r$   r$   r)   )r!   r$   r0      r!   r$   )r   datafileindptrshaper   Xyijvals        r   test_load_svmlight_filer>   +   s   )(3DAq 88>>!!!!771:??771:771:?? 	1c Aw#~~ T7a<<T7a<<T7a<<U8q==U8q== dGqLGT7a<< q,-r   c                     t        j                  t              t        z  } t	        |       } t        |       \  }}t        j                  | t        j                        }	 t        |      \  }}t        |j                  |j                         t        ||       t        j                  |       y # t        j                  |       w xY wr   )r   r   r   r5   strr   osr   O_RDONLYr   dataclose)r   X1y1fdX2y2s         r   test_load_svmlight_file_fdrJ   N   s      01H<III	*FB	BKK	(B#B'B!"''2773!"b)
s   :B/ /Cc                      t        t              } t        t        |             \  }}t        |       \  }}t	        |j
                  |j
                         t	        ||       y r   )r   r5   r   r@   r
   rC   )r   rE   rF   rH   rI   s        r   test_load_svmlight_pathlibrL   a   sH    .x8II/FB	*FBBGGRWW%Br   c                  >    t        t        d      \  } }|g dk(  sJ y )NT
multilabel))r   r!   )r$    )r!   r$   )r   	multifile)r9   r:   s     r   "test_load_svmlight_file_multilabelrR   k   s!    ))EDAq****r   c                  h   t        t              } t        t        |       gdz  t        j
                        \  }}}}t        |j                         |j                                t        ||       |j                  t        j
                  k(  sJ |j                  t        j
                  k(  sJ t        t        |       gdz  t        j                        \  }}}}}	}
|j                  |j                  k(  sJ |j                  |	j                  k(  sJ |	j                  t        j                  k(  sJ y )Nr$   )dtyper0   )r   r5   r	   r@   npfloat32r   toarrayr   rT   float64)r   X_trainy_trainX_testy_testrE   rF   rH   rI   X3y3s              r   test_load_svmlight_filesr_   p   s    .x8I':	Y1BJJ($GWff w(&..*:;gv.==BJJ&&&<<2::%%%0#i.1AA1ERZZXBBB88rxx88rxx88rzz!!!r   c                  d   t        t        d      \  } }| j                  j                  d   dk(  sJ | j                  d   dk(  sJ | j                  d   dk(  sJ dD ]  \  }}}| ||f   |k(  rJ  t	        j
                  t              5  t        t        d       d d d        y # 1 sw Y   y xY w)	N   )
n_featuresr   r   r    r!   )r#   r%   r(   r+   r.   )r   r5   r6   r7   pytestraises
ValueErrorr8   s        r   "test_load_svmlight_file_n_featuresrf      s    )(rBDAq 88>>!!!!771:??771: L 	1cAw#~~ 
z	" @&xB?@ @ @s   B&&B/c                     t        t              \  } }t        dd      5 }|j                          t	        t              j                  d      5 }t        j
                  |j                  d      5 }t        j                  ||       d d d        d d d        t        |j                        \  }}t        j                  |j                         d d d        t        | j                         j                                t        |       t        dd      5 }|j                          t	        t              j                  d      5 }t        |j                  d      5 }t        j                  ||       d d d        d d d        t        |j                        \  }}t        j                  |j                         d d d        t        | j                         j                                t        |       y # 1 sw Y   sxY w# 1 sw Y   xxY w# 1 sw Y   FxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   }xY w)Nzsklearn-testz.gz)prefixsuffixr   wbz.bz2)r   r5   r   rD   r   r   gzipnameshutilcopyfileobjr   rA   remover   rW   r   )	r9   r:   tmpr   fh_outXgzygzXbzybzs	            r   test_load_compressedrv      s   )(3DAq	>%	@ C		+H5::4@ 	.A388T* .f""1f-.	. &chh/S 			#(( aiik3;;=9a%	>&	A S		+H5::4@ 	.A4( .F""1f-.	. &chh/S 			#(( aiik3;;=9a%'. .	. 	. . .	. 	. sw   /H!H0G8H?H/H7H+H4H+<?H78H=HH	
HHH($H++H4	0H77I c                  ~    t        j                  t              5  t        t               d d d        y # 1 sw Y   y xY wr   )rc   rd   re   r   invalidfilerP   r   r   test_load_invalid_filery      s+    	z	" 4&{34 4 4   3<c                  ~    t        j                  t              5  t        t               d d d        y # 1 sw Y   y xY wr   )rc   rd   re   r   invalidfile2rP   r   r   test_load_invalid_order_filer}      s+    	z	" 5&|45 5 5rz   c                      t        d      } t        j                  t              5  t	        | d       d d d        y # 1 sw Y   y xY w)Ns   -1 4:1.
1 0:1
F
zero_based)r   rc   rd   re   r   )r   s    r   test_load_zero_basedr      s8    #$A	z	" 01/0 0 0s	   <Ac                      d} d}t        |       }t        |d      \  }}|j                  dk(  sJ t        |       }t        |      }t        ||gd      \  }}}}	|j                  dk(  sJ |j                  dk(  sJ y )Ns   -1 1:1 2:2 3:3
s   -1 0:0 1:1
autor   )r!   r0   )r!   r4   )r   r   r7   r	   )
data1data2f1r9   r:   f2rE   rF   rH   rI   s
             r   test_load_zero_based_autor      s    EE	BbV4DAq77f	B	B("bfENBB88v88vr   c                     d} t        t        |       d      \  }}t        |g d       t        |j                         ddgddgd	dgg       t	        t        |       gd
      }t        t        |       d
      }||fD ]E  \  }}}t        |g d       t        |g d       t        |j                         ddgddgd	dgg       G y )NsM   
    3 qid:1 1:0.53 2:0.12
    2 qid:1 1:0.13 2:0.1
    7 qid:2 1:0.87 2:0.12Fquery_id)r0   r$   r   g(\?gQ?gp=
ף?皙?gףp=
?T)r!   r!   r$   )r   r   r   rW   r	   )rC   r9   r:   res1res2qids         r   test_load_with_qidr      s    D gdme<DAqq)$qyy{dD\D#;t$MN>Dgdmd;DD\ S	1c1i(3	*199;$c{T4L(QRSr   zPtesting the overflow of 32 bit sparse indexing requires a large amount of memoryc                     dj                  d t        dd      D              } t        t        |       d      \  }}}t	        |dd g d	       t	        t        j                  |      t        j                  dd             y)
zU
    load large libsvm / svmlight file with qid attribute. Tests 64-bit query ID
       
c              3   Z   K   | ]#  }d j                  |      j                          % yw)z.3 qid:{0} 1:0.53 2:0.12
2 qid:{0} 1:0.13 2:0.1N)formatencode).0r;   s     r   	<genexpr>z&test_load_large_qid.<locals>.<genexpr>   s,      	
 >DDQGNNP	
s   )+r!   i ZbTr   N)r0   r$   r0   r$   )joinranger   r   r   rU   uniquearange)rC   r9   r:   r   s       r   test_load_large_qidr      sm     ::	
1./	
D #74=4@IAq#qv|,ryy~ryy4D'EFr   c                      t        j                  t              5  t        t              } t        t
              }t        t        |       t        |      t        |       g       d d d        y # 1 sw Y   y xY wr   )rc   rd   re   r   r5   rx   r	   r@   )r   invalid_paths     r   test_load_invalid_file2r      sW    	z	" Q28<	5kBS^S->IOPQ Q Qs   AA++A4c                  v    t        j                  t              5  t        d       d d d        y # 1 sw Y   y xY w)NgzG?)rc   rd   	TypeErrorr   rP   r   r   test_not_a_filenamer      s-     
y	! !4 ! ! !   /8c                  v    t        j                  t              5  t        d       d d d        y # 1 sw Y   y xY w)Nztrou pic nic douille)rc   rd   OSErrorr   rP   r   r   test_invalid_filenamer      s,    	w	 3123 3 3r   csr_containerc                 \   t        t              \  }}|j                         } | t        j                  |            }|t        j
                  |j                  d            }|t        j
                  |j                  d            }|||fD ]  }|||fD ]  }dD ]  }	t        j                  t        j                  t        j                  t        j                  fD ]  }
t               }t        j                  |      r|j                  d   dk(  r|j                  }|j                  |
      }t!        |||d|	       |j#                  d       |j%                         }t'        |d      }dt(        j*                  z  |v sJ |j%                         }t'        |d      }dd	g|	   d
z   |v sJ t-        ||
|	      \  }}|j.                  |
k(  sJ t1        |j3                         j4                  |j4                         |j                         }t        j                  |      r|j                         }n|}|
t        j                  k(  r-t7        ||d       t7        |j                  |
d      |d       t7        ||d       t7        |j                  |
d      |d        
   y )Nr   )TFr!   testcommentr   utf-8zscikit-learn %sonezeroz-based)rT   r   r4   F)copyr'   )r   r5   rW   rU   
atleast_2dr   r7   rV   rX   int32int64r   spissparseTastyper   seekreadliner@   sklearn__version__r   rT   r   sorted_indicesindicesr   )r   X_sparsey_denseX_densey_sparseX_slicedy_slicedr9   r:   r   rT   r   X_inputr   rH   rI   X2_denseX_input_denses                     r   	test_dumpr     sV   6x@Hg GR]]734H 		(.."345H		(.."345H* 8GX. 7	A+ 6
 jj"**bhhI 5E	A
 {{1~!''!*/ CC  hhuoG&Av* FF1IjjlG!'73G,w/B/BBgMMMjjlG!'73G!6?:6AWLLL/:VFB88u,,,&r'8'8':'B'BBJJO!zz|H{{7+(/(9(/

*1-1M1#NN5uN=r1
 2-2N1#NN5uN=r2g567	8r   c                 $   g dg dg dg}g dg dg dg} | |      }||fD ]k  }t               }t        |||d       |j                  d	       |j                         d
k(  sJ |j                         dk(  sJ |j                         dk(  rkJ  y )N)r!   r   r0   r   r)   r   r   r   r   r   )r   r)   r   r!   r   r   r!   r   )r!   r   r!   )r!   r!   r   TrN   r   s   1 0:1 2:3 4:5
s   0,2 
s   0,1 1:5 3:1
)r   r   r   r   )r   r9   r   r   r:   r   s         r   test_dump_multilabelr   H  s    	/?;A)Y/GW%Hx  0I1at4	q	zz|1111zz|y(((zz|////0r   c                     d} d}d}d}d}| ||||gg dg dg dg dg}| ||||g}t               }t        |||       |j                  d       |j                         d	k(  sJ |j                         d
k(  sJ |j                         dk(  sJ |j                         dk(  sJ |j                         dk(  sJ |j                  d       t	        |      \  }}	t        ||j                                t        ||	       y )Nr!   g @gGz@g     ?r*   )g    eAg NgmCgkcEr   r   r   r   s+   1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1
s!   2.1 0:1000000000 1:2e+18 2:3e+27
s   3.01 
s   1.000000000000001 
s   1 
)r   r   r   r   r   r   rW   )
r   twothreeexactalmostr9   r:   r   rH   rI   s
             r   test_dump_conciser   W  s   
C
CEEF	c5%(	A 
c5%(A	Aq!QFF1I::<JJJJ::<@@@@::<:%%%::<2222::<7"""FF1I"FBa.a$r   c                     t        t              \  } }| j                         } t               }d}t	        | |||d       |j                  d       t        |d      \  }}t        | |j                                t        ||       d}t               }t        j                  t              5  t	        | |||       d d d        |j                  d      }t               }t	        | |||d       |j                  d       t        |d      \  }}t        | |j                                t        ||       t               }t        j                  t              5  t	        | ||d	       d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)
Nz*This is a comment
spanning multiple lines.Fr   r   r   s   It is true that
½² = ¼)r   r   zI've got a  .)r   r5   rW   r   r   r   r   r   rc   rd   UnicodeDecodeErrordecodere   )r9   r:   r   ascii_commentrH   rI   utf8_commentunicode_comments           r   test_dump_commentr   v  sE   )(3DAq			A	AAMq!Q%HFF1Ie4FBa.a$ CL	A	)	* :1aL9: #))'2O	Aq!QEJFF1Ie4FBa.a$	A	z	" >1a,<=> >: :> >s   +E1E=1E:=Fc                  H   t        t              \  } }t               }|g}t        j                  t
              5  t        | ||       d d d        t               }t        j                  t
              5  t        | |d d |       d d d        y # 1 sw Y   FxY w# 1 sw Y   y xY w)N)r   r5   r   rc   rd   re   r   )r9   r:   r   y2ds       r   test_dump_invalidr     s    )(3DAq	A#C	z	" &1c1%& 		A	z	" )1afa() )	& &) )s   B2BBB!c                  p   t        t              \  } }| j                         } t        j                  | j
                  d         dz  }t               }t        | |||d       |j                  d       t        |dd      \  }}}t        | |j                                t        ||       t        ||       y )Nr   r$   Tr   r   )r   r5   rW   rU   r   r7   r   r   r   r   r   )r9   r:   r   r   rE   rF   	query_id1s          r   test_dump_query_idr     s    )(3DAq			Ayy$)H	Aq!QdCFF1I*1tMBIa.a$h	2r   c                  :   d} t        t        |       d      \  }}}g dg dg dg dg}g d}g d}t        ||       t        |j                         |       t        ||       t               }t	        ||||d       |j                  d	       t        |dd      \  }}}t        ||       t        |j                         |       t        ||       |j                  d	       t        |d
d      \  }}t        ||       t        |j                         |       y )Ns   
    1 qid:0 0:1 1:2 2:3
    0 qid:72048431380967004 0:1440446648 1:72048431380967004 2:236784985
    0 qid:-9223372036854775807 0:1440446648 1:72048431380967004 2:236784985
    3 qid:9223372036854775807  0:1440446648 1:72048431380967004 2:236784985Tr   )r!   r$   r0   )ixU   \.,N^iY)r!   r   r   r0   )r   r   l l    r   r   F)r   r   r   rW   r   r   )rC   r9   r:   r   true_Xtrue_ytrueQIDr   s           r   test_load_with_long_qidr     s    OD
 #74=4@IAq# 	222	F FOGq&!qyy{F+sG$	Aq!Q>FF1I"1tEIAq#q&!qyy{F+sG$FF1Ia%DADAqq&!qyy{F+r   c                 R   t               } | t        j                  d            }t        j                  g d      }t	        |||       dD ]X  }|j                  d       t        |d|      \  }}t        ||       t        |j                         |j                                Z y )N)r0   r4   r7   r   )r   TFr   r4   )rb   r   )	r   rU   zerosarrayr   r   r   r   rW   )r   r   r   r   r   r9   r:   s          r   test_load_zerosr     s    	A288&12FXXi Fvvq)+ A
	q	!!jI1!!V,!!))+v~~/?@	Ar   sparsity)r   r   g      ?gGz?r!   	n_samples   e   rb   )r$   r   )   c                    t         j                  j                  d      }|j                  dd||f      }| rd||| k  <    ||      }|j	                  dd|      }t               }t        |||       |j                  d       t        |j                               }d}	|dz  }
|
|	z
  }d|z  dz  }||
z
  }t        |||	|	      \  }}t        |||
|	      \  }}t        |||
      \  }}t        j                  |||g      }t        j                  |||g      }t        ||       t        |j                         |j                                y )Nr           r*   lowhighsizer$   r0   r4   r)   )rb   offsetlength)rb   r   )rU   randomRandomStateuniformrandintr   r   r   lengetvaluer   concatenater   vstackr   rW   )r   r   rb   r   rngr9   r:   r   r   mark_0mark_1length_0mark_2length_1X_0y_0X_1y_1X_2y_2y_concatX_concats                         r   test_load_with_offsetsr    sM   
 ))


"C#Y
,CDA!h,aA	2A	Aq!QFF1Iqzz|D FQYFHX]FH "	jHC "	jHC "!
6JHC~~sCo.Hyy#sC)Ha*aiik8+;+;+=>r   c           
      >   t         j                  j                  d      }t        j                  g dg dg dg dg dg dg dg      } | |      }|j                  \  }}|j                  dd|      }t        j                  |      dz  }t               }t        ||||	       |j                  d       t        |j                               }t        |      D ]  }	|j                  d       t        ||d
d|	      \  }
}}t        ||d
|	d      \  }}}t        j                  ||g      }t        j                  ||g      }t        j                   |
|g      }t#        ||       t%        ||       t#        |j'                         |j'                                 y )Nr   )r   r   r   r   r   r   )r!   r$   r0   r4   r   r    )r!   r   r0   r   r   r   )r   r   r   r   r   r!   )r!   r   r   r   r   r   r$   r   r   T)rb   r   r   r   r   )rU   r   r   r   r7   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rW   )r   r  r9   r   rb   r:   r   r   r   markr  r  q_0r	  r
  q_1q_concatr  r  s                      r   "test_load_offset_exhaustive_splitsr    sn   
))


"C
	

	A 	aAGGIz	2Ayy#q(H	Aq!Q2FF1Iqzz|D d C	q	**tAd
S# +*tD
S# >>3*->>3*-99c3Z(!!X.8X.!!))+x/?/?/ABCr   c                      t        j                  t        d      5  t        t        dd       d d d        y # 1 sw Y   y xY w)Nzn_features is required)matchr0   )r   r   )rc   rd   re   r   r5   rP   r   r   test_load_with_offsets_errorr  8  s6    	z)A	B E&x!DE E Es	   8Ac                    t        | dz        }t        j                  j                  d      }|j	                  dd      j                  t        j                        }t        j                  g d      }t        j                  g d      }t        j                  g d      } ||||fd	      }t        |||d
       t        |d
      \  }	}
g d}|
|k(  sJ y)z
    Ensure that if y contains explicit zeros (i.e. elements of y.data equal to
    0) then those explicit zeros are not encoded.
    svm_explicit_zero*   r0   r)   )r   r$   r0   r    )r   r$   r$   r   r!   r$   )r   r!   r!   r!   r!   r   )r0   r0   r   TrN   )g       @r  )r   r*   N)
r@   rU   r   r   randnr   rX   r   r   r   )tmp_pathr   	save_pathr  r9   r6   r   rC   r:   _y_loady_trues               r    test_multilabel_y_explicit_zerosr#  =  s     H223I
))


#C		!Qrzz*AXXl#Fhh)*G88&'DtWf-V<A q!Y48"9>IAv)FVr   c                     t         j                  j                  d      }|j                  dd      }|j                  d      }t	        ||g      \  }}t        | dz        }t        |||       y)zEnsure that there is no ValueError when dumping a read-only `X`.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28026
    r  r)   r$   svm_read_onlyN)rU   r   r   r  r   r@   r   )r  r  r9   r:   r  s        r   test_dump_read_onlyr&  W  sc     ))


#C		!QA		!A %aV,DAqH./Iq!Y'r   )Drk   rA   rm   bz2r   	importlibr   ior   tempfiler   numpyrU   rc   scipy.sparsesparser   r   sklearn.datasetsr   r   r	   sklearn.utils._testingr
   r   r   r   sklearn.utils.fixesr   r   r5   rQ   rx   r|   r   r   r>   rJ   rL   rR   r_   rf   rv   ry   r}   r   r   r   r  skipr   r   r   r   parametrizer   r   r   r   r   r   r   r   r  r  r  r#  r&  rP   r   r   <module>r3     s3    	     '     X X  /0 (%	$+8/ .F&+
" @"&84
5
0S" VGGQ!3
 .9B :BJ .90 :0%>>B
)3!,H .9
A :
A %;<r3i0z2.9!? : 3 1 =!?H .9'C :'CTE
 .9 :2(r   