From c686edd77a2ddc2ae37da9da331133aa895f167c Mon Sep 17 00:00:00 2001
From: "yue.jiao" <yuejiao@smtp.intel.com>
Date: Tue, 17 Dec 2024 10:07:34 -0800
Subject: [PATCH 1/6] feature: new tests added for tsne to expand test coverage

---
 sklearnex/manifold/tests/test_tsne.py | 91 ++++++++++++++++++++++++++-
 1 file changed, 90 insertions(+), 1 deletion(-)

diff --git a/sklearnex/manifold/tests/test_tsne.py b/sklearnex/manifold/tests/test_tsne.py
index 2ba8c64cdc..cbb63be7b2 100755
--- a/sklearnex/manifold/tests/test_tsne.py
+++ b/sklearnex/manifold/tests/test_tsne.py
@@ -16,7 +16,12 @@
 
 import numpy as np
 from numpy.testing import assert_allclose
-
+import pytest
+from onedal.tests.utils._dataframes_support import (
+    _as_numpy,
+    _convert_to_dataframe,
+    get_dataframes_and_queues,
+)
 
 def test_sklearnex_import():
     from sklearnex.manifold import TSNE
@@ -24,3 +29,87 @@ def test_sklearnex_import():
     X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
     tsne = TSNE(n_components=2, perplexity=2.0).fit(X)
     assert "daal4py" in tsne.__module__
+
+from sklearnex.manifold import TSNE
+
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+def test_sklearnex_tsne_import(dataframe, queue):
+    """Test TSNE compatibility with different backends and queues, and validate sklearnex module."""
+    X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
+    X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    tsne = TSNE(n_components=2, perplexity=2.0).fit(X_df)
+    assert "daal4py" in tsne.__module__
+    assert hasattr(tsne, "n_components"), "TSNE missing 'n_components' attribute."
+    assert tsne.n_components == 2, "TSNE 'n_components' attribute is incorrect."
+
+def test_valid_tsne_functionality():
+    """Test TSNE with valid data: basic functionality, random data, reproducibility, and edge cases."""
+    # Test basic functionality
+    X_basic = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
+    tsne = TSNE(n_components=2, perplexity=2.0).fit(X_basic)
+    assert tsne.embedding_.shape == (4, 2)
+    
+    # Test with random data
+    np.random.seed(42)
+    X_random = np.random.rand(100, 10)
+    tsne_random = TSNE(n_components=2, perplexity=30.0).fit(X_random)
+    assert tsne_random.embedding_.shape == (100, 2)
+
+    # Test reproducibility
+    X_repro = np.random.rand(50, 10)
+    tsne_1 = TSNE(n_components=2, random_state=42).fit_transform(X_repro)
+    tsne_2 = TSNE(n_components=2, random_state=42).fit_transform(X_repro)
+    assert_allclose(tsne_1, tsne_2, rtol=1e-5)
+
+    # Test perplexity edge case (close to dataset size)
+    X_perplexity = np.random.rand(10, 5)
+    tsne_perplexity = TSNE(n_components=2, perplexity=9).fit(X_perplexity)
+    assert tsne_perplexity.embedding_.shape == (10, 2)
+
+    # Test large data
+    X_large = np.random.rand(1000, 50)
+    tsne_large = TSNE(n_components=2, perplexity=50.0).fit(X_large)
+    assert tsne_large.embedding_.shape == (1000, 2)
+
+    # Test valid minimal data
+    X_valid = np.array([[0, 0], [1, 1], [2, 2]])
+    tsne_valid = TSNE(n_components=2, perplexity=2).fit(X_valid)
+    assert tsne_valid.embedding_.shape == (3, 2)
+
+def test_tsne_edge_cases_and_errors():
+    """Test TSNE with invalid, constant, and edge-case data."""
+    # Edge case: constant data
+    X_constant = np.ones((10, 10))
+    with pytest.raises(ValueError) as excinfo:
+        TSNE(n_components=2, perplexity=20).fit(X_constant)
+    assert "perplexity must be less than n_samples" in str(excinfo.value)
+
+    # Edge case: empty data
+    X_empty = np.empty((0, 10))
+    with pytest.raises(ValueError):
+        TSNE(n_components=2).fit(X_empty)
+
+    # Edge case: data with NaN or infinite values
+    X_invalid = np.array([[0, 0], [1, np.nan], [2, np.inf]])
+    with pytest.raises(ValueError):
+        TSNE(n_components=2).fit(X_invalid)
+
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("num_blocks", [1, 2, 4])
+def test_tsne_full_fit_with_blocks(dataframe, queue, dtype, num_blocks):
+    """Test TSNE fitted on the full dataset, after splitting into blocks."""
+    np.random.seed(42)
+    X = np.random.rand(100, 20).astype(dtype)  # 100 samples, 20 features
+    X_blocks = np.array_split(X, num_blocks)   # Split into `num_blocks`
+
+    # Combine blocks back into a single dataset
+    X_combined = np.vstack(X_blocks)
+    X_df = _convert_to_dataframe(X_combined, sycl_queue=queue, target_df=dataframe)
+
+    # Fit TSNE on the combined dataset
+    tsne = TSNE(n_components=2, perplexity=30.0, random_state=42).fit(X_df)
+
+    # Validate embedding shape
+    assert tsne.embedding_.shape == (100, 2)
+

From f3f52233cdbcbf781ed05e288826db39b87c39ef Mon Sep 17 00:00:00 2001
From: "yue.jiao" <yuejiao@smtp.intel.com>
Date: Tue, 17 Dec 2024 16:08:26 -0800
Subject: [PATCH 2/6] test: additional test for gpu and golden data embedding
 test for tsne

---
 sklearnex/manifold/tests/test_tsne.py | 111 +++++++++++++++++++++-----
 1 file changed, 93 insertions(+), 18 deletions(-)

diff --git a/sklearnex/manifold/tests/test_tsne.py b/sklearnex/manifold/tests/test_tsne.py
index cbb63be7b2..5482149a2b 100755
--- a/sklearnex/manifold/tests/test_tsne.py
+++ b/sklearnex/manifold/tests/test_tsne.py
@@ -17,6 +17,7 @@
 import numpy as np
 from numpy.testing import assert_allclose
 import pytest
+#Note: n_componets must be 2 for now
 from onedal.tests.utils._dataframes_support import (
     _as_numpy,
     _convert_to_dataframe,
@@ -42,7 +43,7 @@ def test_sklearnex_tsne_import(dataframe, queue):
     assert hasattr(tsne, "n_components"), "TSNE missing 'n_components' attribute."
     assert tsne.n_components == 2, "TSNE 'n_components' attribute is incorrect."
 
-def test_valid_tsne_functionality():
+def test_basic_tsne_functionality():
     """Test TSNE with valid data: basic functionality, random data, reproducibility, and edge cases."""
     # Test basic functionality
     X_basic = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
@@ -61,7 +62,7 @@ def test_valid_tsne_functionality():
     tsne_2 = TSNE(n_components=2, random_state=42).fit_transform(X_repro)
     assert_allclose(tsne_1, tsne_2, rtol=1e-5)
 
-    # Test perplexity edge case (close to dataset size)
+    # Test perplexity close to dataset size
     X_perplexity = np.random.rand(10, 5)
     tsne_perplexity = TSNE(n_components=2, perplexity=9).fit(X_perplexity)
     assert tsne_perplexity.embedding_.shape == (10, 2)
@@ -76,8 +77,6 @@ def test_valid_tsne_functionality():
     tsne_valid = TSNE(n_components=2, perplexity=2).fit(X_valid)
     assert tsne_valid.embedding_.shape == (3, 2)
 
-def test_tsne_edge_cases_and_errors():
-    """Test TSNE with invalid, constant, and edge-case data."""
     # Edge case: constant data
     X_constant = np.ones((10, 10))
     with pytest.raises(ValueError) as excinfo:
@@ -93,23 +92,99 @@ def test_tsne_edge_cases_and_errors():
     X_invalid = np.array([[0, 0], [1, np.nan], [2, np.inf]])
     with pytest.raises(ValueError):
         TSNE(n_components=2).fit(X_invalid)
+    
+     # Edge Case: Perplexity Larger Than n_samples
+    X_small = np.random.rand(5, 2)  # 5 samples
+    with pytest.raises(ValueError) as excinfo:
+        TSNE(n_components=2, perplexity=10).fit(X_small)
+    assert "perplexity must be less than n_samples" in str(excinfo.value), \
+        "Large perplexity did not trigger expected ValueError."
 
-@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
-@pytest.mark.parametrize("dtype", [np.float32, np.float64])
-@pytest.mark.parametrize("num_blocks", [1, 2, 4])
-def test_tsne_full_fit_with_blocks(dataframe, queue, dtype, num_blocks):
-    """Test TSNE fitted on the full dataset, after splitting into blocks."""
+    # Edge Case: Sparse-Like High-Dimensional Data
     np.random.seed(42)
-    X = np.random.rand(100, 20).astype(dtype)  # 100 samples, 20 features
-    X_blocks = np.array_split(X, num_blocks)   # Split into `num_blocks`
+    X_sparse_like = np.random.rand(50, 10000) * (np.random.rand(50, 10000) > 0.99)
+    try:
+        tsne = TSNE(n_components=2, perplexity=30.0)
+        tsne.fit(X_sparse_like)
+    except Exception as e:
+        pytest.fail(f"TSNE failed on sparse-like high-dimensional data: {e}")
 
-    # Combine blocks back into a single dataset
-    X_combined = np.vstack(X_blocks)
-    X_df = _convert_to_dataframe(X_combined, sycl_queue=queue, target_df=dataframe)
+    # Edge Case: Extremely Low Perplexity
+    X = np.random.rand(10, 5)
+    try:
+        tsne_low_perplexity = TSNE(n_components=2, perplexity=0.5)
+        tsne_low_perplexity.fit(X)
+    except Exception as e:
+        pytest.fail(f"TSNE failed with low perplexity: {e}")
 
-    # Fit TSNE on the combined dataset
-    tsne = TSNE(n_components=2, perplexity=30.0, random_state=42).fit(X_df)
 
-    # Validate embedding shape
-    assert tsne.embedding_.shape == (100, 2)
 
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_tsne_with_specific_complex_dataset(dataframe, queue, dtype):
+    """Test TSNE with a specific, highly diverse dataset."""
+    complex_array = np.array([
+        [0, 0, 0, 0],                    
+        [1, 1, 1, 1],                   
+        [-1e-9, 1e-9, -1e-9, 1e-9],      
+        [-1e9, 1e9, -1e9, 1e9],          
+        [1e-3, 1e3, -1e3, -1e-3],        
+        [0, 1e9, -1e-9, 1],              
+        [1, -1, 1, -1],                  
+        [42, 42, 42, 42],                
+        [0, 0, 1, -1],                   
+        [-1e5, 0, 1e5, -1], 
+        [2e9, 2e-9, -2e9, -2e-9],        
+        [3, -3, 3e3, -3e-3],             
+        [5e-5, 5e5, -5e-5, -5e5],        
+        [1, 0, -1e8, 1e8],               
+        [9e-7, -9e7, 9e-7, -9e7],        
+        [4e-4, 4e4, -4e-4, -4e4],        
+        [6e-6, -6e6, 6e6, -6e-6],        
+        [8, -8, 8e8, -8e-8],             
+    ], dtype=dtype)
+
+    complex_array_df = _convert_to_dataframe(complex_array, sycl_queue=queue, target_df=dataframe)
+
+    try:
+        tsne = TSNE(n_components=2, perplexity=5.0, random_state=42)
+        embedding = tsne.fit_transform(complex_array_df)
+        assert embedding.shape == (complex_array.shape[0], 2), "TSNE embedding shape is incorrect."
+    except Exception as e:
+        pytest.fail(f"TSNE failed on the specific complex dataset: {e}")
+
+
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues(device_filter_="gpu"))
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_tsne_gpu_validation(dataframe, queue, dtype):
+    """
+    GPU validation test for TSNE with a specific complex dataset.
+    """
+    # Complex dataset for testing
+    gpu_validation_array = np.array([
+        [0, 0, 0, 0],
+        [1, 1, 1, 1],
+        [-1e9, 1e9, -1e9, 1e9],
+        [1e-3, 1e3, -1e3, -1e-3],
+        [1, -1, 1, -1],
+        [0, 1e9, -1e-9, 1],
+        [-7e11, 7e11, -7e-11, 7e-11],
+        [4e-4, 4e4, -4e-4, -4e4],
+        [6e-6, -6e6, 6e6, -6e-6],
+        [0, 0, 0, 0],
+        [1, 1, 1, 1],
+    ], dtype=dtype)
+
+    expected_shape = (gpu_validation_array.shape[0], 2)
+    gpu_array_df = _convert_to_dataframe(
+        gpu_validation_array, sycl_queue=queue, target_df=dataframe
+    )
+    try:
+        tsne = TSNE(n_components=2, perplexity=3.0, random_state=42)
+        embedding = tsne.fit_transform(gpu_array_df)
+        assert embedding.shape == expected_shape, f"Incorrect embedding shape on GPU: {embedding.shape}."
+        assert np.all(np.isfinite(embedding)), "Embedding contains NaN or infinite values on GPU."
+        assert np.any(embedding != 0), "GPU embedding contains only zeros, which is invalid."
+
+    except Exception as e:
+        pytest.fail(f"TSNE failed on GPU validation test: {e}")
\ No newline at end of file

From 10da764bc017110065fe6b59d15a4453d2fd01d3 Mon Sep 17 00:00:00 2001
From: "yue.jiao" <yuejiao@smtp.intel.com>
Date: Wed, 18 Dec 2024 08:10:50 -0800
Subject: [PATCH 3/6] fix: fix format by running black and isort test_tsne.py

---
 sklearnex/manifold/tests/test_tsne.py | 122 +++++++++++++++-----------
 1 file changed, 73 insertions(+), 49 deletions(-)

diff --git a/sklearnex/manifold/tests/test_tsne.py b/sklearnex/manifold/tests/test_tsne.py
index 5482149a2b..90506aac03 100755
--- a/sklearnex/manifold/tests/test_tsne.py
+++ b/sklearnex/manifold/tests/test_tsne.py
@@ -15,15 +15,17 @@
 # ===============================================================================
 
 import numpy as np
-from numpy.testing import assert_allclose
 import pytest
-#Note: n_componets must be 2 for now
+from numpy.testing import assert_allclose
+
+# Note: n_components must be 2 for now
 from onedal.tests.utils._dataframes_support import (
     _as_numpy,
     _convert_to_dataframe,
     get_dataframes_and_queues,
 )
 
+
 def test_sklearnex_import():
     from sklearnex.manifold import TSNE
 
@@ -31,8 +33,10 @@ def test_sklearnex_import():
     tsne = TSNE(n_components=2, perplexity=2.0).fit(X)
     assert "daal4py" in tsne.__module__
 
+
 from sklearnex.manifold import TSNE
 
+
 @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
 def test_sklearnex_tsne_import(dataframe, queue):
     """Test TSNE compatibility with different backends and queues, and validate sklearnex module."""
@@ -43,13 +47,14 @@ def test_sklearnex_tsne_import(dataframe, queue):
     assert hasattr(tsne, "n_components"), "TSNE missing 'n_components' attribute."
     assert tsne.n_components == 2, "TSNE 'n_components' attribute is incorrect."
 
+
 def test_basic_tsne_functionality():
     """Test TSNE with valid data: basic functionality, random data, reproducibility, and edge cases."""
     # Test basic functionality
     X_basic = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
     tsne = TSNE(n_components=2, perplexity=2.0).fit(X_basic)
     assert tsne.embedding_.shape == (4, 2)
-    
+
     # Test with random data
     np.random.seed(42)
     X_random = np.random.rand(100, 10)
@@ -92,13 +97,14 @@ def test_basic_tsne_functionality():
     X_invalid = np.array([[0, 0], [1, np.nan], [2, np.inf]])
     with pytest.raises(ValueError):
         TSNE(n_components=2).fit(X_invalid)
-    
-     # Edge Case: Perplexity Larger Than n_samples
+
+    # Edge Case: Perplexity Larger Than n_samples
     X_small = np.random.rand(5, 2)  # 5 samples
     with pytest.raises(ValueError) as excinfo:
         TSNE(n_components=2, perplexity=10).fit(X_small)
-    assert "perplexity must be less than n_samples" in str(excinfo.value), \
-        "Large perplexity did not trigger expected ValueError."
+    assert "perplexity must be less than n_samples" in str(
+        excinfo.value
+    ), "Large perplexity did not trigger expected ValueError."
 
     # Edge Case: Sparse-Like High-Dimensional Data
     np.random.seed(42)
@@ -118,62 +124,74 @@ def test_basic_tsne_functionality():
         pytest.fail(f"TSNE failed with low perplexity: {e}")
 
 
-
 @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
 def test_tsne_with_specific_complex_dataset(dataframe, queue, dtype):
     """Test TSNE with a specific, highly diverse dataset."""
-    complex_array = np.array([
-        [0, 0, 0, 0],                    
-        [1, 1, 1, 1],                   
-        [-1e-9, 1e-9, -1e-9, 1e-9],      
-        [-1e9, 1e9, -1e9, 1e9],          
-        [1e-3, 1e3, -1e3, -1e-3],        
-        [0, 1e9, -1e-9, 1],              
-        [1, -1, 1, -1],                  
-        [42, 42, 42, 42],                
-        [0, 0, 1, -1],                   
-        [-1e5, 0, 1e5, -1], 
-        [2e9, 2e-9, -2e9, -2e-9],        
-        [3, -3, 3e3, -3e-3],             
-        [5e-5, 5e5, -5e-5, -5e5],        
-        [1, 0, -1e8, 1e8],               
-        [9e-7, -9e7, 9e-7, -9e7],        
-        [4e-4, 4e4, -4e-4, -4e4],        
-        [6e-6, -6e6, 6e6, -6e-6],        
-        [8, -8, 8e8, -8e-8],             
-    ], dtype=dtype)
-
-    complex_array_df = _convert_to_dataframe(complex_array, sycl_queue=queue, target_df=dataframe)
+    complex_array = np.array(
+        [
+            [0, 0, 0, 0],
+            [1, 1, 1, 1],
+            [-1e-9, 1e-9, -1e-9, 1e-9],
+            [-1e9, 1e9, -1e9, 1e9],
+            [1e-3, 1e3, -1e3, -1e-3],
+            [0, 1e9, -1e-9, 1],
+            [1, -1, 1, -1],
+            [42, 42, 42, 42],
+            [0, 0, 1, -1],
+            [-1e5, 0, 1e5, -1],
+            [2e9, 2e-9, -2e9, -2e-9],
+            [3, -3, 3e3, -3e-3],
+            [5e-5, 5e5, -5e-5, -5e5],
+            [1, 0, -1e8, 1e8],
+            [9e-7, -9e7, 9e-7, -9e7],
+            [4e-4, 4e4, -4e-4, -4e4],
+            [6e-6, -6e6, 6e6, -6e-6],
+            [8, -8, 8e8, -8e-8],
+        ],
+        dtype=dtype,
+    )
+
+    complex_array_df = _convert_to_dataframe(
+        complex_array, sycl_queue=queue, target_df=dataframe
+    )
 
     try:
         tsne = TSNE(n_components=2, perplexity=5.0, random_state=42)
         embedding = tsne.fit_transform(complex_array_df)
-        assert embedding.shape == (complex_array.shape[0], 2), "TSNE embedding shape is incorrect."
+        assert embedding.shape == (
+            complex_array.shape[0],
+            2,
+        ), "TSNE embedding shape is incorrect."
     except Exception as e:
         pytest.fail(f"TSNE failed on the specific complex dataset: {e}")
 
 
-@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues(device_filter_="gpu"))
+@pytest.mark.parametrize(
+    "dataframe,queue", get_dataframes_and_queues(device_filter_="gpu")
+)
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
 def test_tsne_gpu_validation(dataframe, queue, dtype):
     """
     GPU validation test for TSNE with a specific complex dataset.
     """
     # Complex dataset for testing
-    gpu_validation_array = np.array([
-        [0, 0, 0, 0],
-        [1, 1, 1, 1],
-        [-1e9, 1e9, -1e9, 1e9],
-        [1e-3, 1e3, -1e3, -1e-3],
-        [1, -1, 1, -1],
-        [0, 1e9, -1e-9, 1],
-        [-7e11, 7e11, -7e-11, 7e-11],
-        [4e-4, 4e4, -4e-4, -4e4],
-        [6e-6, -6e6, 6e6, -6e-6],
-        [0, 0, 0, 0],
-        [1, 1, 1, 1],
-    ], dtype=dtype)
+    gpu_validation_array = np.array(
+        [
+            [0, 0, 0, 0],
+            [1, 1, 1, 1],
+            [-1e9, 1e9, -1e9, 1e9],
+            [1e-3, 1e3, -1e3, -1e-3],
+            [1, -1, 1, -1],
+            [0, 1e9, -1e-9, 1],
+            [-7e11, 7e11, -7e-11, 7e-11],
+            [4e-4, 4e4, -4e-4, -4e4],
+            [6e-6, -6e6, 6e6, -6e-6],
+            [0, 0, 0, 0],
+            [1, 1, 1, 1],
+        ],
+        dtype=dtype,
+    )
 
     expected_shape = (gpu_validation_array.shape[0], 2)
     gpu_array_df = _convert_to_dataframe(
@@ -182,9 +200,15 @@ def test_tsne_gpu_validation(dataframe, queue, dtype):
     try:
         tsne = TSNE(n_components=2, perplexity=3.0, random_state=42)
         embedding = tsne.fit_transform(gpu_array_df)
-        assert embedding.shape == expected_shape, f"Incorrect embedding shape on GPU: {embedding.shape}."
-        assert np.all(np.isfinite(embedding)), "Embedding contains NaN or infinite values on GPU."
-        assert np.any(embedding != 0), "GPU embedding contains only zeros, which is invalid."
+        assert (
+            embedding.shape == expected_shape
+        ), f"Incorrect embedding shape on GPU: {embedding.shape}."
+        assert np.all(
+            np.isfinite(embedding)
+        ), "Embedding contains NaN or infinite values on GPU."
+        assert np.any(
+            embedding != 0
+        ), "GPU embedding contains only zeros, which is invalid."
 
     except Exception as e:
-        pytest.fail(f"TSNE failed on GPU validation test: {e}")
\ No newline at end of file
+        pytest.fail(f"TSNE failed on GPU validation test: {e}")

From 2f3e9fae2c06dd8886a69fa968a6b69ec517632f Mon Sep 17 00:00:00 2001
From: "yue.jiao" <yuejiao@smtp.intel.com>
Date: Wed, 18 Dec 2024 14:11:20 -0800
Subject: [PATCH 4/6] fix: const test check shape instead of str output

---
 sklearnex/manifold/tests/test_tsne.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearnex/manifold/tests/test_tsne.py b/sklearnex/manifold/tests/test_tsne.py
index 90506aac03..0c0f7930db 100755
--- a/sklearnex/manifold/tests/test_tsne.py
+++ b/sklearnex/manifold/tests/test_tsne.py
@@ -84,9 +84,9 @@ def test_basic_tsne_functionality():
 
     # Edge case: constant data
     X_constant = np.ones((10, 10))
-    with pytest.raises(ValueError) as excinfo:
-        TSNE(n_components=2, perplexity=20).fit(X_constant)
-    assert "perplexity must be less than n_samples" in str(excinfo.value)
+    tsne = TSNE(n_components=2, perplexity=5, random_state=42)
+    embedding = tsne.fit(X_constant).embedding_
+    assert embedding.shape == (10, 2), f"Unexpected embedding shape: {embedding.shape}"
 
     # Edge case: empty data
     X_empty = np.empty((0, 10))

From 739a90c22b9310bda57563d6a53d1cd382469915 Mon Sep 17 00:00:00 2001
From: "yue.jiao" <yuejiao@smtp.intel.com>
Date: Wed, 18 Dec 2024 14:52:25 -0800
Subject: [PATCH 5/6] fix: test removing raise error test

---
 sklearnex/manifold/tests/test_tsne.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/sklearnex/manifold/tests/test_tsne.py b/sklearnex/manifold/tests/test_tsne.py
index 0c0f7930db..73a654d7e5 100755
--- a/sklearnex/manifold/tests/test_tsne.py
+++ b/sklearnex/manifold/tests/test_tsne.py
@@ -98,14 +98,6 @@ def test_basic_tsne_functionality():
     with pytest.raises(ValueError):
         TSNE(n_components=2).fit(X_invalid)
 
-    # Edge Case: Perplexity Larger Than n_samples
-    X_small = np.random.rand(5, 2)  # 5 samples
-    with pytest.raises(ValueError) as excinfo:
-        TSNE(n_components=2, perplexity=10).fit(X_small)
-    assert "perplexity must be less than n_samples" in str(
-        excinfo.value
-    ), "Large perplexity did not trigger expected ValueError."
-
     # Edge Case: Sparse-Like High-Dimensional Data
     np.random.seed(42)
     X_sparse_like = np.random.rand(50, 10000) * (np.random.rand(50, 10000) > 0.99)

From 822e614a5ddc56bfd5b6d930e102730b31e11636 Mon Sep 17 00:00:00 2001
From: "yue.jiao" <yuejiao@smtp.intel.com>
Date: Thu, 19 Dec 2024 08:37:41 -0800
Subject: [PATCH 6/6] fix: fix test based on comments

---
 sklearnex/manifold/tests/test_tsne.py | 143 +++++++++++---------------
 1 file changed, 60 insertions(+), 83 deletions(-)

diff --git a/sklearnex/manifold/tests/test_tsne.py b/sklearnex/manifold/tests/test_tsne.py
index 73a654d7e5..4803ab2be1 100755
--- a/sklearnex/manifold/tests/test_tsne.py
+++ b/sklearnex/manifold/tests/test_tsne.py
@@ -48,70 +48,94 @@ def test_sklearnex_tsne_import(dataframe, queue):
     assert tsne.n_components == 2, "TSNE 'n_components' attribute is incorrect."
 
 
-def test_basic_tsne_functionality():
-    """Test TSNE with valid data: basic functionality, random data, reproducibility, and edge cases."""
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_tsne_functionality_and_edge_cases(dataframe, queue, dtype):
+    """
+    TSNE test covering basic functionality and edge cases using get_dataframes_and_queues.
+    """
     # Test basic functionality
-    X_basic = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
-    tsne = TSNE(n_components=2, perplexity=2.0).fit(X_basic)
-    assert tsne.embedding_.shape == (4, 2)
+    X_basic = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]], dtype=dtype)
+    X_basic_df = _convert_to_dataframe(X_basic, sycl_queue=queue, target_df=dataframe)
+    tsne_basic = TSNE(n_components=2, perplexity=2.0, random_state=42)
+    embedding_basic = tsne_basic.fit_transform(X_basic_df)
+    assert embedding_basic.shape == (4, 2)
 
     # Test with random data
-    np.random.seed(42)
-    X_random = np.random.rand(100, 10)
-    tsne_random = TSNE(n_components=2, perplexity=30.0).fit(X_random)
-    assert tsne_random.embedding_.shape == (100, 2)
+    X_random = np.random.rand(100, 10).astype(dtype)
+    X_random_df = _convert_to_dataframe(X_random, sycl_queue=queue, target_df=dataframe)
+    tsne_random = TSNE(n_components=2, perplexity=30.0, random_state=42)
+    embedding_random = tsne_random.fit_transform(X_random_df)
+    assert embedding_random.shape == (100, 2)
 
     # Test reproducibility
-    X_repro = np.random.rand(50, 10)
-    tsne_1 = TSNE(n_components=2, random_state=42).fit_transform(X_repro)
-    tsne_2 = TSNE(n_components=2, random_state=42).fit_transform(X_repro)
-    assert_allclose(tsne_1, tsne_2, rtol=1e-5)
-
-    # Test perplexity close to dataset size
-    X_perplexity = np.random.rand(10, 5)
-    tsne_perplexity = TSNE(n_components=2, perplexity=9).fit(X_perplexity)
-    assert tsne_perplexity.embedding_.shape == (10, 2)
+    X_repro = np.random.rand(50, 10).astype(dtype)
+    X_repro_df = _convert_to_dataframe(X_repro, sycl_queue=queue, target_df=dataframe)
+    tsne_repro_1 = TSNE(n_components=2, random_state=42).fit_transform(X_repro_df)
+    tsne_repro_2 = TSNE(n_components=2, random_state=42).fit_transform(X_repro_df)
+    tsne_repro_1_np = _as_numpy(tsne_repro_1)
+    tsne_repro_2_np = _as_numpy(tsne_repro_2)
+    assert_allclose(tsne_repro_1_np, tsne_repro_2_np, rtol=1e-5)
 
     # Test large data
-    X_large = np.random.rand(1000, 50)
-    tsne_large = TSNE(n_components=2, perplexity=50.0).fit(X_large)
-    assert tsne_large.embedding_.shape == (1000, 2)
+    X_large = np.random.rand(1000, 50).astype(dtype)
+    X_large_df = _convert_to_dataframe(X_large, sycl_queue=queue, target_df=dataframe)
+    tsne_large = TSNE(n_components=2, perplexity=50.0, random_state=42)
+    embedding_large = tsne_large.fit_transform(X_large_df)
+    assert embedding_large.shape == (1000, 2)
 
     # Test valid minimal data
-    X_valid = np.array([[0, 0], [1, 1], [2, 2]])
-    tsne_valid = TSNE(n_components=2, perplexity=2).fit(X_valid)
-    assert tsne_valid.embedding_.shape == (3, 2)
+    X_valid = np.array([[0, 0], [1, 1], [2, 2]], dtype=dtype)
+    X_valid_df = _convert_to_dataframe(X_valid, sycl_queue=queue, target_df=dataframe)
+    tsne_valid = TSNE(n_components=2, perplexity=2, random_state=42)
+    embedding_valid = tsne_valid.fit_transform(X_valid_df)
+    assert embedding_valid.shape == (3, 2)
 
     # Edge case: constant data
-    X_constant = np.ones((10, 10))
-    tsne = TSNE(n_components=2, perplexity=5, random_state=42)
-    embedding = tsne.fit(X_constant).embedding_
-    assert embedding.shape == (10, 2), f"Unexpected embedding shape: {embedding.shape}"
+    X_constant = np.ones((10, 10), dtype=dtype)
+    X_constant_df = _convert_to_dataframe(
+        X_constant, sycl_queue=queue, target_df=dataframe
+    )
+    tsne_constant = TSNE(n_components=2, perplexity=5, random_state=42)
+    embedding_constant = tsne_constant.fit(X_constant_df).embedding_
+    assert embedding_constant.shape == (10, 2)
 
     # Edge case: empty data
-    X_empty = np.empty((0, 10))
+    X_empty = np.empty((0, 10), dtype=dtype)
     with pytest.raises(ValueError):
-        TSNE(n_components=2).fit(X_empty)
+        TSNE(n_components=2).fit(
+            _convert_to_dataframe(X_empty, sycl_queue=queue, target_df=dataframe)
+        )
 
     # Edge case: data with NaN or infinite values
-    X_invalid = np.array([[0, 0], [1, np.nan], [2, np.inf]])
+    X_invalid = np.array([[0, 0], [1, np.nan], [2, np.inf]], dtype=dtype)
     with pytest.raises(ValueError):
-        TSNE(n_components=2).fit(X_invalid)
+        TSNE(n_components=2).fit(
+            _convert_to_dataframe(X_invalid, sycl_queue=queue, target_df=dataframe)
+        )
 
     # Edge Case: Sparse-Like High-Dimensional Data
     np.random.seed(42)
-    X_sparse_like = np.random.rand(50, 10000) * (np.random.rand(50, 10000) > 0.99)
+    X_sparse_like = np.random.rand(50, 500).astype(dtype) * (
+        np.random.rand(50, 500) > 0.99
+    )
+    X_sparse_like_df = _convert_to_dataframe(
+        X_sparse_like, sycl_queue=queue, target_df=dataframe
+    )
     try:
         tsne = TSNE(n_components=2, perplexity=30.0)
-        tsne.fit(X_sparse_like)
+        tsne.fit(X_sparse_like_df)
     except Exception as e:
         pytest.fail(f"TSNE failed on sparse-like high-dimensional data: {e}")
 
     # Edge Case: Extremely Low Perplexity
-    X = np.random.rand(10, 5)
+    X_low_perplexity = np.random.rand(10, 5).astype(dtype)
+    X_low_perplexity_df = _convert_to_dataframe(
+        X_low_perplexity, sycl_queue=queue, target_df=dataframe
+    )
     try:
         tsne_low_perplexity = TSNE(n_components=2, perplexity=0.5)
-        tsne_low_perplexity.fit(X)
+        tsne_low_perplexity.fit(X_low_perplexity_df)
     except Exception as e:
         pytest.fail(f"TSNE failed with low perplexity: {e}")
 
@@ -157,50 +181,3 @@ def test_tsne_with_specific_complex_dataset(dataframe, queue, dtype):
         ), "TSNE embedding shape is incorrect."
     except Exception as e:
         pytest.fail(f"TSNE failed on the specific complex dataset: {e}")
-
-
-@pytest.mark.parametrize(
-    "dataframe,queue", get_dataframes_and_queues(device_filter_="gpu")
-)
-@pytest.mark.parametrize("dtype", [np.float32, np.float64])
-def test_tsne_gpu_validation(dataframe, queue, dtype):
-    """
-    GPU validation test for TSNE with a specific complex dataset.
-    """
-    # Complex dataset for testing
-    gpu_validation_array = np.array(
-        [
-            [0, 0, 0, 0],
-            [1, 1, 1, 1],
-            [-1e9, 1e9, -1e9, 1e9],
-            [1e-3, 1e3, -1e3, -1e-3],
-            [1, -1, 1, -1],
-            [0, 1e9, -1e-9, 1],
-            [-7e11, 7e11, -7e-11, 7e-11],
-            [4e-4, 4e4, -4e-4, -4e4],
-            [6e-6, -6e6, 6e6, -6e-6],
-            [0, 0, 0, 0],
-            [1, 1, 1, 1],
-        ],
-        dtype=dtype,
-    )
-
-    expected_shape = (gpu_validation_array.shape[0], 2)
-    gpu_array_df = _convert_to_dataframe(
-        gpu_validation_array, sycl_queue=queue, target_df=dataframe
-    )
-    try:
-        tsne = TSNE(n_components=2, perplexity=3.0, random_state=42)
-        embedding = tsne.fit_transform(gpu_array_df)
-        assert (
-            embedding.shape == expected_shape
-        ), f"Incorrect embedding shape on GPU: {embedding.shape}."
-        assert np.all(
-            np.isfinite(embedding)
-        ), "Embedding contains NaN or infinite values on GPU."
-        assert np.any(
-            embedding != 0
-        ), "GPU embedding contains only zeros, which is invalid."
-
-    except Exception as e:
-        pytest.fail(f"TSNE failed on GPU validation test: {e}")