From c686edd77a2ddc2ae37da9da331133aa895f167c Mon Sep 17 00:00:00 2001 From: "yue.jiao" Date: Tue, 17 Dec 2024 10:07:34 -0800 Subject: [PATCH 1/6] feature: new tests added for tsne to expand test coverage --- sklearnex/manifold/tests/test_tsne.py | 91 ++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 1 deletion(-) diff --git a/sklearnex/manifold/tests/test_tsne.py b/sklearnex/manifold/tests/test_tsne.py index 2ba8c64cdc..cbb63be7b2 100755 --- a/sklearnex/manifold/tests/test_tsne.py +++ b/sklearnex/manifold/tests/test_tsne.py @@ -16,7 +16,12 @@ import numpy as np from numpy.testing import assert_allclose - +import pytest +from onedal.tests.utils._dataframes_support import ( + _as_numpy, + _convert_to_dataframe, + get_dataframes_and_queues, +) def test_sklearnex_import(): from sklearnex.manifold import TSNE @@ -24,3 +29,87 @@ def test_sklearnex_import(): X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]]) tsne = TSNE(n_components=2, perplexity=2.0).fit(X) assert "daal4py" in tsne.__module__ + +from sklearnex.manifold import TSNE + +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +def test_sklearnex_tsne_import(dataframe, queue): + """Test TSNE compatibility with different backends and queues, and validate sklearnex module.""" + X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]]) + X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + tsne = TSNE(n_components=2, perplexity=2.0).fit(X_df) + assert "daal4py" in tsne.__module__ + assert hasattr(tsne, "n_components"), "TSNE missing 'n_components' attribute." + assert tsne.n_components == 2, "TSNE 'n_components' attribute is incorrect." + +def test_valid_tsne_functionality(): + """Test TSNE with valid data: basic functionality, random data, reproducibility, and edge cases.""" + # Test basic functionality + X_basic = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]]) + tsne = TSNE(n_components=2, perplexity=2.0).fit(X_basic) + assert tsne.embedding_.shape == (4, 2) + + # Test with random data + np.random.seed(42) + X_random = np.random.rand(100, 10) + tsne_random = TSNE(n_components=2, perplexity=30.0).fit(X_random) + assert tsne_random.embedding_.shape == (100, 2) + + # Test reproducibility + X_repro = np.random.rand(50, 10) + tsne_1 = TSNE(n_components=2, random_state=42).fit_transform(X_repro) + tsne_2 = TSNE(n_components=2, random_state=42).fit_transform(X_repro) + assert_allclose(tsne_1, tsne_2, rtol=1e-5) + + # Test perplexity edge case (close to dataset size) + X_perplexity = np.random.rand(10, 5) + tsne_perplexity = TSNE(n_components=2, perplexity=9).fit(X_perplexity) + assert tsne_perplexity.embedding_.shape == (10, 2) + + # Test large data + X_large = np.random.rand(1000, 50) + tsne_large = TSNE(n_components=2, perplexity=50.0).fit(X_large) + assert tsne_large.embedding_.shape == (1000, 2) + + # Test valid minimal data + X_valid = np.array([[0, 0], [1, 1], [2, 2]]) + tsne_valid = TSNE(n_components=2, perplexity=2).fit(X_valid) + assert tsne_valid.embedding_.shape == (3, 2) + +def test_tsne_edge_cases_and_errors(): + """Test TSNE with invalid, constant, and edge-case data.""" + # Edge case: constant data + X_constant = np.ones((10, 10)) + with pytest.raises(ValueError) as excinfo: + TSNE(n_components=2, perplexity=20).fit(X_constant) + assert "perplexity must be less than n_samples" in str(excinfo.value) + + # Edge case: empty data + X_empty = np.empty((0, 10)) + with pytest.raises(ValueError): + TSNE(n_components=2).fit(X_empty) + + # Edge case: data with NaN or infinite values + X_invalid = np.array([[0, 0], [1, np.nan], [2, np.inf]]) + with pytest.raises(ValueError): + TSNE(n_components=2).fit(X_invalid) + +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize("num_blocks", [1, 2, 4]) +def test_tsne_full_fit_with_blocks(dataframe, queue, dtype, num_blocks): + """Test TSNE fitted on the full dataset, after splitting into blocks.""" + np.random.seed(42) + X = np.random.rand(100, 20).astype(dtype) # 100 samples, 20 features + X_blocks = np.array_split(X, num_blocks) # Split into `num_blocks` + + # Combine blocks back into a single dataset + X_combined = np.vstack(X_blocks) + X_df = _convert_to_dataframe(X_combined, sycl_queue=queue, target_df=dataframe) + + # Fit TSNE on the combined dataset + tsne = TSNE(n_components=2, perplexity=30.0, random_state=42).fit(X_df) + + # Validate embedding shape + assert tsne.embedding_.shape == (100, 2) + From f3f52233cdbcbf781ed05e288826db39b87c39ef Mon Sep 17 00:00:00 2001 From: "yue.jiao" Date: Tue, 17 Dec 2024 16:08:26 -0800 Subject: [PATCH 2/6] test: additional test for gpu and golden data embedding test for tsne --- sklearnex/manifold/tests/test_tsne.py | 111 +++++++++++++++++++++----- 1 file changed, 93 insertions(+), 18 deletions(-) diff --git a/sklearnex/manifold/tests/test_tsne.py b/sklearnex/manifold/tests/test_tsne.py index cbb63be7b2..5482149a2b 100755 --- a/sklearnex/manifold/tests/test_tsne.py +++ b/sklearnex/manifold/tests/test_tsne.py @@ -17,6 +17,7 @@ import numpy as np from numpy.testing import assert_allclose import pytest +#Note: n_componets must be 2 for now from onedal.tests.utils._dataframes_support import ( _as_numpy, _convert_to_dataframe, @@ -42,7 +43,7 @@ def test_sklearnex_tsne_import(dataframe, queue): assert hasattr(tsne, "n_components"), "TSNE missing 'n_components' attribute." assert tsne.n_components == 2, "TSNE 'n_components' attribute is incorrect." -def test_valid_tsne_functionality(): +def test_basic_tsne_functionality(): """Test TSNE with valid data: basic functionality, random data, reproducibility, and edge cases.""" # Test basic functionality X_basic = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]]) @@ -61,7 +62,7 @@ def test_valid_tsne_functionality(): tsne_2 = TSNE(n_components=2, random_state=42).fit_transform(X_repro) assert_allclose(tsne_1, tsne_2, rtol=1e-5) - # Test perplexity edge case (close to dataset size) + # Test perplexity close to dataset size X_perplexity = np.random.rand(10, 5) tsne_perplexity = TSNE(n_components=2, perplexity=9).fit(X_perplexity) assert tsne_perplexity.embedding_.shape == (10, 2) @@ -76,8 +77,6 @@ def test_valid_tsne_functionality(): tsne_valid = TSNE(n_components=2, perplexity=2).fit(X_valid) assert tsne_valid.embedding_.shape == (3, 2) -def test_tsne_edge_cases_and_errors(): - """Test TSNE with invalid, constant, and edge-case data.""" # Edge case: constant data X_constant = np.ones((10, 10)) with pytest.raises(ValueError) as excinfo: @@ -93,23 +92,99 @@ def test_tsne_edge_cases_and_errors(): X_invalid = np.array([[0, 0], [1, np.nan], [2, np.inf]]) with pytest.raises(ValueError): TSNE(n_components=2).fit(X_invalid) + + # Edge Case: Perplexity Larger Than n_samples + X_small = np.random.rand(5, 2) # 5 samples + with pytest.raises(ValueError) as excinfo: + TSNE(n_components=2, perplexity=10).fit(X_small) + assert "perplexity must be less than n_samples" in str(excinfo.value), \ + "Large perplexity did not trigger expected ValueError." -@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("num_blocks", [1, 2, 4]) -def test_tsne_full_fit_with_blocks(dataframe, queue, dtype, num_blocks): - """Test TSNE fitted on the full dataset, after splitting into blocks.""" + # Edge Case: Sparse-Like High-Dimensional Data np.random.seed(42) - X = np.random.rand(100, 20).astype(dtype) # 100 samples, 20 features - X_blocks = np.array_split(X, num_blocks) # Split into `num_blocks` + X_sparse_like = np.random.rand(50, 10000) * (np.random.rand(50, 10000) > 0.99) + try: + tsne = TSNE(n_components=2, perplexity=30.0) + tsne.fit(X_sparse_like) + except Exception as e: + pytest.fail(f"TSNE failed on sparse-like high-dimensional data: {e}") - # Combine blocks back into a single dataset - X_combined = np.vstack(X_blocks) - X_df = _convert_to_dataframe(X_combined, sycl_queue=queue, target_df=dataframe) + # Edge Case: Extremely Low Perplexity + X = np.random.rand(10, 5) + try: + tsne_low_perplexity = TSNE(n_components=2, perplexity=0.5) + tsne_low_perplexity.fit(X) + except Exception as e: + pytest.fail(f"TSNE failed with low perplexity: {e}") - # Fit TSNE on the combined dataset - tsne = TSNE(n_components=2, perplexity=30.0, random_state=42).fit(X_df) - # Validate embedding shape - assert tsne.embedding_.shape == (100, 2) +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_tsne_with_specific_complex_dataset(dataframe, queue, dtype): + """Test TSNE with a specific, highly diverse dataset.""" + complex_array = np.array([ + [0, 0, 0, 0], + [1, 1, 1, 1], + [-1e-9, 1e-9, -1e-9, 1e-9], + [-1e9, 1e9, -1e9, 1e9], + [1e-3, 1e3, -1e3, -1e-3], + [0, 1e9, -1e-9, 1], + [1, -1, 1, -1], + [42, 42, 42, 42], + [0, 0, 1, -1], + [-1e5, 0, 1e5, -1], + [2e9, 2e-9, -2e9, -2e-9], + [3, -3, 3e3, -3e-3], + [5e-5, 5e5, -5e-5, -5e5], + [1, 0, -1e8, 1e8], + [9e-7, -9e7, 9e-7, -9e7], + [4e-4, 4e4, -4e-4, -4e4], + [6e-6, -6e6, 6e6, -6e-6], + [8, -8, 8e8, -8e-8], + ], dtype=dtype) + + complex_array_df = _convert_to_dataframe(complex_array, sycl_queue=queue, target_df=dataframe) + + try: + tsne = TSNE(n_components=2, perplexity=5.0, random_state=42) + embedding = tsne.fit_transform(complex_array_df) + assert embedding.shape == (complex_array.shape[0], 2), "TSNE embedding shape is incorrect." + except Exception as e: + pytest.fail(f"TSNE failed on the specific complex dataset: {e}") + + +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues(device_filter_="gpu")) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_tsne_gpu_validation(dataframe, queue, dtype): + """ + GPU validation test for TSNE with a specific complex dataset. + """ + # Complex dataset for testing + gpu_validation_array = np.array([ + [0, 0, 0, 0], + [1, 1, 1, 1], + [-1e9, 1e9, -1e9, 1e9], + [1e-3, 1e3, -1e3, -1e-3], + [1, -1, 1, -1], + [0, 1e9, -1e-9, 1], + [-7e11, 7e11, -7e-11, 7e-11], + [4e-4, 4e4, -4e-4, -4e4], + [6e-6, -6e6, 6e6, -6e-6], + [0, 0, 0, 0], + [1, 1, 1, 1], + ], dtype=dtype) + + expected_shape = (gpu_validation_array.shape[0], 2) + gpu_array_df = _convert_to_dataframe( + gpu_validation_array, sycl_queue=queue, target_df=dataframe + ) + try: + tsne = TSNE(n_components=2, perplexity=3.0, random_state=42) + embedding = tsne.fit_transform(gpu_array_df) + assert embedding.shape == expected_shape, f"Incorrect embedding shape on GPU: {embedding.shape}." + assert np.all(np.isfinite(embedding)), "Embedding contains NaN or infinite values on GPU." + assert np.any(embedding != 0), "GPU embedding contains only zeros, which is invalid." + + except Exception as e: + pytest.fail(f"TSNE failed on GPU validation test: {e}") \ No newline at end of file From 10da764bc017110065fe6b59d15a4453d2fd01d3 Mon Sep 17 00:00:00 2001 From: "yue.jiao" Date: Wed, 18 Dec 2024 08:10:50 -0800 Subject: [PATCH 3/6] fix: fix format by running black and isort test_tsne.py --- sklearnex/manifold/tests/test_tsne.py | 122 +++++++++++++++----------- 1 file changed, 73 insertions(+), 49 deletions(-) diff --git a/sklearnex/manifold/tests/test_tsne.py b/sklearnex/manifold/tests/test_tsne.py index 5482149a2b..90506aac03 100755 --- a/sklearnex/manifold/tests/test_tsne.py +++ b/sklearnex/manifold/tests/test_tsne.py @@ -15,15 +15,17 @@ # =============================================================================== import numpy as np -from numpy.testing import assert_allclose import pytest -#Note: n_componets must be 2 for now +from numpy.testing import assert_allclose + +# Note: n_components must be 2 for now from onedal.tests.utils._dataframes_support import ( _as_numpy, _convert_to_dataframe, get_dataframes_and_queues, ) + def test_sklearnex_import(): from sklearnex.manifold import TSNE @@ -31,8 +33,10 @@ def test_sklearnex_import(): tsne = TSNE(n_components=2, perplexity=2.0).fit(X) assert "daal4py" in tsne.__module__ + from sklearnex.manifold import TSNE + @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) def test_sklearnex_tsne_import(dataframe, queue): """Test TSNE compatibility with different backends and queues, and validate sklearnex module.""" @@ -43,13 +47,14 @@ def test_sklearnex_tsne_import(dataframe, queue): assert hasattr(tsne, "n_components"), "TSNE missing 'n_components' attribute." assert tsne.n_components == 2, "TSNE 'n_components' attribute is incorrect." + def test_basic_tsne_functionality(): """Test TSNE with valid data: basic functionality, random data, reproducibility, and edge cases.""" # Test basic functionality X_basic = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]]) tsne = TSNE(n_components=2, perplexity=2.0).fit(X_basic) assert tsne.embedding_.shape == (4, 2) - + # Test with random data np.random.seed(42) X_random = np.random.rand(100, 10) @@ -92,13 +97,14 @@ def test_basic_tsne_functionality(): X_invalid = np.array([[0, 0], [1, np.nan], [2, np.inf]]) with pytest.raises(ValueError): TSNE(n_components=2).fit(X_invalid) - - # Edge Case: Perplexity Larger Than n_samples + + # Edge Case: Perplexity Larger Than n_samples X_small = np.random.rand(5, 2) # 5 samples with pytest.raises(ValueError) as excinfo: TSNE(n_components=2, perplexity=10).fit(X_small) - assert "perplexity must be less than n_samples" in str(excinfo.value), \ - "Large perplexity did not trigger expected ValueError." + assert "perplexity must be less than n_samples" in str( + excinfo.value + ), "Large perplexity did not trigger expected ValueError." # Edge Case: Sparse-Like High-Dimensional Data np.random.seed(42) @@ -118,62 +124,74 @@ def test_basic_tsne_functionality(): pytest.fail(f"TSNE failed with low perplexity: {e}") - @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) def test_tsne_with_specific_complex_dataset(dataframe, queue, dtype): """Test TSNE with a specific, highly diverse dataset.""" - complex_array = np.array([ - [0, 0, 0, 0], - [1, 1, 1, 1], - [-1e-9, 1e-9, -1e-9, 1e-9], - [-1e9, 1e9, -1e9, 1e9], - [1e-3, 1e3, -1e3, -1e-3], - [0, 1e9, -1e-9, 1], - [1, -1, 1, -1], - [42, 42, 42, 42], - [0, 0, 1, -1], - [-1e5, 0, 1e5, -1], - [2e9, 2e-9, -2e9, -2e-9], - [3, -3, 3e3, -3e-3], - [5e-5, 5e5, -5e-5, -5e5], - [1, 0, -1e8, 1e8], - [9e-7, -9e7, 9e-7, -9e7], - [4e-4, 4e4, -4e-4, -4e4], - [6e-6, -6e6, 6e6, -6e-6], - [8, -8, 8e8, -8e-8], - ], dtype=dtype) - - complex_array_df = _convert_to_dataframe(complex_array, sycl_queue=queue, target_df=dataframe) + complex_array = np.array( + [ + [0, 0, 0, 0], + [1, 1, 1, 1], + [-1e-9, 1e-9, -1e-9, 1e-9], + [-1e9, 1e9, -1e9, 1e9], + [1e-3, 1e3, -1e3, -1e-3], + [0, 1e9, -1e-9, 1], + [1, -1, 1, -1], + [42, 42, 42, 42], + [0, 0, 1, -1], + [-1e5, 0, 1e5, -1], + [2e9, 2e-9, -2e9, -2e-9], + [3, -3, 3e3, -3e-3], + [5e-5, 5e5, -5e-5, -5e5], + [1, 0, -1e8, 1e8], + [9e-7, -9e7, 9e-7, -9e7], + [4e-4, 4e4, -4e-4, -4e4], + [6e-6, -6e6, 6e6, -6e-6], + [8, -8, 8e8, -8e-8], + ], + dtype=dtype, + ) + + complex_array_df = _convert_to_dataframe( + complex_array, sycl_queue=queue, target_df=dataframe + ) try: tsne = TSNE(n_components=2, perplexity=5.0, random_state=42) embedding = tsne.fit_transform(complex_array_df) - assert embedding.shape == (complex_array.shape[0], 2), "TSNE embedding shape is incorrect." + assert embedding.shape == ( + complex_array.shape[0], + 2, + ), "TSNE embedding shape is incorrect." except Exception as e: pytest.fail(f"TSNE failed on the specific complex dataset: {e}") -@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues(device_filter_="gpu")) +@pytest.mark.parametrize( + "dataframe,queue", get_dataframes_and_queues(device_filter_="gpu") +) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) def test_tsne_gpu_validation(dataframe, queue, dtype): """ GPU validation test for TSNE with a specific complex dataset. """ # Complex dataset for testing - gpu_validation_array = np.array([ - [0, 0, 0, 0], - [1, 1, 1, 1], - [-1e9, 1e9, -1e9, 1e9], - [1e-3, 1e3, -1e3, -1e-3], - [1, -1, 1, -1], - [0, 1e9, -1e-9, 1], - [-7e11, 7e11, -7e-11, 7e-11], - [4e-4, 4e4, -4e-4, -4e4], - [6e-6, -6e6, 6e6, -6e-6], - [0, 0, 0, 0], - [1, 1, 1, 1], - ], dtype=dtype) + gpu_validation_array = np.array( + [ + [0, 0, 0, 0], + [1, 1, 1, 1], + [-1e9, 1e9, -1e9, 1e9], + [1e-3, 1e3, -1e3, -1e-3], + [1, -1, 1, -1], + [0, 1e9, -1e-9, 1], + [-7e11, 7e11, -7e-11, 7e-11], + [4e-4, 4e4, -4e-4, -4e4], + [6e-6, -6e6, 6e6, -6e-6], + [0, 0, 0, 0], + [1, 1, 1, 1], + ], + dtype=dtype, + ) expected_shape = (gpu_validation_array.shape[0], 2) gpu_array_df = _convert_to_dataframe( @@ -182,9 +200,15 @@ def test_tsne_gpu_validation(dataframe, queue, dtype): try: tsne = TSNE(n_components=2, perplexity=3.0, random_state=42) embedding = tsne.fit_transform(gpu_array_df) - assert embedding.shape == expected_shape, f"Incorrect embedding shape on GPU: {embedding.shape}." - assert np.all(np.isfinite(embedding)), "Embedding contains NaN or infinite values on GPU." - assert np.any(embedding != 0), "GPU embedding contains only zeros, which is invalid." + assert ( + embedding.shape == expected_shape + ), f"Incorrect embedding shape on GPU: {embedding.shape}." + assert np.all( + np.isfinite(embedding) + ), "Embedding contains NaN or infinite values on GPU." + assert np.any( + embedding != 0 + ), "GPU embedding contains only zeros, which is invalid." except Exception as e: - pytest.fail(f"TSNE failed on GPU validation test: {e}") \ No newline at end of file + pytest.fail(f"TSNE failed on GPU validation test: {e}") From 2f3e9fae2c06dd8886a69fa968a6b69ec517632f Mon Sep 17 00:00:00 2001 From: "yue.jiao" Date: Wed, 18 Dec 2024 14:11:20 -0800 Subject: [PATCH 4/6] fix: const test check shape instead of str output --- sklearnex/manifold/tests/test_tsne.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearnex/manifold/tests/test_tsne.py b/sklearnex/manifold/tests/test_tsne.py index 90506aac03..0c0f7930db 100755 --- a/sklearnex/manifold/tests/test_tsne.py +++ b/sklearnex/manifold/tests/test_tsne.py @@ -84,9 +84,9 @@ def test_basic_tsne_functionality(): # Edge case: constant data X_constant = np.ones((10, 10)) - with pytest.raises(ValueError) as excinfo: - TSNE(n_components=2, perplexity=20).fit(X_constant) - assert "perplexity must be less than n_samples" in str(excinfo.value) + tsne = TSNE(n_components=2, perplexity=5, random_state=42) + embedding = tsne.fit(X_constant).embedding_ + assert embedding.shape == (10, 2), f"Unexpected embedding shape: {embedding.shape}" # Edge case: empty data X_empty = np.empty((0, 10)) From 739a90c22b9310bda57563d6a53d1cd382469915 Mon Sep 17 00:00:00 2001 From: "yue.jiao" Date: Wed, 18 Dec 2024 14:52:25 -0800 Subject: [PATCH 5/6] fix: test removing raise error test --- sklearnex/manifold/tests/test_tsne.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sklearnex/manifold/tests/test_tsne.py b/sklearnex/manifold/tests/test_tsne.py index 0c0f7930db..73a654d7e5 100755 --- a/sklearnex/manifold/tests/test_tsne.py +++ b/sklearnex/manifold/tests/test_tsne.py @@ -98,14 +98,6 @@ def test_basic_tsne_functionality(): with pytest.raises(ValueError): TSNE(n_components=2).fit(X_invalid) - # Edge Case: Perplexity Larger Than n_samples - X_small = np.random.rand(5, 2) # 5 samples - with pytest.raises(ValueError) as excinfo: - TSNE(n_components=2, perplexity=10).fit(X_small) - assert "perplexity must be less than n_samples" in str( - excinfo.value - ), "Large perplexity did not trigger expected ValueError." - # Edge Case: Sparse-Like High-Dimensional Data np.random.seed(42) X_sparse_like = np.random.rand(50, 10000) * (np.random.rand(50, 10000) > 0.99) From 822e614a5ddc56bfd5b6d930e102730b31e11636 Mon Sep 17 00:00:00 2001 From: "yue.jiao" Date: Thu, 19 Dec 2024 08:37:41 -0800 Subject: [PATCH 6/6] fix: fix test based on comments --- sklearnex/manifold/tests/test_tsne.py | 143 +++++++++++--------------- 1 file changed, 60 insertions(+), 83 deletions(-) diff --git a/sklearnex/manifold/tests/test_tsne.py b/sklearnex/manifold/tests/test_tsne.py index 73a654d7e5..4803ab2be1 100755 --- a/sklearnex/manifold/tests/test_tsne.py +++ b/sklearnex/manifold/tests/test_tsne.py @@ -48,70 +48,94 @@ def test_sklearnex_tsne_import(dataframe, queue): assert tsne.n_components == 2, "TSNE 'n_components' attribute is incorrect." -def test_basic_tsne_functionality(): - """Test TSNE with valid data: basic functionality, random data, reproducibility, and edge cases.""" +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_tsne_functionality_and_edge_cases(dataframe, queue, dtype): + """ + TSNE test covering basic functionality and edge cases using get_dataframes_and_queues. + """ # Test basic functionality - X_basic = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]]) - tsne = TSNE(n_components=2, perplexity=2.0).fit(X_basic) - assert tsne.embedding_.shape == (4, 2) + X_basic = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]], dtype=dtype) + X_basic_df = _convert_to_dataframe(X_basic, sycl_queue=queue, target_df=dataframe) + tsne_basic = TSNE(n_components=2, perplexity=2.0, random_state=42) + embedding_basic = tsne_basic.fit_transform(X_basic_df) + assert embedding_basic.shape == (4, 2) # Test with random data - np.random.seed(42) - X_random = np.random.rand(100, 10) - tsne_random = TSNE(n_components=2, perplexity=30.0).fit(X_random) - assert tsne_random.embedding_.shape == (100, 2) + X_random = np.random.rand(100, 10).astype(dtype) + X_random_df = _convert_to_dataframe(X_random, sycl_queue=queue, target_df=dataframe) + tsne_random = TSNE(n_components=2, perplexity=30.0, random_state=42) + embedding_random = tsne_random.fit_transform(X_random_df) + assert embedding_random.shape == (100, 2) # Test reproducibility - X_repro = np.random.rand(50, 10) - tsne_1 = TSNE(n_components=2, random_state=42).fit_transform(X_repro) - tsne_2 = TSNE(n_components=2, random_state=42).fit_transform(X_repro) - assert_allclose(tsne_1, tsne_2, rtol=1e-5) - - # Test perplexity close to dataset size - X_perplexity = np.random.rand(10, 5) - tsne_perplexity = TSNE(n_components=2, perplexity=9).fit(X_perplexity) - assert tsne_perplexity.embedding_.shape == (10, 2) + X_repro = np.random.rand(50, 10).astype(dtype) + X_repro_df = _convert_to_dataframe(X_repro, sycl_queue=queue, target_df=dataframe) + tsne_repro_1 = TSNE(n_components=2, random_state=42).fit_transform(X_repro_df) + tsne_repro_2 = TSNE(n_components=2, random_state=42).fit_transform(X_repro_df) + tsne_repro_1_np = _as_numpy(tsne_repro_1) + tsne_repro_2_np = _as_numpy(tsne_repro_2) + assert_allclose(tsne_repro_1_np, tsne_repro_2_np, rtol=1e-5) # Test large data - X_large = np.random.rand(1000, 50) - tsne_large = TSNE(n_components=2, perplexity=50.0).fit(X_large) - assert tsne_large.embedding_.shape == (1000, 2) + X_large = np.random.rand(1000, 50).astype(dtype) + X_large_df = _convert_to_dataframe(X_large, sycl_queue=queue, target_df=dataframe) + tsne_large = TSNE(n_components=2, perplexity=50.0, random_state=42) + embedding_large = tsne_large.fit_transform(X_large_df) + assert embedding_large.shape == (1000, 2) # Test valid minimal data - X_valid = np.array([[0, 0], [1, 1], [2, 2]]) - tsne_valid = TSNE(n_components=2, perplexity=2).fit(X_valid) - assert tsne_valid.embedding_.shape == (3, 2) + X_valid = np.array([[0, 0], [1, 1], [2, 2]], dtype=dtype) + X_valid_df = _convert_to_dataframe(X_valid, sycl_queue=queue, target_df=dataframe) + tsne_valid = TSNE(n_components=2, perplexity=2, random_state=42) + embedding_valid = tsne_valid.fit_transform(X_valid_df) + assert embedding_valid.shape == (3, 2) # Edge case: constant data - X_constant = np.ones((10, 10)) - tsne = TSNE(n_components=2, perplexity=5, random_state=42) - embedding = tsne.fit(X_constant).embedding_ - assert embedding.shape == (10, 2), f"Unexpected embedding shape: {embedding.shape}" + X_constant = np.ones((10, 10), dtype=dtype) + X_constant_df = _convert_to_dataframe( + X_constant, sycl_queue=queue, target_df=dataframe + ) + tsne_constant = TSNE(n_components=2, perplexity=5, random_state=42) + embedding_constant = tsne_constant.fit(X_constant_df).embedding_ + assert embedding_constant.shape == (10, 2) # Edge case: empty data - X_empty = np.empty((0, 10)) + X_empty = np.empty((0, 10), dtype=dtype) with pytest.raises(ValueError): - TSNE(n_components=2).fit(X_empty) + TSNE(n_components=2).fit( + _convert_to_dataframe(X_empty, sycl_queue=queue, target_df=dataframe) + ) # Edge case: data with NaN or infinite values - X_invalid = np.array([[0, 0], [1, np.nan], [2, np.inf]]) + X_invalid = np.array([[0, 0], [1, np.nan], [2, np.inf]], dtype=dtype) with pytest.raises(ValueError): - TSNE(n_components=2).fit(X_invalid) + TSNE(n_components=2).fit( + _convert_to_dataframe(X_invalid, sycl_queue=queue, target_df=dataframe) + ) # Edge Case: Sparse-Like High-Dimensional Data np.random.seed(42) - X_sparse_like = np.random.rand(50, 10000) * (np.random.rand(50, 10000) > 0.99) + X_sparse_like = np.random.rand(50, 500).astype(dtype) * ( + np.random.rand(50, 500) > 0.99 + ) + X_sparse_like_df = _convert_to_dataframe( + X_sparse_like, sycl_queue=queue, target_df=dataframe + ) try: tsne = TSNE(n_components=2, perplexity=30.0) - tsne.fit(X_sparse_like) + tsne.fit(X_sparse_like_df) except Exception as e: pytest.fail(f"TSNE failed on sparse-like high-dimensional data: {e}") # Edge Case: Extremely Low Perplexity - X = np.random.rand(10, 5) + X_low_perplexity = np.random.rand(10, 5).astype(dtype) + X_low_perplexity_df = _convert_to_dataframe( + X_low_perplexity, sycl_queue=queue, target_df=dataframe + ) try: tsne_low_perplexity = TSNE(n_components=2, perplexity=0.5) - tsne_low_perplexity.fit(X) + tsne_low_perplexity.fit(X_low_perplexity_df) except Exception as e: pytest.fail(f"TSNE failed with low perplexity: {e}") @@ -157,50 +181,3 @@ def test_tsne_with_specific_complex_dataset(dataframe, queue, dtype): ), "TSNE embedding shape is incorrect." except Exception as e: pytest.fail(f"TSNE failed on the specific complex dataset: {e}") - - -@pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues(device_filter_="gpu") -) -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_tsne_gpu_validation(dataframe, queue, dtype): - """ - GPU validation test for TSNE with a specific complex dataset. - """ - # Complex dataset for testing - gpu_validation_array = np.array( - [ - [0, 0, 0, 0], - [1, 1, 1, 1], - [-1e9, 1e9, -1e9, 1e9], - [1e-3, 1e3, -1e3, -1e-3], - [1, -1, 1, -1], - [0, 1e9, -1e-9, 1], - [-7e11, 7e11, -7e-11, 7e-11], - [4e-4, 4e4, -4e-4, -4e4], - [6e-6, -6e6, 6e6, -6e-6], - [0, 0, 0, 0], - [1, 1, 1, 1], - ], - dtype=dtype, - ) - - expected_shape = (gpu_validation_array.shape[0], 2) - gpu_array_df = _convert_to_dataframe( - gpu_validation_array, sycl_queue=queue, target_df=dataframe - ) - try: - tsne = TSNE(n_components=2, perplexity=3.0, random_state=42) - embedding = tsne.fit_transform(gpu_array_df) - assert ( - embedding.shape == expected_shape - ), f"Incorrect embedding shape on GPU: {embedding.shape}." - assert np.all( - np.isfinite(embedding) - ), "Embedding contains NaN or infinite values on GPU." - assert np.any( - embedding != 0 - ), "GPU embedding contains only zeros, which is invalid." - - except Exception as e: - pytest.fail(f"TSNE failed on GPU validation test: {e}")