Test windows

sdv-dev · Aug 21, 2024 · 20b25a4 · 20b25a4
1 parent db98bd6
commit 20b25a4
Show file tree

Hide file tree

Showing 11 changed files with 86 additions and 199 deletions.
diff --git a/.github/workflows/dependency_checker.yml b/.github/workflows/dependency_checker.yml
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
diff --git a/.github/workflows/minimum.yml b/.github/workflows/minimum.yml
diff --git a/.github/workflows/static_code_analysis.yml b/.github/workflows/static_code_analysis.yml
diff --git a/.github/workflows/unit.yml b/.github/workflows/unit.yml
diff --git a/.github/workflows/readme.yml → .github/workflows/windows.yml b/.github/workflows/readme.yml → .github/workflows/windows.yml
@@ -1,17 +1,17 @@
-name: Test README
+name: Windows Test
 
 on:
   push:
   pull_request:
     types: [opened, reopened]
 
 jobs:
-  readme:
+  windows:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
-        os: [ubuntu-latest, macos-latest]   # skip windows bc rundoc fails
+        python-version: ['3.12']
+        os: [windows-latest]
     steps:
     - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
@@ -21,7 +21,6 @@ jobs:
     - name: Install dependencies
       run: |
           python -m pip install --upgrade pip
-          python -m pip install invoke rundoc .
-          python -m pip install tomli
-    - name: Run the README.md
-      run: invoke readme
+          python -m pip install invoke .[test]
+    - name: Run Windows tests
+      run: invoke windows
diff --git a/issue.py b/issue.py
@@ -0,0 +1,24 @@
+from sdgym.benchmark import benchmark_single_table
+from sdgym.synthesizers.generate import create_single_table_synthesizer
+from sdv.metadata.single_table import SingleTableMetadata
+from sdv.single_table.copulas import GaussianCopulaSynthesizer
+
+def get_trained_synth(data, metadata):
+    metadata = SingleTableMetadata.load_from_dict(metadata)
+    synthesizer = GaussianCopulaSynthesizer(metadata)
+    synthesizer.fit(data)
+    return synthesizer
+
+def sample_synth(trained_synthesizer, num_samples):
+    return trained_synthesizer.sample(num_samples)
+
+custom_synthesizer = create_single_table_synthesizer('SimpleGaussianCopula', get_trained_synth, sample_synth)
+
+output = benchmark_single_table(
+    synthesizers=[],
+    sdv_datasets=['fake_hotel_guests'],
+    timeout=120,
+    sdmetrics=[],
+    custom_synthesizers=[custom_synthesizer],
+)
+print(output)
diff --git a/sdgym/benchmark.py b/sdgym/benchmark.py
@@ -2,7 +2,6 @@
 
 import concurrent
 import logging
-import multiprocessing
 import os
 import pickle
 import tracemalloc
@@ -43,6 +42,13 @@
     get_synthesizers,
     used_memory,
 )
+import multiprocessing
+import cloudpickle
+import dill
+
+multiprocessing.set_start_method('spawn', force=True)
+multiprocessing.reduction.ForkingPickler.dumps = cloudpickle.dumps
+multiprocessing.reduction.ForkingPickler.loads = cloudpickle.loads
 
 LOGGER = logging.getLogger(__name__)
 DEFAULT_SYNTHESIZERS = [GaussianCopulaSynthesizer, CTGANSynthesizer]

diff --git a/tasks.py b/tasks.py
@@ -28,6 +28,11 @@ def unit(c):
     c.run('python -m pytest ./tests/unit --cov=sdgym --cov-report=xml')
 
 
+@task
+def windows(c):
+    c.run('python -m python issue.py')
+
+
 @task
 def integration(c):
     c.run('python -m pytest ./tests/integration')

diff --git a/tests/integration/test_benchmark.py b/tests/integration/test_benchmark.py
@@ -471,3 +471,46 @@ def test_benchmark_single_table_limit_dataset_size():
         .between(0, 1000)
         .all()
     )
+
+
+def test_benchmark_single_table_custom_synthesizer_with_timeout():
+    """Test it works with custom synthesizers and timeout."""
+
+    # Setup
+    def get_trained_synth(data, metadata):
+        metadata = SingleTableMetadata.load_from_dict(metadata)
+        synthesizer = GaussianCopulaSynthesizer(metadata)
+        synthesizer.fit(data)
+        return synthesizer
+
+    def get_trained_synth_v2(data, metadata):
+        metadata = SingleTableMetadata.load_from_dict(metadata)
+        synthesizer = GaussianCopulaSynthesizer(metadata)
+        synthesizer.fit(data)
+        return synthesizer
+
+    def sample_synth(trained_synthesizer, num_samples):
+        return trained_synthesizer.sample(num_samples)
+
+    # Run
+    custom_synthesizer = create_single_table_synthesizer(
+        'SimpleGaussianCopula', get_trained_synth, sample_synth
+    )
+    custom_synthesizer_v2 = create_single_table_synthesizer(
+        'SimpleGaussianCopulaV2', get_trained_synth_v2, sample_synth
+    )
+    output = benchmark_single_table(
+        synthesizers=[],
+        sdv_datasets=['fake_hotel_guests'],
+        timeout=120,
+        sdmetrics=[],
+        custom_synthesizers=[custom_synthesizer, custom_synthesizer_v2],
+    )
+
+    # Assert
+    assert output['Synthesizer'][0] == 'Custom:SimpleGaussianCopula'
+    assert output['Synthesizer'][1] == 'Custom:SimpleGaussianCopulaV2'
+    output.to_csv('output.csv')
+
+    output = output.drop('Quality_Score', axis=1)
+    assert not output.isna().to_numpy().any()