Skip to content

Commit

Permalink
Test windows
Browse files Browse the repository at this point in the history
  • Loading branch information
fealho committed Aug 21, 2024
1 parent db98bd6 commit 20b25a4
Show file tree
Hide file tree
Showing 11 changed files with 86 additions and 199 deletions.
31 changes: 0 additions & 31 deletions .github/workflows/dependency_checker.yml

This file was deleted.

31 changes: 0 additions & 31 deletions .github/workflows/integration.yml

This file was deleted.

22 changes: 0 additions & 22 deletions .github/workflows/lint.yml

This file was deleted.

37 changes: 0 additions & 37 deletions .github/workflows/minimum.yml

This file was deleted.

35 changes: 0 additions & 35 deletions .github/workflows/static_code_analysis.yml

This file was deleted.

34 changes: 0 additions & 34 deletions .github/workflows/unit.yml

This file was deleted.

15 changes: 7 additions & 8 deletions .github/workflows/readme.yml → .github/workflows/windows.yml
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
name: Test README
name: Windows Test

on:
push:
pull_request:
types: [opened, reopened]

jobs:
readme:
windows:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
os: [ubuntu-latest, macos-latest] # skip windows bc rundoc fails
python-version: ['3.12']
os: [windows-latest]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
Expand All @@ -21,7 +21,6 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install invoke rundoc .
python -m pip install tomli
- name: Run the README.md
run: invoke readme
python -m pip install invoke .[test]
- name: Run Windows tests
run: invoke windows
24 changes: 24 additions & 0 deletions issue.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from sdgym.benchmark import benchmark_single_table
from sdgym.synthesizers.generate import create_single_table_synthesizer
from sdv.metadata.single_table import SingleTableMetadata
from sdv.single_table.copulas import GaussianCopulaSynthesizer

def get_trained_synth(data, metadata):
metadata = SingleTableMetadata.load_from_dict(metadata)
synthesizer = GaussianCopulaSynthesizer(metadata)
synthesizer.fit(data)
return synthesizer

def sample_synth(trained_synthesizer, num_samples):
return trained_synthesizer.sample(num_samples)

custom_synthesizer = create_single_table_synthesizer('SimpleGaussianCopula', get_trained_synth, sample_synth)

output = benchmark_single_table(
synthesizers=[],
sdv_datasets=['fake_hotel_guests'],
timeout=120,
sdmetrics=[],
custom_synthesizers=[custom_synthesizer],
)
print(output)
8 changes: 7 additions & 1 deletion sdgym/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import concurrent
import logging
import multiprocessing
import os
import pickle
import tracemalloc
Expand Down Expand Up @@ -43,6 +42,13 @@
get_synthesizers,
used_memory,
)
import multiprocessing
import cloudpickle
import dill

multiprocessing.set_start_method('spawn', force=True)
multiprocessing.reduction.ForkingPickler.dumps = cloudpickle.dumps
multiprocessing.reduction.ForkingPickler.loads = cloudpickle.loads

LOGGER = logging.getLogger(__name__)
DEFAULT_SYNTHESIZERS = [GaussianCopulaSynthesizer, CTGANSynthesizer]
Expand Down
5 changes: 5 additions & 0 deletions tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ def unit(c):
c.run('python -m pytest ./tests/unit --cov=sdgym --cov-report=xml')


@task
def windows(c):
c.run('python -m python issue.py')


@task
def integration(c):
c.run('python -m pytest ./tests/integration')
Expand Down
43 changes: 43 additions & 0 deletions tests/integration/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,3 +471,46 @@ def test_benchmark_single_table_limit_dataset_size():
.between(0, 1000)
.all()
)


def test_benchmark_single_table_custom_synthesizer_with_timeout():
"""Test it works with custom synthesizers and timeout."""

# Setup
def get_trained_synth(data, metadata):
metadata = SingleTableMetadata.load_from_dict(metadata)
synthesizer = GaussianCopulaSynthesizer(metadata)
synthesizer.fit(data)
return synthesizer

def get_trained_synth_v2(data, metadata):
metadata = SingleTableMetadata.load_from_dict(metadata)
synthesizer = GaussianCopulaSynthesizer(metadata)
synthesizer.fit(data)
return synthesizer

def sample_synth(trained_synthesizer, num_samples):
return trained_synthesizer.sample(num_samples)

# Run
custom_synthesizer = create_single_table_synthesizer(
'SimpleGaussianCopula', get_trained_synth, sample_synth
)
custom_synthesizer_v2 = create_single_table_synthesizer(
'SimpleGaussianCopulaV2', get_trained_synth_v2, sample_synth
)
output = benchmark_single_table(
synthesizers=[],
sdv_datasets=['fake_hotel_guests'],
timeout=120,
sdmetrics=[],
custom_synthesizers=[custom_synthesizer, custom_synthesizer_v2],
)

# Assert
assert output['Synthesizer'][0] == 'Custom:SimpleGaussianCopula'
assert output['Synthesizer'][1] == 'Custom:SimpleGaussianCopulaV2'
output.to_csv('output.csv')

output = output.drop('Quality_Score', axis=1)
assert not output.isna().to_numpy().any()

0 comments on commit 20b25a4

Please sign in to comment.