From c499b048ddaca65e1f64b481498974e5c3804c0f Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Wed, 28 Feb 2024 10:33:32 +0000 Subject: [PATCH 01/16] define pyproject.toml --- pyproject.toml | 209 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 209 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..6086c4d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,209 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +authors = [{ name = "DataCebo, Inc.", email = "info@sdv.dev" }] +classifiers = [ + "Development Status :: 2 - Pre-Alpha", + "Intended Audience :: Developers", + "License :: Free for non-commercial use", + "Natural Language :: English", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Scientific/Engineering :: Artificial Intelligence", +] +description = "Benchmark tabular synthetic data generators using a variety of datasets" +keywords = ["machine learning", "synthetic data generation", "benchmark", "generative models"] +name = "sdgym" +version = "0.7.1.dev0" +license = { text = "BSL-1.1" } +requires-python = '>=3.8,<3.12' +readme = "README.md" +urls = { "Homepage" = "https://github.com/sdv-dev/SDGym" } + +dependencies = [ + 'appdirs>=1.3,<2', + 'boto3>=1.15.0,<2', + 'botocore>=1.18,<2', + 'compress-pickle>=1.2.0,<3', + 'humanfriendly>=8.2,<11', + "numpy>=1.20.0,<2;python_version<'3.10'", + "numpy>=1.23.3,<2;python_version>='3.10'", + "pandas>=1.1.3;python_version<'3.10'", + "pandas>=1.3.4;python_version>='3.10' and python_version<'3.11'", + "pandas>=1.5.0;python_version>='3.11'", + 'psutil>=5.7,<6', + "scikit-learn>=0.24,<2;python_version<'3.10'", + "scikit-learn>=1.1.3,<2;python_version>='3.10'", + "scipy>=1.5.4,<2;python_version<'3.10'", + "scipy>=1.9.2,<2;python_version>='3.10'", + 'tabulate>=0.8.3,<0.9', + "torch>=1.8.0;python_version<'3.10'", + "torch>=1.11.0;python_version>='3.10' and python_version<'3.11'", + "torch>=2.0.0;python_version>='3.11'", + 'tqdm>=4.15,<5', + 'XlsxWriter>=1.2.8,<4', + 'rdt>=1.6.1,<2.0', + 'sdmetrics>=0.11.0,<1.0', + 'sdv>=1.3.0,<2', +] + +[project.entry-points] +sdgym = { main = "sdgym.cli.__main__:main" } + + +[project.optional-dependencies] + +dask = ["dask", "distributed"] +test = [ + 'pytest>=3.4.2', + 'pytest-cov>=2.6.0', + 'jupyter>=1.0.0,<2', + 'rundoc>=0.4.3,<0.5', +] +dev = [ + # dask + "dask", + "distributed", + + # test + 'pytest>=3.4.2', + 'pytest-cov>=2.6.0', + 'jupyter>=1.0.0,<2', + 'rundoc>=0.4.3,<0.5', + + # general + 'bumpversion>=0.5.3,<0.6', + 'pip>=9.0.1', + 'watchdog>=0.8.3,<0.11', + + # style check + 'flake8>=3.7.7,<4', + 'flake8-absolute-import>=1.0,<2', + 'flake8-builtins>=1.5.3,<1.6', + 'flake8-comprehensions>=3.6.1,<3.7', + 'flake8-debugger>=4.0.0,<4.1', + 'flake8-docstrings>=1.5.0,<2', + 'flake8-eradicate>=1.1.0,<1.2', + 'flake8-fixme>=1.1.1,<1.2', + 'flake8-mock>=0.3,<0.4', + 'flake8-multiline-containers>=0.0.18,<0.1', + 'flake8-mutable>=1.2.0,<1.3', + 'flake8-expression-complexity>=0.0.9,<0.1', + 'flake8-print>=4.0.0,<4.1', + 'flake8-pytest-style>=1.5.0,<2', + 'flake8-quotes>=3.3.0,<4', + 'flake8-sfs>=0.0.3,<0.1', + 'flake8-variables-names>=0.0.4,<0.1', + 'dlint>=0.11.0,<0.12', + 'isort>=4.3.4,<5', + 'pandas-vet>=0.2.3,<0.3', + 'pep8-naming>=0.12.1,<0.13', + 'pydocstyle>=6.1.1,<6.2', + + # fix style issues + 'autoflake>=1.1,<2', + 'autopep8>=1.4.3,<2', + + # distribute on PyPI + 'twine>=1.10.0,<4', + 'wheel>=0.30.0', + + # Advanced testing + 'coverage>=4.5.1,<6', + 'tox>=2.9.1,<4', + 'importlib-metadata>=3.6', + + # Invoke + 'invoke', +] + +all = [ + # dask + "dask", + "distributed", + + # test + 'pytest>=3.4.2', + 'pytest-cov>=2.6.0', + 'jupyter>=1.0.0,<2', + 'rundoc>=0.4.3,<0.5', + + # general + 'bumpversion>=0.5.3,<0.6', + 'pip>=9.0.1', + 'watchdog>=0.8.3,<0.11', + + # style check + 'flake8>=3.7.7,<4', + 'flake8-absolute-import>=1.0,<2', + 'flake8-builtins>=1.5.3,<1.6', + 'flake8-comprehensions>=3.6.1,<3.7', + 'flake8-debugger>=4.0.0,<4.1', + 'flake8-docstrings>=1.5.0,<2', + 'flake8-eradicate>=1.1.0,<1.2', + 'flake8-fixme>=1.1.1,<1.2', + 'flake8-mock>=0.3,<0.4', + 'flake8-multiline-containers>=0.0.18,<0.1', + 'flake8-mutable>=1.2.0,<1.3', + 'flake8-expression-complexity>=0.0.9,<0.1', + 'flake8-print>=4.0.0,<4.1', + 'flake8-pytest-style>=1.5.0,<2', + 'flake8-quotes>=3.3.0,<4', + 'flake8-sfs>=0.0.3,<0.1', + 'flake8-variables-names>=0.0.4,<0.1', + 'dlint>=0.11.0,<0.12', + 'isort>=4.3.4,<5', + 'pandas-vet>=0.2.3,<0.3', + 'pep8-naming>=0.12.1,<0.13', + 'pydocstyle>=6.1.1,<6.2', + + # fix style issues + 'autoflake>=1.1,<2', + 'autopep8>=1.4.3,<2', + + # distribute on PyPI + 'twine>=1.10.0,<4', + 'wheel>=0.30.0', + + # Advanced testing + 'coverage>=4.5.1,<6', + 'tox>=2.9.1,<4', + 'importlib-metadata>=3.6', + + # Invoke + 'invoke', +] + +[tool.setuptools] +include-package-data = true + +[tool.setuptools.packages.find] +include = ['sdgym', 'sdgym.*'] +namespaces = false + +[tool.flake8] +max-line-length = 99 +inline-quotes = "single" +exclude = ["docs", ".tox", ".git", "__pycache__", ".ipynb_checkpoints", "sdgym/cli"] +extend-ignore = [ + "D105", # Missing docstring in magic method + "D107", # Missing docstring in __init__ + "PD005", # Use arithmetic operator instead of method + "SFS3" # String literal formatting using f-string +] + +[tool.isort] +line_length = 99 +lines_between_types = 0 +multi_line_output = 4 +not_skip = ["__init__.py"] +use_parentheses = true + +[tool.pydocstyle] +convention = "google" +add-ignore = ["D107", "D407", "D417"] From 3ccfce1729d64244857bc533a85cbe0c6b3695ff Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Wed, 28 Feb 2024 10:33:45 +0000 Subject: [PATCH 02/16] update setup.cfg --- setup.cfg | 43 ++++++++++--------------------------------- 1 file changed, 10 insertions(+), 33 deletions(-) diff --git a/setup.cfg b/setup.cfg index 3e30e01..3cb9e7a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,49 +4,26 @@ commit = True tag = True parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\.(?P[a-z]+)(?P\d+))? serialize = - {major}.{minor}.{patch}.{release}{candidate} - {major}.{minor}.{patch} + {major}.{minor}.{patch}.{release}{candidate} + {major}.{minor}.{patch} [bumpversion:part:release] optional_value = release first_value = dev values = - dev - release + dev + release -[bumpversion:part:candidate] - -[bumpversion:file:setup.py] -search = version='{current_version}' -replace = version='{new_version}' +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" [bumpversion:file:sdgym/__init__.py] -search = __version__ = '{current_version}' -replace = __version__ = '{new_version}' +search = __version__ = "{current_version}" +replace = __version__ = "{new_version}" [bdist_wheel] universal = 1 -[flake8] -max-line-length = 99 -inline-quotes = single -exclude = docs, .tox, .git, __pycache__, .ipynb_checkpoints, sdgym/cli -extend-ignore = D105, # Missing docstring in magic method - D107, # Missing docstring in __init__ - PD005, # Use arithmetic operator instead of method - SFS3 # String literal formatting using f-string - -[isort] -line_length = 99 -lines_between_types = 0 -multi_line_output = 4 -not_skip = __init__.py -use_parentheses = True - [aliases] -test = pytest - -[pydocstyle] -convention = google -add-ignore = D107, D407, D417 - +test = pytest \ No newline at end of file From 0684101ef8fa64a5c7189a4fe9c5bcb2018e208e Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Wed, 28 Feb 2024 10:33:57 +0000 Subject: [PATCH 03/16] update makefile --- Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index f3331fc..a0bb432 100644 --- a/Makefile +++ b/Makefile @@ -123,8 +123,7 @@ coverage: ## check code coverage quickly with the default Python .PHONY: dist dist: clean ## builds source and wheel package - python setup.py sdist - python setup.py bdist_wheel + python -m build --wheel --sdist ls -l dist .PHONY: publish-confirm From 3a387726f5ad5ff1e0c33c8be0db238962b71daf Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Wed, 28 Feb 2024 10:34:18 +0000 Subject: [PATCH 04/16] remove setup.py --- setup.py | 149 ------------------------------------------------------- 1 file changed, 149 deletions(-) delete mode 100644 setup.py diff --git a/setup.py b/setup.py deleted file mode 100644 index 1a90960..0000000 --- a/setup.py +++ /dev/null @@ -1,149 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -"""The setup script.""" - -from setuptools import find_packages, setup - -with open('README.md', encoding='utf-8') as readme_file: - readme = readme_file.read() - -with open('HISTORY.md', encoding='utf-8') as history_file: - history = history_file.read() - -install_requires = [ - 'appdirs>=1.3,<2', - 'boto3>=1.15.0,<2', - 'botocore>=1.18,<2', - 'compress-pickle>=1.2.0,<3', - 'humanfriendly>=8.2,<11', - "numpy>=1.20.0,<2;python_version<'3.10'", - "numpy>=1.23.3,<2;python_version>='3.10'", - "pandas>=1.1.3;python_version<'3.10'", - "pandas>=1.3.4;python_version>='3.10' and python_version<'3.11'", - "pandas>=1.5.0;python_version>='3.11'", - 'psutil>=5.7,<6', - "scikit-learn>=0.24,<2;python_version<'3.10'", - "scikit-learn>=1.1.3,<2;python_version>='3.10'", - "scipy>=1.5.4,<2;python_version<'3.10'", - "scipy>=1.9.2,<2;python_version>='3.10'", - 'tabulate>=0.8.3,<0.9', - "torch>=1.8.0;python_version<'3.10'", - "torch>=1.11.0;python_version>='3.10' and python_version<'3.11'", - "torch>=2.0.0;python_version>='3.11'", - 'tqdm>=4.15,<5', - 'XlsxWriter>=1.2.8,<4', - 'rdt>=1.6.1,<2.0', - 'sdmetrics>=0.11.0,<1.0', - 'sdv>=1.3.0,<2', -] - - -dask_requires = [ - 'dask', - 'distributed', -] - -setup_requires = [ - 'pytest-runner>=2.11.1', -] - -tests_require = [ - 'pytest>=3.4.2', - 'pytest-cov>=2.6.0', - 'jupyter>=1.0.0,<2', - 'rundoc>=0.4.3,<0.5', -] - -development_requires = [ - # general - 'bumpversion>=0.5.3,<0.6', - 'pip>=9.0.1', - 'watchdog>=0.8.3,<0.11', - - # style check - 'flake8>=3.7.7,<4', - 'flake8-absolute-import>=1.0,<2', - 'flake8-builtins>=1.5.3,<1.6', - 'flake8-comprehensions>=3.6.1,<3.7', - 'flake8-debugger>=4.0.0,<4.1', - 'flake8-docstrings>=1.5.0,<2', - 'flake8-eradicate>=1.1.0,<1.2', - 'flake8-fixme>=1.1.1,<1.2', - 'flake8-mock>=0.3,<0.4', - 'flake8-multiline-containers>=0.0.18,<0.1', - 'flake8-mutable>=1.2.0,<1.3', - 'flake8-expression-complexity>=0.0.9,<0.1', - 'flake8-print>=4.0.0,<4.1', - 'flake8-pytest-style>=1.5.0,<2', - 'flake8-quotes>=3.3.0,<4', - 'flake8-sfs>=0.0.3,<0.1', - 'flake8-variables-names>=0.0.4,<0.1', - 'dlint>=0.11.0,<0.12', - 'isort>=4.3.4,<5', - 'pandas-vet>=0.2.3,<0.3', - 'pep8-naming>=0.12.1,<0.13', - 'pydocstyle>=6.1.1,<6.2', - - # fix style issues - 'autoflake>=1.1,<2', - 'autopep8>=1.4.3,<2', - - # distribute on PyPI - 'twine>=1.10.0,<4', - 'wheel>=0.30.0', - - # Advanced testing - 'coverage>=4.5.1,<6', - 'tox>=2.9.1,<4', - 'importlib-metadata>=3.6', - - # Invoke - 'invoke', -] - -setup( - author='DataCebo, Inc.', - author_email='info@sdv.dev', - classifiers=[ - 'Development Status :: 2 - Pre-Alpha', - 'Intended Audience :: Developers', - 'License :: Free for non-commercial use', - 'Natural Language :: English', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - ], - description=( - 'Benchmark tabular synthetic data generators using a variety of datasets' - ), - entry_points={ - 'console_scripts': [ - 'sdgym=sdgym.cli.__main__:main' - ], - }, - extras_require={ - 'all': development_requires + tests_require + dask_requires, - 'dev': development_requires + tests_require + dask_requires, - 'test': tests_require, - 'dask': dask_requires, - }, - include_package_data=True, - install_requires=install_requires, - license='BSL-1.1', - long_description=readme + '\n\n' + history, - long_description_content_type='text/markdown', - keywords='machine learning synthetic data generation benchmark generative models', - name='sdgym', - packages=find_packages(include=['sdgym', 'sdgym.*']), - python_requires='>=3.8,<3.12', - setup_requires=setup_requires, - test_suite='tests', - tests_require=tests_require, - url='https://github.com/sdv-dev/SDGym', - version='0.7.1.dev0', - zip_safe=False, -) From 5919e05887c2179fee67aa2e8b5b7d67636675ad Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Wed, 28 Feb 2024 11:04:26 +0000 Subject: [PATCH 05/16] minimum version + lint --- pyproject.toml | 11 ----------- setup.cfg | 18 +++++++++++++++++- tasks.py | 37 +++++++++++++++---------------------- 3 files changed, 32 insertions(+), 34 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6086c4d..55fd382 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -186,17 +186,6 @@ include-package-data = true include = ['sdgym', 'sdgym.*'] namespaces = false -[tool.flake8] -max-line-length = 99 -inline-quotes = "single" -exclude = ["docs", ".tox", ".git", "__pycache__", ".ipynb_checkpoints", "sdgym/cli"] -extend-ignore = [ - "D105", # Missing docstring in magic method - "D107", # Missing docstring in __init__ - "PD005", # Use arithmetic operator instead of method - "SFS3" # String literal formatting using f-string -] - [tool.isort] line_length = 99 lines_between_types = 0 diff --git a/setup.cfg b/setup.cfg index 3cb9e7a..e689ae9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -26,4 +26,20 @@ replace = __version__ = "{new_version}" universal = 1 [aliases] -test = pytest \ No newline at end of file +test = pytest + +[flake8] +max-line-length = 99 +inline-quotes = single +exclude = docs, .tox, .git, __pycache__, .ipynb_checkpoints, sdgym/cli +extend-ignore = D105, # Missing docstring in magic method + D107, # Missing docstring in __init__ + PD005, # Use arithmetic operator instead of method + SFS3 # String literal formatting using f-string + +[isort] +line_length = 99 +lines_between_types = 0 +multi_line_output = 4 +not_skip = __init__.py +use_parentheses = True \ No newline at end of file diff --git a/tasks.py b/tasks.py index 8f9aac6..2c9d712 100644 --- a/tasks.py +++ b/tasks.py @@ -10,6 +10,7 @@ from pathlib import Path from invoke import task +import toml COMPARISONS = { '>=': operator.ge, @@ -70,30 +71,22 @@ def _validate_python_version(line): @task def install_minimum(c): - with open('setup.py', 'r') as setup_py: - lines = setup_py.read().splitlines() + with open('pyproject.toml', 'r', encoding='utf-8') as pyproject_file: + pyproject_data = toml.load(pyproject_file) + dependencies = pyproject_data.get('project', {}).get('dependencies', []) versions = [] - started = False - for line in lines: - if started: - if line == ']': - started = False - continue - - line = line.strip() - if _validate_python_version(line): - requirement = re.match(r'[^>]*', line).group(0) - requirement = re.sub(r"""['",]""", '', requirement) - version = re.search(r'>=?(\d\.?)+\w*', line).group(0) - if version: - version = re.sub(r'>=?', '==', version) - version = re.sub(r"""['",]""", '', version) - requirement += version - versions.append(requirement) - - elif (line.startswith('install_requires = [')): - started = True + for line in dependencies: + line = line.strip() + if _validate_python_version(line): + requirement = re.match(r'[^>]*', line).group(0) + requirement = re.sub(r"""['",]""", '', requirement) + version = re.search(r'>=?(\d\.?)+\w*', line).group(0) + if version: + version = re.sub(r'>=?', '==', version) + version = re.sub(r"""['",]""", '', version) + requirement += version + versions.append(requirement) c.run(f'python -m pip install {" ".join(versions)}') From 0d3ab6ef0d5802c1c0f2910bdf516c8259af4f3d Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Wed, 28 Feb 2024 11:06:02 +0000 Subject: [PATCH 06/16] docker --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d5e678f..281c347 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,7 @@ RUN mkdir /SDGym && \ mkdir /SDGym/sdgym && \ # Copy code -COPY setup.py README.md HISTORY.md MANIFEST.in LICENSE Makefile setup.cfg /SDGym/ +COPY pyproject.toml README.md HISTORY.md MANIFEST.in LICENSE Makefile setup.cfg /SDGym/ COPY /sdgym/ /SDGym/sdgym WORKDIR /SDGym From 7d775e5c7f17562f94f12f9f5b8c940b8151cf0e Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Wed, 28 Feb 2024 11:35:01 +0000 Subject: [PATCH 07/16] update task.py --- tasks.py | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/tasks.py b/tasks.py index 2c9d712..406bfec 100644 --- a/tasks.py +++ b/tasks.py @@ -10,7 +10,6 @@ from pathlib import Path from invoke import task -import toml COMPARISONS = { '>=': operator.ge, @@ -71,22 +70,30 @@ def _validate_python_version(line): @task def install_minimum(c): - with open('pyproject.toml', 'r', encoding='utf-8') as pyproject_file: - pyproject_data = toml.load(pyproject_file) + with open('pyproject.toml', 'r') as pyproject: + lines = pyproject.read().splitlines() - dependencies = pyproject_data.get('project', {}).get('dependencies', []) versions = [] - for line in dependencies: - line = line.strip() - if _validate_python_version(line): - requirement = re.match(r'[^>]*', line).group(0) - requirement = re.sub(r"""['",]""", '', requirement) - version = re.search(r'>=?(\d\.?)+\w*', line).group(0) - if version: - version = re.sub(r'>=?', '==', version) - version = re.sub(r"""['",]""", '', version) - requirement += version - versions.append(requirement) + started = False + for line in lines: + if started: + if line == ']': + started = False + continue + + line = line.strip() + if _validate_python_version(line): + requirement = re.match(r'[^>]*', line).group(0) + requirement = re.sub(r"""['",]""", '', requirement) + version = re.search(r'>=?(\d\.?)+\w*', line).group(0) + if version: + version = re.sub(r'>=?', '==', version) + version = re.sub(r"""['",]""", '', version) + requirement += version + versions.append(requirement) + + elif (line.startswith('dependencies = [')): + started = True c.run(f'python -m pip install {" ".join(versions)}') From 2e7cfb7bc479ea0057800c41c6579b9b5a3d64db Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Wed, 28 Feb 2024 11:53:16 +0000 Subject: [PATCH 08/16] formatting --- pyproject.toml | 3 ++- setup.cfg | 29 ++++++++++++----------------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 55fd382..261b4ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,16 +55,17 @@ dependencies = [ [project.entry-points] sdgym = { main = "sdgym.cli.__main__:main" } - [project.optional-dependencies] dask = ["dask", "distributed"] + test = [ 'pytest>=3.4.2', 'pytest-cov>=2.6.0', 'jupyter>=1.0.0,<2', 'rundoc>=0.4.3,<0.5', ] + dev = [ # dask "dask", diff --git a/setup.cfg b/setup.cfg index e689ae9..c2e3618 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,30 +4,29 @@ commit = True tag = True parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\.(?P[a-z]+)(?P\d+))? serialize = - {major}.{minor}.{patch}.{release}{candidate} - {major}.{minor}.{patch} + {major}.{minor}.{patch}.{release}{candidate} + {major}.{minor}.{patch} [bumpversion:part:release] optional_value = release first_value = dev values = - dev - release + dev + release + +[bumpversion:part:candidate] [bumpversion:file:pyproject.toml] -search = version = "{current_version}" -replace = version = "{new_version}" +search = version='{current_version}' +replace = version='{new_version}' [bumpversion:file:sdgym/__init__.py] -search = __version__ = "{current_version}" -replace = __version__ = "{new_version}" +search = __version__ = '{current_version}' +replace = __version__ = '{new_version}' [bdist_wheel] universal = 1 -[aliases] -test = pytest - [flake8] max-line-length = 99 inline-quotes = single @@ -37,9 +36,5 @@ extend-ignore = D105, # Missing docstring in magic method PD005, # Use arithmetic operator instead of method SFS3 # String literal formatting using f-string -[isort] -line_length = 99 -lines_between_types = 0 -multi_line_output = 4 -not_skip = __init__.py -use_parentheses = True \ No newline at end of file +[aliases] +test = pytest From e89c353319a0163303ea15ae317d56c12f112615 Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Wed, 28 Feb 2024 13:39:35 +0000 Subject: [PATCH 09/16] single quote --- pyproject.toml | 58 +++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 261b4ec..a407a2c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,29 +1,29 @@ [build-system] -requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta" +requires = ['setuptools', 'wheel'] +build-backend = 'setuptools.build_meta' [project] -authors = [{ name = "DataCebo, Inc.", email = "info@sdv.dev" }] +authors = [{ name = 'DataCebo, Inc.', email = 'info@sdv.dev' }] classifiers = [ - "Development Status :: 2 - Pre-Alpha", - "Intended Audience :: Developers", - "License :: Free for non-commercial use", - "Natural Language :: English", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Topic :: Scientific/Engineering :: Artificial Intelligence", + 'Development Status :: 2 - Pre-Alpha', + 'Intended Audience :: Developers', + 'License :: Free for non-commercial use', + 'Natural Language :: English', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', ] -description = "Benchmark tabular synthetic data generators using a variety of datasets" -keywords = ["machine learning", "synthetic data generation", "benchmark", "generative models"] -name = "sdgym" -version = "0.7.1.dev0" -license = { text = "BSL-1.1" } +description = 'Benchmark tabular synthetic data generators using a variety of datasets' +keywords = ['machine learning', 'synthetic data generation', 'benchmark', 'generative models'] +name = 'sdgym' +version = '0.7.1.dev0' +license = { text = 'BSL-1.1' } requires-python = '>=3.8,<3.12' -readme = "README.md" -urls = { "Homepage" = "https://github.com/sdv-dev/SDGym" } +readme = 'README.md' +urls = { 'Homepage' = 'https://github.com/sdv-dev/SDGym' } dependencies = [ 'appdirs>=1.3,<2', @@ -53,11 +53,11 @@ dependencies = [ ] [project.entry-points] -sdgym = { main = "sdgym.cli.__main__:main" } +sdgym = { main = 'sdgym.cli.__main__:main' } [project.optional-dependencies] -dask = ["dask", "distributed"] +dask = ['dask', 'distributed'] test = [ 'pytest>=3.4.2', @@ -68,8 +68,8 @@ test = [ dev = [ # dask - "dask", - "distributed", + 'dask', + 'distributed', # test 'pytest>=3.4.2', @@ -125,8 +125,8 @@ dev = [ all = [ # dask - "dask", - "distributed", + 'dask', + 'distributed', # test 'pytest>=3.4.2', @@ -191,9 +191,9 @@ namespaces = false line_length = 99 lines_between_types = 0 multi_line_output = 4 -not_skip = ["__init__.py"] +not_skip = ['__init__.py'] use_parentheses = true [tool.pydocstyle] -convention = "google" -add-ignore = ["D107", "D407", "D417"] +convention = 'google' +add-ignore = ['D107', 'D407', 'D417'] From 7fac59d7ad872f32dc3b7307944cfe1980724f17 Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Wed, 28 Feb 2024 15:40:11 +0000 Subject: [PATCH 10/16] address comments in pyproject.toml --- pyproject.toml | 78 +++++++------------------------------------------- 1 file changed, 10 insertions(+), 68 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a407a2c..cfa47c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,6 +3,8 @@ requires = ['setuptools', 'wheel'] build-backend = 'setuptools.build_meta' [project] +name = 'sdgym' +description = 'Benchmark tabular synthetic data generators using a variety of datasets' authors = [{ name = 'DataCebo, Inc.', email = 'info@sdv.dev' }] classifiers = [ 'Development Status :: 2 - Pre-Alpha', @@ -16,15 +18,11 @@ classifiers = [ 'Programming Language :: Python :: 3.11', 'Topic :: Scientific/Engineering :: Artificial Intelligence', ] -description = 'Benchmark tabular synthetic data generators using a variety of datasets' keywords = ['machine learning', 'synthetic data generation', 'benchmark', 'generative models'] -name = 'sdgym' version = '0.7.1.dev0' license = { text = 'BSL-1.1' } requires-python = '>=3.8,<3.12' readme = 'README.md' -urls = { 'Homepage' = 'https://github.com/sdv-dev/SDGym' } - dependencies = [ 'appdirs>=1.3,<2', 'boto3>=1.15.0,<2', @@ -52,11 +50,16 @@ dependencies = [ 'sdv>=1.3.0,<2', ] +[project.urls] +"Source Code"= "https://github.com/sdv-dev/SDGym/" +"Issue Tracker" = "https://github.com/sdv-dev/SDGym/issues" +"Twitter" = "https://twitter.com/sdv_dev" +"Chat" = "https://bit.ly/sdv-slack-invite" + [project.entry-points] sdgym = { main = 'sdgym.cli.__main__:main' } [project.optional-dependencies] - dask = ['dask', 'distributed'] test = [ @@ -67,15 +70,7 @@ test = [ ] dev = [ - # dask - 'dask', - 'distributed', - - # test - 'pytest>=3.4.2', - 'pytest-cov>=2.6.0', - 'jupyter>=1.0.0,<2', - 'rundoc>=0.4.3,<0.5', + 'sdgym[dask, test]', # general 'bumpversion>=0.5.3,<0.6', @@ -124,60 +119,7 @@ dev = [ ] all = [ - # dask - 'dask', - 'distributed', - - # test - 'pytest>=3.4.2', - 'pytest-cov>=2.6.0', - 'jupyter>=1.0.0,<2', - 'rundoc>=0.4.3,<0.5', - - # general - 'bumpversion>=0.5.3,<0.6', - 'pip>=9.0.1', - 'watchdog>=0.8.3,<0.11', - - # style check - 'flake8>=3.7.7,<4', - 'flake8-absolute-import>=1.0,<2', - 'flake8-builtins>=1.5.3,<1.6', - 'flake8-comprehensions>=3.6.1,<3.7', - 'flake8-debugger>=4.0.0,<4.1', - 'flake8-docstrings>=1.5.0,<2', - 'flake8-eradicate>=1.1.0,<1.2', - 'flake8-fixme>=1.1.1,<1.2', - 'flake8-mock>=0.3,<0.4', - 'flake8-multiline-containers>=0.0.18,<0.1', - 'flake8-mutable>=1.2.0,<1.3', - 'flake8-expression-complexity>=0.0.9,<0.1', - 'flake8-print>=4.0.0,<4.1', - 'flake8-pytest-style>=1.5.0,<2', - 'flake8-quotes>=3.3.0,<4', - 'flake8-sfs>=0.0.3,<0.1', - 'flake8-variables-names>=0.0.4,<0.1', - 'dlint>=0.11.0,<0.12', - 'isort>=4.3.4,<5', - 'pandas-vet>=0.2.3,<0.3', - 'pep8-naming>=0.12.1,<0.13', - 'pydocstyle>=6.1.1,<6.2', - - # fix style issues - 'autoflake>=1.1,<2', - 'autopep8>=1.4.3,<2', - - # distribute on PyPI - 'twine>=1.10.0,<4', - 'wheel>=0.30.0', - - # Advanced testing - 'coverage>=4.5.1,<6', - 'tox>=2.9.1,<4', - 'importlib-metadata>=3.6', - - # Invoke - 'invoke', + 'sdgym[dask, test, dev]', ] [tool.setuptools] From 3ef342a60d307271038e824a25ea77a75876712f Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Wed, 28 Feb 2024 17:44:39 +0000 Subject: [PATCH 11/16] add toml dependency --- pyproject.toml | 1 + tasks.py | 42 ++++++++++++++++++------------------------ 2 files changed, 19 insertions(+), 24 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cfa47c2..f65d16b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,6 +67,7 @@ test = [ 'pytest-cov>=2.6.0', 'jupyter>=1.0.0,<2', 'rundoc>=0.4.3,<0.5', + 'toml>=0.10.2,<1', ] dev = [ diff --git a/tasks.py b/tasks.py index 406bfec..ba5bfc2 100644 --- a/tasks.py +++ b/tasks.py @@ -2,13 +2,14 @@ import inspect import operator import os -import re -import pkg_resources import platform +import re import shutil import stat from pathlib import Path +import pkg_resources +import toml from invoke import task COMPARISONS = { @@ -70,30 +71,23 @@ def _validate_python_version(line): @task def install_minimum(c): - with open('pyproject.toml', 'r') as pyproject: - lines = pyproject.read().splitlines() + with open('pyproject.toml', 'r', encoding='utf-8') as pyproject_file: + pyproject_data = toml.load(pyproject_file) + dependencies = pyproject_data.get('project', {}).get('dependencies', []) versions = [] - started = False - for line in lines: - if started: - if line == ']': - started = False - continue - - line = line.strip() - if _validate_python_version(line): - requirement = re.match(r'[^>]*', line).group(0) - requirement = re.sub(r"""['",]""", '', requirement) - version = re.search(r'>=?(\d\.?)+\w*', line).group(0) - if version: - version = re.sub(r'>=?', '==', version) - version = re.sub(r"""['",]""", '', version) - requirement += version - versions.append(requirement) - - elif (line.startswith('dependencies = [')): - started = True + for line in dependencies: + line = line.strip() + if _validate_python_version(line): + requirement = re.match(r'[^>]*', line).group(0) + requirement = re.sub(r"""['",]""", '', requirement) + version = re.search(r'>=?(\d\.?)+\w*', line).group(0) + if version: + version = re.sub(r'>=?', '==', version) + version = re.sub(r"""['",]""", '', version) + requirement += version + + versions.append(requirement) c.run(f'python -m pip install {" ".join(versions)}') From 1c527e41c1413c0d467bc154131aa6e13a37bb16 Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Thu, 29 Feb 2024 16:46:55 +0000 Subject: [PATCH 12/16] update _get_minimum_versions --- .github/workflows/readme.yml | 1 + tasks.py | 64 +++++++++++++++++++----------------- tests/test_tasks.py | 37 +++++++++++++++++++++ 3 files changed, 72 insertions(+), 30 deletions(-) create mode 100644 tests/test_tasks.py diff --git a/.github/workflows/readme.yml b/.github/workflows/readme.yml index 14fddf4..6058d32 100644 --- a/.github/workflows/readme.yml +++ b/.github/workflows/readme.yml @@ -22,5 +22,6 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install invoke rundoc . + python -m pip install toml - name: Run the README.md run: invoke readme diff --git a/tasks.py b/tasks.py index ba5bfc2..51a0eb5 100644 --- a/tasks.py +++ b/tasks.py @@ -6,11 +6,14 @@ import re import shutil import stat +import sys from pathlib import Path import pkg_resources import toml from invoke import task +from packaging.requirements import Requirement +from packaging.version import Version COMPARISONS = { '>=': operator.ge, @@ -53,21 +56,32 @@ def readme(c): os.chdir(cwd) shutil.rmtree(test_path) - -def _validate_python_version(line): - is_valid = True - for python_version_match in re.finditer(r"python_version(<=?|>=?|==)\'(\d\.?)+\'", line): - python_version = python_version_match.group(0) - comparison = re.search(r'(>=?|<=?|==)', python_version).group(0) - version_number = python_version.split(comparison)[-1].replace("'", "") - comparison_function = COMPARISONS[comparison] - is_valid = is_valid and comparison_function( - pkg_resources.parse_version(platform.python_version()), - pkg_resources.parse_version(version_number), - ) - - return is_valid - +def _get_minimum_versions(dependencies, python_version): + min_versions = {} + for dependency in dependencies: + if '@' in dependency: + name, url = dependency.split(' @') + min_versions[name] = f'{name} @ {url}' + continue + + req = Requirement(dependency) + if ';' in dependency: + marker = req.marker + if marker and not marker.evaluate({'python_version': python_version}): + continue # Skip this dependency if the marker does not apply to the current Python version + + if req.name not in min_versions: + min_version = next((spec.version for spec in req.specifier if spec.operator in ('>=', '==')), None) + if min_version: + min_versions[req.name] = f'{req.name}=={min_version}' + + elif '@' not in min_versions[req.name]: + existing_version = Version(min_versions[req.name].split('==')[1]) + new_version = next((spec.version for spec in req.specifier if spec.operator in ('>=', '==')), existing_version) + if new_version > existing_version: + min_versions[req.name] = f'{req.name}=={new_version}' # Change when a valid newer version is found + + return list(min_versions.values()) @task def install_minimum(c): @@ -75,21 +89,11 @@ def install_minimum(c): pyproject_data = toml.load(pyproject_file) dependencies = pyproject_data.get('project', {}).get('dependencies', []) - versions = [] - for line in dependencies: - line = line.strip() - if _validate_python_version(line): - requirement = re.match(r'[^>]*', line).group(0) - requirement = re.sub(r"""['",]""", '', requirement) - version = re.search(r'>=?(\d\.?)+\w*', line).group(0) - if version: - version = re.sub(r'>=?', '==', version) - version = re.sub(r"""['",]""", '', version) - requirement += version - - versions.append(requirement) - - c.run(f'python -m pip install {" ".join(versions)}') + python_version = '.'.join(map(str, sys.version_info[:2])) + minimum_versions = _get_minimum_versions(dependencies, python_version) + + if minimum_versions: + c.run(f'python -m pip install {" ".join(minimum_versions)}') @task diff --git a/tests/test_tasks.py b/tests/test_tasks.py new file mode 100644 index 0000000..a4cf300 --- /dev/null +++ b/tests/test_tasks.py @@ -0,0 +1,37 @@ +from tasks import _get_minimum_versions + + +def test_get_minimum_versions(): + """Test the ``_get_minimum_versions`` method. + + The method should return the minimum versions of the dependencies for the given python version. + If a library is linked to an URL, the minimum version should be the URL. + """ + # Setup + dependencies = [ + "numpy>=1.20.0,<2;python_version<'3.10'", + "numpy>=1.23.3,<2;python_version>='3.10'", + "pandas>=1.2.0,<2;python_version<'3.10'", + "pandas>=1.3.0,<2;python_version>='3.10'", + 'humanfriendly>=8.2,<11', + 'pandas @ git+https://github.com/pandas-dev/pandas.git@master#egg=pandas' + ] + + # Run + minimum_versions_39 = _get_minimum_versions(dependencies, '3.9') + minimum_versions_310 = _get_minimum_versions(dependencies, '3.10') + + # Assert + expected_versions_39 = [ + 'numpy==1.20.0', + 'pandas @ git+https://github.com/pandas-dev/pandas.git@master#egg=pandas', + 'humanfriendly==8.2', + ] + expected_versions_310 = [ + 'numpy==1.23.3', + 'pandas @ git+https://github.com/pandas-dev/pandas.git@master#egg=pandas', + 'humanfriendly==8.2', + ] + + assert minimum_versions_39 == expected_versions_39 + assert minimum_versions_310 == expected_versions_310 From a1fd9954cb75b4d489fab04a8a112e7a997b797a Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Thu, 29 Feb 2024 16:55:15 +0000 Subject: [PATCH 13/16] pytest --- tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks.py b/tasks.py index 51a0eb5..68f7cb4 100644 --- a/tasks.py +++ b/tasks.py @@ -60,7 +60,7 @@ def _get_minimum_versions(dependencies, python_version): min_versions = {} for dependency in dependencies: if '@' in dependency: - name, url = dependency.split(' @') + name, url = dependency.split(' @ ') min_versions[name] = f'{name} @ {url}' continue From e8cf5f72eb955b60e1ea134912a7d7d67e28ff75 Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Thu, 29 Feb 2024 17:02:37 +0000 Subject: [PATCH 14/16] blank lines --- pyproject.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f65d16b..3d5e16c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,6 @@ sdgym = { main = 'sdgym.cli.__main__:main' } [project.optional-dependencies] dask = ['dask', 'distributed'] - test = [ 'pytest>=3.4.2', 'pytest-cov>=2.6.0', @@ -69,7 +68,6 @@ test = [ 'rundoc>=0.4.3,<0.5', 'toml>=0.10.2,<1', ] - dev = [ 'sdgym[dask, test]', @@ -118,7 +116,6 @@ dev = [ # Invoke 'invoke', ] - all = [ 'sdgym[dask, test, dev]', ] From 76d2115f60d3485097554d940fc6715a13af0263 Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Thu, 29 Feb 2024 17:20:59 +0000 Subject: [PATCH 15/16] use tomli --- .github/workflows/readme.yml | 2 +- tasks.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/readme.yml b/.github/workflows/readme.yml index 6058d32..fc21dac 100644 --- a/.github/workflows/readme.yml +++ b/.github/workflows/readme.yml @@ -22,6 +22,6 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install invoke rundoc . - python -m pip install toml + python -m pip install tomli - name: Run the README.md run: invoke readme diff --git a/tasks.py b/tasks.py index 68f7cb4..50ab8ec 100644 --- a/tasks.py +++ b/tasks.py @@ -10,7 +10,7 @@ from pathlib import Path import pkg_resources -import toml +import tomli from invoke import task from packaging.requirements import Requirement from packaging.version import Version @@ -56,6 +56,7 @@ def readme(c): os.chdir(cwd) shutil.rmtree(test_path) + def _get_minimum_versions(dependencies, python_version): min_versions = {} for dependency in dependencies: @@ -83,10 +84,11 @@ def _get_minimum_versions(dependencies, python_version): return list(min_versions.values()) + @task def install_minimum(c): - with open('pyproject.toml', 'r', encoding='utf-8') as pyproject_file: - pyproject_data = toml.load(pyproject_file) + with open('pyproject.toml', 'rb') as pyproject_file: + pyproject_data = tomli.load(pyproject_file) dependencies = pyproject_data.get('project', {}).get('dependencies', []) python_version = '.'.join(map(str, sys.version_info[:2])) From 03c2d8774eeb24624c9ae9831e6611d6fc896a47 Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Thu, 29 Feb 2024 17:28:04 +0000 Subject: [PATCH 16/16] tomli 2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3d5e16c..70e8947 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ test = [ 'pytest-cov>=2.6.0', 'jupyter>=1.0.0,<2', 'rundoc>=0.4.3,<0.5', - 'toml>=0.10.2,<1', + 'tomli>=2.0.0,<3', ] dev = [ 'sdgym[dask, test]',