From 82ba17aacdb91e7614687fbe6001bbc92ea38512 Mon Sep 17 00:00:00 2001 From: LouisCastricato Date: Tue, 8 Oct 2024 20:14:04 +0000 Subject: [PATCH] adds pyproject files and tests --- requirements/pyproject-apex-pip.toml | 14 +++++++ requirements/pyproject-comet.toml | 14 +++++++ requirements/pyproject-flashattention.toml | 14 +++++++ requirements/pyproject-mamba.toml | 16 +++++++ requirements/pyproject-neox-dev.toml | 21 ++++++++++ requirements/pyproject-onebitadam.toml | 14 +++++++ requirements/pyproject-s3.toml | 15 +++++++ requirements/pyproject-sparseattention.toml | 14 +++++++ requirements/pyproject-tensorboard.toml | 14 +++++++ requirements/pyproject-transformerengine.toml | 14 +++++++ requirements/pyproject-wandb.toml | 14 +++++++ requirements/pyproject.toml | 33 +++++++++++++++ tests/requirements/test_requirements.py | 42 +++++++++++++++++++ 13 files changed, 239 insertions(+) create mode 100644 requirements/pyproject-apex-pip.toml create mode 100644 requirements/pyproject-comet.toml create mode 100644 requirements/pyproject-flashattention.toml create mode 100644 requirements/pyproject-mamba.toml create mode 100644 requirements/pyproject-neox-dev.toml create mode 100644 requirements/pyproject-onebitadam.toml create mode 100644 requirements/pyproject-s3.toml create mode 100644 requirements/pyproject-sparseattention.toml create mode 100644 requirements/pyproject-tensorboard.toml create mode 100644 requirements/pyproject-transformerengine.toml create mode 100644 requirements/pyproject-wandb.toml create mode 100644 requirements/pyproject.toml create mode 100644 tests/requirements/test_requirements.py diff --git a/requirements/pyproject-apex-pip.toml b/requirements/pyproject-apex-pip.toml new file mode 100644 index 000000000..ed9435af3 --- /dev/null +++ b/requirements/pyproject-apex-pip.toml @@ -0,0 +1,14 @@ +[tool.poetry] +name = "gpt-neox-apex-pip" +version = "0.1.0" +description = "Apex pip requirements for GPT-NeoX" +authors = ["EleutherAI "] +license = "Apache-2.0" + +[tool.poetry.dependencies] +python = "^3.8" +pip = "23.3.2" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/requirements/pyproject-comet.toml b/requirements/pyproject-comet.toml new file mode 100644 index 000000000..3fc98576b --- /dev/null +++ b/requirements/pyproject-comet.toml @@ -0,0 +1,14 @@ +[tool.poetry] +name = "gpt-neox-comet" +version = "0.1.0" +description = "Comet ML requirements for GPT-NeoX" +authors = ["EleutherAI "] +license = "Apache-2.0" + +[tool.poetry.dependencies] +python = "^3.8" +comet_ml = ">=3.45.0" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/requirements/pyproject-flashattention.toml b/requirements/pyproject-flashattention.toml new file mode 100644 index 000000000..73102251a --- /dev/null +++ b/requirements/pyproject-flashattention.toml @@ -0,0 +1,14 @@ +[tool.poetry] +name = "gpt-neox-flashattention" +version = "0.1.0" +description = "Flash Attention requirements for GPT-NeoX" +authors = ["EleutherAI "] +license = "Apache-2.0" + +[tool.poetry.dependencies] +python = "^3.8" +flash-attn = "2.5.6" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/requirements/pyproject-mamba.toml b/requirements/pyproject-mamba.toml new file mode 100644 index 000000000..807d4d801 --- /dev/null +++ b/requirements/pyproject-mamba.toml @@ -0,0 +1,16 @@ +[tool.poetry] +name = "gpt-neox-mamba" +version = "0.1.0" +description = "Mamba requirements for GPT-NeoX" +authors = ["EleutherAI "] +license = "Apache-2.0" + +[tool.poetry.dependencies] +python = "^3.8" +causal_conv1d = ">=1.1.0" +einops = "*" +mamba_ssm = ">=1.2.0.post1" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/requirements/pyproject-neox-dev.toml b/requirements/pyproject-neox-dev.toml new file mode 100644 index 000000000..4a0058a3e --- /dev/null +++ b/requirements/pyproject-neox-dev.toml @@ -0,0 +1,21 @@ +[tool.poetry] +name = "gpt-neox-dev" +version = "0.1.0" +description = "Development requirements for GPT-NeoX" +authors = ["EleutherAI "] +license = "Apache-2.0" + +[tool.poetry.dependencies] +python = "^3.8" +autopep8 = ">=1.5.6" +clang-format = ">=13.0.1" +pre-commit = ">=2.17.0" +pytest = ">=6.2.3" +pytest-cov = ">=2.11.1" +pytest-forked = ">=1.3.0" +pytest-html = "4.1.1" +pytest-xdist = "*" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/requirements/pyproject-onebitadam.toml b/requirements/pyproject-onebitadam.toml new file mode 100644 index 000000000..002391e93 --- /dev/null +++ b/requirements/pyproject-onebitadam.toml @@ -0,0 +1,14 @@ +[tool.poetry] +name = "gpt-neox-onebitadam" +version = "0.1.0" +description = "OneBitAdam requirements for GPT-NeoX" +authors = ["EleutherAI "] +license = "Apache-2.0" + +[tool.poetry.dependencies] +python = "^3.8" +cupy-cuda111 = ">=8.6.0" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/requirements/pyproject-s3.toml b/requirements/pyproject-s3.toml new file mode 100644 index 000000000..d90584660 --- /dev/null +++ b/requirements/pyproject-s3.toml @@ -0,0 +1,15 @@ +[tool.poetry] +name = "gpt-neox-s3" +version = "0.1.0" +description = "S3 requirements for GPT-NeoX" +authors = ["EleutherAI "] +license = "Apache-2.0" + +[tool.poetry.dependencies] +python = "^3.8" +boto3 = "*" +hf-transfer = ">=0.1.3" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/requirements/pyproject-sparseattention.toml b/requirements/pyproject-sparseattention.toml new file mode 100644 index 000000000..ecd069c13 --- /dev/null +++ b/requirements/pyproject-sparseattention.toml @@ -0,0 +1,14 @@ +[tool.poetry] +name = "gpt-neox-sparseattention" +version = "0.1.0" +description = "Sparse Attention requirements for GPT-NeoX" +authors = ["EleutherAI "] +license = "Apache-2.0" + +[tool.poetry.dependencies] +python = "^3.8" +triton = "2.1.0" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/requirements/pyproject-tensorboard.toml b/requirements/pyproject-tensorboard.toml new file mode 100644 index 000000000..3f88cb644 --- /dev/null +++ b/requirements/pyproject-tensorboard.toml @@ -0,0 +1,14 @@ +[tool.poetry] +name = "gpt-neox-tensorboard" +version = "0.1.0" +description = "TensorBoard requirements for GPT-NeoX" +authors = ["EleutherAI "] +license = "Apache-2.0" + +[tool.poetry.dependencies] +python = "^3.8" +tensorboard = "2.13.0" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/requirements/pyproject-transformerengine.toml b/requirements/pyproject-transformerengine.toml new file mode 100644 index 000000000..07429f88a --- /dev/null +++ b/requirements/pyproject-transformerengine.toml @@ -0,0 +1,14 @@ +[tool.poetry] +name = "gpt-neox-transformerengine" +version = "0.1.0" +description = "Transformer Engine requirements for GPT-NeoX" +authors = ["EleutherAI "] +license = "Apache-2.0" + +[tool.poetry.dependencies] +python = "^3.8" +transformer-engine = {git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "stable"} + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/requirements/pyproject-wandb.toml b/requirements/pyproject-wandb.toml new file mode 100644 index 000000000..e34fe8648 --- /dev/null +++ b/requirements/pyproject-wandb.toml @@ -0,0 +1,14 @@ +[tool.poetry] +name = "gpt-neox-wandb" +version = "0.1.0" +description = "Weights & Biases requirements for GPT-NeoX" +authors = ["EleutherAI "] +license = "Apache-2.0" + +[tool.poetry.dependencies] +python = "^3.8" +wandb = ">=0.10.28" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/requirements/pyproject.toml b/requirements/pyproject.toml new file mode 100644 index 000000000..ccaf95e4e --- /dev/null +++ b/requirements/pyproject.toml @@ -0,0 +1,33 @@ +[tool.poetry] +name = "gpt-neox" +version = "2.0.0" +description = "An open-source library for training large-scale language models on GPUs" +authors = ["EleutherAI "] +license = "Apache-2.0" +readme = "README.md" +homepage = "https://www.github.com/eleutherai/gpt-neox" +repository = "https://www.github.com/eleutherai/gpt-neox" +documentation = "https://www.github.com/eleutherai/gpt-neox" + +[tool.poetry.dependencies] +python = "^3.8" +deepspeed = {git = "https://github.com/EleutherAI/DeeperSpeed.git", rev = "02e2ebf7dee6aaab3d89094ed470a4609763c742"} +ftfy = "^6.0.1" +huggingface_hub = "^0.11.0" +jinja2 = "3.1.4" +lm_dataformat = {git = "https://github.com/EleutherAI/lm_dataformat.git", rev = "4eec05349977071bf67fc072290b95e31c8dd836"} +lm_eval = ">=0.4.0,<=0.4.1" +mpi4py = "^3.0.3" +numpy = "<2.0" +pybind11 = "^2.6.2" +regex = "*" +sentencepiece = "*" +six = "*" +tiktoken = "^0.1.2" +tokenizers = "^0.12.1" +transformers = "4.38.0" +toml = "*" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/tests/requirements/test_requirements.py b/tests/requirements/test_requirements.py new file mode 100644 index 000000000..7f7f7bd4b --- /dev/null +++ b/tests/requirements/test_requirements.py @@ -0,0 +1,42 @@ +import pytest +import toml +from pathlib import Path + +def parse_requirements(file_path): + with open(file_path, 'r') as f: + return [line.strip() for line in f if line.strip() and not line.startswith('#')] + +def parse_pyproject(file_path): + with open(file_path, 'r') as f: + pyproject_data = toml.load(f) + return pyproject_data['tool']['poetry']['dependencies'] + +def normalize_version(version): + return version.replace('>=', '').replace('==', '').replace('*', '') + +def compare_dependencies(req_deps, pyproject_deps): + for req in req_deps: + name, _, version = req.partition('==') + if not version: + name, _, version = req.partition('>=') + if not version: + name, _, version = req.partition('>') + name = name.lower() + if name == 'python': + continue # Skip Python version comparison + if name not in pyproject_deps: + return False + if version and normalize_version(version) != normalize_version(pyproject_deps[name]): + return False + return True + +@pytest.mark.parametrize("req_file", Path("requirements").glob("requirements-*.txt")) +def test_pyproject_matches_requirements(req_file): + pyproject_file = Path("requirements") / f"pyproject-{req_file.stem.split('-')[1]}.toml" + assert pyproject_file.exists(), f"pyproject.toml file not found for {req_file}" + + req_deps = parse_requirements(req_file) + pyproject_deps = parse_pyproject(pyproject_file) + + assert compare_dependencies(req_deps, pyproject_deps), \ + f"Dependencies in {req_file} do not match those in {pyproject_file}" \ No newline at end of file