From 1702afe68552c7e718f2b9f0552f45bf954852b6 Mon Sep 17 00:00:00 2001 From: Kye Date: Wed, 24 May 2023 23:55:09 -0400 Subject: [PATCH] pypi stuff --- .github/workflows/python-publish.yml | 39 +++++++++++++++++++ Andromeda/__init__.py | 2 + .../build_dataset.py | 4 +- inference.py => Andromeda/inference.py | 0 traingv2.py => Andromeda/traingv2.py | 4 +- training.py => Andromeda/training.txt | 4 +- setup.py | 34 ++++++++++++++++ 7 files changed, 81 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/python-publish.yml create mode 100644 Andromeda/__init__.py rename build_dataset.py => Andromeda/build_dataset.py (98%) rename inference.py => Andromeda/inference.py (100%) rename traingv2.py => Andromeda/traingv2.py (99%) rename training.py => Andromeda/training.txt (98%) create mode 100644 setup.py diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..dbaedd4 --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,39 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Upload Python Package + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file diff --git a/Andromeda/__init__.py b/Andromeda/__init__.py new file mode 100644 index 0000000..dd3e7ab --- /dev/null +++ b/Andromeda/__init__.py @@ -0,0 +1,2 @@ +from Andromeda.traingv2 import TrainAndromeda +from Andromeda.build_dataset import built_dataset \ No newline at end of file diff --git a/build_dataset.py b/Andromeda/build_dataset.py similarity index 98% rename from build_dataset.py rename to Andromeda/build_dataset.py index c52f300..961c4bf 100644 --- a/build_dataset.py +++ b/Andromeda/build_dataset.py @@ -13,7 +13,7 @@ class CFG: TOKENIZER: str = "EleutherAI/gpt-neox-20b" DATASET_NAME: str = "EleutherAI/the_pile_deduplicated" -def main(args): +def built_dataset(args): tokenizer = AutoTokenizer.from_pretrained(CFG.Tokenizer) train_dataset = load_dataset(CFG.DATASET_NAME, split="train") @@ -67,4 +67,4 @@ def group_texts(examples): parser.add_argument("--tokenizer", type=str, default=CFG.TOKENIZER, help="Tokenizer model to use") parser.add_argument("--dataset_name", type=str, default=CFG.DATASET_NAME, help="Name of the dataset to process") args = parser.parse_args() - main(args) + built_dataset(args) diff --git a/inference.py b/Andromeda/inference.py similarity index 100% rename from inference.py rename to Andromeda/inference.py diff --git a/traingv2.py b/Andromeda/traingv2.py similarity index 99% rename from traingv2.py rename to Andromeda/traingv2.py index 5d74224..1d67492 100644 --- a/traingv2.py +++ b/Andromeda/traingv2.py @@ -222,7 +222,7 @@ def group_texts(examples): # main -def main(): +def TrainAndromeda(): # accelerator timeout = InitProcessGroupKwargs(timeout=timedelta(seconds=1_000_000)) @@ -431,4 +431,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + TrainAndromeda() \ No newline at end of file diff --git a/training.py b/Andromeda/training.txt similarity index 98% rename from training.py rename to Andromeda/training.txt index 66f769d..67f617b 100644 --- a/training.py +++ b/Andromeda/training.txt @@ -13,8 +13,8 @@ from lion_pytorch import Lion -from x_transformers import TransformerWrapper, Decoder, AutoregressiveWrapper - +# from x_transformers import TransformerWrapper, Decoder, AutoregressiveWrapper +from optimus_prim import TransformerWrapper, Decoder, AutoregressiveWrapper from torch.nn.parallel import DataParallel, DistributedDataParallel import torch.distributed as dist diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f822c10 --- /dev/null +++ b/setup.py @@ -0,0 +1,34 @@ +from setuptools import setup, find_packages + +setup( + name = 'andromeda', + packages = find_packages(exclude=['examples']), + version = '1.1.3', + license='MIT', + description = 'andromeda - Pytorch', + author = 'Kye Gomez', + author_email = 'kye@apac.ai', + url = 'https://github.com/kyegomez/Andromeda', + long_description_content_type = 'text/markdown', + keywords = [ + 'artificial intelligence', + 'attention mechanism', + 'transformers' + ], + install_requires=[ + 'torch>=1.6', + 'einops>=0.6.1', + 'datasets', + 'accelerate', + 'transformers', + 'optimus-prime-transformers', + 'lion_pytorch' + ], + classifiers=[ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 3.6', + ], +) \ No newline at end of file