From 1702afe68552c7e718f2b9f0552f45bf954852b6 Mon Sep 17 00:00:00 2001
From: Kye <kye@apacmediasolutions.com>
Date: Wed, 24 May 2023 23:55:09 -0400
Subject: [PATCH] pypi stuff

---
 .github/workflows/python-publish.yml          | 39 +++++++++++++++++++
 Andromeda/__init__.py                         |  2 +
 .../build_dataset.py                          |  4 +-
 inference.py => Andromeda/inference.py        |  0
 traingv2.py => Andromeda/traingv2.py          |  4 +-
 training.py => Andromeda/training.txt         |  4 +-
 setup.py                                      | 34 ++++++++++++++++
 7 files changed, 81 insertions(+), 6 deletions(-)
 create mode 100644 .github/workflows/python-publish.yml
 create mode 100644 Andromeda/__init__.py
 rename build_dataset.py => Andromeda/build_dataset.py (98%)
 rename inference.py => Andromeda/inference.py (100%)
 rename traingv2.py => Andromeda/traingv2.py (99%)
 rename training.py => Andromeda/training.txt (98%)
 create mode 100644 setup.py

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
new file mode 100644
index 0000000..dbaedd4
--- /dev/null
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,39 @@
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [published]
+
+permissions:
+  contents: read
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Build package
+      run: python -m build
+    - name: Publish package
+      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_API_TOKEN }}
\ No newline at end of file
diff --git a/Andromeda/__init__.py b/Andromeda/__init__.py
new file mode 100644
index 0000000..dd3e7ab
--- /dev/null
+++ b/Andromeda/__init__.py
@@ -0,0 +1,2 @@
+from Andromeda.traingv2 import TrainAndromeda
+from Andromeda.build_dataset import built_dataset
\ No newline at end of file
diff --git a/build_dataset.py b/Andromeda/build_dataset.py
similarity index 98%
rename from build_dataset.py
rename to Andromeda/build_dataset.py
index c52f300..961c4bf 100644
--- a/build_dataset.py
+++ b/Andromeda/build_dataset.py
@@ -13,7 +13,7 @@ class CFG:
     TOKENIZER: str = "EleutherAI/gpt-neox-20b"
     DATASET_NAME: str = "EleutherAI/the_pile_deduplicated"
 
-def main(args):
+def built_dataset(args):
     tokenizer = AutoTokenizer.from_pretrained(CFG.Tokenizer)
     train_dataset = load_dataset(CFG.DATASET_NAME, split="train")
 
@@ -67,4 +67,4 @@ def group_texts(examples):
     parser.add_argument("--tokenizer", type=str, default=CFG.TOKENIZER, help="Tokenizer model to use")
     parser.add_argument("--dataset_name", type=str, default=CFG.DATASET_NAME, help="Name of the dataset to process")
     args = parser.parse_args()
-    main(args)
+    built_dataset(args)
diff --git a/inference.py b/Andromeda/inference.py
similarity index 100%
rename from inference.py
rename to Andromeda/inference.py
diff --git a/traingv2.py b/Andromeda/traingv2.py
similarity index 99%
rename from traingv2.py
rename to Andromeda/traingv2.py
index 5d74224..1d67492 100644
--- a/traingv2.py
+++ b/Andromeda/traingv2.py
@@ -222,7 +222,7 @@ def group_texts(examples):
 # main
 
 
-def main():
+def TrainAndromeda():
     # accelerator
 
     timeout = InitProcessGroupKwargs(timeout=timedelta(seconds=1_000_000))
@@ -431,4 +431,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    TrainAndromeda()
\ No newline at end of file
diff --git a/training.py b/Andromeda/training.txt
similarity index 98%
rename from training.py
rename to Andromeda/training.txt
index 66f769d..67f617b 100644
--- a/training.py
+++ b/Andromeda/training.txt
@@ -13,8 +13,8 @@
 
 
 from lion_pytorch import Lion
-from x_transformers import TransformerWrapper, Decoder, AutoregressiveWrapper
-
+# from x_transformers import TransformerWrapper, Decoder, AutoregressiveWrapper
+from optimus_prim import TransformerWrapper, Decoder, AutoregressiveWrapper
 
 from torch.nn.parallel import DataParallel, DistributedDataParallel
 import torch.distributed as dist
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..f822c10
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,34 @@
+from setuptools import setup, find_packages
+
+setup(
+  name = 'andromeda',
+  packages = find_packages(exclude=['examples']),
+  version = '1.1.3',
+  license='MIT',
+  description = 'andromeda - Pytorch',
+  author = 'Kye Gomez',
+  author_email = 'kye@apac.ai',
+  url = 'https://github.com/kyegomez/Andromeda',
+  long_description_content_type = 'text/markdown',
+  keywords = [
+    'artificial intelligence',
+    'attention mechanism',
+    'transformers'
+  ],
+  install_requires=[
+    'torch>=1.6',
+    'einops>=0.6.1',
+    'datasets',
+    'accelerate',
+    'transformers',
+    'optimus-prime-transformers',
+    'lion_pytorch'
+  ],
+  classifiers=[
+    'Development Status :: 4 - Beta',
+    'Intended Audience :: Developers',
+    'Topic :: Scientific/Engineering :: Artificial Intelligence',
+    'License :: OSI Approved :: MIT License',
+    'Programming Language :: Python :: 3.6',
+  ],
+)
\ No newline at end of file