diff --git a/python/turbine_models/README.md b/python/turbine_models/README.md new file mode 100644 index 000000000..666d8c057 --- /dev/null +++ b/python/turbine_models/README.md @@ -0,0 +1,47 @@ +# LLAMA 2 Inference + +This example require some extra dependencies. Here's an easy way to get it running on a fresh server. + +Don't forget to put in your huggingface token from https://huggingface.co/settings/tokens + +```bash +#!/bin/bash + + +# if you don't insert it, you will be prompted to log in later; +# you may need to rerun this script after logging in +YOUR_HF_TOKEN="insert token for headless" + +# clone and install dependencies +sudo apt install -y git +git clone https://github.com/nod-ai/SHARK-Turbine.git +cd SHARK-Turbine +pip install -r requirements.txt +pip install -r turbine-models-requirements.txt + +# do an editable install from the cloned SHARK-Turbine +pip install --editable . + +# Log in with Hugging Face CLI if token setup is required +if [[ $YOUR_HF_TOKEN == hf_* ]]; then + huggingface login --token $YOUR_HF_TOKEN + echo "Logged in with YOUR_HF_TOKEN." +elif [ -f ~/.cache/huggingface/token ]; then + # Read token from the file + TOKEN_CONTENT=$(cat ~/.cache/huggingface/token) + + # Check if the token starts with "hf_" + if [[ $TOKEN_CONTENT == hf_* ]]; then + echo "Already logged in with a Hugging Face token." + else + echo "Token in file does not start with 'hf_'. Please log into huggingface to download models." + huggingface-cli login + fi +else + echo "Please log into huggingface to download models." + huggingface-cli login +fi + +# Step 7: Run the Python script +python .\python\turbine_models\custom_models\stateless_llama.py --compile_to=torch --external_weights=safetensors --external_weight_file=llama_f32.safetensors +``` diff --git a/python/turbine_models/__init__.py b/python/turbine_models/custom_models/__init__.py similarity index 100% rename from python/turbine_models/__init__.py rename to python/turbine_models/custom_models/__init__.py diff --git a/python/turbine_models/gen_external_params/__init__.py b/python/turbine_models/gen_external_params/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/python/turbine_models/pyproject.toml b/python/turbine_models/pyproject.toml new file mode 100644 index 000000000..9787c3bdf --- /dev/null +++ b/python/turbine_models/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/python/turbine_models/setup.py b/python/turbine_models/setup.py new file mode 100644 index 000000000..048f64050 --- /dev/null +++ b/python/turbine_models/setup.py @@ -0,0 +1,72 @@ +import json +import os +from pathlib import Path + +from setuptools import find_namespace_packages, setup + + +#### TURBINE MODELS SETUP #### + + +TURBINE_MODELS_DIR = os.path.realpath(os.path.dirname(__file__)) +TURBINE_ROOT_DIR = Path(TURBINE_MODELS_DIR).parent.parent +print(TURBINE_ROOT_DIR) +VERSION_INFO_FILE = os.path.join(TURBINE_MODELS_DIR, "version_info.json") + + +with open( + os.path.join( + TURBINE_MODELS_DIR, + "README.md", + ), + "rt", +) as f: + README = f.read() + + +def load_version_info(): + with open(VERSION_INFO_FILE, "rt") as f: + return json.load(f) + + +version_info = load_version_info() +PACKAGE_VERSION = version_info["package-version"] + +setup( + name=f"turbine-models", + version=f"{PACKAGE_VERSION}", + author="SHARK Authors", + author_email="dan@nod.ai", + description="SHARK Turbine Machine Learning Model Zoo", + long_description=README, + long_description_content_type="text/markdown", + url="https://github.com/nod-ai/SHARK-Turbine", + license="Apache-2.0", + classifiers=[ + "Development Status :: 3 - Alpha", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + ], + package_dir={ + "": f"{str(TURBINE_ROOT_DIR)}/python", + }, + packages=find_namespace_packages( + include=[ + "turbine_models", + "turbine_models.*", + ], + where=f"{str(TURBINE_ROOT_DIR)}/python", + ), + entry_points={ + "torch_dynamo_backends": [ + "turbine_cpu = shark_turbine.dynamo.backends.cpu:backend", + ], + }, + install_requires=[ + "Shark-Turbine", + "brevitas @ git+https://github.com/Xilinx/brevitas.git@6695e8df7f6a2c7715b9ed69c4b78157376bb60b", + "protobuf", + "sentencepiece", + "transformers", + ], +) diff --git a/python/turbine_models/tests/__init__.py b/python/turbine_models/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/python/turbine_models/version_info.json b/python/turbine_models/version_info.json new file mode 100644 index 000000000..44d2596c7 --- /dev/null +++ b/python/turbine_models/version_info.json @@ -0,0 +1,3 @@ +{ + "package-version": "0.0.1.dev1" +} diff --git a/setup.py b/setup.py index 53f2646f2..53a7b8c5d 100644 --- a/setup.py +++ b/setup.py @@ -4,12 +4,10 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -import argparse import json import os import distutils.command.build from pathlib import Path -import sys from setuptools import find_namespace_packages, setup