diff --git a/python/turbine_models/README.md b/python/turbine_models/README.md
new file mode 100644
index 000000000..666d8c057
--- /dev/null
+++ b/python/turbine_models/README.md
@@ -0,0 +1,47 @@
+# LLAMA 2 Inference
+
+This example require some extra dependencies. Here's an easy way to get it running on a fresh server.
+
+Don't forget to put in your huggingface token from https://huggingface.co/settings/tokens
+
+```bash
+#!/bin/bash
+
+
+# if you don't insert it, you will be prompted to log in later;
+# you may need to rerun this script after logging in
+YOUR_HF_TOKEN="insert token for headless" 
+
+# clone and install dependencies
+sudo apt install -y git
+git clone https://github.com/nod-ai/SHARK-Turbine.git
+cd SHARK-Turbine
+pip install -r requirements.txt
+pip install -r turbine-models-requirements.txt
+
+# do an editable install from the cloned SHARK-Turbine
+pip install --editable .
+
+# Log in with Hugging Face CLI if token setup is required
+if [[ $YOUR_HF_TOKEN == hf_* ]]; then
+    huggingface login --token $YOUR_HF_TOKEN
+    echo "Logged in with YOUR_HF_TOKEN."
+elif [ -f ~/.cache/huggingface/token ]; then
+    # Read token from the file
+    TOKEN_CONTENT=$(cat ~/.cache/huggingface/token)
+    
+    # Check if the token starts with "hf_"
+    if [[ $TOKEN_CONTENT == hf_* ]]; then
+        echo "Already logged in with a Hugging Face token."
+    else
+        echo "Token in file does not start with 'hf_'. Please log into huggingface to download models."
+        huggingface-cli login
+    fi
+else
+    echo "Please log into huggingface to download models."
+    huggingface-cli login
+fi
+
+# Step 7: Run the Python script
+python .\python\turbine_models\custom_models\stateless_llama.py --compile_to=torch --external_weights=safetensors --external_weight_file=llama_f32.safetensors
+```
diff --git a/python/turbine_models/__init__.py b/python/turbine_models/custom_models/__init__.py
similarity index 100%
rename from python/turbine_models/__init__.py
rename to python/turbine_models/custom_models/__init__.py
diff --git a/python/turbine_models/gen_external_params/__init__.py b/python/turbine_models/gen_external_params/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/turbine_models/pyproject.toml b/python/turbine_models/pyproject.toml
new file mode 100644
index 000000000..9787c3bdf
--- /dev/null
+++ b/python/turbine_models/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
diff --git a/python/turbine_models/setup.py b/python/turbine_models/setup.py
new file mode 100644
index 000000000..048f64050
--- /dev/null
+++ b/python/turbine_models/setup.py
@@ -0,0 +1,72 @@
+import json
+import os
+from pathlib import Path
+
+from setuptools import find_namespace_packages, setup
+
+
+#### TURBINE MODELS SETUP ####
+
+
+TURBINE_MODELS_DIR = os.path.realpath(os.path.dirname(__file__))
+TURBINE_ROOT_DIR = Path(TURBINE_MODELS_DIR).parent.parent
+print(TURBINE_ROOT_DIR)
+VERSION_INFO_FILE = os.path.join(TURBINE_MODELS_DIR, "version_info.json")
+
+
+with open(
+    os.path.join(
+        TURBINE_MODELS_DIR,
+        "README.md",
+    ),
+    "rt",
+) as f:
+    README = f.read()
+
+
+def load_version_info():
+    with open(VERSION_INFO_FILE, "rt") as f:
+        return json.load(f)
+
+
+version_info = load_version_info()
+PACKAGE_VERSION = version_info["package-version"]
+
+setup(
+    name=f"turbine-models",
+    version=f"{PACKAGE_VERSION}",
+    author="SHARK Authors",
+    author_email="dan@nod.ai",
+    description="SHARK Turbine Machine Learning Model Zoo",
+    long_description=README,
+    long_description_content_type="text/markdown",
+    url="https://github.com/nod-ai/SHARK-Turbine",
+    license="Apache-2.0",
+    classifiers=[
+        "Development Status :: 3 - Alpha",
+        "License :: OSI Approved :: Apache Software License",
+        "Programming Language :: Python :: 3",
+    ],
+    package_dir={
+        "": f"{str(TURBINE_ROOT_DIR)}/python",
+    },
+    packages=find_namespace_packages(
+        include=[
+            "turbine_models",
+            "turbine_models.*",
+        ],
+        where=f"{str(TURBINE_ROOT_DIR)}/python",
+    ),
+    entry_points={
+        "torch_dynamo_backends": [
+            "turbine_cpu = shark_turbine.dynamo.backends.cpu:backend",
+        ],
+    },
+    install_requires=[
+        "Shark-Turbine",
+        "brevitas @ git+https://github.com/Xilinx/brevitas.git@6695e8df7f6a2c7715b9ed69c4b78157376bb60b",
+        "protobuf",
+        "sentencepiece",
+        "transformers",
+    ],
+)
diff --git a/python/turbine_models/tests/__init__.py b/python/turbine_models/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/turbine_models/version_info.json b/python/turbine_models/version_info.json
new file mode 100644
index 000000000..44d2596c7
--- /dev/null
+++ b/python/turbine_models/version_info.json
@@ -0,0 +1,3 @@
+{
+    "package-version": "0.0.1.dev1"
+}
diff --git a/setup.py b/setup.py
index 53f2646f2..53a7b8c5d 100644
--- a/setup.py
+++ b/setup.py
@@ -4,12 +4,10 @@
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-import argparse
 import json
 import os
 import distutils.command.build
 from pathlib import Path
-import sys
 
 from setuptools import find_namespace_packages, setup