Merge pull request #495 from macrocosm-os/staging

v2.15.0
macrocosm-os · Dec 18, 2024 · 851f91f · 851f91f
2 parents f6eeaf7 + 1b2ada7
commit 851f91f
Show file tree

Hide file tree

Showing 90 changed files with 1,946 additions and 2,121 deletions.
diff --git a/.env.api.example b/.env.api.example
@@ -0,0 +1,3 @@
+API_PORT = "8005"
+API_HOST = "0.0.0.0"
+# SCORING_KEY = "YOUR_SCORING_API_KEY_GOES_HERE"
diff --git a/.env.validator.example b/.env.validator.example
@@ -1,7 +1,5 @@
 # The network UID. If 1 for main, 61 for testnet.
 NETUID = 61
-DEPLOY_API = false
-API_PORT = 8094
 
 # The network name [test, finney, local].
 SUBTENSOR_NETWORK = "test"
@@ -25,3 +23,8 @@ SN19_API_URL = "e.g. http://24.199.112.174:4051/"
 # Third Party Access Tokens.
 OPENAI_API_KEY = "your_openai_api_key_here"
 HF_TOKEN = "your_huggingface_token_here"
+
+# Scoring API.
+DEPLOY_SCORING_API = true
+SCORING_ADMIN_KEY = "123456"
+SCORING_API_PORT = 8094
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -43,20 +43,15 @@ jobs:
     # Style/format checks.
     - name: Run Black (code formatter)
       run: |
-        poetry run black --check .
+        poetry run black --check --diff .
 
     - name: Run isort (import sorting)
       run: |
-        poetry run isort --check-only --profile black .
-
-    # TODO: "pyflakes[F]" failed during execution due to RecursionError('maximum recursion depth exceeded')
-    # - name: Run Flake8 (style guide enforcement)
-    #   run: |
-    #     poetry run flake8 .
+        poetry run isort --check-only --diff --profile black .
 
     - name: Run Ruff (linter)
       run: |
-        poetry run ruff check .
+        poetry run ruff check --diff .
 
     - name: Test with pytest
       run: |

diff --git a/.gitignore b/.gitignore
@@ -131,6 +131,7 @@ celerybeat.pid
 
 # Environments
 .env
+.env.api
 .env.miner
 .env.validator
 .venv
@@ -171,6 +172,8 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 
+# Repository-specific files.
+AutoAWQ/
 testing/
 data/*
 plots/*
@@ -180,4 +183,3 @@ wandb
 .vscode
 api_keys.json
 prompting/api/api_keys.json
-AutoAWQ
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -18,6 +18,7 @@ repos:
     - id: black
       name: black (code formatter)
       language_version: python3.10
+      additional_dependencies: ["black[jupyter]"]
 
 - repo: https://github.com/pycqa/isort
   rev: 5.13.2

diff --git a/README.md b/README.md
@@ -67,6 +67,10 @@ The miner receives a code snippet that is incomplete. The task is to complete th
 ### 7. **Web Retrieval**
 The miner is given a question based on a random web page and must return a scraped website that contains the answer. This requires searching the web to locate the most accurate and reliable source to provide the answer. The miner is scored based on the embedding similarity between the answer it returns and the original website that the validator generated the reference from.
 
+# API Documentation
+
+For detailed information on the available API endpoints, request/response formats, and usage examples, please refer to the [API Documentation](./api/API.md).
+
 # Contribute
 <div align="center">
 

diff --git a/install.sh b/install.sh
@@ -7,15 +7,14 @@ pip install poetry
 poetry config virtualenvs.in-project true
 
 # Install the project dependencies
-poetry install
+poetry install --extras "validator"
 
-git clone [email protected]:casper-hansen/AutoAWQ.git && cd AutoAWQ && poetry run pip install -e . && cd ..
+# Build AutoAWQ==0.2.7.post3 from source
+git clone https://github.com/casper-hansen/AutoAWQ.git
+cd AutoAWQ  && poetry run pip install -e . && cd ..
 
 poetry run pip install flash-attn --no-build-isolation
 
-# Updating the package list and installing jq and npm
-apt update && apt install -y jq npm
-
 # Check if jq is installed and install it if not
 if ! command -v jq &> /dev/null
 then

diff --git a/neurons/miners/epistula_miner/miner.py b/neurons/miners/epistula_miner/miner.py
@@ -19,8 +19,8 @@
 from starlette.background import BackgroundTask
 from starlette.responses import StreamingResponse
 
-from prompting.base.epistula import verify_signature
 from prompting.llms.hf_llm import ReproducibleHF
+from shared.epistula import verify_signature
 
 MODEL_ID: str = "gpt-3.5-turbo"
 NEURON_MAX_TOKENS: int = 256

diff --git a/neurons/miners/test/echo.py b/neurons/miners/test/echo.py
@@ -5,8 +5,8 @@
 from starlette.types import Send
 
 # import base miner class which takes care of most of the boilerplate
-from prompting.base.prompting_miner import BaseStreamPromptingMiner
-from prompting.base.protocol import StreamPromptingSynapse
+from shared.prompting_miner import BaseStreamPromptingMiner
+from shared.protocol import StreamPromptingSynapse
 
 
 class EchoMiner(BaseStreamPromptingMiner):

diff --git a/neurons/miners/test/mock.py b/neurons/miners/test/mock.py
@@ -5,9 +5,9 @@
 from starlette.types import Send
 
 # import base miner class which takes care of most of the boilerplate
-from prompting.base.prompting_miner import BaseStreamPromptingMiner
-from prompting.base.protocol import StreamPromptingSynapse
-from prompting.settings import settings
+from shared.prompting_miner import BaseStreamPromptingMiner
+from shared.protocol import StreamPromptingSynapse
+from shared.settings import shared_settings
 
 
 class MockMiner(BaseStreamPromptingMiner):
@@ -28,7 +28,7 @@ async def _forward(message: str, send: Send):
                 }
             )
 
-        message = f"Hey you reached mock miner {settings.HOTKEY}. Please leave a message after the tone.. Beep!"
+        message = f"Hey you reached mock miner {shared_settings.HOTKEY}. Please leave a message after the tone.. Beep!"
         token_streamer = partial(_forward, message)
         return synapse.create_streaming_response(token_streamer)
 

diff --git a/neurons/miners/test/phrase.py b/neurons/miners/test/phrase.py
@@ -5,10 +5,10 @@
 from starlette.types import Send
 
 # import base miner class which takes care of most of the boilerplate
-from prompting.base.prompting_miner import BaseStreamPromptingMiner
+from shared.prompting_miner import BaseStreamPromptingMiner
 
 # Bittensor Miner Template:
-from prompting.base.protocol import StreamPromptingSynapse
+from shared.protocol import StreamPromptingSynapse
 
 
 class PhraseMiner(BaseStreamPromptingMiner):

diff --git a/neurons/test_vanilla_post.py b/neurons/test_vanilla_post.py
@@ -0,0 +1,53 @@
+import openai
+from httpx import Timeout
+
+from prompting import settings
+
+settings.settings = settings.Settings.load(mode="validator")
+settings = settings.settings
+
+from shared.epistula import create_header_hook
+
+
+async def main():
+    payload = {
+        "seed": "42",
+        "sampling_parameters": settings.SAMPLING_PARAMS,
+        "task": "InferenceTask",
+        "model": "Dummy_Model",
+        "messages": [
+            {"role": "user", "content": "#Bittensor #ToTheMoon"},
+        ],
+    }
+
+    uid = 732
+    try:
+        axon_info = settings.METAGRAPH.axons[uid]
+        miner = openai.AsyncOpenAI(
+            base_url=f"http://{axon_info.ip}:{axon_info.port}/v1",
+            api_key="Apex",
+            max_retries=0,
+            timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=10),
+            http_client=openai.DefaultAsyncHttpxClient(
+                event_hooks={"request": [create_header_hook(settings.WALLET.hotkey, axon_info.hotkey)]}
+            ),
+        )
+        chat = await miner.chat.completions.create(
+            messages=payload["messages"],
+            model=payload["model"],
+            stream=True,
+            extra_body={k: v for k, v in payload.items() if k not in ["messages", "model"]},
+        )
+
+        async for chunk in chat:
+            if chunk.choices[0].delta and chunk.choices[0].delta.content:
+                print(chunk.choices[0].delta.content)
+    except Exception as e:
+        print("something went wrong", e)
+    return
+
+
+if __name__ == "__main__":
+    import asyncio
+
+    asyncio.run(main())
diff --git a/neurons/validator.py b/neurons/validator.py
@@ -1,23 +1,24 @@
 # ruff: noqa: E402
+from shared import settings
+
+settings.shared_settings = settings.SharedSettings.load(mode="validator")
+shared_settings = settings.shared_settings
+
 import asyncio
+import multiprocessing as mp
 import time
 
 from loguru import logger
 
-from prompting import settings
-from prompting.utils.profiling import profiler
-
-settings.settings = settings.Settings.load(mode="validator")
-settings = settings.settings
-
-from prompting.api.api import start_api
+from prompting.api.api import start_scoring_api
 from prompting.llms.model_manager import model_scheduler
 from prompting.llms.utils import GPUInfo
 from prompting.miner_availability.miner_availability import availability_checking_loop
 from prompting.rewards.scoring import task_scorer
 from prompting.tasks.task_creation import task_loop
 from prompting.tasks.task_sending import task_sender
 from prompting.weight_setting.weight_setter import weight_setter
+from shared.profiling import profiler
 
 NEURON_SAMPLE_SIZE = 100
 
@@ -26,40 +27,43 @@ async def main():
     # will start checking the availability of miners at regular intervals, needed for API and Validator
     asyncio.create_task(availability_checking_loop.start())
 
-    if settings.DEPLOY_API:
-        asyncio.create_task(start_api())
+    if shared_settings.DEPLOY_SCORING_API:
+        # Use multiprocessing to bypass API blocking issue.
+        api_process = mp.Process(target=lambda: asyncio.run(start_scoring_api()))
+        api_process.start()
 
     GPUInfo.log_gpu_info()
-    if settings.DEPLOY_VALIDATOR:
-        # start profiling
-        asyncio.create_task(profiler.print_stats())
+    # start profiling
+    asyncio.create_task(profiler.print_stats())
 
-        # start rotating LLM models
-        asyncio.create_task(model_scheduler.start())
+    # start rotating LLM models
+    asyncio.create_task(model_scheduler.start())
 
-        # start creating tasks
-        asyncio.create_task(task_loop.start())
+    # start creating tasks
+    asyncio.create_task(task_loop.start())
 
-        # start sending tasks to miners
-        asyncio.create_task(task_sender.start())
+    # will start checking the availability of miners at regular intervals
+    asyncio.create_task(availability_checking_loop.start())
 
-        # sets weights at regular intervals (synchronised between all validators)
-        asyncio.create_task(weight_setter.start())
+    # start sending tasks to miners
+    asyncio.create_task(task_sender.start())
 
-        # start scoring tasks in separate loop
-        asyncio.create_task(task_scorer.start())
+    # sets weights at regular intervals (synchronised between all validators)
+    asyncio.create_task(weight_setter.start())
 
+    # start scoring tasks in separate loop
+    asyncio.create_task(task_scorer.start())
     # # TODO: Think about whether we want to store the task queue locally in case of a crash
     # # TODO: Possibly run task scorer & model scheduler with a lock so I don't unload a model whilst it's generating
     # # TODO: Make weight setting happen as specific intervals as we load/unload models
     start = time.time()
+    await asyncio.sleep(60)
     while True:
-        await asyncio.sleep(1)
+        await asyncio.sleep(5)
         time_diff = -start + (start := time.time())
         logger.debug(f"Running {time_diff:.2f} seconds")
 
 
 # The main function parses the configuration and runs the validator.
 if __name__ == "__main__":
     asyncio.run(main())
-    # will start rotating the different LLMs in/out of memory
diff --git a/notebooks/demo.ipynb b/notebooks/demo.ipynb
@@ -1,5 +1,25 @@
 {
  "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<Response [200]>\n"
+     ]
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "\n",
+    "response = requests.get(\"http://localhost:8094/health\", timeout=10)\n",
+    "print(response)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,