From 4457d4e42ac55a133ffe04bdcdc249be430f97cc Mon Sep 17 00:00:00 2001
From: richwardle <richardwardle@macrocosmos.ai>
Date: Tue, 19 Nov 2024 06:17:42 -1000
Subject: [PATCH 01/40] Initial upload

---
 neurons/miners/epistula_miner/miner.py        | 195 +++++++++++++++++
 neurons/validator.py                          |  29 +--
 prompting/base/epistula.py                    | 207 ++++++++++++++++++
 .../miner_availability/miner_availability.py  |  17 +-
 4 files changed, 412 insertions(+), 36 deletions(-)
 create mode 100644 neurons/miners/epistula_miner/miner.py
 create mode 100644 prompting/base/epistula.py

diff --git a/neurons/miners/epistula_miner/miner.py b/neurons/miners/epistula_miner/miner.py
new file mode 100644
index 00000000..9ff5a4f6
--- /dev/null
+++ b/neurons/miners/epistula_miner/miner.py
@@ -0,0 +1,195 @@
+# ruff: noqa: E402
+from prompting import settings
+
+settings.settings = settings.Settings.load(mode="miner")
+settings = settings.settings
+
+import time
+import httpx
+import netaddr
+import uvicorn
+import requests
+import traceback
+import bittensor as bt
+
+from loguru import logger
+from fastapi import APIRouter, Depends, FastAPI, Request, HTTPException
+from starlette.background import BackgroundTask
+from starlette.responses import StreamingResponse
+from bittensor.subtensor import serve_extrinsic
+from bittensor.axon import FastAPIThreadedServer
+from prompting.base.epistula import verify_signature
+
+
+MODEL_ID: str = "gpt-3.5-turbo"
+NEURON_MAX_TOKENS: int = 256
+NEURON_TEMPERATURE: float = 0.7
+NEURON_TOP_K: int = 50
+NEURON_TOP_P: float = 0.95
+NEURON_STREAMING_BATCH_SIZE: int = 12
+NEURON_STOP_ON_FORWARD_EXCEPTION: bool = False
+
+SYSTEM_PROMPT = """You are a helpful agent that does it's best to answer all questions!"""
+
+
+class OpenAIMiner():
+    
+    def __init__(self):
+        self.should_exit = False
+        self.client = httpx.AsyncClient(
+        base_url="https://api.openai.com/v1",
+        headers={
+            "Authorization": f"Bearer {settings.OPENAI_API_KEY}",
+            "Content-Type": "application/json",
+        },
+    )
+
+    def format_headers(self, request: Request):
+        # Iterate through the headers and only keep the ones that will be used for the openai request
+
+        return request
+
+    async def create_chat_completion(self, request: Request):
+        request["model"] = MODEL_ID
+        bt.logging.info(
+            "\u2713",
+            f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!",
+        )
+        req = self.client.build_request(
+            "POST", "/chat/completions", content=await request.body()
+        )
+        r = await self.client.send(req, stream=True)
+        return StreamingResponse(
+            r.aiter_raw(), background=BackgroundTask(r.aclose), headers=r.headers
+        )
+
+    async def check_availability(self, request: Request):
+        # Parse the incoming JSON request
+        data = await request.json()
+        task_availabilities = data.get('task_availabilities', {})
+        llm_model_availabilities = data.get('llm_model_availabilities', {})
+        
+        # Set all task availabilities to True
+        task_response = {key: True for key in task_availabilities}
+        
+        # Set all model availabilities to False
+        model_response = {key: False for key in llm_model_availabilities}
+        
+        # Construct the response dictionary
+        response = {
+            'task_availabilities': task_response,
+            'llm_model_availabilities': model_response
+        }
+        
+        return response
+    
+    async def verify_request(
+        self,
+        request: Request,
+    ):
+        # We do this as early as possible so that now has a lesser chance
+        # of causing a stale request
+        now = round(time.time() * 1000)
+
+        # We need to check the signature of the body as bytes
+        # But use some specific fields from the body
+        signed_by = request.headers.get("Epistula-Signed-By")
+        signed_for = request.headers.get("Epistula-Signed-For")
+        if signed_for != self.wallet.hotkey.ss58_address:
+            raise HTTPException(
+                status_code=400, detail="Bad Request, message is not intended for self"
+            )
+        if signed_by not in self.metagraph.hotkeys:
+            raise HTTPException(status_code=401, detail="Signer not in metagraph")
+
+        uid = self.metagraph.hotkeys.index(signed_by)
+        stake = self.metagraph.S[uid].item()
+        if not self.config.no_force_validator_permit and stake < 10000:
+            bt.logging.warning(
+                f"Blacklisting request from {signed_by} [uid={uid}], not enough stake -- {stake}"
+            )
+            raise HTTPException(status_code=401, detail="Stake below minimum: {stake}")
+
+        # If anything is returned here, we can throw
+        body = await request.body()
+        err = verify_signature(
+            request.headers.get("Epistula-Request-Signature"),
+            body,
+            request.headers.get("Epistula-Timestamp"),
+            request.headers.get("Epistula-Uuid"),
+            signed_for,
+            signed_by,
+            now,
+        )
+        if err:
+            bt.logging.error(err)
+            raise HTTPException(status_code=400, detail=err)
+
+    def run(self):
+
+        external_ip = None #settings.EXTERNAL_IP
+        if not external_ip or external_ip == "[::]":
+            try:
+                external_ip = requests.get("https://checkip.amazonaws.com").text.strip()
+                netaddr.IPAddress(external_ip)
+            except Exception:
+                bt.logging.error("Failed to get external IP")
+
+        bt.logging.info(
+            f"Serving miner endpoint {external_ip}:{settings.AXON_PORT} on network: {settings.SUBTENSOR_NETWORK} with netuid: {settings.NETUID}"
+        )
+
+        serve_success = serve_extrinsic(
+            subtensor=settings.SUBTENSOR,
+            wallet=settings.WALLET,
+            ip=external_ip,
+            port=settings.AXON_PORT,
+            protocol=4,
+            netuid=settings.NETUID,
+        )
+        if not serve_success:
+            bt.logging.error("Failed to serve endpoint")
+            return
+
+        # Start  starts the miner's endpoint, making it active on the network.
+        # change the config in the axon
+        app = FastAPI()
+        router = APIRouter()
+        router.add_api_route(
+            "/chat/completions",
+            self.create_chat_completion,
+            dependencies=[Depends(self.verify_request)],
+            methods=["POST"],
+        )
+        router.add_api_route(
+            "/availability",
+            self.check_availability,
+            methods=["POST"],
+        )
+        app.include_router(router)
+        fast_config = uvicorn.Config(
+            app,
+            host="0.0.0.0",
+            port=settings.AXON_PORT,
+            log_level="info",
+            loop="asyncio",
+        )
+        self.fast_api = FastAPIThreadedServer(config=fast_config)
+        self.fast_api.start()
+
+        bt.logging.info(f"Miner starting at block: {settings.SUBTENSOR.block}")
+
+        # This loop maintains the miner's operations until intentionally stopped.
+        try:
+            while not self.should_exit:
+                time.sleep(1)
+        except Exception as e:
+            bt.logging.error(str(e))
+            bt.logging.error(traceback.format_exc())
+        self.shutdown()
+
+
+if __name__ == "__main__":
+    miner = OpenAIMiner()
+    miner.run()
+    logger.warning("Ending miner...")
diff --git a/neurons/validator.py b/neurons/validator.py
index 7e7b0caf..fa580804 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -23,6 +23,7 @@
 from prompting.organic.organic_loop import start_organic
 from prompting.weight_setting.weight_setter import weight_setter
 from prompting.llms.utils import GPUInfo
+from prompting.base.epistula import query_miners
 
 NEURON_SAMPLE_SIZE = 100
 
@@ -136,35 +137,15 @@ async def collect_responses(self, task: BaseTextTask) -> DendriteResponseEvent |
         if len(uids) == 0:
             logger.warning("No available miners. This should already have been caught earlier.")
             return
-        axons = [settings.METAGRAPH.axons[uid] for uid in uids]
-
-        # Create the synapse
-        synapse = StreamPromptingSynapse(
-            task_name=task.__class__.__name__,
-            seed=task.seed,
-            target_model=task.llm_model_id,
-            roles=["user"],
-            messages=[task.query],
-        )
 
-        # Call the synchronous wrapper that includes both DENDRITE and handle_response
-        stream_results = run_dendrite_and_handle_response_sync(
-            uids=uids,
-            axons=axons,
-            synapse=synapse,
-            timeout=settings.NEURON_TIMEOUT,
-            deserialize=False,
-            streaming=True,
-        )
 
-        logger.debug(
-            f"Non-empty responses: {len([r.completion for r in stream_results if len(r.completion) > 0])}\n"
-            f"Empty responses: {len([r.completion for r in stream_results if len(r.completion) == 0])}"
-        )
+        body = {"seed": task.seed, "model": task.llm_model_id, "roles": ["user"], "messages": [task.query]}
+        body_bytes = json.dumps(body).encode("utf-8")
+        stream_results = query_miners(task.__class__.__name__, uids, body)
 
         log_stream_results(stream_results)
 
-        # Encapsulate the responses in a response event (dataclass
+
         response_event = DendriteResponseEvent(
             stream_results=stream_results, uids=uids, timeout=settings.NEURON_TIMEOUT
         )
diff --git a/prompting/base/epistula.py b/prompting/base/epistula.py
new file mode 100644
index 00000000..951b3fc0
--- /dev/null
+++ b/prompting/base/epistula.py
@@ -0,0 +1,207 @@
+import json
+from hashlib import sha256
+from uuid import uuid4
+from math import ceil
+import time
+from prompting.utils.timer import Timer
+from substrateinterface import Keypair
+import asyncio
+import bittensor as bt
+import math
+from os import urandom
+import time
+import traceback
+from typing import Dict, List, Optional, Tuple, Any, Annotated
+from prompting.base.dendrite import SynapseStreamResult
+from httpx import Timeout
+import httpx
+import openai
+import requests
+from prompting.settings import settings
+
+def verify_signature(
+    signature, body: bytes, timestamp, uuid, signed_for, signed_by, now
+) -> Optional[Annotated[str, "Error Message"]]:
+    if not isinstance(signature, str):
+        return "Invalid Signature"
+    timestamp = int(timestamp)
+    if not isinstance(timestamp, int):
+        return "Invalid Timestamp"
+    if not isinstance(signed_by, str):
+        return "Invalid Sender key"
+    if not isinstance(signed_for, str):
+        return "Invalid receiver key"
+    if not isinstance(uuid, str):
+        return "Invalid uuid"
+    if not isinstance(body, bytes):
+        return "Body is not of type bytes"
+    ALLOWED_DELTA_MS = 8000
+    keypair = Keypair(ss58_address=signed_by)
+    if timestamp + ALLOWED_DELTA_MS < now:
+        return "Request is too stale"
+    message = f"{sha256(body).hexdigest()}.{uuid}.{timestamp}.{signed_for}"
+    verified = keypair.verify(message, signature)
+    if not verified:
+        return "Signature Mismatch"
+    return None
+
+def generate_header(
+    hotkey: Keypair,
+    body_bytes: Dict[str, Any],
+    signed_for: Optional[str] = None,
+) -> Dict[str, Any]:
+    timestamp = round(time.time() * 1000)
+    timestampInterval = ceil(timestamp / 1e4) * 1e4
+    uuid = str(uuid4())
+    headers = {
+        "Epistula-Version": "2",
+        "Epistula-Timestamp": str(timestamp),
+        "Epistula-Uuid": uuid,
+        "Epistula-Signed-By": hotkey.ss58_address,
+        "Epistula-Request-Signature": "0x"
+        + hotkey.sign(
+            f"{sha256(body_bytes).hexdigest()}.{uuid}.{timestamp}.{signed_for or ''}"
+        ).hex(),
+    }
+    if signed_for:
+        headers["Epistula-Signed-For"] = signed_for
+        headers["Epistula-Secret-Signature-0"] = (
+            "0x" + hotkey.sign(str(timestampInterval - 1) + "." + signed_for).hex()
+        )
+        headers["Epistula-Secret-Signature-1"] = (
+            "0x" + hotkey.sign(str(timestampInterval) + "." + signed_for).hex()
+        )
+        headers["Epistula-Secret-Signature-2"] = (
+            "0x" + hotkey.sign(str(timestampInterval + 1) + "." + signed_for).hex()
+        )
+    return headers
+
+def create_header_hook(hotkey, axon_hotkey, task):
+    async def add_headers(request: httpx.Request):
+        for key, header in generate_header(hotkey, request.read(), axon_hotkey).items():
+            request.headers[key] = header
+        request.headers["Task"] = task
+
+    return add_headers
+
+async def query_miners(task, uids, body):
+    try:
+        tasks = []
+        for uid in uids:
+            tasks.append(
+                asyncio.create_task(
+                    handle_inference(
+                        settings.METAGRAPH, settings.WALLET, task, body, uid,
+                    )
+                )
+            )
+        responses: List[SynapseStreamResult] = await asyncio.gather(*tasks)
+        return responses
+    except Exception as e:
+        bt.logging.error(f"Error in forward for: {e}")
+        bt.logging.error(traceback.format_exc())
+        return []
+    
+async def query_availabilities(uids, task_config, model_config):
+    """ Query the availability of the miners """
+    availability_dict = {'task_availabilities': task_config, 'llm_model_availabilities': model_config}
+    # Query the availability of the miners
+    try:
+        tasks = []
+        for uid in uids:
+            tasks.append(
+                asyncio.create_task(
+                    handle_availability(
+                        settings.METAGRAPH, availability_dict, uid,
+                    )
+                )
+            )
+        responses: List[SynapseStreamResult] = await asyncio.gather(*tasks)
+        return responses
+    
+    except Exception as e:
+        bt.logging.error(f"Error in availability call: {e}")
+        bt.logging.error(traceback.format_exc())
+        return []
+    
+async def handle_availability(
+    metagraph: "bt.NonTorchMetagraph",
+    request: Dict[str, Any],
+    uid: int,
+) -> Dict[str, bool]:
+    try:
+        axon_info = metagraph.axons[uid]
+        url = f"http://{axon_info.ip}:{axon_info.port}/availability"
+
+        timeout = httpx.Timeout(settings.NEURON_TIMEOUT, connect=5, read=5)
+
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            response = await client.post(url, json=request)
+
+        response.raise_for_status()
+        return response.json()
+
+    except Exception as e:
+        # If the miner is not available, we will return a failure response
+        bt.logging.error(f"Miner {uid} failed request: {e}")
+        return {}
+
+
+async def handle_inference(
+    metagraph: "bt.NonTorchMetagraph",
+    wallet: "bt.wallet",
+    task: str,
+    body: Dict[str, Any],
+    uid: int,
+) -> SynapseStreamResult:
+    
+    try:
+        with Timer() as timer:
+            axon_info = metagraph.axons[uid]
+            miner = openai.AsyncOpenAI(
+                base_url=f"http://{axon_info.ip}:{axon_info.port}/v1", #Maybe need to change this? 
+                api_key="Apex",
+                max_retries=0,
+                timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5),
+                http_client=openai.DefaultAsyncHttpxClient(event_hooks={
+                    "request": [
+                        create_header_hook(
+                            wallet.hotkey, axon_info.hotkey, task
+                        )
+                    ]
+                }),
+            )
+            try:
+                chunk_timings = []
+                chunks = []
+                chat = await miner.chat.completions.create(**generate_header(wallet.hotkey, body, signed_for=axon_info.hotkey))
+                async for chunk in chat:
+                    if chunk.choices[0].delta is None:
+                        continue
+                    if (
+                        chunk.choices[0].delta.content == ""
+                        or chunk.choices[0].delta.content is None
+                    ) and len(chunks) == 0:
+                        continue
+                    
+                    chunks.append(chunk.choices[0].delta.content)
+                    chunk_timings.append(timer.elapsed_time())
+
+            except openai.APIConnectionError as e:
+                bt.logging.trace(f"Miner {uid} failed request: {e}")
+
+            except Exception as e:
+                bt.logging.trace(f"Unknown Error when sending to miner {uid}: {e}")
+
+    except Exception as e:
+        exception = e
+        bt.logging.error(f"{uid}: Error in forward for: {e}")
+        bt.logging.error(traceback.format_exc())
+    finally:
+        return SynapseStreamResult(
+            accumulated_chunks=chunks,
+            accumulated_chunks_timings=chunk_timings,
+            synapse=None,
+            uid=uid,
+            exception=exception,
+        )
\ No newline at end of file
diff --git a/prompting/miner_availability/miner_availability.py b/prompting/miner_availability/miner_availability.py
index f349c7c5..d5dfc84b 100644
--- a/prompting/miner_availability/miner_availability.py
+++ b/prompting/miner_availability/miner_availability.py
@@ -10,6 +10,8 @@
 import random
 import asyncio
 import numpy as np
+from prompting.base.epistula import query_availabilities
+from typing import Dict
 
 task_config: dict[str, bool] = {str(task_config.task.__name__): True for task_config in TaskRegistry.task_configs}
 # task_config: dict[str, bool] = {
@@ -74,22 +76,13 @@ async def run_step(self):
 
         if any(uid >= len(settings.METAGRAPH.axons) for uid in uids_to_query):
             raise ValueError("Some UIDs are out of bounds. Make sure all the TEST_MINER_IDS are valid.")
+        responses: list[Dict[str, bool]] = await query_availabilities(uids_to_query, task_config, model_config)
 
-        axons = [settings.METAGRAPH.axons[uid] for uid in uids_to_query]
-        responses: list[AvailabilitySynapse] = await settings.DENDRITE(
-            axons=axons,
-            synapse=AvailabilitySynapse(task_availabilities=task_config, llm_model_availabilities=model_config),
-            timeout=settings.NEURON_TIMEOUT,
-            deserialize=False,
-            streaming=False,
-        )
         logger.debug(f"Availability responses: {responses}")
-        for response, uid in zip(responses, uids_to_query):
-            if response.is_failure:
-                logger.warning(f"Miner {uid} failed to respond. Response is timeout: {response.timeout}")
-                continue
 
         for response, uid in zip(responses, uids_to_query):
+            if not response:
+                continue
             miner_availabilities.miners[uid] = MinerAvailability(
                 task_availabilities=response.task_availabilities,
                 llm_model_availabilities=response.llm_model_availabilities,

From 62ae30cfa14bdc3b4886cefc0b9929f295536f09 Mon Sep 17 00:00:00 2001
From: richwardle <richardwardle@macrocosmos.ai>
Date: Tue, 19 Nov 2024 13:22:34 -1000
Subject: [PATCH 02/40] Get everything working

---
 neurons/miners/epistula_miner/miner.py        | 94 +++++++++++++++++--
 neurons/validator.py                          |  7 +-
 prompting/base/dendrite.py                    | 26 +++--
 prompting/base/epistula.py                    | 92 +++++++++---------
 prompting/base/forward.py                     |  6 +-
 .../miner_availability/miner_availability.py  |  4 +-
 prompting/rewards/reward.py                   |  2 +
 7 files changed, 156 insertions(+), 75 deletions(-)

diff --git a/neurons/miners/epistula_miner/miner.py b/neurons/miners/epistula_miner/miner.py
index 9ff5a4f6..06fb1c0f 100644
--- a/neurons/miners/epistula_miner/miner.py
+++ b/neurons/miners/epistula_miner/miner.py
@@ -5,13 +5,15 @@
 settings = settings.settings
 
 import time
+import asyncio
+import json
 import httpx
 import netaddr
 import uvicorn
 import requests
 import traceback
 import bittensor as bt
-
+from starlette.responses import JSONResponse
 from loguru import logger
 from fastapi import APIRouter, Depends, FastAPI, Request, HTTPException
 from starlette.background import BackgroundTask
@@ -43,27 +45,99 @@ def __init__(self):
             "Content-Type": "application/json",
         },
     )
+        print("OpenAI Key: ", settings.OPENAI_API_KEY)
 
-    def format_headers(self, request: Request):
-        # Iterate through the headers and only keep the ones that will be used for the openai request
-
-        return request
-
+    async def format_openai_query(self, request: Request):
+        # Read the JSON data once
+        data = await request.json()
+        
+        # Extract the required fields
+        openai_request = {}
+        for key in ["messages", "model", "stream"]:
+            if key in data:
+                openai_request[key] = data[key]
+        openai_request["model"] = MODEL_ID
+        
+        return openai_request
+    
     async def create_chat_completion(self, request: Request):
-        request["model"] = MODEL_ID
         bt.logging.info(
             "\u2713",
             f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!",
         )
         req = self.client.build_request(
-            "POST", "/chat/completions", content=await request.body()
+            "POST", "chat/completions", json = await self.format_openai_query(request)
         )
         r = await self.client.send(req, stream=True)
         return StreamingResponse(
             r.aiter_raw(), background=BackgroundTask(r.aclose), headers=r.headers
         )
 
+    # async def create_chat_completion(self, request: Request):
+    #     bt.logging.info(
+    #         "\u2713",
+    #         f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!",
+    #     )
+    #     openai_request_body = await self.format_openai_query(request)
+    #     try:
+    #         req = self.client.build_request(
+    #             "POST", "chat/completions", json=openai_request_body
+    #         )
+    #         r = await self.client.send(req, stream=True)
+    #         # Check for non-200 status code
+    #         if r.status_code != 200:
+    #             error_content = await r.aread()
+    #             bt.logging.error(f"OpenAI API Error {r.status_code}: {error_content}")
+    #             return JSONResponse(
+    #                 content=json.loads(error_content),
+    #                 status_code=r.status_code
+    #             )
+    #     except Exception as e:
+    #         bt.logging.error(f"Exception during OpenAI API call: {str(e)}")
+    #         return JSONResponse(
+    #             content={"error": str(e)},
+    #             status_code=500
+    #         )
+
+    # async def create_chat_completion(self, request: Request):
+    #     bt.logging.info(
+    #         "\u2713",
+    #         f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!",
+    #     )
+        
+    #     async def word_stream():
+    #         words = "This is a test stream".split()
+    #         for word in words:
+    #             # Simulate the OpenAI streaming response format
+    #             data = {
+    #                 "choices": [
+    #                     {
+    #                         "delta": {"content": word + ' '},
+    #                         "index": 0,
+    #                         "finish_reason": None
+    #                     }
+    #                 ]
+    #             }
+    #             # Yield the data in SSE (Server-Sent Events) format
+    #             yield f"data: {json.dumps(data)}\n\n"
+    #             await asyncio.sleep(0.1)  # Simulate a delay between words
+    #         # Indicate the end of the stream
+    #         data = {
+    #             "choices": [
+    #                 {
+    #                     "delta": {},
+    #                     "index": 0,
+    #                     "finish_reason": "stop"
+    #                 }
+    #             ]
+    #         }
+    #         yield f"data: {json.dumps(data)}\n\n"
+    #         yield "data: [DONE]\n\n"
+        
+    #     return StreamingResponse(word_stream(), media_type='text/event-stream')
+
     async def check_availability(self, request: Request):
+        print("Checking availability")
         # Parse the incoming JSON request
         data = await request.json()
         task_availabilities = data.get('task_availabilities', {})
@@ -156,9 +230,9 @@ def run(self):
         app = FastAPI()
         router = APIRouter()
         router.add_api_route(
-            "/chat/completions",
+            "/v1/chat/completions",
             self.create_chat_completion,
-            dependencies=[Depends(self.verify_request)],
+            #dependencies=[Depends(self.verify_request)],
             methods=["POST"],
         )
         router.add_api_route(
diff --git a/neurons/validator.py b/neurons/validator.py
index fa580804..e7021343 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -1,6 +1,7 @@
 # ruff: noqa: E402
 import asyncio
 import time
+import json
 from prompting import settings
 from prompting.utils.profiling import profiler
 
@@ -10,7 +11,7 @@
 from loguru import logger
 from prompting.base.validator import BaseValidatorNeuron
 from prompting.base.forward import log_stream_results, handle_response
-from prompting.base.dendrite import DendriteResponseEvent, StreamPromptingSynapse
+from prompting.base.dendrite import DendriteResponseEvent
 from prompting.tasks.task_creation import task_loop
 from prompting.utils.logging import ValidatorLoggingEvent, ErrorLoggingEvent
 from prompting.rewards.scoring import task_scorer
@@ -139,9 +140,9 @@ async def collect_responses(self, task: BaseTextTask) -> DendriteResponseEvent |
             return
 
 
-        body = {"seed": task.seed, "model": task.llm_model_id, "roles": ["user"], "messages": [task.query]}
+        body = {"seed": task.seed, "model": task.llm_model_id, "messages": [{'role': 'user', 'content': task.query},]}
         body_bytes = json.dumps(body).encode("utf-8")
-        stream_results = query_miners(task.__class__.__name__, uids, body)
+        stream_results = await query_miners(task.__class__.__name__, uids, body_bytes)
 
         log_stream_results(stream_results)
 
diff --git a/prompting/base/dendrite.py b/prompting/base/dendrite.py
index 046b737e..5e62284f 100644
--- a/prompting/base/dendrite.py
+++ b/prompting/base/dendrite.py
@@ -6,21 +6,21 @@
 
 
 class SynapseStreamResult(BaseModel):
-    exception: BaseException | None = None
+    exception: str | None = None
     uid: int | None = None
     accumulated_chunks: list[str] | None = None
     accumulated_chunks_timings: list[float] | None = None
     tokens_per_chunk: list[int] | None = None
-    synapse: StreamPromptingSynapse | None = None
+    status_code: int = 200
+    status_message: str = ""
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
     @property
     def completion(self) -> str:
-        if not self.synapse:
-            logger.warning("Synapse is None")
-            return
-        return self.synapse.completion
+        if not self.accumulated_chunks:
+            return ""
+        return "".join(self.accumulated_chunks)
 
     def model_dump(self):
         # without a custom model dump, this leads to serialization errors in DendriteResponseEvent...
@@ -31,7 +31,6 @@ def model_dump(self):
             "accumulated_chunks": self.accumulated_chunks,
             "accumulated_chunks_timings": self.accumulated_chunks_timings,
             "tokens_per_chunk": self.tokens_per_chunk,
-            "synapse": self.synapse.model_dump() if self.synapse is not None else None,
         }
 
 
@@ -59,19 +58,16 @@ def process_stream_results(self) -> "DendriteResponseEvent":
             return self
         for stream_result in self.stream_results:
             # for some reason the language server needs this line to understand the type of stream_result
-            stream_result: SynapseStreamResult
 
-            synapse = stream_result.synapse
+            self.completions.append(stream_result.completion)
+            self.status_messages.append(stream_result.status_message)
+            status_code = stream_result.status_code
 
-            self.completions.append(synapse.completion)
-            self.status_messages.append(synapse.dendrite.status_message)
-            status_code = synapse.dendrite.status_code
-
-            if len(synapse.completion) == 0 and status_code == 200:
+            if len(stream_result.completion) == 0 and status_code == 200:
                 status_code = 204
 
             self.status_codes.append(status_code)
-            process_time = synapse.dendrite.process_time or 0
+            process_time = stream_result.accumulated_chunks_timings[-1] if stream_result.accumulated_chunks_timings else 0
             if status_code == 200 or status_code == 204:
                 self.timings.append(process_time)
             elif status_code == 408:
diff --git a/prompting/base/epistula.py b/prompting/base/epistula.py
index 951b3fc0..ccf8d460 100644
--- a/prompting/base/epistula.py
+++ b/prompting/base/epistula.py
@@ -74,13 +74,14 @@ def generate_header(
         headers["Epistula-Secret-Signature-2"] = (
             "0x" + hotkey.sign(str(timestampInterval + 1) + "." + signed_for).hex()
         )
-    return headers
+    return {**headers, **json.loads(body_bytes)}
 
-def create_header_hook(hotkey, axon_hotkey, task):
+def create_header_hook(hotkey, axon_hotkey):
     async def add_headers(request: httpx.Request):
         for key, header in generate_header(hotkey, request.read(), axon_hotkey).items():
-            request.headers[key] = header
-        request.headers["Task"] = task
+            if key not in ['messages', 'model', 'stream']:
+                request.headers[key] = header
+        return request
 
     return add_headers
 
@@ -142,8 +143,6 @@ async def handle_availability(
         return response.json()
 
     except Exception as e:
-        # If the miner is not available, we will return a failure response
-        bt.logging.error(f"Miner {uid} failed request: {e}")
         return {}
 
 
@@ -154,54 +153,63 @@ async def handle_inference(
     body: Dict[str, Any],
     uid: int,
 ) -> SynapseStreamResult:
-    
+    exception = None
+    chunks = []
+    chunk_timings = []
     try:
-        with Timer() as timer:
-            axon_info = metagraph.axons[uid]
-            miner = openai.AsyncOpenAI(
-                base_url=f"http://{axon_info.ip}:{axon_info.port}/v1", #Maybe need to change this? 
-                api_key="Apex",
-                max_retries=0,
-                timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5),
-                http_client=openai.DefaultAsyncHttpxClient(event_hooks={
-                    "request": [
-                        create_header_hook(
-                            wallet.hotkey, axon_info.hotkey, task
-                        )
-                    ]
-                }),
-            )
-            try:
-                chunk_timings = []
-                chunks = []
-                chat = await miner.chat.completions.create(**generate_header(wallet.hotkey, body, signed_for=axon_info.hotkey))
-                async for chunk in chat:
-                    if chunk.choices[0].delta is None:
-                        continue
-                    if (
-                        chunk.choices[0].delta.content == ""
-                        or chunk.choices[0].delta.content is None
-                    ) and len(chunks) == 0:
-                        continue
-                    
+        start_time = time.time()
+        axon_info = metagraph.axons[uid]
+        miner = openai.AsyncOpenAI(
+            base_url=f"http://{axon_info.ip}:{axon_info.port}/v1",
+            api_key="Apex",
+            max_retries=0,
+            timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5),
+            http_client=openai.DefaultAsyncHttpxClient(event_hooks={
+                "request": [
+                    create_header_hook(
+                        wallet.hotkey, axon_info.hotkey
+                    )
+                ]
+            }),
+        )
+        try:
+            payload = json.loads(body)
+            chat = await miner.chat.completions.create(messages=payload["messages"], model=payload["model"], stream=True)
+            async for chunk in chat:
+                if chunk.choices[0].delta and chunk.choices[0].delta.content:
                     chunks.append(chunk.choices[0].delta.content)
-                    chunk_timings.append(timer.elapsed_time())
+                    chunk_timings.append(time.time() - start_time)
 
-            except openai.APIConnectionError as e:
-                bt.logging.trace(f"Miner {uid} failed request: {e}")
+        except openai.APIConnectionError as e:
+            bt.logging.trace(f"Miner {uid} failed request: {e}")
+            exception = e
 
-            except Exception as e:
-                bt.logging.trace(f"Unknown Error when sending to miner {uid}: {e}")
+        except Exception as e:
+            bt.logging.trace(f"Unknown Error when sending to miner {uid}: {e}")
+            exception = e
 
     except Exception as e:
         exception = e
         bt.logging.error(f"{uid}: Error in forward for: {e}")
         bt.logging.error(traceback.format_exc())
     finally:
+        if exception:
+            exception = str(exception)
+        if exception is None:
+            status_code = 200
+            status_message = "Success"
+        elif isinstance(exception, openai.APIConnectionError):
+            status_code = 502
+            status_message = str(exception)
+        else:
+            status_code = 500
+            status_message = str(exception)
+
         return SynapseStreamResult(
             accumulated_chunks=chunks,
             accumulated_chunks_timings=chunk_timings,
-            synapse=None,
             uid=uid,
             exception=exception,
-        )
\ No newline at end of file
+            status_code=status_code,
+            status_message=status_message,
+        )
diff --git a/prompting/base/forward.py b/prompting/base/forward.py
index e3ee6254..25596bb6 100644
--- a/prompting/base/forward.py
+++ b/prompting/base/forward.py
@@ -86,13 +86,13 @@ async def generate_reference(task: BaseTextTask, pipeline: BasePipeline) -> str:
 
 def log_stream_results(stream_results: List[SynapseStreamResult]):
     failed_responses = [
-        response for response in stream_results if response.exception is not None or response.synapse is None
+        response for response in stream_results if response.exception is not None or response.completion is None
     ]
     empty_responses = [
-        response for response in stream_results if response.exception is None and response.synapse.completion == ""
+        response for response in stream_results if response.exception is None and response.completion == ""
     ]
     non_empty_responses = [
-        response for response in stream_results if response.exception is None and response.synapse.completion != ""
+        response for response in stream_results if response.exception is None and response.completion != ""
     ]
 
     logger.debug(f"Total of non_empty responses: ({len(non_empty_responses)})")
diff --git a/prompting/miner_availability/miner_availability.py b/prompting/miner_availability/miner_availability.py
index d5dfc84b..b3ab8f64 100644
--- a/prompting/miner_availability/miner_availability.py
+++ b/prompting/miner_availability/miner_availability.py
@@ -84,8 +84,8 @@ async def run_step(self):
             if not response:
                 continue
             miner_availabilities.miners[uid] = MinerAvailability(
-                task_availabilities=response.task_availabilities,
-                llm_model_availabilities=response.llm_model_availabilities,
+                task_availabilities=response['task_availabilities'],
+                llm_model_availabilities=response['llm_model_availabilities'],
             )
 
         logger.debug("Miner availabilities updated.")
diff --git a/prompting/rewards/reward.py b/prompting/rewards/reward.py
index 1564d425..f95ebf2f 100644
--- a/prompting/rewards/reward.py
+++ b/prompting/rewards/reward.py
@@ -50,6 +50,8 @@ class BatchRewardOutput(BaseModel):
 
     @property
     def rewards_normalized(self) -> np.ndarray:
+        if self.rewards.size == 0:
+            return np.array([])
         if self.rewards.shape != self.timings.shape:
             raise ValueError(f"rewards.shape {self.rewards.shape} != timings.shape {self.timings.shape}")
         if self.rewards.min() == self.rewards.max():

From 8e25f33a267c85da6581e69401404f086ad28f33 Mon Sep 17 00:00:00 2001
From: Hollyqui <felix.quinque@gmail.com>
Date: Wed, 20 Nov 2024 17:26:16 +0000
Subject: [PATCH 03/40] SN1-331: Adding initial draft for endpoints

---
 neurons/miners/epistula_miner/miner.py        | 75 ++++++++-----------
 neurons/validator.py                          | 14 +++-
 prompting/api/api.py                          | 15 ++++
 prompting/api/gpt_endpoints/api.py            | 45 +++++++++++
 prompting/api/gpt_endpoints/serialisers.py    |  0
 prompting/api/miner_availabilities/api.py     | 19 +++++
 prompting/base/epistula.py                    | 69 ++++++++---------
 .../miner_availability/miner_availability.py  |  7 +-
 8 files changed, 157 insertions(+), 87 deletions(-)
 create mode 100644 prompting/api/api.py
 create mode 100644 prompting/api/gpt_endpoints/api.py
 create mode 100644 prompting/api/gpt_endpoints/serialisers.py
 create mode 100644 prompting/api/miner_availabilities/api.py

diff --git a/neurons/miners/epistula_miner/miner.py b/neurons/miners/epistula_miner/miner.py
index 06fb1c0f..52a53819 100644
--- a/neurons/miners/epistula_miner/miner.py
+++ b/neurons/miners/epistula_miner/miner.py
@@ -5,17 +5,14 @@
 settings = settings.settings
 
 import time
-import asyncio
-import json
 import httpx
 import netaddr
 import uvicorn
 import requests
 import traceback
 import bittensor as bt
-from starlette.responses import JSONResponse
 from loguru import logger
-from fastapi import APIRouter, Depends, FastAPI, Request, HTTPException
+from fastapi import APIRouter, FastAPI, Request, HTTPException
 from starlette.background import BackgroundTask
 from starlette.responses import StreamingResponse
 from bittensor.subtensor import serve_extrinsic
@@ -34,44 +31,41 @@
 SYSTEM_PROMPT = """You are a helpful agent that does it's best to answer all questions!"""
 
 
-class OpenAIMiner():
-    
+class OpenAIMiner:
     def __init__(self):
         self.should_exit = False
         self.client = httpx.AsyncClient(
-        base_url="https://api.openai.com/v1",
-        headers={
-            "Authorization": f"Bearer {settings.OPENAI_API_KEY}",
-            "Content-Type": "application/json",
-        },
-    )
+            base_url="https://api.openai.com/v1",
+            headers={
+                "Authorization": f"Bearer {settings.OPENAI_API_KEY}",
+                "Content-Type": "application/json",
+            },
+        )
         print("OpenAI Key: ", settings.OPENAI_API_KEY)
 
     async def format_openai_query(self, request: Request):
         # Read the JSON data once
         data = await request.json()
-        
+
         # Extract the required fields
         openai_request = {}
         for key in ["messages", "model", "stream"]:
             if key in data:
                 openai_request[key] = data[key]
         openai_request["model"] = MODEL_ID
-        
+
         return openai_request
-    
+
     async def create_chat_completion(self, request: Request):
         bt.logging.info(
             "\u2713",
             f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!",
         )
-        req = self.client.build_request(
-            "POST", "chat/completions", json = await self.format_openai_query(request)
-        )
+        logger.debug("Starting chat completion request...")
+        req = self.client.build_request("POST", "chat/completions", json=await self.format_openai_query(request))
         r = await self.client.send(req, stream=True)
-        return StreamingResponse(
-            r.aiter_raw(), background=BackgroundTask(r.aclose), headers=r.headers
-        )
+        logger.debug("Chat completion request returning...")
+        return StreamingResponse(r.aiter_raw(), background=BackgroundTask(r.aclose), headers=r.headers)
 
     # async def create_chat_completion(self, request: Request):
     #     bt.logging.info(
@@ -104,7 +98,7 @@ async def create_chat_completion(self, request: Request):
     #         "\u2713",
     #         f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!",
     #     )
-        
+
     #     async def word_stream():
     #         words = "This is a test stream".split()
     #         for word in words:
@@ -133,30 +127,27 @@ async def create_chat_completion(self, request: Request):
     #         }
     #         yield f"data: {json.dumps(data)}\n\n"
     #         yield "data: [DONE]\n\n"
-        
+
     #     return StreamingResponse(word_stream(), media_type='text/event-stream')
 
     async def check_availability(self, request: Request):
         print("Checking availability")
         # Parse the incoming JSON request
         data = await request.json()
-        task_availabilities = data.get('task_availabilities', {})
-        llm_model_availabilities = data.get('llm_model_availabilities', {})
-        
+        task_availabilities = data.get("task_availabilities", {})
+        llm_model_availabilities = data.get("llm_model_availabilities", {})
+
         # Set all task availabilities to True
         task_response = {key: True for key in task_availabilities}
-        
+
         # Set all model availabilities to False
         model_response = {key: False for key in llm_model_availabilities}
-        
+
         # Construct the response dictionary
-        response = {
-            'task_availabilities': task_response,
-            'llm_model_availabilities': model_response
-        }
-        
+        response = {"task_availabilities": task_response, "llm_model_availabilities": model_response}
+
         return response
-    
+
     async def verify_request(
         self,
         request: Request,
@@ -170,18 +161,14 @@ async def verify_request(
         signed_by = request.headers.get("Epistula-Signed-By")
         signed_for = request.headers.get("Epistula-Signed-For")
         if signed_for != self.wallet.hotkey.ss58_address:
-            raise HTTPException(
-                status_code=400, detail="Bad Request, message is not intended for self"
-            )
+            raise HTTPException(status_code=400, detail="Bad Request, message is not intended for self")
         if signed_by not in self.metagraph.hotkeys:
             raise HTTPException(status_code=401, detail="Signer not in metagraph")
 
         uid = self.metagraph.hotkeys.index(signed_by)
         stake = self.metagraph.S[uid].item()
         if not self.config.no_force_validator_permit and stake < 10000:
-            bt.logging.warning(
-                f"Blacklisting request from {signed_by} [uid={uid}], not enough stake -- {stake}"
-            )
+            bt.logging.warning(f"Blacklisting request from {signed_by} [uid={uid}], not enough stake -- {stake}")
             raise HTTPException(status_code=401, detail="Stake below minimum: {stake}")
 
         # If anything is returned here, we can throw
@@ -200,8 +187,7 @@ async def verify_request(
             raise HTTPException(status_code=400, detail=err)
 
     def run(self):
-
-        external_ip = None #settings.EXTERNAL_IP
+        external_ip = None  # settings.EXTERNAL_IP
         if not external_ip or external_ip == "[::]":
             try:
                 external_ip = requests.get("https://checkip.amazonaws.com").text.strip()
@@ -232,7 +218,7 @@ def run(self):
         router.add_api_route(
             "/v1/chat/completions",
             self.create_chat_completion,
-            #dependencies=[Depends(self.verify_request)],
+            # dependencies=[Depends(self.verify_request)],
             methods=["POST"],
         )
         router.add_api_route(
@@ -244,7 +230,8 @@ def run(self):
         fast_config = uvicorn.Config(
             app,
             host="0.0.0.0",
-            port=settings.AXON_PORT,
+            # port=settings.AXON_PORT,
+            port=8008,
             log_level="info",
             loop="asyncio",
         )
diff --git a/neurons/validator.py b/neurons/validator.py
index e7021343..892649d8 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -25,6 +25,7 @@
 from prompting.weight_setting.weight_setter import weight_setter
 from prompting.llms.utils import GPUInfo
 from prompting.base.epistula import query_miners
+from prompting.api.api import start_api
 
 NEURON_SAMPLE_SIZE = 100
 
@@ -139,14 +140,18 @@ async def collect_responses(self, task: BaseTextTask) -> DendriteResponseEvent |
             logger.warning("No available miners. This should already have been caught earlier.")
             return
 
-
-        body = {"seed": task.seed, "model": task.llm_model_id, "messages": [{'role': 'user', 'content': task.query},]}
+        body = {
+            "seed": task.seed,
+            "model": task.llm_model_id,
+            "messages": [
+                {"role": "user", "content": task.query},
+            ],
+        }
         body_bytes = json.dumps(body).encode("utf-8")
         stream_results = await query_miners(task.__class__.__name__, uids, body_bytes)
 
         log_stream_results(stream_results)
 
-
         response_event = DendriteResponseEvent(
             stream_results=stream_results, uids=uids, timeout=settings.NEURON_TIMEOUT
         )
@@ -202,6 +207,9 @@ def __exit__(self, exc_type, exc_value, traceback):
 
 
 async def main():
+    # start api
+    asyncio.create_task(start_api())
+
     GPUInfo.log_gpu_info()
     # start profiling
     asyncio.create_task(profiler.print_stats())
diff --git a/prompting/api/api.py b/prompting/api/api.py
new file mode 100644
index 00000000..1f9da1f0
--- /dev/null
+++ b/prompting/api/api.py
@@ -0,0 +1,15 @@
+from fastapi import FastAPI
+import uvicorn
+from prompting.api.gpt_endpoints.api import router as gpt_router
+from prompting.api.miner_availabilities.api import router as miner_availabilities_router
+from loguru import logger
+
+app = FastAPI()
+
+app.include_router(gpt_router)
+app.include_router(miner_availabilities_router)
+
+
+async def start_api():
+    logger.info("Starting API")
+    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
new file mode 100644
index 00000000..061f7cdb
--- /dev/null
+++ b/prompting/api/gpt_endpoints/api.py
@@ -0,0 +1,45 @@
+from fastapi import APIRouter, Request
+import openai
+from prompting.settings import settings
+from httpx import Timeout
+from prompting.base.epistula import create_header_hook
+from fastapi.responses import StreamingResponse
+import json
+
+router = APIRouter()
+
+
+async def process_stream(stream):
+    async for chunk in stream:
+        if hasattr(chunk, "choices") and chunk.choices:
+            # Extract the delta content from the chunk
+            delta = chunk.choices[0].delta
+            if hasattr(delta, "content") and delta.content is not None:
+                # Format as SSE data
+                yield f"data: {json.dumps(chunk.model_dump())}\n\n"
+    yield "data: [DONE]\n\n"
+
+
+@router.post("/v1/chat/completions")
+async def proxy_chat_completions(request: Request):
+    # Get the request body
+    body = await request.json()
+
+    # Ensure streaming is enabled
+    body["stream"] = True
+
+    # TODO: Forward to actual miners
+    miner = openai.AsyncOpenAI(
+        base_url="http://localhost:8008/v1",
+        max_retries=0,
+        timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5),
+        http_client=openai.DefaultAsyncHttpxClient(
+            event_hooks={"request": [create_header_hook(settings.WALLET.hotkey, None)]}
+        ),
+    )
+
+    # Create streaming request to OpenAI
+    response = await miner.chat.completions.create(**body)
+
+    # Return a streaming response with properly formatted chunks
+    return StreamingResponse(process_stream(response), media_type="text/event-stream")
diff --git a/prompting/api/gpt_endpoints/serialisers.py b/prompting/api/gpt_endpoints/serialisers.py
new file mode 100644
index 00000000..e69de29b
diff --git a/prompting/api/miner_availabilities/api.py b/prompting/api/miner_availabilities/api.py
new file mode 100644
index 00000000..f45b058b
--- /dev/null
+++ b/prompting/api/miner_availabilities/api.py
@@ -0,0 +1,19 @@
+from fastapi import APIRouter
+from prompting.miner_availability.miner_availability import miner_availabilities
+from loguru import logger
+
+router = APIRouter()
+
+
+@router.post("/miner_availabilities")
+async def get_miner_availabilities(uids: list[int] | None = None):
+    if uids:
+        return {uid: miner_availabilities.miners.get(uid) for uid in uids}
+    logger.info(f"Returning all miner availabilities for {len(miner_availabilities.miners)} miners")
+    return miner_availabilities.miners
+
+
+@router.get("/get_available_miners")
+async def get_available_miners(task: str | None = None, model: str | None = None, k: int = 10):
+    logger.info(f"Getting {k} available miners for task {task} and model {model}")
+    return miner_availabilities.get_available_miners(task=task, model=model, k=k)
diff --git a/prompting/base/epistula.py b/prompting/base/epistula.py
index ccf8d460..4fbca51c 100644
--- a/prompting/base/epistula.py
+++ b/prompting/base/epistula.py
@@ -3,22 +3,18 @@
 from uuid import uuid4
 from math import ceil
 import time
-from prompting.utils.timer import Timer
 from substrateinterface import Keypair
 import asyncio
 import bittensor as bt
-import math
-from os import urandom
-import time
 import traceback
-from typing import Dict, List, Optional, Tuple, Any, Annotated
+from typing import Dict, List, Optional, Any, Annotated
 from prompting.base.dendrite import SynapseStreamResult
 from httpx import Timeout
 import httpx
 import openai
-import requests
 from prompting.settings import settings
 
+
 def verify_signature(
     signature, body: bytes, timestamp, uuid, signed_for, signed_by, now
 ) -> Optional[Annotated[str, "Error Message"]]:
@@ -45,6 +41,7 @@ def verify_signature(
         return "Signature Mismatch"
     return None
 
+
 def generate_header(
     hotkey: Keypair,
     body_bytes: Dict[str, Any],
@@ -59,32 +56,26 @@ def generate_header(
         "Epistula-Uuid": uuid,
         "Epistula-Signed-By": hotkey.ss58_address,
         "Epistula-Request-Signature": "0x"
-        + hotkey.sign(
-            f"{sha256(body_bytes).hexdigest()}.{uuid}.{timestamp}.{signed_for or ''}"
-        ).hex(),
+        + hotkey.sign(f"{sha256(body_bytes).hexdigest()}.{uuid}.{timestamp}.{signed_for or ''}").hex(),
     }
     if signed_for:
         headers["Epistula-Signed-For"] = signed_for
-        headers["Epistula-Secret-Signature-0"] = (
-            "0x" + hotkey.sign(str(timestampInterval - 1) + "." + signed_for).hex()
-        )
-        headers["Epistula-Secret-Signature-1"] = (
-            "0x" + hotkey.sign(str(timestampInterval) + "." + signed_for).hex()
-        )
-        headers["Epistula-Secret-Signature-2"] = (
-            "0x" + hotkey.sign(str(timestampInterval + 1) + "." + signed_for).hex()
-        )
+        headers["Epistula-Secret-Signature-0"] = "0x" + hotkey.sign(str(timestampInterval - 1) + "." + signed_for).hex()
+        headers["Epistula-Secret-Signature-1"] = "0x" + hotkey.sign(str(timestampInterval) + "." + signed_for).hex()
+        headers["Epistula-Secret-Signature-2"] = "0x" + hotkey.sign(str(timestampInterval + 1) + "." + signed_for).hex()
     return {**headers, **json.loads(body_bytes)}
 
-def create_header_hook(hotkey, axon_hotkey):
+
+def create_header_hook(hotkey, axon_hotkey=None):
     async def add_headers(request: httpx.Request):
         for key, header in generate_header(hotkey, request.read(), axon_hotkey).items():
-            if key not in ['messages', 'model', 'stream']:
+            if key not in ["messages", "model", "stream"]:
                 request.headers[key] = header
         return request
 
     return add_headers
 
+
 async def query_miners(task, uids, body):
     try:
         tasks = []
@@ -92,7 +83,11 @@ async def query_miners(task, uids, body):
             tasks.append(
                 asyncio.create_task(
                     handle_inference(
-                        settings.METAGRAPH, settings.WALLET, task, body, uid,
+                        settings.METAGRAPH,
+                        settings.WALLET,
+                        task,
+                        body,
+                        uid,
                     )
                 )
             )
@@ -102,10 +97,11 @@ async def query_miners(task, uids, body):
         bt.logging.error(f"Error in forward for: {e}")
         bt.logging.error(traceback.format_exc())
         return []
-    
+
+
 async def query_availabilities(uids, task_config, model_config):
-    """ Query the availability of the miners """
-    availability_dict = {'task_availabilities': task_config, 'llm_model_availabilities': model_config}
+    """Query the availability of the miners"""
+    availability_dict = {"task_availabilities": task_config, "llm_model_availabilities": model_config}
     # Query the availability of the miners
     try:
         tasks = []
@@ -113,18 +109,21 @@ async def query_availabilities(uids, task_config, model_config):
             tasks.append(
                 asyncio.create_task(
                     handle_availability(
-                        settings.METAGRAPH, availability_dict, uid,
+                        settings.METAGRAPH,
+                        availability_dict,
+                        uid,
                     )
                 )
             )
         responses: List[SynapseStreamResult] = await asyncio.gather(*tasks)
         return responses
-    
+
     except Exception as e:
         bt.logging.error(f"Error in availability call: {e}")
         bt.logging.error(traceback.format_exc())
         return []
-    
+
+
 async def handle_availability(
     metagraph: "bt.NonTorchMetagraph",
     request: Dict[str, Any],
@@ -142,7 +141,7 @@ async def handle_availability(
         response.raise_for_status()
         return response.json()
 
-    except Exception as e:
+    except Exception:
         return {}
 
 
@@ -164,17 +163,15 @@ async def handle_inference(
             api_key="Apex",
             max_retries=0,
             timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5),
-            http_client=openai.DefaultAsyncHttpxClient(event_hooks={
-                "request": [
-                    create_header_hook(
-                        wallet.hotkey, axon_info.hotkey
-                    )
-                ]
-            }),
+            http_client=openai.DefaultAsyncHttpxClient(
+                event_hooks={"request": [create_header_hook(wallet.hotkey, axon_info.hotkey)]}
+            ),
         )
         try:
             payload = json.loads(body)
-            chat = await miner.chat.completions.create(messages=payload["messages"], model=payload["model"], stream=True)
+            chat = await miner.chat.completions.create(
+                messages=payload["messages"], model=payload["model"], stream=True
+            )
             async for chunk in chat:
                 if chunk.choices[0].delta and chunk.choices[0].delta.content:
                     chunks.append(chunk.choices[0].delta.content)
diff --git a/prompting/miner_availability/miner_availability.py b/prompting/miner_availability/miner_availability.py
index b3ab8f64..1a7648b4 100644
--- a/prompting/miner_availability/miner_availability.py
+++ b/prompting/miner_availability/miner_availability.py
@@ -3,7 +3,6 @@
 from prompting.tasks.base_task import BaseTask
 from prompting.llms.model_zoo import ModelZoo
 from prompting.base.loop_runner import AsyncLoopRunner
-from prompting.base.protocol import AvailabilitySynapse
 from prompting.settings import settings
 from prompting.tasks.task_registry import TaskRegistry
 from prompting.utils.uids import get_uids
@@ -53,7 +52,7 @@ def get_available_miners(
             available = [uid for uid in available if self.miners[uid].is_model_available(model)]
         if k:
             available = random.sample(available, min(len(available), k))
-        return available
+        return list(map(int, available))
 
 
 class CheckMinerAvailability(AsyncLoopRunner):
@@ -84,8 +83,8 @@ async def run_step(self):
             if not response:
                 continue
             miner_availabilities.miners[uid] = MinerAvailability(
-                task_availabilities=response['task_availabilities'],
-                llm_model_availabilities=response['llm_model_availabilities'],
+                task_availabilities=response["task_availabilities"],
+                llm_model_availabilities=response["llm_model_availabilities"],
             )
 
         logger.debug("Miner availabilities updated.")

From ee5351fc5709746973a53574034af9283e80058a Mon Sep 17 00:00:00 2001
From: Hollyqui <felix.quinque@gmail.com>
Date: Fri, 22 Nov 2024 15:45:20 +0000
Subject: [PATCH 04/40] SN1-331: Adding API keys

---
 prompting/api/api.py                      | 25 ++++++--
 prompting/api/api_keys.json               |  1 +
 prompting/api/api_managements/api.py      | 77 +++++++++++++++++++++++
 prompting/api/gpt_endpoints/api.py        | 11 +++-
 prompting/api/miner_availabilities/api.py | 12 +++-
 5 files changed, 117 insertions(+), 9 deletions(-)
 create mode 100644 prompting/api/api_keys.json
 create mode 100644 prompting/api/api_managements/api.py

diff --git a/prompting/api/api.py b/prompting/api/api.py
index 1f9da1f0..e60e1d6d 100644
--- a/prompting/api/api.py
+++ b/prompting/api/api.py
@@ -1,15 +1,28 @@
 from fastapi import FastAPI
+from loguru import logger
+
+# This ensures uvicorn is imported first
 import uvicorn
-from prompting.api.gpt_endpoints.api import router as gpt_router
+
+# Now we can safely import the rest
+from prompting.api.api_managements.api import router as api_management_router
 from prompting.api.miner_availabilities.api import router as miner_availabilities_router
-from loguru import logger
+from prompting.api.gpt_endpoints.api import router as gpt_router
 
 app = FastAPI()
 
-app.include_router(gpt_router)
+# Add routers at the application level
+app.include_router(api_management_router)
 app.include_router(miner_availabilities_router)
+app.include_router(gpt_router)
+
+
+@app.get("/health")
+def health():
+    logger.info("Health endpoint accessed.")
+    return {"status": "healthy"}
 
 
-async def start_api():
-    logger.info("Starting API")
-    uvicorn.run(app, host="0.0.0.0", port=8000)
+if __name__ == "__main__":
+    logger.info("Starting API...")
+    uvicorn.run("api:app", host="0.0.0.0", port=8004, loop="asyncio", reload=True)
diff --git a/prompting/api/api_keys.json b/prompting/api/api_keys.json
new file mode 100644
index 00000000..0967ef42
--- /dev/null
+++ b/prompting/api/api_keys.json
@@ -0,0 +1 @@
+{}
diff --git a/prompting/api/api_managements/api.py b/prompting/api/api_managements/api.py
new file mode 100644
index 00000000..07c757a1
--- /dev/null
+++ b/prompting/api/api_managements/api.py
@@ -0,0 +1,77 @@
+from fastapi import APIRouter, FastAPI, HTTPException, Header, Depends
+import json
+import secrets
+
+from prompting.settings import settings
+
+
+router = APIRouter()
+
+
+# Load and save functions for API keys
+def load_api_keys():
+    try:
+        with open(settings.API_KEYS_FILE, "r") as f:
+            return json.load(f)
+    except FileNotFoundError:
+        return {}
+
+
+def save_api_keys(api_keys):
+    with open(settings.API_KEYS_FILE, "w") as f:
+        json.dump(api_keys, f)
+
+
+# Use lifespan to initialize API keys
+_keys = load_api_keys()
+save_api_keys(_keys)
+
+
+# Dependency to validate the admin key
+def validate_admin_key(admin_key: str = Header(...)):
+    if admin_key != settings.ADMIN_KEY:
+        raise HTTPException(status_code=403, detail="Invalid admin key")
+
+
+# Dependency to validate API keys
+def validate_api_key(api_key: str = Header(...)):
+    if api_key not in _keys:
+        raise HTTPException(status_code=403, detail="Invalid API key")
+    return _keys[api_key]
+
+
+@router.post("/create-api-key/")
+def create_api_key(rate_limit: int, admin_key: str = Depends(validate_admin_key)):
+    """Creates a new API key with a specified rate limit."""
+    new_api_key = secrets.token_hex(16)
+    _keys[new_api_key] = {"rate_limit": rate_limit, "usage": 0}
+    return {"message": "API key created", "api_key": new_api_key}
+
+
+@router.put("/modify-api-key/{api_key}")
+def modify_api_key(api_key: str, rate_limit: int, admin_key: str = Depends(validate_admin_key)):
+    """Modifies the rate limit of an existing API key."""
+    if api_key not in _keys:
+        raise HTTPException(status_code=404, detail="API key not found")
+    _keys[api_key]["rate_limit"] = rate_limit
+    return {"message": "API key updated", "api_key": api_key}
+
+
+@router.delete("/delete-api-key/{api_key}")
+def delete_api_key(api_key: str, admin_key: str = Depends(validate_admin_key)):
+    """Deletes an existing API key."""
+    if api_key not in _keys:
+        raise HTTPException(status_code=404, detail="API key not found")
+    del _keys[api_key]
+    return {"message": "API key deleted"}
+
+
+@router.get("/demo-endpoint/")
+def demo_endpoint(api_key_data: dict = Depends(validate_api_key)):
+    """A demo endpoint that requires a valid API key."""
+    return {"message": "Access granted", "your_rate_limit": api_key_data["rate_limit"]}
+
+
+# Create FastAPI app and include the router
+app = FastAPI()
+app.include_router(router)
diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
index 061f7cdb..5d9735fc 100644
--- a/prompting/api/gpt_endpoints/api.py
+++ b/prompting/api/gpt_endpoints/api.py
@@ -5,6 +5,7 @@
 from prompting.base.epistula import create_header_hook
 from fastapi.responses import StreamingResponse
 import json
+from prompting.miner_availability.miner_availability import miner_availabilities
 
 router = APIRouter()
 
@@ -27,10 +28,16 @@ async def proxy_chat_completions(request: Request):
 
     # Ensure streaming is enabled
     body["stream"] = True
+    if not settings.mode == "mock" and not (
+        available_miners := miner_availabilities.get_available_miners(task="Inference", model=None)
+    ):
+        return "No miners available"
+    axon_info = settings.METAGRAPH.axons[available_miners[0]]
+    base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1"
 
     # TODO: Forward to actual miners
     miner = openai.AsyncOpenAI(
-        base_url="http://localhost:8008/v1",
+        base_url=base_url,
         max_retries=0,
         timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5),
         http_client=openai.DefaultAsyncHttpxClient(
@@ -41,5 +48,7 @@ async def proxy_chat_completions(request: Request):
     # Create streaming request to OpenAI
     response = await miner.chat.completions.create(**body)
 
+    # TODO: Add final response to scoring_queue
+
     # Return a streaming response with properly formatted chunks
     return StreamingResponse(process_stream(response), media_type="text/event-stream")
diff --git a/prompting/api/miner_availabilities/api.py b/prompting/api/miner_availabilities/api.py
index f45b058b..d8f43a63 100644
--- a/prompting/api/miner_availabilities/api.py
+++ b/prompting/api/miner_availabilities/api.py
@@ -1,6 +1,8 @@
 from fastapi import APIRouter
 from prompting.miner_availability.miner_availability import miner_availabilities
 from loguru import logger
+from prompting.tasks.task_registry import TaskRegistry
+from typing import Literal
 
 router = APIRouter()
 
@@ -14,6 +16,12 @@ async def get_miner_availabilities(uids: list[int] | None = None):
 
 
 @router.get("/get_available_miners")
-async def get_available_miners(task: str | None = None, model: str | None = None, k: int = 10):
+async def get_available_miners(
+    task: Literal[tuple([config.task.__name__ for config in TaskRegistry.task_configs])] | None = None,
+    model: str | None = None,
+    k: int = 10,
+):
     logger.info(f"Getting {k} available miners for task {task} and model {model}")
-    return miner_availabilities.get_available_miners(task=task, model=model, k=k)
+    task_configs = [config for config in TaskRegistry.task_configs if config.task.__name__ == task]
+    task_config = task_configs[0] if task_configs else None
+    return miner_availabilities.get_available_miners(task=task_config, model=model, k=k)

From 4ebe74d32d327e0b5fc2c55fb96cfbdd4ad5f6d6 Mon Sep 17 00:00:00 2001
From: Hollyqui <felix.quinque@gmail.com>
Date: Fri, 22 Nov 2024 15:51:56 +0000
Subject: [PATCH 05/40] Adding test miner ids

---
 prompting/api/gpt_endpoints/api.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
index 5d9735fc..10f742e0 100644
--- a/prompting/api/gpt_endpoints/api.py
+++ b/prompting/api/gpt_endpoints/api.py
@@ -28,7 +28,9 @@ async def proxy_chat_completions(request: Request):
 
     # Ensure streaming is enabled
     body["stream"] = True
-    if not settings.mode == "mock" and not (
+    if settings.TEST_MINER_IDS:
+        available_miners = settings.TEST_MINER_IDS
+    elif not settings.mode == "mock" and not (
         available_miners := miner_availabilities.get_available_miners(task="Inference", model=None)
     ):
         return "No miners available"

From 20e5376473ed3299bbafffb130b17fe0ea4434bb Mon Sep 17 00:00:00 2001
From: Hollyqui <felix.quinque@gmail.com>
Date: Sun, 24 Nov 2024 16:41:03 +0000
Subject: [PATCH 06/40] Adding tasks to scoring queue

---
 api_keys.json                           |  1 +
 neurons/miners/epistula_miner/miner.py  |  6 +--
 neurons/miners/inference_miner/miner.py |  2 +-
 neurons/validator.py                    | 13 +++--
 prompting/api/api.py                    |  5 +-
 prompting/api/api_managements/api.py    |  8 +--
 prompting/api/gpt_endpoints/api.py      | 66 +++++++++++++++++++++----
 prompting/datasets/base.py              |  2 +-
 prompting/llms/vllm_llm.py              |  2 +-
 prompting/settings.py                   | 34 ++++++++++---
 prompting/utils/timer.py                |  5 +-
 11 files changed, 107 insertions(+), 37 deletions(-)
 create mode 100644 api_keys.json

diff --git a/api_keys.json b/api_keys.json
new file mode 100644
index 00000000..0967ef42
--- /dev/null
+++ b/api_keys.json
@@ -0,0 +1 @@
+{}
diff --git a/neurons/miners/epistula_miner/miner.py b/neurons/miners/epistula_miner/miner.py
index 52a53819..5f93154d 100644
--- a/neurons/miners/epistula_miner/miner.py
+++ b/neurons/miners/epistula_miner/miner.py
@@ -160,10 +160,10 @@ async def verify_request(
         # But use some specific fields from the body
         signed_by = request.headers.get("Epistula-Signed-By")
         signed_for = request.headers.get("Epistula-Signed-For")
-        if signed_for != self.wallet.hotkey.ss58_address:
-            raise HTTPException(status_code=400, detail="Bad Request, message is not intended for self")
+        if signed_for and signed_for != self.wallet.hotkey.ss58_address:
+            raise HTTPException(status_code=400, detail="EpistulaError: The message is not signed for this hotkey")
         if signed_by not in self.metagraph.hotkeys:
-            raise HTTPException(status_code=401, detail="Signer not in metagraph")
+            raise HTTPException(status_code=401, detail="EpistulaError: Signer not in metagraph")
 
         uid = self.metagraph.hotkeys.index(signed_by)
         stake = self.metagraph.S[uid].item()
diff --git a/neurons/miners/inference_miner/miner.py b/neurons/miners/inference_miner/miner.py
index 0af511bb..0b180bce 100644
--- a/neurons/miners/inference_miner/miner.py
+++ b/neurons/miners/inference_miner/miner.py
@@ -143,7 +143,7 @@ async def _forward(
                 init_time,
                 timeout_threshold,
             )
-            logger.info(f"Time for complete response: {timer.elapsed_time}")
+            logger.info(f"Time for complete response: {timer.final_time}")
             return synapse.create_streaming_response(token_streamer)
 
     def check_availability(self, synapse: AvailabilitySynapse) -> AvailabilitySynapse:
diff --git a/neurons/validator.py b/neurons/validator.py
index 892649d8..fc435f98 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -105,7 +105,7 @@ async def run_step(self, k: int, timeout: float) -> ValidatorLoggingEvent | Erro
             if response_event is None:
                 logger.warning("No response event collected. This should not be happening.")
                 return
-            logger.debug(f"Collected responses in {timer.elapsed_time:.2f} seconds")
+            logger.debug(f"Collected responses in {timer.final_time:.2f} seconds")
 
             # scoring_manager will score the responses as and when the correct model is loaded
             task_scorer.add_to_queue(
@@ -121,7 +121,7 @@ async def run_step(self, k: int, timeout: float) -> ValidatorLoggingEvent | Erro
             return ValidatorLoggingEvent(
                 block=self.estimate_block,
                 step=self.step,
-                step_time=timer.elapsed_time,
+                step_time=timer.final_time,
                 response_event=response_event,
                 task_id=task.task_id,
             )
@@ -174,7 +174,7 @@ async def forward(self):
         if not event:
             return
 
-        event.forward_time = timer.elapsed_time
+        event.forward_time = timer.final_time
 
     def __enter__(self):
         if settings.NO_BACKGROUND_THREAD:
@@ -207,7 +207,6 @@ def __exit__(self, exc_type, exc_value, traceback):
 
 
 async def main():
-    # start api
     asyncio.create_task(start_api())
 
     GPUInfo.log_gpu_info()
@@ -228,9 +227,9 @@ async def main():
 
     # start scoring tasks in separate loop
     asyncio.create_task(task_scorer.start())
-    # TODO: Think about whether we want to store the task queue locally in case of a crash
-    # TODO: Possibly run task scorer & model scheduler with a lock so I don't unload a model whilst it's generating
-    # TODO: Make weight setting happen as specific intervals as we load/unload models
+    # # TODO: Think about whether we want to store the task queue locally in case of a crash
+    # # TODO: Possibly run task scorer & model scheduler with a lock so I don't unload a model whilst it's generating
+    # # TODO: Make weight setting happen as specific intervals as we load/unload models
     with Validator() as v:
         while True:
             logger.info(
diff --git a/prompting/api/api.py b/prompting/api/api.py
index e60e1d6d..b797273b 100644
--- a/prompting/api/api.py
+++ b/prompting/api/api.py
@@ -23,6 +23,7 @@ def health():
     return {"status": "healthy"}
 
 
-if __name__ == "__main__":
+# if __name__ == "__main__":
+async def start_api():
     logger.info("Starting API...")
-    uvicorn.run("api:app", host="0.0.0.0", port=8004, loop="asyncio", reload=True)
+    uvicorn.run("prompting.api.api:app", host="0.0.0.0", port=8004, loop="asyncio", reload=False)
diff --git a/prompting/api/api_managements/api.py b/prompting/api/api_managements/api.py
index 07c757a1..40d21577 100644
--- a/prompting/api/api_managements/api.py
+++ b/prompting/api/api_managements/api.py
@@ -1,4 +1,4 @@
-from fastapi import APIRouter, FastAPI, HTTPException, Header, Depends
+from fastapi import APIRouter, HTTPException, Header, Depends
 import json
 import secrets
 
@@ -72,6 +72,6 @@ def demo_endpoint(api_key_data: dict = Depends(validate_api_key)):
     return {"message": "Access granted", "your_rate_limit": api_key_data["rate_limit"]}
 
 
-# Create FastAPI app and include the router
-app = FastAPI()
-app.include_router(router)
+# # Create FastAPI app and include the router
+# app = FastAPI()
+# app.include_router(router)
diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
index 10f742e0..f7162da8 100644
--- a/prompting/api/gpt_endpoints/api.py
+++ b/prompting/api/gpt_endpoints/api.py
@@ -1,4 +1,6 @@
 from fastapi import APIRouter, Request
+from loguru import logger
+import random
 import openai
 from prompting.settings import settings
 from httpx import Timeout
@@ -6,18 +8,50 @@
 from fastapi.responses import StreamingResponse
 import json
 from prompting.miner_availability.miner_availability import miner_availabilities
+from prompting.tasks.inference import InferenceTask
+from typing import AsyncGenerator
+from prompting.rewards.scoring import task_scorer
+from prompting.base.dendrite import DendriteResponseEvent, SynapseStreamResult
+from prompting.utils.timer import Timer
 
 router = APIRouter()
 
 
-async def process_stream(stream):
-    async for chunk in stream:
-        if hasattr(chunk, "choices") and chunk.choices:
-            # Extract the delta content from the chunk
-            delta = chunk.choices[0].delta
-            if hasattr(delta, "content") and delta.content is not None:
-                # Format as SSE data
+async def process_and_collect_stream(miner_id: int, request: dict, response: AsyncGenerator):
+    collected_content = []
+    collected_chunks_timings = []
+    with Timer() as timer:
+        async for chunk in response:
+            logger.debug(f"Chunk: {chunk}")
+            if hasattr(chunk, "choices") and chunk.choices and isinstance(chunk.choices[0].delta.content, str):
+                collected_content.append(chunk.choices[0].delta.content)
+                collected_chunks_timings.append(timer.elapsed_time())
+                # Format in SSE format
                 yield f"data: {json.dumps(chunk.model_dump())}\n\n"
+        # After streaming is complete, put the response in the queue
+    task = InferenceTask(
+        query=request["messages"][-1]["content"],
+        model=request.get("model"),
+        seed=request.get("seed"),
+        response="".join(collected_content),
+    )
+    logger.debug(f"Adding Organic Request to scoring queue: {task}")
+    response_event = DendriteResponseEvent(
+        stream_results=[
+            SynapseStreamResult(
+                uid=miner_id,
+                accumulated_chunks=collected_content,
+                accumulated_chunks_timings=collected_chunks_timings,
+            )
+        ],
+        uids=[miner_id],
+        timeout=settings.NEURON_TIMEOUT,
+    )
+
+    # TODO: Estimate block and step
+    task_scorer.add_to_queue(
+        task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id
+    )
     yield "data: [DONE]\n\n"
 
 
@@ -25,17 +59,28 @@ async def process_stream(stream):
 async def proxy_chat_completions(request: Request):
     # Get the request body
     body = await request.json()
+    body["seed"] = body.get("seed") or str(
+        random.randint(0, 1_000_000)
+    )  # for some reason needs to be passed as string... it seems?
 
     # Ensure streaming is enabled
-    body["stream"] = True
+    # body["stream"] = True
     if settings.TEST_MINER_IDS:
         available_miners = settings.TEST_MINER_IDS
     elif not settings.mode == "mock" and not (
-        available_miners := miner_availabilities.get_available_miners(task="Inference", model=None)
+        available_miners := miner_availabilities.get_available_miners(task=InferenceTask(), model=None)
     ):
         return "No miners available"
     axon_info = settings.METAGRAPH.axons[available_miners[0]]
+
+    # TODO: Remove this/build better testing mechanism
     base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1"
+    # base_url = "http://localhost:8008/v1"
+    # available_miners = [-1]
+
+    miner_id = available_miners[0]
+
+    logger.debug(f"Using base_url: {base_url}")
 
     # TODO: Forward to actual miners
     miner = openai.AsyncOpenAI(
@@ -53,4 +98,5 @@ async def proxy_chat_completions(request: Request):
     # TODO: Add final response to scoring_queue
 
     # Return a streaming response with properly formatted chunks
-    return StreamingResponse(process_stream(response), media_type="text/event-stream")
+    # return await process_and_collect_stream(process_stream(response))
+    return StreamingResponse(process_and_collect_stream(miner_id, body, response), media_type="text/event-stream")
diff --git a/prompting/datasets/base.py b/prompting/datasets/base.py
index 2a92c39b..aabd33ed 100644
--- a/prompting/datasets/base.py
+++ b/prompting/datasets/base.py
@@ -81,7 +81,7 @@ def next(self, method: Literal["random", "search", "get"] = "random", **kwargs)
 
         context.source = self.__class__.__name__
         context.stats = {
-            "fetch_time": timer.elapsed_time,
+            "fetch_time": timer.final_time,
             "num_tries": tries,
             "fetch_method": method,
             "next_kwargs": kwargs,
diff --git a/prompting/llms/vllm_llm.py b/prompting/llms/vllm_llm.py
index 2ffb1c43..2d04726d 100644
--- a/prompting/llms/vllm_llm.py
+++ b/prompting/llms/vllm_llm.py
@@ -270,7 +270,7 @@ def generate(self, prompts, sampling_params=None):
             for output in outputs:
                 results.append(output.outputs[0].text.strip())
         logger.debug(
-            f"PROMPT: {prompts}\n\nRESPONSES: {results}\n\nSAMPLING PARAMS: {sampling_params}\n\nTIME FOR RESPONSE: {timer.elapsed_time}"
+            f"PROMPT: {prompts}\n\nRESPONSES: {results}\n\nSAMPLING PARAMS: {sampling_params}\n\nTIME FOR RESPONSE: {timer.final_time}"
         )
 
         return results if len(results) > 1 else results[0]
diff --git a/prompting/settings.py b/prompting/settings.py
index 2d00a011..da85f9e2 100644
--- a/prompting/settings.py
+++ b/prompting/settings.py
@@ -1,14 +1,11 @@
 import os
 from functools import cached_property
 from typing import Any, Literal, Optional
-
-import bittensor as bt
-import dotenv
-import torch
 from loguru import logger
+import dotenv
 from pydantic import Field, model_validator
 from pydantic_settings import BaseSettings
-
+import bittensor as bt
 from prompting.utils.config import config
 
 
@@ -30,7 +27,7 @@ class Settings(BaseSettings):
 
     # Neuron.
     NEURON_EPOCH_LENGTH: int = Field(100, env="NEURON_EPOCH_LENGTH")
-    NEURON_DEVICE: str = Field("cuda" if torch.cuda.is_available() else "cpu", env="NEURON_DEVICE")
+    NEURON_DEVICE: str | None = Field(None, env="NEURON_DEVICE")
     NEURON_GPUS: int = Field(1, env="NEURON_GPUS")
 
     # Logging.
@@ -66,6 +63,10 @@ class Settings(BaseSettings):
     SCORING_QUEUE_LENGTH_THRESHOLD: int = Field(10, env="SCORING_QUEUE_LENGTH_THRESHOLD")
     HF_TOKEN: Optional[str] = Field(None, env="HF_TOKEN")
 
+    # API Management.
+    API_KEYS_FILE: str = Field("api_keys.json", env="API_KEYS_FILE")
+    ADMIN_KEY: str | None = Field(None, env="ADMIN_KEY")
+
     # Additional Fields.
     NETUID: Optional[int] = Field(61, env="NETUID")
     TEST: bool = False
@@ -127,8 +128,9 @@ def load_env_file(cls, mode: Literal["miner", "validator", "mock"]):
             dotenv_file = ".env.miner"
         elif mode == "validator":
             dotenv_file = ".env.validator"
+        # For mock testing, still make validator env vars available where possible.
         elif mode == "mock":
-            dotenv_file = None
+            dotenv_file = ".env.validator"
         else:
             raise ValueError(f"Invalid mode: {mode}")
 
@@ -154,15 +156,23 @@ def load(cls, mode: Literal["miner", "validator", "mock"]) -> "Settings":
     def complete_settings(cls, values: dict[str, Any]) -> dict[str, Any]:
         mode = values["mode"]
         netuid = values.get("NETUID", 61)
+
         if netuid is None:
             raise ValueError("NETUID must be specified")
         values["TEST"] = netuid != 1
 
         if mode == "mock":
             values["MOCK"] = True
+            values["NEURON_DEVICE"] = "cpu"
             logger.info("Running in mock mode. Bittensor objects will not be initialized.")
             return values
 
+        # load slow packages only if not in mock mode
+        import torch
+
+        if not values.get("NEURON_DEVICE"):
+            values["NEURON_DEVICE"] = "cuda" if torch.cuda.is_available() else "cpu"
+
         # Ensure SAVE_PATH exists.
         save_path = values.get("SAVE_PATH", "./storage")
         if not os.path.exists(save_path):
@@ -177,6 +187,8 @@ def complete_settings(cls, values: dict[str, Any]) -> dict[str, Any]:
             raise Exception(
                 "You must provide an OpenAI API key as a backup. It is recommended to also provide an SN19 API key + url to avoid incurring API costs."
             )
+        if mode == "validator" and values.get("ADMIN_KEY") is None:
+            raise Exception("You must provide an admin key to access the API.")
         return values
 
     @cached_property
@@ -208,4 +220,12 @@ def DENDRITE(self) -> bt.dendrite:
         return bt.dendrite(wallet=self.WALLET)
 
 
+logger.info("Settings class instantiated.")
 settings: Optional[Settings] = None
+try:
+    settings: Optional[Settings] = Settings.load(mode="mock")
+    pass
+except Exception as e:
+    logger.exception(f"Error loading settings: {e}")
+    settings = None
+logger.info("Settings loaded.")
diff --git a/prompting/utils/timer.py b/prompting/utils/timer.py
index 039e5bad..4fa05496 100644
--- a/prompting/utils/timer.py
+++ b/prompting/utils/timer.py
@@ -6,6 +6,9 @@ def __enter__(self):
         self.start_time = time.perf_counter()
         return self
 
+    def elapsed_time(self):
+        return self.start_time - time.perf_counter()
+
     def __exit__(self, exc_type, exc_val, exc_tb):
         self.end_time = time.perf_counter()
-        self.elapsed_time = self.end_time - self.start_time
+        self.final_time = self.end_time - self.start_time

From 37a8874ecc15824f7424bfe796dd6907affbf52b Mon Sep 17 00:00:00 2001
From: Hollyqui <felix.quinque@gmail.com>
Date: Mon, 25 Nov 2024 15:55:06 +0000
Subject: [PATCH 07/40] Enabling non-streaming response + bug fixes

---
 prompting/api/gpt_endpoints/api.py | 64 ++++++++++++++++++++++--------
 prompting/settings.py              |  5 +--
 2 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
index f7162da8..58d664bf 100644
--- a/prompting/api/gpt_endpoints/api.py
+++ b/prompting/api/gpt_endpoints/api.py
@@ -1,4 +1,4 @@
-from fastapi import APIRouter, Request
+from fastapi import APIRouter, Request, HTTPException
 from loguru import logger
 import random
 import openai
@@ -31,6 +31,7 @@ async def process_and_collect_stream(miner_id: int, request: dict, response: Asy
         # After streaming is complete, put the response in the queue
     task = InferenceTask(
         query=request["messages"][-1]["content"],
+        messages=[message["content"] for message in request["messages"]],
         model=request.get("model"),
         seed=request.get("seed"),
         response="".join(collected_content),
@@ -46,6 +47,7 @@ async def process_and_collect_stream(miner_id: int, request: dict, response: Asy
         ],
         uids=[miner_id],
         timeout=settings.NEURON_TIMEOUT,
+        completions=["".join(collected_content)],
     )
 
     # TODO: Estimate block and step
@@ -55,34 +57,34 @@ async def process_and_collect_stream(miner_id: int, request: dict, response: Asy
     yield "data: [DONE]\n\n"
 
 
+@router.post("/mixture_of_agents")
+async def mixture_of_agents(request: Request):
+    # body = await request.json()
+    # return {"message": "Mixture of Agents"}
+    return {"message": "Mixture of Agents"}
+
+
 @router.post("/v1/chat/completions")
 async def proxy_chat_completions(request: Request):
-    # Get the request body
     body = await request.json()
     body["seed"] = body.get("seed") or str(
         random.randint(0, 1_000_000)
     )  # for some reason needs to be passed as string... it seems?
+    logger.debug(f"Seed provided by miner: {bool(body.get('seed'))} -- Using seed: {body.get('seed')}")
 
-    # Ensure streaming is enabled
-    # body["stream"] = True
     if settings.TEST_MINER_IDS:
         available_miners = settings.TEST_MINER_IDS
     elif not settings.mode == "mock" and not (
         available_miners := miner_availabilities.get_available_miners(task=InferenceTask(), model=None)
     ):
         return "No miners available"
-    axon_info = settings.METAGRAPH.axons[available_miners[0]]
 
-    # TODO: Remove this/build better testing mechanism
+    axon_info = settings.METAGRAPH.axons[available_miners[0]]
     base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1"
     # base_url = "http://localhost:8008/v1"
-    # available_miners = [-1]
-
     miner_id = available_miners[0]
-
     logger.debug(f"Using base_url: {base_url}")
 
-    # TODO: Forward to actual miners
     miner = openai.AsyncOpenAI(
         base_url=base_url,
         max_retries=0,
@@ -92,11 +94,39 @@ async def proxy_chat_completions(request: Request):
         ),
     )
 
-    # Create streaming request to OpenAI
-    response = await miner.chat.completions.create(**body)
-
-    # TODO: Add final response to scoring_queue
+    try:
+        with Timer() as timer:
+            # Create request to OpenAI
+            response = await miner.chat.completions.create(**body)
+        if body.get("stream"):
+            # If streaming is requested, return streaming response
+            return StreamingResponse(
+                process_and_collect_stream(miner_id, body, response), media_type="text/event-stream"
+            )
+    except Exception as e:
+        logger.exception(f"Error coming from Miner: {e}")
+        raise HTTPException(status_code=500, detail=f"Error coming from Miner: {e}")
 
-    # Return a streaming response with properly formatted chunks
-    # return await process_and_collect_stream(process_stream(response))
-    return StreamingResponse(process_and_collect_stream(miner_id, body, response), media_type="text/event-stream")
+    response_event = DendriteResponseEvent(
+        stream_results=[
+            SynapseStreamResult(
+                uid=miner_id,
+                accumulated_chunks=[response.choices[0].message.content],
+                accumulated_chunks_timings=[timer.final_time],
+            )
+        ],
+        completions=[response.choices[0].message.content],
+        uids=[miner_id],
+        timeout=settings.NEURON_TIMEOUT,
+    )
+    task = InferenceTask(
+        query=body["messages"][-1]["content"],
+        messages=[message["content"] for message in body["messages"]],
+        model=body.get("model"),
+        seed=body.get("seed"),
+        response=response_event,
+    )
+    task_scorer.add_to_queue(
+        task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id
+    )
+    return response
diff --git a/prompting/settings.py b/prompting/settings.py
index da85f9e2..6bbc47ad 100644
--- a/prompting/settings.py
+++ b/prompting/settings.py
@@ -160,7 +160,8 @@ def complete_settings(cls, values: dict[str, Any]) -> dict[str, Any]:
         if netuid is None:
             raise ValueError("NETUID must be specified")
         values["TEST"] = netuid != 1
-
+        if values.get("TEST_MINER_IDS"):
+            values["TEST_MINER_IDS"] = str(values["TEST_MINER_IDS"]).split(",")
         if mode == "mock":
             values["MOCK"] = True
             values["NEURON_DEVICE"] = "cpu"
@@ -177,8 +178,6 @@ def complete_settings(cls, values: dict[str, Any]) -> dict[str, Any]:
         save_path = values.get("SAVE_PATH", "./storage")
         if not os.path.exists(save_path):
             os.makedirs(save_path)
-        if values.get("TEST_MINER_IDS"):
-            values["TEST_MINER_IDS"] = str(values["TEST_MINER_IDS"]).split(",")
         if values.get("SN19_API_KEY") is None or values.get("SN19_API_URL") is None:
             logger.warning(
                 "It is strongly recommended to provide an SN19 API KEY + URL to avoid incurring OpenAI API costs."

From a09cd9bcbe0ecda25dcc695adf08917de8140002 Mon Sep 17 00:00:00 2001
From: Hollyqui <felix.quinque@gmail.com>
Date: Mon, 25 Nov 2024 16:40:46 +0000
Subject: [PATCH 08/40] Making model loading non-blocking

---
 prompting/llms/model_manager.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/prompting/llms/model_manager.py b/prompting/llms/model_manager.py
index 55fa5e23..0adf032c 100644
--- a/prompting/llms/model_manager.py
+++ b/prompting/llms/model_manager.py
@@ -182,7 +182,8 @@ async def run_step(self):
 
         logger.debug(f"Active models: {model_manager.active_models.keys()}")
         # Load the selected model
-        model_manager.load_model(selected_model)
+        loop = asyncio.get_running_loop()
+        await loop.run_in_executor(None, self.llm_model_manager.load_model, selected_model)
         await asyncio.sleep(0.01)
 
 

From 566dc770fde2035dca61c85cfdcda853f455a29c Mon Sep 17 00:00:00 2001
From: Hollyqui <felix.quinque@gmail.com>
Date: Tue, 26 Nov 2024 11:21:30 +0000
Subject: [PATCH 09/40] Protecting endpoints with API key

---
 prompting/api/gpt_endpoints/api.py | 24 ++++++++++--------------
 prompting/base/epistula.py         | 20 +++++++++++++++++++-
 2 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
index 58d664bf..54111122 100644
--- a/prompting/api/gpt_endpoints/api.py
+++ b/prompting/api/gpt_endpoints/api.py
@@ -13,6 +13,8 @@
 from prompting.rewards.scoring import task_scorer
 from prompting.base.dendrite import DendriteResponseEvent, SynapseStreamResult
 from prompting.utils.timer import Timer
+from prompting.api.api_managements.api import validate_api_key
+from fastapi import Depends
 
 router = APIRouter()
 
@@ -26,9 +28,8 @@ async def process_and_collect_stream(miner_id: int, request: dict, response: Asy
             if hasattr(chunk, "choices") and chunk.choices and isinstance(chunk.choices[0].delta.content, str):
                 collected_content.append(chunk.choices[0].delta.content)
                 collected_chunks_timings.append(timer.elapsed_time())
-                # Format in SSE format
                 yield f"data: {json.dumps(chunk.model_dump())}\n\n"
-        # After streaming is complete, put the response in the queue
+
     task = InferenceTask(
         query=request["messages"][-1]["content"],
         messages=[message["content"] for message in request["messages"]],
@@ -50,7 +51,6 @@ async def process_and_collect_stream(miner_id: int, request: dict, response: Asy
         completions=["".join(collected_content)],
     )
 
-    # TODO: Estimate block and step
     task_scorer.add_to_queue(
         task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id
     )
@@ -58,18 +58,14 @@ async def process_and_collect_stream(miner_id: int, request: dict, response: Asy
 
 
 @router.post("/mixture_of_agents")
-async def mixture_of_agents(request: Request):
-    # body = await request.json()
-    # return {"message": "Mixture of Agents"}
+async def mixture_of_agents(request: Request, api_key_data: dict = Depends(validate_api_key)):
     return {"message": "Mixture of Agents"}
 
 
 @router.post("/v1/chat/completions")
-async def proxy_chat_completions(request: Request):
+async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(validate_api_key)):
     body = await request.json()
-    body["seed"] = body.get("seed") or str(
-        random.randint(0, 1_000_000)
-    )  # for some reason needs to be passed as string... it seems?
+    body["seed"] = body.get("seed") or str(random.randint(0, 1_000_000))
     logger.debug(f"Seed provided by miner: {bool(body.get('seed'))} -- Using seed: {body.get('seed')}")
 
     if settings.TEST_MINER_IDS:
@@ -77,11 +73,10 @@ async def proxy_chat_completions(request: Request):
     elif not settings.mode == "mock" and not (
         available_miners := miner_availabilities.get_available_miners(task=InferenceTask(), model=None)
     ):
-        return "No miners available"
+        raise HTTPException(status_code=503, detail="No miners available")
 
     axon_info = settings.METAGRAPH.axons[available_miners[0]]
     base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1"
-    # base_url = "http://localhost:8008/v1"
     miner_id = available_miners[0]
     logger.debug(f"Using base_url: {base_url}")
 
@@ -96,10 +91,8 @@ async def proxy_chat_completions(request: Request):
 
     try:
         with Timer() as timer:
-            # Create request to OpenAI
             response = await miner.chat.completions.create(**body)
         if body.get("stream"):
-            # If streaming is requested, return streaming response
             return StreamingResponse(
                 process_and_collect_stream(miner_id, body, response), media_type="text/event-stream"
             )
@@ -119,6 +112,7 @@ async def proxy_chat_completions(request: Request):
         uids=[miner_id],
         timeout=settings.NEURON_TIMEOUT,
     )
+
     task = InferenceTask(
         query=body["messages"][-1]["content"],
         messages=[message["content"] for message in body["messages"]],
@@ -126,7 +120,9 @@ async def proxy_chat_completions(request: Request):
         seed=body.get("seed"),
         response=response_event,
     )
+
     task_scorer.add_to_queue(
         task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id
     )
+
     return response
diff --git a/prompting/base/epistula.py b/prompting/base/epistula.py
index 4fbca51c..afe4fa92 100644
--- a/prompting/base/epistula.py
+++ b/prompting/base/epistula.py
@@ -66,11 +66,29 @@ def generate_header(
     return {**headers, **json.loads(body_bytes)}
 
 
-def create_header_hook(hotkey, axon_hotkey=None):
+def create_header_hook(hotkey, axon_hotkey=None, api_key=None):
+    """
+    Creates a header hook function that adds authentication headers including API key.
+
+    Args:
+        hotkey: The wallet hotkey
+        axon_hotkey: Optional axon hotkey
+        api_key: Optional API key for endpoint authentication
+
+    Returns:
+        Async function that adds headers to the request
+    """
+
     async def add_headers(request: httpx.Request):
+        # Add standard headers
         for key, header in generate_header(hotkey, request.read(), axon_hotkey).items():
             if key not in ["messages", "model", "stream"]:
                 request.headers[key] = header
+
+        # Add API key if provided
+        if api_key:
+            request.headers["api-key"] = api_key
+
         return request
 
     return add_headers

From 9d810ce3d4b7d1cdf77b3701690eecd92120c212 Mon Sep 17 00:00:00 2001
From: Hollyqui <felix.quinque@gmail.com>
Date: Tue, 26 Nov 2024 14:43:05 +0000
Subject: [PATCH 10/40] Improving error messages + improving API key saving

---
 api_keys.json                          |  2 +-
 neurons/miners/epistula_miner/miner.py | 63 --------------------------
 prompting/api/api_managements/api.py   |  5 ++
 prompting/api/gpt_endpoints/api.py     |  6 ++-
 4 files changed, 10 insertions(+), 66 deletions(-)

diff --git a/api_keys.json b/api_keys.json
index 0967ef42..1e41dd87 100644
--- a/api_keys.json
+++ b/api_keys.json
@@ -1 +1 @@
-{}
+{"0566dbe21ee33bba9419549716cd6f1f": {"rate_limit": 10, "usage": 0}}
diff --git a/neurons/miners/epistula_miner/miner.py b/neurons/miners/epistula_miner/miner.py
index 5f93154d..42c302e8 100644
--- a/neurons/miners/epistula_miner/miner.py
+++ b/neurons/miners/epistula_miner/miner.py
@@ -67,69 +67,6 @@ async def create_chat_completion(self, request: Request):
         logger.debug("Chat completion request returning...")
         return StreamingResponse(r.aiter_raw(), background=BackgroundTask(r.aclose), headers=r.headers)
 
-    # async def create_chat_completion(self, request: Request):
-    #     bt.logging.info(
-    #         "\u2713",
-    #         f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!",
-    #     )
-    #     openai_request_body = await self.format_openai_query(request)
-    #     try:
-    #         req = self.client.build_request(
-    #             "POST", "chat/completions", json=openai_request_body
-    #         )
-    #         r = await self.client.send(req, stream=True)
-    #         # Check for non-200 status code
-    #         if r.status_code != 200:
-    #             error_content = await r.aread()
-    #             bt.logging.error(f"OpenAI API Error {r.status_code}: {error_content}")
-    #             return JSONResponse(
-    #                 content=json.loads(error_content),
-    #                 status_code=r.status_code
-    #             )
-    #     except Exception as e:
-    #         bt.logging.error(f"Exception during OpenAI API call: {str(e)}")
-    #         return JSONResponse(
-    #             content={"error": str(e)},
-    #             status_code=500
-    #         )
-
-    # async def create_chat_completion(self, request: Request):
-    #     bt.logging.info(
-    #         "\u2713",
-    #         f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!",
-    #     )
-
-    #     async def word_stream():
-    #         words = "This is a test stream".split()
-    #         for word in words:
-    #             # Simulate the OpenAI streaming response format
-    #             data = {
-    #                 "choices": [
-    #                     {
-    #                         "delta": {"content": word + ' '},
-    #                         "index": 0,
-    #                         "finish_reason": None
-    #                     }
-    #                 ]
-    #             }
-    #             # Yield the data in SSE (Server-Sent Events) format
-    #             yield f"data: {json.dumps(data)}\n\n"
-    #             await asyncio.sleep(0.1)  # Simulate a delay between words
-    #         # Indicate the end of the stream
-    #         data = {
-    #             "choices": [
-    #                 {
-    #                     "delta": {},
-    #                     "index": 0,
-    #                     "finish_reason": "stop"
-    #                 }
-    #             ]
-    #         }
-    #         yield f"data: {json.dumps(data)}\n\n"
-    #         yield "data: [DONE]\n\n"
-
-    #     return StreamingResponse(word_stream(), media_type='text/event-stream')
-
     async def check_availability(self, request: Request):
         print("Checking availability")
         # Parse the incoming JSON request
diff --git a/prompting/api/api_managements/api.py b/prompting/api/api_managements/api.py
index 40d21577..5538dc96 100644
--- a/prompting/api/api_managements/api.py
+++ b/prompting/api/api_managements/api.py
@@ -1,5 +1,6 @@
 from fastapi import APIRouter, HTTPException, Header, Depends
 import json
+from loguru import logger
 import secrets
 
 from prompting.settings import settings
@@ -24,6 +25,7 @@ def save_api_keys(api_keys):
 
 # Use lifespan to initialize API keys
 _keys = load_api_keys()
+logger.info(f"Loaded API keys: {_keys}")
 save_api_keys(_keys)
 
 
@@ -45,6 +47,7 @@ def create_api_key(rate_limit: int, admin_key: str = Depends(validate_admin_key)
     """Creates a new API key with a specified rate limit."""
     new_api_key = secrets.token_hex(16)
     _keys[new_api_key] = {"rate_limit": rate_limit, "usage": 0}
+    save_api_keys(_keys)
     return {"message": "API key created", "api_key": new_api_key}
 
 
@@ -54,6 +57,7 @@ def modify_api_key(api_key: str, rate_limit: int, admin_key: str = Depends(valid
     if api_key not in _keys:
         raise HTTPException(status_code=404, detail="API key not found")
     _keys[api_key]["rate_limit"] = rate_limit
+    save_api_keys(_keys)
     return {"message": "API key updated", "api_key": api_key}
 
 
@@ -63,6 +67,7 @@ def delete_api_key(api_key: str, admin_key: str = Depends(validate_admin_key)):
     if api_key not in _keys:
         raise HTTPException(status_code=404, detail="API key not found")
     del _keys[api_key]
+    save_api_keys(_keys)
     return {"message": "API key deleted"}
 
 
diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
index 54111122..010d7f2b 100644
--- a/prompting/api/gpt_endpoints/api.py
+++ b/prompting/api/gpt_endpoints/api.py
@@ -71,9 +71,11 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
     if settings.TEST_MINER_IDS:
         available_miners = settings.TEST_MINER_IDS
     elif not settings.mode == "mock" and not (
-        available_miners := miner_availabilities.get_available_miners(task=InferenceTask(), model=None)
+        available_miners := miner_availabilities.get_available_miners(task=InferenceTask(), model=body.get("model"))
     ):
-        raise HTTPException(status_code=503, detail="No miners available")
+        raise HTTPException(
+            status_code=503, detail=f"No miners available for model: {body.get('model')} and task: {InferenceTask()}"
+        )
 
     axon_info = settings.METAGRAPH.axons[available_miners[0]]
     base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1"

From 685290a270a95c39b5c50c156d1f5f7134fd10d2 Mon Sep 17 00:00:00 2001
From: Hollyqui <felix.quinque@gmail.com>
Date: Tue, 26 Nov 2024 16:03:07 +0000
Subject: [PATCH 11/40] Signing epistula properly for recipient

---
 api_keys.json                      | 2 +-
 prompting/api/api.py               | 6 +++---
 prompting/api/gpt_endpoints/api.py | 7 +++++--
 prompting/tasks/task_registry.py   | 6 ++++++
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/api_keys.json b/api_keys.json
index 1e41dd87..93b0e261 100644
--- a/api_keys.json
+++ b/api_keys.json
@@ -1 +1 @@
-{"0566dbe21ee33bba9419549716cd6f1f": {"rate_limit": 10, "usage": 0}}
+{"0566dbe21ee33bba9419549716cd6f1f": {"rate_limit": 10, "usage": 0}, "e03da67439c0b7e7a622dde4fa3cf857": {"rate_limit": 10, "usage": 0}}
diff --git a/prompting/api/api.py b/prompting/api/api.py
index b797273b..7a528ece 100644
--- a/prompting/api/api.py
+++ b/prompting/api/api.py
@@ -12,9 +12,9 @@
 app = FastAPI()
 
 # Add routers at the application level
-app.include_router(api_management_router)
-app.include_router(miner_availabilities_router)
-app.include_router(gpt_router)
+app.include_router(api_management_router, prefix="/api_management", tags=["api_management"])
+app.include_router(miner_availabilities_router, prefix="/miner_availabilities", tags=["miner_availabilities"])
+app.include_router(gpt_router, tags=["gpt"])
 
 
 @app.get("/health")
diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
index 010d7f2b..f43770b5 100644
--- a/prompting/api/gpt_endpoints/api.py
+++ b/prompting/api/gpt_endpoints/api.py
@@ -9,6 +9,7 @@
 import json
 from prompting.miner_availability.miner_availability import miner_availabilities
 from prompting.tasks.inference import InferenceTask
+from prompting.tasks.task_registry import TaskRegistry
 from typing import AsyncGenerator
 from prompting.rewards.scoring import task_scorer
 from prompting.base.dendrite import DendriteResponseEvent, SynapseStreamResult
@@ -67,11 +68,12 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
     body = await request.json()
     body["seed"] = body.get("seed") or str(random.randint(0, 1_000_000))
     logger.debug(f"Seed provided by miner: {bool(body.get('seed'))} -- Using seed: {body.get('seed')}")
+    task = TaskRegistry.get_task_by_name(body.get("task"))
 
     if settings.TEST_MINER_IDS:
         available_miners = settings.TEST_MINER_IDS
     elif not settings.mode == "mock" and not (
-        available_miners := miner_availabilities.get_available_miners(task=InferenceTask(), model=body.get("model"))
+        available_miners := miner_availabilities.get_available_miners(task=task, model=body.get("model"))
     ):
         raise HTTPException(
             status_code=503, detail=f"No miners available for model: {body.get('model')} and task: {InferenceTask()}"
@@ -79,6 +81,7 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
 
     axon_info = settings.METAGRAPH.axons[available_miners[0]]
     base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1"
+    # base_url = "http://localhost:8008/v1"
     miner_id = available_miners[0]
     logger.debug(f"Using base_url: {base_url}")
 
@@ -87,7 +90,7 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
         max_retries=0,
         timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5),
         http_client=openai.DefaultAsyncHttpxClient(
-            event_hooks={"request": [create_header_hook(settings.WALLET.hotkey, None)]}
+            event_hooks={"request": [create_header_hook(settings.WALLET.hotkey, axon_info.hotkey)]}
         ),
     )
 
diff --git a/prompting/tasks/task_registry.py b/prompting/tasks/task_registry.py
index f311057a..32320030 100644
--- a/prompting/tasks/task_registry.py
+++ b/prompting/tasks/task_registry.py
@@ -75,6 +75,12 @@ class TaskRegistry(BaseModel):
         ),
     ]
 
+    @classmethod
+    def get_task_by_name(cls, task_name: str) -> BaseTextTask:
+        if matching_tasks := [t.task for t in cls.task_configs if t.task.__name__ == task_name]:
+            return matching_tasks[0]
+        return None
+
     @classmethod
     def get_task_config(cls, task: BaseTextTask.__class__ | BaseTextTask) -> TaskConfig:
         task = task.__class__ if isinstance(task, BaseTextTask) else task

From 7296fe77a18822a6429469486dc33bf13b3cdf1e Mon Sep 17 00:00:00 2001
From: Hollyqui <felix.quinque@gmail.com>
Date: Tue, 26 Nov 2024 17:17:45 +0000
Subject: [PATCH 12/40] Passing task type

---
 prompting/api/gpt_endpoints/api.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
index f43770b5..40aa0844 100644
--- a/prompting/api/gpt_endpoints/api.py
+++ b/prompting/api/gpt_endpoints/api.py
@@ -66,9 +66,14 @@ async def mixture_of_agents(request: Request, api_key_data: dict = Depends(valid
 @router.post("/v1/chat/completions")
 async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(validate_api_key)):
     body = await request.json()
+    task = TaskRegistry.get_task_by_name(body.get("task"))
+    if body.get("task") and not task:
+        raise HTTPException(status_code=400, detail=f"Task {body.get('task')} not found")
+    logger.debug(f"Requested Task: {body.get('task')}, {task}")
+
+    body = {k: v for k, v in body.items() if k != "task"}
     body["seed"] = body.get("seed") or str(random.randint(0, 1_000_000))
     logger.debug(f"Seed provided by miner: {bool(body.get('seed'))} -- Using seed: {body.get('seed')}")
-    task = TaskRegistry.get_task_by_name(body.get("task"))
 
     if settings.TEST_MINER_IDS:
         available_miners = settings.TEST_MINER_IDS
@@ -81,7 +86,7 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
 
     axon_info = settings.METAGRAPH.axons[available_miners[0]]
     base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1"
-    # base_url = "http://localhost:8008/v1"
+    base_url = "http://localhost:8008/v1"
     miner_id = available_miners[0]
     logger.debug(f"Using base_url: {base_url}")
 
@@ -96,7 +101,10 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
 
     try:
         with Timer() as timer:
-            response = await miner.chat.completions.create(**body)
+            if task:
+                response = await miner.chat.completions.create(**body, extra_body={"task": task.__name__})
+            else:
+                response = await miner.chat.completions.create(**body)
         if body.get("stream"):
             return StreamingResponse(
                 process_and_collect_stream(miner_id, body, response), media_type="text/event-stream"

From 120a90a84c8104b1252fb5be90af9db55898b1bb Mon Sep 17 00:00:00 2001
From: bkb2135 <bkb2135@columbia.edu>
Date: Tue, 26 Nov 2024 13:52:18 -1000
Subject: [PATCH 13/40] Move streaming of miners into query_miners function

---
 neurons/validator.py                          |  6 +-
 prompting/api/api.py                          |  7 +-
 prompting/api/api_managements/api.py          |  6 +-
 prompting/api/gpt_endpoints/api.py            | 21 ++---
 prompting/api/miner_availabilities/api.py     |  6 +-
 prompting/base/epistula.py                    | 90 +++++++++++++------
 .../miner_availability/miner_availability.py  |  5 --
 prompting/settings.py                         |  5 +-
 8 files changed, 89 insertions(+), 57 deletions(-)

diff --git a/neurons/validator.py b/neurons/validator.py
index 734585bc..312b88c1 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -2,7 +2,7 @@
 import asyncio
 import json
 import time
-import json
+
 from prompting import settings
 from prompting.utils.profiling import profiler
 
@@ -12,6 +12,7 @@
 from loguru import logger
 
 from prompting import mutable_globals
+from prompting.api.api import start_api
 from prompting.base.dendrite import DendriteResponseEvent
 from prompting.base.epistula import query_miners
 from prompting.base.forward import log_stream_results
@@ -26,9 +27,6 @@
 from prompting.utils.logging import ErrorLoggingEvent, ValidatorLoggingEvent
 from prompting.utils.timer import Timer
 from prompting.weight_setting.weight_setter import weight_setter
-from prompting.llms.utils import GPUInfo
-from prompting.base.epistula import query_miners
-from prompting.api.api import start_api
 
 NEURON_SAMPLE_SIZE = 100
 
diff --git a/prompting/api/api.py b/prompting/api/api.py
index 7a528ece..dff0d2bb 100644
--- a/prompting/api/api.py
+++ b/prompting/api/api.py
@@ -1,13 +1,12 @@
-from fastapi import FastAPI
-from loguru import logger
-
 # This ensures uvicorn is imported first
 import uvicorn
+from fastapi import FastAPI
+from loguru import logger
 
 # Now we can safely import the rest
 from prompting.api.api_managements.api import router as api_management_router
-from prompting.api.miner_availabilities.api import router as miner_availabilities_router
 from prompting.api.gpt_endpoints.api import router as gpt_router
+from prompting.api.miner_availabilities.api import router as miner_availabilities_router
 
 app = FastAPI()
 
diff --git a/prompting/api/api_managements/api.py b/prompting/api/api_managements/api.py
index 5538dc96..92ccc922 100644
--- a/prompting/api/api_managements/api.py
+++ b/prompting/api/api_managements/api.py
@@ -1,10 +1,10 @@
-from fastapi import APIRouter, HTTPException, Header, Depends
 import json
-from loguru import logger
 import secrets
 
-from prompting.settings import settings
+from fastapi import APIRouter, Depends, Header, HTTPException
+from loguru import logger
 
+from prompting.settings import settings
 
 router = APIRouter()
 
diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
index 40aa0844..2fe64ced 100644
--- a/prompting/api/gpt_endpoints/api.py
+++ b/prompting/api/gpt_endpoints/api.py
@@ -1,21 +1,22 @@
-from fastapi import APIRouter, Request, HTTPException
-from loguru import logger
+import json
 import random
+from typing import AsyncGenerator
+
 import openai
-from prompting.settings import settings
+from fastapi import APIRouter, Depends, HTTPException, Request
+from fastapi.responses import StreamingResponse
 from httpx import Timeout
+from loguru import logger
+
+from prompting.api.api_managements.api import validate_api_key
+from prompting.base.dendrite import DendriteResponseEvent, SynapseStreamResult
 from prompting.base.epistula import create_header_hook
-from fastapi.responses import StreamingResponse
-import json
 from prompting.miner_availability.miner_availability import miner_availabilities
+from prompting.rewards.scoring import task_scorer
+from prompting.settings import settings
 from prompting.tasks.inference import InferenceTask
 from prompting.tasks.task_registry import TaskRegistry
-from typing import AsyncGenerator
-from prompting.rewards.scoring import task_scorer
-from prompting.base.dendrite import DendriteResponseEvent, SynapseStreamResult
 from prompting.utils.timer import Timer
-from prompting.api.api_managements.api import validate_api_key
-from fastapi import Depends
 
 router = APIRouter()
 
diff --git a/prompting/api/miner_availabilities/api.py b/prompting/api/miner_availabilities/api.py
index d8f43a63..44bee346 100644
--- a/prompting/api/miner_availabilities/api.py
+++ b/prompting/api/miner_availabilities/api.py
@@ -1,8 +1,10 @@
+from typing import Literal
+
 from fastapi import APIRouter
-from prompting.miner_availability.miner_availability import miner_availabilities
 from loguru import logger
+
+from prompting.miner_availability.miner_availability import miner_availabilities
 from prompting.tasks.task_registry import TaskRegistry
-from typing import Literal
 
 router = APIRouter()
 
diff --git a/prompting/base/epistula.py b/prompting/base/epistula.py
index 73534c2d..cd8607f6 100644
--- a/prompting/base/epistula.py
+++ b/prompting/base/epistula.py
@@ -79,7 +79,7 @@ async def add_headers(request: httpx.Request):
     return add_headers
 
 
-async def query_miners(uids, body):
+async def query_miners(uids: list = [], body: bytes = b"", stream: bool = False):
     try:
         tasks = []
         for uid in uids:
@@ -90,13 +90,53 @@ async def query_miners(uids, body):
                         settings.WALLET,
                         body,
                         uid,
+                        stream=stream,
                     )
                 )
             )
-        responses: List[SynapseStreamResult] = await asyncio.gather(*tasks)
-        return responses
+        responses = await asyncio.gather(*tasks, return_exceptions=True)
+
+        # Filter out exceptions from responses
+        exceptions = [resp for resp in responses if isinstance(resp, Exception)]
+        if exceptions:
+            for exc in exceptions:
+                logger.error(f"Error in handle_inference: {exc}")
+            # Handle exceptions as needed
+
+        if stream:
+            # 'responses' is a list of async iterators (chat objects)
+            async def merged_stream():
+                streams = [response.__aiter__() for response in responses if not isinstance(response, Exception)]
+                pending = {}
+                for stream in streams:
+                    try:
+                        task = asyncio.create_task(stream.__anext__())
+                        pending[task] = stream
+                    except StopAsyncIteration:
+                        continue  # Skip empty streams
+
+                while pending:
+                    done, _ = await asyncio.wait(pending.keys(), return_when=asyncio.FIRST_COMPLETED)
+                    for task in done:
+                        stream = pending.pop(task)
+                        try:
+                            result = task.result()
+                            yield result
+                            # Schedule the next item from the same stream
+                            next_task = asyncio.create_task(stream.__anext__())
+                            pending[next_task] = stream
+                        except StopAsyncIteration:
+                            # Stream is exhausted
+                            pass
+                        except Exception as e:
+                            logger.error(f"Error while streaming: {e}")
+
+            return merged_stream()
+        else:
+            # 'responses' is a list of SynapseStreamResult objects
+            return [resp for resp in responses if not isinstance(resp, Exception)]
     except Exception as e:
-        logger.error(f"Error in forward for: {e}")
+        logger.error(f"Error in query_miners: {e}")
         return []
 
 
@@ -150,6 +190,7 @@ async def handle_inference(
     wallet: "bt.wallet",
     body: Dict[str, Any],
     uid: int,
+    stream: bool = False,
 ) -> SynapseStreamResult:
     exception = None
     chunks = []
@@ -166,43 +207,38 @@ async def handle_inference(
                 event_hooks={"request": [create_header_hook(wallet.hotkey, axon_info.hotkey)]}
             ),
         )
-        try:
-            payload = json.loads(body)
-            chat = await miner.chat.completions.create(
-                messages=payload["messages"],
-                model=payload["model"],
-                stream=True,
-                extra_body={k: v for k, v in payload.items() if k not in ["messages", "model"]},
-            )
+        payload = json.loads(body)
+        chat = await miner.chat.completions.create(
+            messages=payload["messages"],
+            model=payload["model"],
+            stream=True,
+            extra_body={k: v for k, v in payload.items() if k not in ["messages", "model"]},
+        )
+        if not stream:
             async for chunk in chat:
                 if chunk.choices[0].delta and chunk.choices[0].delta.content:
                     chunks.append(chunk.choices[0].delta.content)
                     chunk_timings.append(time.time() - start_time)
-
-        except openai.APIConnectionError as e:
-            logger.trace(f"Miner {uid} failed request: {e}")
-            exception = e
-
-        except Exception as e:
-            logger.trace(f"Unknown Error when sending to miner {uid}: {e}")
-            exception = e
-
+    except openai.APIConnectionError as e:
+        logger.trace(f"Miner {uid} failed request: {e}")
+        exception = str(e)
     except Exception as e:
-        exception = e
-        logger.error(f"{uid}: Error in forward for: {e}")
+        logger.trace(f"Unknown Error when sending to miner {uid}: {e}")
+        exception = str(e)
     finally:
-        if exception:
-            exception = str(exception)
         if exception is None:
             status_code = 200
             status_message = "Success"
         elif isinstance(exception, openai.APIConnectionError):
             status_code = 502
-            status_message = str(exception)
+            status_message = exception
         else:
             status_code = 500
-            status_message = str(exception)
+            status_message = exception
 
+    if stream:
+        return chat
+    else:
         return SynapseStreamResult(
             accumulated_chunks=chunks,
             accumulated_chunks_timings=chunk_timings,
diff --git a/prompting/miner_availability/miner_availability.py b/prompting/miner_availability/miner_availability.py
index dee65660..29bbc7f9 100644
--- a/prompting/miner_availability/miner_availability.py
+++ b/prompting/miner_availability/miner_availability.py
@@ -13,11 +13,6 @@
 from prompting.tasks.base_task import BaseTask
 from prompting.tasks.task_registry import TaskRegistry
 from prompting.utils.uids import get_uids
-import random
-import asyncio
-import numpy as np
-from prompting.base.epistula import query_availabilities
-from typing import Dict
 
 task_config: dict[str, bool] = {str(task_config.task.__name__): True for task_config in TaskRegistry.task_configs}
 # task_config: dict[str, bool] = {
diff --git a/prompting/settings.py b/prompting/settings.py
index 08890df5..04d3a4cd 100644
--- a/prompting/settings.py
+++ b/prompting/settings.py
@@ -1,11 +1,12 @@
 import os
 from functools import cached_property
 from typing import Any, Literal, Optional
-from loguru import logger
+
+import bittensor as bt
 import dotenv
+from loguru import logger
 from pydantic import Field, model_validator
 from pydantic_settings import BaseSettings
-import bittensor as bt
 from transformers import AwqConfig
 
 # from prompting.utils.config import config

From 7200833fc451e95b684a053e7ea45147ff14e9ad Mon Sep 17 00:00:00 2001
From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com>
Date: Wed, 27 Nov 2024 03:58:30 +0000
Subject: [PATCH 14/40] WIP: Add system prompt

---
 neurons/validator.py               | 9 ++++++---
 prompting/api/gpt_endpoints/api.py | 2 ++
 prompting/base/epistula.py         | 2 +-
 prompting/tasks/base_task.py       | 3 ++-
 prompting/tasks/inference.py       | 4 ++--
 5 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/neurons/validator.py b/neurons/validator.py
index fc435f98..bdf6cd6f 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -140,12 +140,15 @@ async def collect_responses(self, task: BaseTextTask) -> DendriteResponseEvent |
             logger.warning("No available miners. This should already have been caught earlier.")
             return
 
+        messages: list[dict[str, str]] = []
+        if task.synapse_system_prompt:
+            messages.append({"role": "system", "content": task.synapse_system_prompt})
+        messages.append({"role": "user", "content": task.query})
+
         body = {
             "seed": task.seed,
             "model": task.llm_model_id,
-            "messages": [
-                {"role": "user", "content": task.query},
-            ],
+            "messages": messages,
         }
         body_bytes = json.dumps(body).encode("utf-8")
         stream_results = await query_miners(task.__class__.__name__, uids, body_bytes)
diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
index f43770b5..62a00401 100644
--- a/prompting/api/gpt_endpoints/api.py
+++ b/prompting/api/gpt_endpoints/api.py
@@ -34,6 +34,7 @@ async def process_and_collect_stream(miner_id: int, request: dict, response: Asy
     task = InferenceTask(
         query=request["messages"][-1]["content"],
         messages=[message["content"] for message in request["messages"]],
+        roles=request.get("roles", None),
         model=request.get("model"),
         seed=request.get("seed"),
         response="".join(collected_content),
@@ -121,6 +122,7 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
     task = InferenceTask(
         query=body["messages"][-1]["content"],
         messages=[message["content"] for message in body["messages"]],
+        roles=roles,
         model=body.get("model"),
         seed=body.get("seed"),
         response=response_event,
diff --git a/prompting/base/epistula.py b/prompting/base/epistula.py
index afe4fa92..fe1d675c 100644
--- a/prompting/base/epistula.py
+++ b/prompting/base/epistula.py
@@ -94,7 +94,7 @@ async def add_headers(request: httpx.Request):
     return add_headers
 
 
-async def query_miners(task, uids, body):
+async def query_miners(task: str, uids: list[int], body: dict[str, any]):
     try:
         tasks = []
         for uid in uids:
diff --git a/prompting/tasks/base_task.py b/prompting/tasks/base_task.py
index 56024817..f7414776 100644
--- a/prompting/tasks/base_task.py
+++ b/prompting/tasks/base_task.py
@@ -49,6 +49,7 @@ def make_reference(self, **kwargs):
 
 class BaseTextTask(BaseTask):
     query: str | None = None
+    roles: list[str] | None = None
     messages: list[str] | None = None
     reference: str | None = None
     llm_model: ModelConfig = None
@@ -93,7 +94,7 @@ def generate_query(
         """Generates a query to be used for generating the challenge"""
         logger.info("🤖 Generating query...")
         llm_messages = [LLMMessage(role="system", content=self.query_system_prompt)] if self.query_system_prompt else []
-        llm_messages += [LLMMessage(role="user", content=message) for message in messages]
+        llm_messages.extemd([LLMMessage(role="user", content=message) for message in messages])
 
         self.query = LLMWrapper.chat_complete(messages=LLMMessages(*llm_messages))
 
diff --git a/prompting/tasks/inference.py b/prompting/tasks/inference.py
index fefa5b7a..e1cd602c 100644
--- a/prompting/tasks/inference.py
+++ b/prompting/tasks/inference.py
@@ -57,8 +57,8 @@ def make_query(self, dataset_entry: ChatEntry) -> str:
 
     def make_reference(self, dataset_entry: ChatEntry) -> str:
         self.reference = model_manager.generate(
-            messages=[self.messages[-1]],
-            roles=["user"],
+            messages=[self.messages[-1]] if self.roles is None else self.messages,
+            roles=["user"] if self.roles is None else self.roles,
             model=self.llm_model,
             sampling_params=SamplingParams(seed=self.seed),
         )[0]

From 6cf7aa5e0a8748c680864efabf018c183850cd98 Mon Sep 17 00:00:00 2001
From: bkb2135 <bkb2135@columbia.edu>
Date: Wed, 27 Nov 2024 02:36:08 -1000
Subject: [PATCH 15/40] Use query_miners in api

---
 prompting/api/gpt_endpoints/api.py | 139 +++++++++++++++++------------
 1 file changed, 83 insertions(+), 56 deletions(-)

diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
index 2fe64ced..fd9e91b1 100644
--- a/prompting/api/gpt_endpoints/api.py
+++ b/prompting/api/gpt_endpoints/api.py
@@ -10,7 +10,7 @@
 
 from prompting.api.api_managements.api import validate_api_key
 from prompting.base.dendrite import DendriteResponseEvent, SynapseStreamResult
-from prompting.base.epistula import create_header_hook
+from prompting.base.epistula import create_header_hook, query_miners
 from prompting.miner_availability.miner_availability import miner_availabilities
 from prompting.rewards.scoring import task_scorer
 from prompting.settings import settings
@@ -18,6 +18,7 @@
 from prompting.tasks.task_registry import TaskRegistry
 from prompting.utils.timer import Timer
 
+
 router = APIRouter()
 
 
@@ -72,7 +73,9 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
         raise HTTPException(status_code=400, detail=f"Task {body.get('task')} not found")
     logger.debug(f"Requested Task: {body.get('task')}, {task}")
 
-    body = {k: v for k, v in body.items() if k != "task"}
+    stream = body.get("stream")
+    body = {k: v for k, v in body.items() if k not in ["task", "stream"]}
+    body['task'] = task.__class__.__name__
     body["seed"] = body.get("seed") or str(random.randint(0, 1_000_000))
     logger.debug(f"Seed provided by miner: {bool(body.get('seed'))} -- Using seed: {body.get('seed')}")
 
@@ -82,61 +85,85 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
         available_miners := miner_availabilities.get_available_miners(task=task, model=body.get("model"))
     ):
         raise HTTPException(
-            status_code=503, detail=f"No miners available for model: {body.get('model')} and task: {InferenceTask()}"
+            status_code=503, detail=f"No miners available for model: {body.get('model')} and task: {task.__class__.__name__}"
         )
 
-    axon_info = settings.METAGRAPH.axons[available_miners[0]]
-    base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1"
-    base_url = "http://localhost:8008/v1"
-    miner_id = available_miners[0]
-    logger.debug(f"Using base_url: {base_url}")
-
-    miner = openai.AsyncOpenAI(
-        base_url=base_url,
-        max_retries=0,
-        timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5),
-        http_client=openai.DefaultAsyncHttpxClient(
-            event_hooks={"request": [create_header_hook(settings.WALLET.hotkey, axon_info.hotkey)]}
-        ),
-    )
-
-    try:
-        with Timer() as timer:
-            if task:
-                response = await miner.chat.completions.create(**body, extra_body={"task": task.__name__})
-            else:
-                response = await miner.chat.completions.create(**body)
-        if body.get("stream"):
-            return StreamingResponse(
-                process_and_collect_stream(miner_id, body, response), media_type="text/event-stream"
-            )
-    except Exception as e:
-        logger.exception(f"Error coming from Miner: {e}")
-        raise HTTPException(status_code=500, detail=f"Error coming from Miner: {e}")
-
-    response_event = DendriteResponseEvent(
-        stream_results=[
-            SynapseStreamResult(
-                uid=miner_id,
-                accumulated_chunks=[response.choices[0].message.content],
-                accumulated_chunks_timings=[timer.final_time],
-            )
-        ],
-        completions=[response.choices[0].message.content],
-        uids=[miner_id],
-        timeout=settings.NEURON_TIMEOUT,
-    )
-
-    task = InferenceTask(
-        query=body["messages"][-1]["content"],
-        messages=[message["content"] for message in body["messages"]],
-        model=body.get("model"),
-        seed=body.get("seed"),
-        response=response_event,
-    )
+    response = query_miners(available_miners, body, stream = stream)
+    if stream:
+        return response
+    else:
+        response_event = DendriteResponseEvent(
+            stream_results = response,
+            uids = available_miners,
+            timeout = settings.NEURON_TIMEOUT,
+            completions = ["".join(res.accumulated_chunks) for res in response]
+        )
 
-    task_scorer.add_to_queue(
-        task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id
-    )
+        task = task(
+            query=body["messages"][-1]["content"],
+            messages=[message["content"] for message in body["messages"]],
+            model=body.get("model"),
+            seed=body.get("seed"),
+            response=response_event,
+        )
 
-    return response
+        task_scorer.add_to_queue(
+            task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id
+        )
+        return response
+
+
+    # axon_info = settings.METAGRAPH.axons[available_miners[0]]
+    # base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1"
+    # miner_id = available_miners[0]
+    # logger.debug(f"Using base_url: {base_url}")
+
+    # miner = openai.AsyncOpenAI(
+    #     base_url=base_url,
+    #     max_retries=0,
+    #     timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5),
+    #     http_client=openai.DefaultAsyncHttpxClient(
+    #         event_hooks={"request": [create_header_hook(settings.WALLET.hotkey, axon_info.hotkey)]}
+    #     ),
+    # )
+
+    # try:
+    #     with Timer() as timer:
+    #         if task:
+    #             response = await miner.chat.completions.create(**body, extra_body={"task": task.__name__})
+    #         else:
+    #             response = await miner.chat.completions.create(**body)
+    #     if body.get("stream"):
+    #         return StreamingResponse(
+    #             process_and_collect_stream(miner_id, body, response), media_type="text/event-stream"
+    #         )
+    # except Exception as e:
+    #     logger.exception(f"Error coming from Miner: {e}")
+    #     raise HTTPException(status_code=500, detail=f"Error coming from Miner: {e}")
+
+    # response_event = DendriteResponseEvent(
+    #     stream_results=[
+    #         SynapseStreamResult(
+    #             uid=miner_id,
+    #             accumulated_chunks=[response.choices[0].message.content],
+    #             accumulated_chunks_timings=[timer.final_time],
+    #         )
+    #     ],
+    #     completions=[response.choices[0].message.content],
+    #     uids=[miner_id],
+    #     timeout=settings.NEURON_TIMEOUT,
+    # )
+
+    # task = InferenceTask(
+    #     query=body["messages"][-1]["content"],
+    #     messages=[message["content"] for message in body["messages"]],
+    #     model=body.get("model"),
+    #     seed=body.get("seed"),
+    #     response=response_event,
+    # )
+
+    # task_scorer.add_to_queue(
+    #     task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id
+    # )
+
+    # return response

From 07620fd76632927f696635f8af52b5d206672a79 Mon Sep 17 00:00:00 2001
From: bkb2135 <bkb2135@columbia.edu>
Date: Wed, 27 Nov 2024 03:17:23 -1000
Subject: [PATCH 16/40] Fix syntax errors

---
 prompting/api/gpt_endpoints/api.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
index fd9e91b1..7b6d539d 100644
--- a/prompting/api/gpt_endpoints/api.py
+++ b/prompting/api/gpt_endpoints/api.py
@@ -88,10 +88,11 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
             status_code=503, detail=f"No miners available for model: {body.get('model')} and task: {task.__class__.__name__}"
         )
 
-    response = query_miners(available_miners, body, stream = stream)
+    response = query_miners(available_miners, json.dumps(body).encode("utf-8"), stream = stream)
     if stream:
         return response
     else:
+        response = await response
         response_event = DendriteResponseEvent(
             stream_results = response,
             uids = available_miners,
@@ -110,6 +111,7 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
         task_scorer.add_to_queue(
             task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id
         )
+        
         return response
 
 

From 21c223681fd24f4d94a17ec51e84c8e9e44ca279 Mon Sep 17 00:00:00 2001
From: bkb2135 <bkb2135@columbia.edu>
Date: Wed, 27 Nov 2024 07:02:46 -1000
Subject: [PATCH 17/40] Manually dump models

---
 prompting/api/gpt_endpoints/api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
index 7b6d539d..e25d4f21 100644
--- a/prompting/api/gpt_endpoints/api.py
+++ b/prompting/api/gpt_endpoints/api.py
@@ -112,7 +112,7 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
             task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id
         )
         
-        return response
+        return [res.model_dump() for res in response]
 
 
     # axon_info = settings.METAGRAPH.axons[available_miners[0]]

From ba900a2e19dd911ec21da11e65b5b29bf7192588 Mon Sep 17 00:00:00 2001
From: richwardle <richard.wardle@macrocosmos.ai>
Date: Wed, 27 Nov 2024 18:14:49 +0000
Subject: [PATCH 18/40] Use autoawq 0.2.0

---
 poetry.lock    | 340 ++++++++++++++++++++++++++-----------------------
 pyproject.toml |   2 +-
 2 files changed, 185 insertions(+), 157 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index fc6bc9a8..0d35d7f4 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "accelerate"
@@ -345,29 +345,54 @@ tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
 
 [[package]]
 name = "autoawq"
-version = "0.2.7.post2"
+version = "0.2.0"
 description = "AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference."
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "autoawq-0.2.7.post2-py3-none-any.whl", hash = "sha256:cef26e2b21a812e298f1752326545cf1ea0456af4c54a92e1941b2d018b92815"},
+    {file = "autoawq-0.2.0-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:4c9c4db6fbf23cd625a9cb5b5495777555659dc12aa7e0aba733f20c51f10005"},
+    {file = "autoawq-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:9cfefc8e8c4d92b9b78f2f1bff61d6bb413138d2ab221029587251344d65007c"},
+    {file = "autoawq-0.2.0-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:ee68699fec949c4440374b402558400efe83c359e7f85a5a7979608c5eec0da3"},
+    {file = "autoawq-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:4d6080539bb386a5754cc76b5081b112a93df1ee38f4c2f82e2773e9f098470b"},
+    {file = "autoawq-0.2.0-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:74d2c49780aaa7c7ba0fa4e1f196ac2dc4bdceba27e780115e7dfb32f1ba3c0a"},
+    {file = "autoawq-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:43651382592e348c8f44bdc6796b9fa6fc5bd398f58908410376f0b7aaa2b3b3"},
+    {file = "autoawq-0.2.0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:a40c12fc4ddeabec6f04a2179e720e79563bfe29646ddf9c130bce0bcb51a760"},
+    {file = "autoawq-0.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:3c5dd45bcf23d8a0de2d79a04baf65fb2208249babeb729274c97df6218d48ae"},
 ]
 
 [package.dependencies]
 accelerate = "*"
-datasets = ">=2.20"
+autoawq-kernels = "*"
+datasets = "*"
 tokenizers = ">=0.12.1"
-torch = ">=2.2.0"
+torch = ">=2.0.1"
 transformers = ">=4.35.0"
-triton = "*"
 typing-extensions = ">=4.8.0"
 zstandard = "*"
 
 [package.extras]
-cpu = ["intel-extension-for-pytorch (>=2.4.0)"]
 dev = ["black", "griffe-typingdoc", "mkdocs-material", "mkdocstrings-python"]
-eval = ["evaluate", "lm-eval (==0.4.1)", "protobuf", "scipy", "tabulate"]
-kernels = ["autoawq-kernels"]
+eval = ["evaluate", "lm-eval (>=0.4.0)", "protobuf", "scipy", "tabulate"]
+
+[[package]]
+name = "autoawq-kernels"
+version = "0.0.9"
+description = "AutoAWQ Kernels implements the AWQ kernels."
+optional = false
+python-versions = ">=3.8.0"
+files = [
+    {file = "autoawq_kernels-0.0.9-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:ed8f4d744df21beae445efb1de54061bffc5fccbfefc8ae65c1dc10d08f90052"},
+    {file = "autoawq_kernels-0.0.9-cp310-cp310-win_amd64.whl", hash = "sha256:cd7d3db501068b3a12094a07921d985a57e640725cdda1252d4b135ed6aeaa65"},
+    {file = "autoawq_kernels-0.0.9-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:fe800a6538691afaa77abe7c8b2b0a121351843f048d54e11d617d604dcba48f"},
+    {file = "autoawq_kernels-0.0.9-cp311-cp311-win_amd64.whl", hash = "sha256:8c7f2404b3aa448ff77872dd6ba2963ce8b685d8aa73ef65fd1b8bc85d92b17d"},
+    {file = "autoawq_kernels-0.0.9-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:4c41a71af1d5a75e52c9833b9c48237b04d3b0eee26d712fc1b074af9135afc8"},
+    {file = "autoawq_kernels-0.0.9-cp312-cp312-win_amd64.whl", hash = "sha256:f259e7c60b11fa0689bb337dd4456319787256cbd2a8e4a491f01b51bb6c43d1"},
+    {file = "autoawq_kernels-0.0.9-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:b6baf039c22deb02f2ae194fdd77551b3c85c8f8a77b749f7caa17dacf986adb"},
+    {file = "autoawq_kernels-0.0.9-cp39-cp39-win_amd64.whl", hash = "sha256:6ad12dd68b0932182678f2f9fbee87452707b81f0e8dad242d23af018358f030"},
+]
+
+[package.dependencies]
+torch = ">=2.5.1"
 
 [[package]]
 name = "babel"
@@ -1338,18 +1363,18 @@ six = ">=1.4.0"
 
 [[package]]
 name = "duckduckgo-search"
-version = "6.3.6"
+version = "6.3.7"
 description = "Search for words, documents, images, news, maps and text translation using the DuckDuckGo.com search engine."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "duckduckgo_search-6.3.6-py3-none-any.whl", hash = "sha256:0fb9e05df335619797828d0520fe5a84e43009600836b2eb61e034a645d2379c"},
-    {file = "duckduckgo_search-6.3.6.tar.gz", hash = "sha256:58e020270e6a1515ead2ba386a86f9c5187c886654ddc7db62e3ddbc65489ff1"},
+    {file = "duckduckgo_search-6.3.7-py3-none-any.whl", hash = "sha256:6a831a27977751e8928222f04c99a5d069ff80e2a7c78b699c9b9ac6cb48c41b"},
+    {file = "duckduckgo_search-6.3.7.tar.gz", hash = "sha256:53d84966429a6377647e2a1ea7224b657575c7a4d506729bdb837e4ee12915ed"},
 ]
 
 [package.dependencies]
 click = ">=8.1.7"
-primp = ">=0.8.0"
+primp = ">=0.8.1"
 
 [package.extras]
 dev = ["mypy (>=1.11.1)", "pytest (>=8.3.1)", "pytest-asyncio (>=0.23.8)", "ruff (>=0.6.1)"]
@@ -1849,13 +1874,13 @@ typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "t
 
 [[package]]
 name = "identify"
-version = "2.6.2"
+version = "2.6.3"
 description = "File identification library for Python"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "identify-2.6.2-py2.py3-none-any.whl", hash = "sha256:c097384259f49e372f4ea00a19719d95ae27dd5ff0fd77ad630aa891306b82f3"},
-    {file = "identify-2.6.2.tar.gz", hash = "sha256:fab5c716c24d7a789775228823797296a2994b075fb6080ac83a102772a98cbd"},
+    {file = "identify-2.6.3-py2.py3-none-any.whl", hash = "sha256:9edba65473324c2ea9684b1f944fe3191db3345e50b6d04571d10ed164f8d7bd"},
+    {file = "identify-2.6.3.tar.gz", hash = "sha256:62f5dae9b5fef52c84cc188514e9ea4f3f636b1d8799ab5ebc475471f9e47a02"},
 ]
 
 [package.extras]
@@ -2009,84 +2034,86 @@ i18n = ["Babel (>=2.7)"]
 
 [[package]]
 name = "jiter"
-version = "0.7.1"
+version = "0.8.0"
 description = "Fast iterable JSON parser."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "jiter-0.7.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:262e96d06696b673fad6f257e6a0abb6e873dc22818ca0e0600f4a1189eb334f"},
-    {file = "jiter-0.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:be6de02939aac5be97eb437f45cfd279b1dc9de358b13ea6e040e63a3221c40d"},
-    {file = "jiter-0.7.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:935f10b802bc1ce2b2f61843e498c7720aa7f4e4bb7797aa8121eab017293c3d"},
-    {file = "jiter-0.7.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9cd3cccccabf5064e4bb3099c87bf67db94f805c1e62d1aefd2b7476e90e0ee2"},
-    {file = "jiter-0.7.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4aa919ebfc5f7b027cc368fe3964c0015e1963b92e1db382419dadb098a05192"},
-    {file = "jiter-0.7.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ae2d01e82c94491ce4d6f461a837f63b6c4e6dd5bb082553a70c509034ff3d4"},
-    {file = "jiter-0.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f9568cd66dbbdab67ae1b4c99f3f7da1228c5682d65913e3f5f95586b3cb9a9"},
-    {file = "jiter-0.7.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9ecbf4e20ec2c26512736284dc1a3f8ed79b6ca7188e3b99032757ad48db97dc"},
-    {file = "jiter-0.7.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b1a0508fddc70ce00b872e463b387d49308ef02b0787992ca471c8d4ba1c0fa1"},
-    {file = "jiter-0.7.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f84c9996664c460f24213ff1e5881530abd8fafd82058d39af3682d5fd2d6316"},
-    {file = "jiter-0.7.1-cp310-none-win32.whl", hash = "sha256:c915e1a1960976ba4dfe06551ea87063b2d5b4d30759012210099e712a414d9f"},
-    {file = "jiter-0.7.1-cp310-none-win_amd64.whl", hash = "sha256:75bf3b7fdc5c0faa6ffffcf8028a1f974d126bac86d96490d1b51b3210aa0f3f"},
-    {file = "jiter-0.7.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:ad04a23a91f3d10d69d6c87a5f4471b61c2c5cd6e112e85136594a02043f462c"},
-    {file = "jiter-0.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e47a554de88dff701226bb5722b7f1b6bccd0b98f1748459b7e56acac2707a5"},
-    {file = "jiter-0.7.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e44fff69c814a2e96a20b4ecee3e2365e9b15cf5fe4e00869d18396daa91dab"},
-    {file = "jiter-0.7.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:df0a1d05081541b45743c965436f8b5a1048d6fd726e4a030113a2699a6046ea"},
-    {file = "jiter-0.7.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f22cf8f236a645cb6d8ffe2a64edb5d2b66fb148bf7c75eea0cb36d17014a7bc"},
-    {file = "jiter-0.7.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da8589f50b728ea4bf22e0632eefa125c8aa9c38ed202a5ee6ca371f05eeb3ff"},
-    {file = "jiter-0.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f20de711224f2ca2dbb166a8d512f6ff48c9c38cc06b51f796520eb4722cc2ce"},
-    {file = "jiter-0.7.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8a9803396032117b85ec8cbf008a54590644a062fedd0425cbdb95e4b2b60479"},
-    {file = "jiter-0.7.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3d8bae77c82741032e9d89a4026479061aba6e646de3bf5f2fc1ae2bbd9d06e0"},
-    {file = "jiter-0.7.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3dc9939e576bbc68c813fc82f6620353ed68c194c7bcf3d58dc822591ec12490"},
-    {file = "jiter-0.7.1-cp311-none-win32.whl", hash = "sha256:f7605d24cd6fab156ec89e7924578e21604feee9c4f1e9da34d8b67f63e54892"},
-    {file = "jiter-0.7.1-cp311-none-win_amd64.whl", hash = "sha256:f3ea649e7751a1a29ea5ecc03c4ada0a833846c59c6da75d747899f9b48b7282"},
-    {file = "jiter-0.7.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ad36a1155cbd92e7a084a568f7dc6023497df781adf2390c345dd77a120905ca"},
-    {file = "jiter-0.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7ba52e6aaed2dc5c81a3d9b5e4ab95b039c4592c66ac973879ba57c3506492bb"},
-    {file = "jiter-0.7.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b7de0b6f6728b678540c7927587e23f715284596724be203af952418acb8a2d"},
-    {file = "jiter-0.7.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9463b62bd53c2fb85529c700c6a3beb2ee54fde8bef714b150601616dcb184a6"},
-    {file = "jiter-0.7.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:627164ec01d28af56e1f549da84caf0fe06da3880ebc7b7ee1ca15df106ae172"},
-    {file = "jiter-0.7.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:25d0e5bf64e368b0aa9e0a559c3ab2f9b67e35fe7269e8a0d81f48bbd10e8963"},
-    {file = "jiter-0.7.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c244261306f08f8008b3087059601997016549cb8bb23cf4317a4827f07b7d74"},
-    {file = "jiter-0.7.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7ded4e4b75b68b843b7cea5cd7c55f738c20e1394c68c2cb10adb655526c5f1b"},
-    {file = "jiter-0.7.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:80dae4f1889b9d09e5f4de6b58c490d9c8ce7730e35e0b8643ab62b1538f095c"},
-    {file = "jiter-0.7.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5970cf8ec943b51bce7f4b98d2e1ed3ada170c2a789e2db3cb484486591a176a"},
-    {file = "jiter-0.7.1-cp312-none-win32.whl", hash = "sha256:701d90220d6ecb3125d46853c8ca8a5bc158de8c49af60fd706475a49fee157e"},
-    {file = "jiter-0.7.1-cp312-none-win_amd64.whl", hash = "sha256:7824c3ecf9ecf3321c37f4e4d4411aad49c666ee5bc2a937071bdd80917e4533"},
-    {file = "jiter-0.7.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:097676a37778ba3c80cb53f34abd6943ceb0848263c21bf423ae98b090f6c6ba"},
-    {file = "jiter-0.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3298af506d4271257c0a8f48668b0f47048d69351675dd8500f22420d4eec378"},
-    {file = "jiter-0.7.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:12fd88cfe6067e2199964839c19bd2b422ca3fd792949b8f44bb8a4e7d21946a"},
-    {file = "jiter-0.7.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dacca921efcd21939123c8ea8883a54b9fa7f6545c8019ffcf4f762985b6d0c8"},
-    {file = "jiter-0.7.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de3674a5fe1f6713a746d25ad9c32cd32fadc824e64b9d6159b3b34fd9134143"},
-    {file = "jiter-0.7.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65df9dbae6d67e0788a05b4bad5706ad40f6f911e0137eb416b9eead6ba6f044"},
-    {file = "jiter-0.7.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ba9a358d59a0a55cccaa4957e6ae10b1a25ffdabda863c0343c51817610501d"},
-    {file = "jiter-0.7.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:576eb0f0c6207e9ede2b11ec01d9c2182973986514f9c60bc3b3b5d5798c8f50"},
-    {file = "jiter-0.7.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:e550e29cdf3577d2c970a18f3959e6b8646fd60ef1b0507e5947dc73703b5627"},
-    {file = "jiter-0.7.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:81d968dbf3ce0db2e0e4dec6b0a0d5d94f846ee84caf779b07cab49f5325ae43"},
-    {file = "jiter-0.7.1-cp313-none-win32.whl", hash = "sha256:f892e547e6e79a1506eb571a676cf2f480a4533675f834e9ae98de84f9b941ac"},
-    {file = "jiter-0.7.1-cp313-none-win_amd64.whl", hash = "sha256:0302f0940b1455b2a7fb0409b8d5b31183db70d2b07fd177906d83bf941385d1"},
-    {file = "jiter-0.7.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:c65a3ce72b679958b79d556473f192a4dfc5895e8cc1030c9f4e434690906076"},
-    {file = "jiter-0.7.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e80052d3db39f9bb8eb86d207a1be3d9ecee5e05fdec31380817f9609ad38e60"},
-    {file = "jiter-0.7.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70a497859c4f3f7acd71c8bd89a6f9cf753ebacacf5e3e799138b8e1843084e3"},
-    {file = "jiter-0.7.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c1288bc22b9e36854a0536ba83666c3b1fb066b811019d7b682c9cf0269cdf9f"},
-    {file = "jiter-0.7.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b096ca72dd38ef35675e1d3b01785874315182243ef7aea9752cb62266ad516f"},
-    {file = "jiter-0.7.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8dbbd52c50b605af13dbee1a08373c520e6fcc6b5d32f17738875847fea4e2cd"},
-    {file = "jiter-0.7.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af29c5c6eb2517e71ffa15c7ae9509fa5e833ec2a99319ac88cc271eca865519"},
-    {file = "jiter-0.7.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f114a4df1e40c03c0efbf974b376ed57756a1141eb27d04baee0680c5af3d424"},
-    {file = "jiter-0.7.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:191fbaee7cf46a9dd9b817547bf556facde50f83199d07fc48ebeff4082f9df4"},
-    {file = "jiter-0.7.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0e2b445e5ee627fb4ee6bbceeb486251e60a0c881a8e12398dfdff47c56f0723"},
-    {file = "jiter-0.7.1-cp38-none-win32.whl", hash = "sha256:47ac4c3cf8135c83e64755b7276339b26cd3c7ddadf9e67306ace4832b283edf"},
-    {file = "jiter-0.7.1-cp38-none-win_amd64.whl", hash = "sha256:60b49c245cd90cde4794f5c30f123ee06ccf42fb8730a019a2870cd005653ebd"},
-    {file = "jiter-0.7.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:8f212eeacc7203256f526f550d105d8efa24605828382cd7d296b703181ff11d"},
-    {file = "jiter-0.7.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d9e247079d88c00e75e297e6cb3a18a039ebcd79fefc43be9ba4eb7fb43eb726"},
-    {file = "jiter-0.7.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0aacaa56360139c53dcf352992b0331f4057a0373bbffd43f64ba0c32d2d155"},
-    {file = "jiter-0.7.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bc1b55314ca97dbb6c48d9144323896e9c1a25d41c65bcb9550b3e0c270ca560"},
-    {file = "jiter-0.7.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f281aae41b47e90deb70e7386558e877a8e62e1693e0086f37d015fa1c102289"},
-    {file = "jiter-0.7.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:93c20d2730a84d43f7c0b6fb2579dc54335db742a59cf9776d0b80e99d587382"},
-    {file = "jiter-0.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e81ccccd8069110e150613496deafa10da2f6ff322a707cbec2b0d52a87b9671"},
-    {file = "jiter-0.7.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a7d5e85766eff4c9be481d77e2226b4c259999cb6862ccac5ef6621d3c8dcce"},
-    {file = "jiter-0.7.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f52ce5799df5b6975439ecb16b1e879d7655e1685b6e3758c9b1b97696313bfb"},
-    {file = "jiter-0.7.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e0c91a0304373fdf97d56f88356a010bba442e6d995eb7773cbe32885b71cdd8"},
-    {file = "jiter-0.7.1-cp39-none-win32.whl", hash = "sha256:5c08adf93e41ce2755970e8aa95262298afe2bf58897fb9653c47cd93c3c6cdc"},
-    {file = "jiter-0.7.1-cp39-none-win_amd64.whl", hash = "sha256:6592f4067c74176e5f369228fb2995ed01400c9e8e1225fb73417183a5e635f0"},
-    {file = "jiter-0.7.1.tar.gz", hash = "sha256:448cf4f74f7363c34cdef26214da527e8eeffd88ba06d0b80b485ad0667baf5d"},
+    {file = "jiter-0.8.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:dee4eeb293ffcd2c3b31ebab684dbf7f7b71fe198f8eddcdf3a042cc6e10205a"},
+    {file = "jiter-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aad1e6e9b01cf0304dcee14db03e92e0073287a6297caf5caf2e9dbfea16a924"},
+    {file = "jiter-0.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:504099fb7acdbe763e10690d560a25d4aee03d918d6a063f3a761d8a09fb833f"},
+    {file = "jiter-0.8.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2373487caad7fe39581f588ab5c9262fc1ade078d448626fec93f4ffba528858"},
+    {file = "jiter-0.8.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c341ecc3f9bccde952898b0c97c24f75b84b56a7e2f8bbc7c8e38cab0875a027"},
+    {file = "jiter-0.8.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0e48e7a336529b9419d299b70c358d4ebf99b8f4b847ed3f1000ec9f320e8c0c"},
+    {file = "jiter-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5ee157a8afd2943be690db679f82fafb8d347a8342e8b9c34863de30c538d55"},
+    {file = "jiter-0.8.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d7dceae3549b80087f913aad4acc2a7c1e0ab7cb983effd78bdc9c41cabdcf18"},
+    {file = "jiter-0.8.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e29e9ecce53d396772590438214cac4ab89776f5e60bd30601f1050b34464019"},
+    {file = "jiter-0.8.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fa1782f22d5f92c620153133f35a9a395d3f3823374bceddd3e7032e2fdfa0b1"},
+    {file = "jiter-0.8.0-cp310-none-win32.whl", hash = "sha256:f754ef13b4e4f67a3bf59fe974ef4342523801c48bf422f720bd37a02a360584"},
+    {file = "jiter-0.8.0-cp310-none-win_amd64.whl", hash = "sha256:796f750b65f5d605f5e7acaccc6b051675e60c41d7ac3eab40dbd7b5b81a290f"},
+    {file = "jiter-0.8.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f6f4e645efd96b4690b9b6091dbd4e0fa2885ba5c57a0305c1916b75b4f30ff6"},
+    {file = "jiter-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f61cf6d93c1ade9b8245c9f14b7900feadb0b7899dbe4aa8de268b705647df81"},
+    {file = "jiter-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0396bc5cb1309c6dab085e70bb3913cdd92218315e47b44afe9eace68ee8adaa"},
+    {file = "jiter-0.8.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:62d0e42ec5dc772bd8554a304358220be5d97d721c4648b23f3a9c01ccc2cb26"},
+    {file = "jiter-0.8.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec4b711989860705733fc59fb8c41b2def97041cea656b37cf6c8ea8dee1c3f4"},
+    {file = "jiter-0.8.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:859cc35bf304ab066d88f10a44a3251a9cd057fb11ec23e00be22206db878f4f"},
+    {file = "jiter-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5000195921aa293b39b9b5bc959d7fa658e7f18f938c0e52732da8e3cc70a278"},
+    {file = "jiter-0.8.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:36050284c0abde57aba34964d3920f3d6228211b65df7187059bb7c7f143759a"},
+    {file = "jiter-0.8.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a88f608e050cfe45c48d771e86ecdbf5258314c883c986d4217cc79e1fb5f689"},
+    {file = "jiter-0.8.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:646cf4237665b2e13b4159d8f26d53f59bc9f2e6e135e3a508a2e5dd26d978c6"},
+    {file = "jiter-0.8.0-cp311-none-win32.whl", hash = "sha256:21fe5b8345db1b3023052b2ade9bb4d369417827242892051244af8fae8ba231"},
+    {file = "jiter-0.8.0-cp311-none-win_amd64.whl", hash = "sha256:30c2161c5493acf6b6c3c909973fb64ae863747def01cc7574f3954e0a15042c"},
+    {file = "jiter-0.8.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:d91a52d8f49ada2672a4b808a0c5c25d28f320a2c9ca690e30ebd561eb5a1002"},
+    {file = "jiter-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c38cf25cf7862f61410b7a49684d34eb3b5bcbd7ddaf4773eea40e0bd43de706"},
+    {file = "jiter-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6189beb5c4b3117624be6b2e84545cff7611f5855d02de2d06ff68e316182be"},
+    {file = "jiter-0.8.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e13fa849c0e30643554add089983caa82f027d69fad8f50acadcb21c462244ab"},
+    {file = "jiter-0.8.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d7765ca159d0a58e8e0f8ca972cd6d26a33bc97b4480d0d2309856763807cd28"},
+    {file = "jiter-0.8.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1b0befe7c6e9fc867d5bed21bab0131dfe27d1fa5cd52ba2bced67da33730b7d"},
+    {file = "jiter-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7d6363d4c6f1052b1d8b494eb9a72667c3ef5f80ebacfe18712728e85327000"},
+    {file = "jiter-0.8.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a873e57009863eeac3e3969e4653f07031d6270d037d6224415074ac17e5505c"},
+    {file = "jiter-0.8.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2582912473c0d9940791479fe1bf2976a34f212eb8e0a82ee9e645ac275c5d16"},
+    {file = "jiter-0.8.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:646163201af42f55393ee6e8f6136b8df488253a6533f4230a64242ecbfe6048"},
+    {file = "jiter-0.8.0-cp312-none-win32.whl", hash = "sha256:96e75c9abfbf7387cba89a324d2356d86d8897ac58c956017d062ad510832dae"},
+    {file = "jiter-0.8.0-cp312-none-win_amd64.whl", hash = "sha256:ed6074552b4a32e047b52dad5ab497223721efbd0e9efe68c67749f094a092f7"},
+    {file = "jiter-0.8.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:dd5e351cb9b3e676ec3360a85ea96def515ad2b83c8ae3a251ce84985a2c9a6f"},
+    {file = "jiter-0.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ba9f12b0f801ecd5ed0cec29041dc425d1050922b434314c592fc30d51022467"},
+    {file = "jiter-0.8.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7ba461c3681728d556392e8ae56fb44a550155a24905f01982317b367c21dd4"},
+    {file = "jiter-0.8.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3a15ed47ab09576db560dbc5c2c5a64477535beb056cd7d997d5dd0f2798770e"},
+    {file = "jiter-0.8.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cef55042816d0737142b0ec056c0356a5f681fb8d6aa8499b158e87098f4c6f8"},
+    {file = "jiter-0.8.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:549f170215adeb5e866f10617c3d019d8eb4e6d4e3c6b724b3b8c056514a3487"},
+    {file = "jiter-0.8.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f867edeb279d22020877640d2ea728de5817378c60a51be8af731a8a8f525306"},
+    {file = "jiter-0.8.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aef8845f463093799db4464cee2aa59d61aa8edcb3762aaa4aacbec3f478c929"},
+    {file = "jiter-0.8.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:d0d6e22e4062c3d3c1bf3594baa2f67fc9dcdda8275abad99e468e0c6540bc54"},
+    {file = "jiter-0.8.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:079e62e64696241ac3f408e337aaac09137ed760ccf2b72b1094b48745c13641"},
+    {file = "jiter-0.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74d2b56ed3da5760544df53b5f5c39782e68efb64dc3aa0bba4cc08815e6fae8"},
+    {file = "jiter-0.8.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:798dafe108cba58a7bb0a50d4d5971f98bb7f3c974e1373e750de6eb21c1a329"},
+    {file = "jiter-0.8.0-cp313-none-win32.whl", hash = "sha256:ca6d3064dfc743eb0d3d7539d89d4ba886957c717567adc72744341c1e3573c9"},
+    {file = "jiter-0.8.0-cp313-none-win_amd64.whl", hash = "sha256:38caedda64fe1f04b06d7011fc15e86b3b837ed5088657bf778656551e3cd8f9"},
+    {file = "jiter-0.8.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:bb5c8a0a8d081c338db22e5b8d53a89a121790569cbb85f7d3cfb1fe0fbe9836"},
+    {file = "jiter-0.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:202dbe8970bfb166fab950eaab8f829c505730a0b33cc5e1cfb0a1c9dd56b2f9"},
+    {file = "jiter-0.8.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9046812e5671fdcfb9ae02881fff1f6a14d484b7e8b3316179a372cdfa1e8026"},
+    {file = "jiter-0.8.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e6ac56425023e52d65150918ae25480d0a1ce2a6bf5ea2097f66a2cc50f6d692"},
+    {file = "jiter-0.8.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7dfcf97210c6eab9d2a1c6af15dd39e1d5154b96a7145d0a97fa1df865b7b834"},
+    {file = "jiter-0.8.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4e3c8444d418686f78c9a547b9b90031faf72a0a1a46bfec7fb31edbd889c0d"},
+    {file = "jiter-0.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6507011a299b7f578559084256405a8428875540d8d13530e00b688e41b09493"},
+    {file = "jiter-0.8.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0aae4738eafdd34f0f25c2d3668ce9e8fa0d7cb75a2efae543c9a69aebc37323"},
+    {file = "jiter-0.8.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7f5d782e790396b13f2a7b36bdcaa3736a33293bdda80a4bf1a3ce0cd5ef9f15"},
+    {file = "jiter-0.8.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cc7f993bc2c4e03015445adbb16790c303282fce2e8d9dc3a3905b1d40e50564"},
+    {file = "jiter-0.8.0-cp38-none-win32.whl", hash = "sha256:d4a8a6eda018a991fa58ef707dd51524055d11f5acb2f516d70b1be1d15ab39c"},
+    {file = "jiter-0.8.0-cp38-none-win_amd64.whl", hash = "sha256:4cca948a3eda8ea24ed98acb0ee19dc755b6ad2e570ec85e1527d5167f91ff67"},
+    {file = "jiter-0.8.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ef89663678d8257063ce7c00d94638e05bd72f662c5e1eb0e07a172e6c1a9a9f"},
+    {file = "jiter-0.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c402ddcba90b4cc71db3216e8330f4db36e0da2c78cf1d8a9c3ed8f272602a94"},
+    {file = "jiter-0.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a6dfe795b7a173a9f8ba7421cdd92193d60c1c973bbc50dc3758a9ad0fa5eb6"},
+    {file = "jiter-0.8.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8ec29a31b9abd6be39453a2c45da067138a3005d65d2c0507c530e0f1fdcd9a4"},
+    {file = "jiter-0.8.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a488f8c54bddc3ddefaf3bfd6de4a52c97fc265d77bc2dcc6ee540c17e8c342"},
+    {file = "jiter-0.8.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aeb5561adf4d26ca0d01b5811b4d7b56a8986699a473d700757b4758ef787883"},
+    {file = "jiter-0.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ab961858d7ad13132328517d29f121ae1b2d94502191d6bcf96bddcc8bb5d1c"},
+    {file = "jiter-0.8.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a207e718d114d23acf0850a2174d290f42763d955030d9924ffa4227dbd0018f"},
+    {file = "jiter-0.8.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:733bc9dc8ff718a0ae4695239e9268eb93e88b73b367dfac3ec227d8ce2f1e77"},
+    {file = "jiter-0.8.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d1ec27299e22d05e13a06e460bf7f75f26f9aaa0e0fb7d060f40e88df1d81faa"},
+    {file = "jiter-0.8.0-cp39-none-win32.whl", hash = "sha256:e8dbfcb46553e6661d3fc1f33831598fcddf73d0f67834bce9fc3e9ebfe5c439"},
+    {file = "jiter-0.8.0-cp39-none-win_amd64.whl", hash = "sha256:af2ce2487b3a93747e2cb5150081d4ae1e5874fce5924fc1a12e9e768e489ad8"},
+    {file = "jiter-0.8.0.tar.gz", hash = "sha256:86fee98b569d4cc511ff2e3ec131354fafebd9348a487549c31ad371ae730310"},
 ]
 
 [[package]]
@@ -3158,13 +3185,13 @@ files = [
 
 [[package]]
 name = "openai"
-version = "1.55.0"
+version = "1.55.2"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "openai-1.55.0-py3-none-any.whl", hash = "sha256:446e08918f8dd70d8723274be860404c8c7cc46b91b93bbc0ef051f57eb503c1"},
-    {file = "openai-1.55.0.tar.gz", hash = "sha256:6c0975ac8540fe639d12b4ff5a8e0bf1424c844c4a4251148f59f06c4b2bd5db"},
+    {file = "openai-1.55.2-py3-none-any.whl", hash = "sha256:3027c7fa4a33ed759f4a3d076093fcfa1c55658660c889bec33f651e2dc77922"},
+    {file = "openai-1.55.2.tar.gz", hash = "sha256:5cc0b1162b65dcdf670b4b41448f18dd470d2724ca04821ab1e86b6b4e88650b"},
 ]
 
 [package.dependencies]
@@ -3536,19 +3563,20 @@ tests = ["pytest", "pytest-cov", "pytest-lazy-fixtures"]
 
 [[package]]
 name = "primp"
-version = "0.8.0"
+version = "0.8.1"
 description = "HTTP client that can impersonate web browsers, mimicking their headers and `TLS/JA3/JA4/HTTP2` fingerprints"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "primp-0.8.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:5cb4d1db83d92a95fb4506d4605484b389a988fb962e80089caa73c035185f58"},
-    {file = "primp-0.8.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:135e6350a6c509fcc3d1cc03d2025edd54783bca671a39a2d4f240ce5d406576"},
-    {file = "primp-0.8.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:609f4363fb591bde351e6372ba0caaf1ac963d38cbf942bc42dc3284575b4cdf"},
-    {file = "primp-0.8.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e341c821fa265f2eaf2a0de80924e465f7bc20a84e9ce28e65cee350ad2cc300"},
-    {file = "primp-0.8.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6672554a653f4ef5e672f3985481bc4afff9bfbeaf2bc7b70b9230b7672d49d6"},
-    {file = "primp-0.8.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ea18ebc1f664898beac62cfa092ff38ad70c7eb0b3120aecd18ab7a776b6b3fb"},
-    {file = "primp-0.8.0-cp38-abi3-win_amd64.whl", hash = "sha256:bcf9895f8dd97d49843adbed635d713e3a1c2dc0a4b08ac0879292be83f1e447"},
-    {file = "primp-0.8.0.tar.gz", hash = "sha256:6472651b8270247b3121f728b613e312301b8f7e9170944a4e71771dd58eaa8b"},
+    {file = "primp-0.8.1-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:8294db817701ad76b6a186c16e22cc49d36fac5986647a83657ad4a58ddeee42"},
+    {file = "primp-0.8.1-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:e8117531dcdb0dbcf9855fdbac73febdde5967ca0332a2c05b5961d2fbcfe749"},
+    {file = "primp-0.8.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:993cc4284e8c5c858254748f078e872ba250c9339d64398dc000a8f9cffadda3"},
+    {file = "primp-0.8.1-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4a27ac642be5c616fc5f139a5ad391dcd0c5964ace56fe6cf31cbffb972a7480"},
+    {file = "primp-0.8.1-cp38-abi3-manylinux_2_34_armv7l.whl", hash = "sha256:e8483b8d9eec9fc43d77bb448555466030f29cdd99d9375eb75155e9f832e5bd"},
+    {file = "primp-0.8.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:92f5f8267216252cfb27f2149811e14682bb64f0c5d37f00d218d1592e02f0b9"},
+    {file = "primp-0.8.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:98f7f3a9481c55c56e7eff9024f29e16379a87d5b0a1b683e145dd8fcbdcc46b"},
+    {file = "primp-0.8.1-cp38-abi3-win_amd64.whl", hash = "sha256:6f0018a26be787431504e32548b296a278abbe85da43bcbaf2d4982ac3dcd332"},
+    {file = "primp-0.8.1.tar.gz", hash = "sha256:ddf05754a7b70d59df8a014a8585e418f9c04e0b69065bab6633f4a9b92bad93"},
 ]
 
 [package.extras]
@@ -4025,53 +4053,53 @@ files = [
 
 [[package]]
 name = "pyarrow"
-version = "18.0.0"
+version = "18.1.0"
 description = "Python library for Apache Arrow"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "pyarrow-18.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:2333f93260674e185cfbf208d2da3007132572e56871f451ba1a556b45dae6e2"},
-    {file = "pyarrow-18.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:4c381857754da44326f3a49b8b199f7f87a51c2faacd5114352fc78de30d3aba"},
-    {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:603cd8ad4976568954598ef0a6d4ed3dfb78aff3d57fa8d6271f470f0ce7d34f"},
-    {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58a62549a3e0bc9e03df32f350e10e1efb94ec6cf63e3920c3385b26663948ce"},
-    {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bc97316840a349485fbb137eb8d0f4d7057e1b2c1272b1a20eebbbe1848f5122"},
-    {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:2e549a748fa8b8715e734919923f69318c953e077e9c02140ada13e59d043310"},
-    {file = "pyarrow-18.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:606e9a3dcb0f52307c5040698ea962685fb1c852d72379ee9412be7de9c5f9e2"},
-    {file = "pyarrow-18.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d5795e37c0a33baa618c5e054cd61f586cf76850a251e2b21355e4085def6280"},
-    {file = "pyarrow-18.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:5f0510608ccd6e7f02ca8596962afb8c6cc84c453e7be0da4d85f5f4f7b0328a"},
-    {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:616ea2826c03c16e87f517c46296621a7c51e30400f6d0a61be645f203aa2b93"},
-    {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1824f5b029ddd289919f354bc285992cb4e32da518758c136271cf66046ef22"},
-    {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6dd1b52d0d58dd8f685ced9971eb49f697d753aa7912f0a8f50833c7a7426319"},
-    {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:320ae9bd45ad7ecc12ec858b3e8e462578de060832b98fc4d671dee9f10d9954"},
-    {file = "pyarrow-18.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:2c992716cffb1088414f2b478f7af0175fd0a76fea80841b1706baa8fb0ebaad"},
-    {file = "pyarrow-18.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:e7ab04f272f98ebffd2a0661e4e126036f6936391ba2889ed2d44c5006237802"},
-    {file = "pyarrow-18.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:03f40b65a43be159d2f97fd64dc998f769d0995a50c00f07aab58b0b3da87e1f"},
-    {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be08af84808dff63a76860847c48ec0416928a7b3a17c2f49a072cac7c45efbd"},
-    {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c70c1965cde991b711a98448ccda3486f2a336457cf4ec4dca257a926e149c9"},
-    {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:00178509f379415a3fcf855af020e3340254f990a8534294ec3cf674d6e255fd"},
-    {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:a71ab0589a63a3e987beb2bc172e05f000a5c5be2636b4b263c44034e215b5d7"},
-    {file = "pyarrow-18.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:fe92efcdbfa0bcf2fa602e466d7f2905500f33f09eb90bf0bcf2e6ca41b574c8"},
-    {file = "pyarrow-18.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:907ee0aa8ca576f5e0cdc20b5aeb2ad4d3953a3b4769fc4b499e00ef0266f02f"},
-    {file = "pyarrow-18.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:66dcc216ebae2eb4c37b223feaf82f15b69d502821dde2da138ec5a3716e7463"},
-    {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc1daf7c425f58527900876354390ee41b0ae962a73ad0959b9d829def583bb1"},
-    {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:871b292d4b696b09120ed5bde894f79ee2a5f109cb84470546471df264cae136"},
-    {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:082ba62bdcb939824ba1ce10b8acef5ab621da1f4c4805e07bfd153617ac19d4"},
-    {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:2c664ab88b9766413197733c1720d3dcd4190e8fa3bbdc3710384630a0a7207b"},
-    {file = "pyarrow-18.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc892be34dbd058e8d189b47db1e33a227d965ea8805a235c8a7286f7fd17d3a"},
-    {file = "pyarrow-18.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:28f9c39a56d2c78bf6b87dcc699d520ab850919d4a8c7418cd20eda49874a2ea"},
-    {file = "pyarrow-18.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:f1a198a50c409ab2d009fbf20956ace84567d67f2c5701511d4dd561fae6f32e"},
-    {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5bd7fd32e3ace012d43925ea4fc8bd1b02cc6cc1e9813b518302950e89b5a22"},
-    {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:336addb8b6f5208be1b2398442c703a710b6b937b1a046065ee4db65e782ff5a"},
-    {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:45476490dd4adec5472c92b4d253e245258745d0ccaabe706f8d03288ed60a79"},
-    {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b46591222c864e7da7faa3b19455196416cd8355ff6c2cc2e65726a760a3c420"},
-    {file = "pyarrow-18.0.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:eb7e3abcda7e1e6b83c2dc2909c8d045881017270a119cc6ee7fdcfe71d02df8"},
-    {file = "pyarrow-18.0.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:09f30690b99ce34e0da64d20dab372ee54431745e4efb78ac938234a282d15f9"},
-    {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d5ca5d707e158540312e09fd907f9f49bacbe779ab5236d9699ced14d2293b8"},
-    {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6331f280c6e4521c69b201a42dd978f60f7e129511a55da9e0bfe426b4ebb8d"},
-    {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3ac24b2be732e78a5a3ac0b3aa870d73766dd00beba6e015ea2ea7394f8b4e55"},
-    {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b30a927c6dff89ee702686596f27c25160dd6c99be5bcc1513a763ae5b1bfc03"},
-    {file = "pyarrow-18.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:8f40ec677e942374e3d7f2fad6a67a4c2811a8b975e8703c6fd26d3b168a90e2"},
-    {file = "pyarrow-18.0.0.tar.gz", hash = "sha256:a6aa027b1a9d2970cf328ccd6dbe4a996bc13c39fd427f502782f5bdb9ca20f5"},
+    {file = "pyarrow-18.1.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e21488d5cfd3d8b500b3238a6c4b075efabc18f0f6d80b29239737ebd69caa6c"},
+    {file = "pyarrow-18.1.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:b516dad76f258a702f7ca0250885fc93d1fa5ac13ad51258e39d402bd9e2e1e4"},
+    {file = "pyarrow-18.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f443122c8e31f4c9199cb23dca29ab9427cef990f283f80fe15b8e124bcc49b"},
+    {file = "pyarrow-18.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0a03da7f2758645d17b7b4f83c8bffeae5bbb7f974523fe901f36288d2eab71"},
+    {file = "pyarrow-18.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ba17845efe3aa358ec266cf9cc2800fa73038211fb27968bfa88acd09261a470"},
+    {file = "pyarrow-18.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:3c35813c11a059056a22a3bef520461310f2f7eea5c8a11ef9de7062a23f8d56"},
+    {file = "pyarrow-18.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9736ba3c85129d72aefa21b4f3bd715bc4190fe4426715abfff90481e7d00812"},
+    {file = "pyarrow-18.1.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:eaeabf638408de2772ce3d7793b2668d4bb93807deed1725413b70e3156a7854"},
+    {file = "pyarrow-18.1.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:3b2e2239339c538f3464308fd345113f886ad031ef8266c6f004d49769bb074c"},
+    {file = "pyarrow-18.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f39a2e0ed32a0970e4e46c262753417a60c43a3246972cfc2d3eb85aedd01b21"},
+    {file = "pyarrow-18.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e31e9417ba9c42627574bdbfeada7217ad8a4cbbe45b9d6bdd4b62abbca4c6f6"},
+    {file = "pyarrow-18.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:01c034b576ce0eef554f7c3d8c341714954be9b3f5d5bc7117006b85fcf302fe"},
+    {file = "pyarrow-18.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:f266a2c0fc31995a06ebd30bcfdb7f615d7278035ec5b1cd71c48d56daaf30b0"},
+    {file = "pyarrow-18.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:d4f13eee18433f99adefaeb7e01d83b59f73360c231d4782d9ddfaf1c3fbde0a"},
+    {file = "pyarrow-18.1.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:9f3a76670b263dc41d0ae877f09124ab96ce10e4e48f3e3e4257273cee61ad0d"},
+    {file = "pyarrow-18.1.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:da31fbca07c435be88a0c321402c4e31a2ba61593ec7473630769de8346b54ee"},
+    {file = "pyarrow-18.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:543ad8459bc438efc46d29a759e1079436290bd583141384c6f7a1068ed6f992"},
+    {file = "pyarrow-18.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0743e503c55be0fdb5c08e7d44853da27f19dc854531c0570f9f394ec9671d54"},
+    {file = "pyarrow-18.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:d4b3d2a34780645bed6414e22dda55a92e0fcd1b8a637fba86800ad737057e33"},
+    {file = "pyarrow-18.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c52f81aa6f6575058d8e2c782bf79d4f9fdc89887f16825ec3a66607a5dd8e30"},
+    {file = "pyarrow-18.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:0ad4892617e1a6c7a551cfc827e072a633eaff758fa09f21c4ee548c30bcaf99"},
+    {file = "pyarrow-18.1.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:84e314d22231357d473eabec709d0ba285fa706a72377f9cc8e1cb3c8013813b"},
+    {file = "pyarrow-18.1.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:f591704ac05dfd0477bb8f8e0bd4b5dc52c1cadf50503858dce3a15db6e46ff2"},
+    {file = "pyarrow-18.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:acb7564204d3c40babf93a05624fc6a8ec1ab1def295c363afc40b0c9e66c191"},
+    {file = "pyarrow-18.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74de649d1d2ccb778f7c3afff6085bd5092aed4c23df9feeb45dd6b16f3811aa"},
+    {file = "pyarrow-18.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f96bd502cb11abb08efea6dab09c003305161cb6c9eafd432e35e76e7fa9b90c"},
+    {file = "pyarrow-18.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:36ac22d7782554754a3b50201b607d553a8d71b78cdf03b33c1125be4b52397c"},
+    {file = "pyarrow-18.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:25dbacab8c5952df0ca6ca0af28f50d45bd31c1ff6fcf79e2d120b4a65ee7181"},
+    {file = "pyarrow-18.1.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6a276190309aba7bc9d5bd2933230458b3521a4317acfefe69a354f2fe59f2bc"},
+    {file = "pyarrow-18.1.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:ad514dbfcffe30124ce655d72771ae070f30bf850b48bc4d9d3b25993ee0e386"},
+    {file = "pyarrow-18.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aebc13a11ed3032d8dd6e7171eb6e86d40d67a5639d96c35142bd568b9299324"},
+    {file = "pyarrow-18.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6cf5c05f3cee251d80e98726b5c7cc9f21bab9e9783673bac58e6dfab57ecc8"},
+    {file = "pyarrow-18.1.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:11b676cd410cf162d3f6a70b43fb9e1e40affbc542a1e9ed3681895f2962d3d9"},
+    {file = "pyarrow-18.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b76130d835261b38f14fc41fdfb39ad8d672afb84c447126b84d5472244cfaba"},
+    {file = "pyarrow-18.1.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:0b331e477e40f07238adc7ba7469c36b908f07c89b95dd4bd3a0ec84a3d1e21e"},
+    {file = "pyarrow-18.1.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:2c4dd0c9010a25ba03e198fe743b1cc03cd33c08190afff371749c52ccbbaf76"},
+    {file = "pyarrow-18.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f97b31b4c4e21ff58c6f330235ff893cc81e23da081b1a4b1c982075e0ed4e9"},
+    {file = "pyarrow-18.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a4813cb8ecf1809871fd2d64a8eff740a1bd3691bbe55f01a3cf6c5ec869754"},
+    {file = "pyarrow-18.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:05a5636ec3eb5cc2a36c6edb534a38ef57b2ab127292a716d00eabb887835f1e"},
+    {file = "pyarrow-18.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:73eeed32e724ea3568bb06161cad5fa7751e45bc2228e33dcb10c614044165c7"},
+    {file = "pyarrow-18.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:a1880dd6772b685e803011a6b43a230c23b566859a6e0c9a276c1e0faf4f4052"},
+    {file = "pyarrow-18.1.0.tar.gz", hash = "sha256:9386d3ca9c145b5539a1cfc75df07757dff870168c959b473a0bccbc3abc8c73"},
 ]
 
 [package.extras]
@@ -4142,13 +4170,13 @@ files = [
 
 [[package]]
 name = "pydantic"
-version = "2.10.1"
+version = "2.10.2"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic-2.10.1-py3-none-any.whl", hash = "sha256:a8d20db84de64cf4a7d59e899c2caf0fe9d660c7cfc482528e7020d7dd189a7e"},
-    {file = "pydantic-2.10.1.tar.gz", hash = "sha256:a4daca2dc0aa429555e0656d6bf94873a7dc5f54ee42b1f5873d666fb3f35560"},
+    {file = "pydantic-2.10.2-py3-none-any.whl", hash = "sha256:cfb96e45951117c3024e6b67b25cdc33a3cb7b2fa62e239f7af1378358a1d99e"},
+    {file = "pydantic-2.10.2.tar.gz", hash = "sha256:2bc2d7f17232e0841cbba4641e65ba1eb6fafb3a08de3a091ff3ce14a197c4fa"},
 ]
 
 [package.dependencies]
@@ -5820,20 +5848,20 @@ files = [
 
 [[package]]
 name = "tqdm"
-version = "4.67.0"
+version = "4.67.1"
 description = "Fast, Extensible Progress Meter"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "tqdm-4.67.0-py3-none-any.whl", hash = "sha256:0cd8af9d56911acab92182e88d763100d4788bdf421d251616040cc4d44863be"},
-    {file = "tqdm-4.67.0.tar.gz", hash = "sha256:fe5a6f95e6fe0b9755e9469b77b9c3cf850048224ecaa8293d7d2d31f97d869a"},
+    {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"},
+    {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"},
 ]
 
 [package.dependencies]
 colorama = {version = "*", markers = "platform_system == \"Windows\""}
 
 [package.extras]
-dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"]
+dev = ["nbval", "pytest (>=6)", "pytest-asyncio (>=0.24)", "pytest-cov", "pytest-timeout"]
 discord = ["requests"]
 notebook = ["ipywidgets (>=6)"]
 slack = ["slack-sdk"]
@@ -6063,13 +6091,13 @@ standard = ["colorama (>=0.4)", "httptools (>=0.6.3)", "python-dotenv (>=0.13)",
 
 [[package]]
 name = "virtualenv"
-version = "20.27.1"
+version = "20.28.0"
 description = "Virtual Python Environment builder"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "virtualenv-20.27.1-py3-none-any.whl", hash = "sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4"},
-    {file = "virtualenv-20.27.1.tar.gz", hash = "sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba"},
+    {file = "virtualenv-20.28.0-py3-none-any.whl", hash = "sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0"},
+    {file = "virtualenv-20.28.0.tar.gz", hash = "sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa"},
 ]
 
 [package.dependencies]
@@ -6688,4 +6716,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10 <3.11"
-content-hash = "31da2759578c6c7e68b892ec067ac9db8309ba9c75132cb52fbd36bc85ceb4d6"
+content-hash = "de00aa9b7294d8f0766f98ff424f176dc80f07d71feb4d3ab84ed396fd92cc50"
diff --git a/pyproject.toml b/pyproject.toml
index 73c7f20e..e0fcec0f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -149,7 +149,7 @@ black = "23.7.0"
 pytest = "^8.3.1"
 angle-emb = "0.4.3"
 organic-scoring = {git = "https://github.com/macrocosm-os/organic-scoring.git", rev = "main"}
-autoawq = "^0.2.5"
+autoawq = "0.2.0"
 loguru = "^0.7.2"
 duckduckgo-search = "^6.2.12"
 trafilatura = "^1.12.1"

From 26d1db13f065e50d71a644073cd8d68e25848cc3 Mon Sep 17 00:00:00 2001
From: richwardle <richard.wardle@macrocosmos.ai>
Date: Wed, 27 Nov 2024 18:36:37 +0000
Subject: [PATCH 19/40] Support delta or message in sn19 response

---
 prompting/llms/apis/sn19_wrapper.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/prompting/llms/apis/sn19_wrapper.py b/prompting/llms/apis/sn19_wrapper.py
index a227d2ab..798c9852 100644
--- a/prompting/llms/apis/sn19_wrapper.py
+++ b/prompting/llms/apis/sn19_wrapper.py
@@ -6,6 +6,7 @@
 
 from prompting.llms.apis.llm_messages import LLMMessages
 from prompting.settings import settings
+from test_sn19 import response
 
 
 # TODO: key error in response.json() when response is 500
@@ -39,6 +40,9 @@ def chat_complete(
     response = requests.post(url, headers=headers, data=json.dumps(data))
     try:
         response_json = response.json()
-        return response_json["choices"][0]["message"].get("content")
+        try:
+            return response_json["choices"][0]["message"].get("content")
+        except KeyError:
+            return response_json["choices"][0]["delta"].get("content")
     except Exception as e:
         logger.exception(f"Error in chat_complete: {e}")

From 0389d1bdf64dc8afa1ab8f2c04fae80e83be11c3 Mon Sep 17 00:00:00 2001
From: richwardle <richard.wardle@macrocosmos.ai>
Date: Wed, 27 Nov 2024 18:42:01 +0000
Subject: [PATCH 20/40] Remove Unecessary Line

---
 prompting/llms/apis/sn19_wrapper.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/prompting/llms/apis/sn19_wrapper.py b/prompting/llms/apis/sn19_wrapper.py
index 798c9852..e8445146 100644
--- a/prompting/llms/apis/sn19_wrapper.py
+++ b/prompting/llms/apis/sn19_wrapper.py
@@ -6,7 +6,6 @@
 
 from prompting.llms.apis.llm_messages import LLMMessages
 from prompting.settings import settings
-from test_sn19 import response
 
 
 # TODO: key error in response.json() when response is 500

From 351c14c70ebefbf8ce32e7ddb85915735dea526d Mon Sep 17 00:00:00 2001
From: bkb2135 <bkb2135@columbia.edu>
Date: Wed, 27 Nov 2024 10:28:17 -1000
Subject: [PATCH 21/40] Formatting

---
 prompting/api/gpt_endpoints/api.py | 31 +++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
index e25d4f21..33f77e3f 100644
--- a/prompting/api/gpt_endpoints/api.py
+++ b/prompting/api/gpt_endpoints/api.py
@@ -2,15 +2,12 @@
 import random
 from typing import AsyncGenerator
 
-import openai
 from fastapi import APIRouter, Depends, HTTPException, Request
-from fastapi.responses import StreamingResponse
-from httpx import Timeout
 from loguru import logger
 
 from prompting.api.api_managements.api import validate_api_key
 from prompting.base.dendrite import DendriteResponseEvent, SynapseStreamResult
-from prompting.base.epistula import create_header_hook, query_miners
+from prompting.base.epistula import query_miners
 from prompting.miner_availability.miner_availability import miner_availabilities
 from prompting.rewards.scoring import task_scorer
 from prompting.settings import settings
@@ -18,7 +15,6 @@
 from prompting.tasks.task_registry import TaskRegistry
 from prompting.utils.timer import Timer
 
-
 router = APIRouter()
 
 
@@ -75,7 +71,7 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
 
     stream = body.get("stream")
     body = {k: v for k, v in body.items() if k not in ["task", "stream"]}
-    body['task'] = task.__class__.__name__
+    body["task"] = task.__class__.__name__
     body["seed"] = body.get("seed") or str(random.randint(0, 1_000_000))
     logger.debug(f"Seed provided by miner: {bool(body.get('seed'))} -- Using seed: {body.get('seed')}")
 
@@ -85,19 +81,20 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
         available_miners := miner_availabilities.get_available_miners(task=task, model=body.get("model"))
     ):
         raise HTTPException(
-            status_code=503, detail=f"No miners available for model: {body.get('model')} and task: {task.__class__.__name__}"
+            status_code=503,
+            detail=f"No miners available for model: {body.get('model')} and task: {task.__class__.__name__}",
         )
 
-    response = query_miners(available_miners, json.dumps(body).encode("utf-8"), stream = stream)
+    response = query_miners(available_miners, json.dumps(body).encode("utf-8"), stream=stream)
     if stream:
         return response
     else:
         response = await response
         response_event = DendriteResponseEvent(
-            stream_results = response,
-            uids = available_miners,
-            timeout = settings.NEURON_TIMEOUT,
-            completions = ["".join(res.accumulated_chunks) for res in response]
+            stream_results=response,
+            uids=available_miners,
+            timeout=settings.NEURON_TIMEOUT,
+            completions=["".join(res.accumulated_chunks) for res in response],
         )
 
         task = task(
@@ -109,11 +106,15 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
         )
 
         task_scorer.add_to_queue(
-            task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id
+            task=task,
+            response=response_event,
+            dataset_entry=task.dataset_entry,
+            block=-1,
+            step=-1,
+            task_id=task.task_id,
         )
-        
-        return [res.model_dump() for res in response]
 
+        return [res.model_dump() for res in response]
 
     # axon_info = settings.METAGRAPH.axons[available_miners[0]]
     # base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1"

From 0f6bfd7476090a3941830f9dfe6ba73bc1db1214 Mon Sep 17 00:00:00 2001
From: bkb2135 <98138173+bkb2135@users.noreply.github.com>
Date: Wed, 27 Nov 2024 16:11:24 -0500
Subject: [PATCH 22/40] Update pyproject.toml

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index e0fcec0f..429d30b2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "prompting"
-version = "2.13.1"
+version = "2.13.2"
 description = "Subnetwork 1 runs on Bittensor and is maintained by Macrocosmos. It's an effort to create decentralised AI"
 authors = ["Kalei Brady, Dmytro Bobrenko, Felix Quinque, Steffen Cruz"]
 readme = "README.md"

From 9a037cc36dc3e629a036f943630c03244bd1f634 Mon Sep 17 00:00:00 2001
From: bkb2135 <bkb2135@columbia.edu>
Date: Wed, 27 Nov 2024 17:00:35 -1000
Subject: [PATCH 23/40] Add test_api to scripts

---
 scripts/test_api.py | 102 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 scripts/test_api.py

diff --git a/scripts/test_api.py b/scripts/test_api.py
new file mode 100644
index 00000000..147d850c
--- /dev/null
+++ b/scripts/test_api.py
@@ -0,0 +1,102 @@
+import openai
+from httpx import Timeout
+from typing import Optional
+from prompting.base.epistula import create_header_hook
+from prompting import settings
+
+settings.settings = settings.Settings.load(mode="validator")
+settings = settings.settings
+
+
+def setup_miner_client(
+    port: int = 8004,
+    api_key: str = "123456",  # Default key from your api_keys.json
+    hotkey: Optional[str] = None
+) -> openai.AsyncOpenAI:
+    """
+    Setup an authenticated OpenAI client for the miner.
+    
+    Args:
+        port: Port number for the local server
+        api_key: API key for authentication
+        hotkey: Optional wallet hotkey
+    
+    Returns:
+        Configured AsyncOpenAI client
+    """
+
+    # Create headers with both API key and hotkey
+    async def combined_header_hook(request):
+        # Add API key header
+        request.headers["api-key"] = api_key
+        # Add any additional headers from the original header hook
+        if hotkey:
+            original_hook = create_header_hook(hotkey, None)
+            await original_hook(request)
+        return request
+
+    return openai.AsyncOpenAI(
+        base_url=f"http://localhost:{port}/v1",
+        max_retries=0,
+        timeout=Timeout(15, connect=5, read=10),
+        http_client=openai.DefaultAsyncHttpxClient(
+            event_hooks={"request": [combined_header_hook]}
+        ),
+    )
+
+
+async def make_completion(
+    miner: openai.AsyncOpenAI,
+    prompt: str,
+    stream: bool = False,
+    seed: str = "1759348"
+) -> str:
+    """
+    Make a completion request to the API.
+    
+    Args:
+        miner: Configured AsyncOpenAI client
+        prompt: Input prompt
+        stream: Whether to stream the response
+        seed: Random seed for reproducibility
+
+    Returns:
+        Generated completion text
+    """
+    result = await miner.chat.completions.create(
+        model="Test-Model",
+        messages=[{"role": "user", "content": prompt}],
+        stream=stream,
+        extra_body={"seed": seed, "sampling_parameters": settings.SAMPLING_PARAMS, "task": "QuestionAnsweringTask"}
+    )
+    
+    if not stream:
+        return result
+    else:
+        chunks = []
+        async for chunk in result:
+            print(chunk)
+            if chunk.choices[0].delta.content:
+                chunks.append(chunk.choices[0].delta.content)
+        return "".join(chunks)
+
+
+async def main():
+    PORT = 8004
+    API_KEY = "YOUR_API_KEY_HERE"
+    miner = setup_miner_client(
+        port=PORT,
+        api_key=API_KEY,
+        hotkey=settings.WALLET.hotkey if hasattr(settings, 'WALLET') else None
+    )
+    response = await make_completion(
+        miner=miner,
+        prompt="Say 10 random numbers between 1 and 100",
+        stream=False
+    )
+    print(response)
+
+
+# Run the async main function
+import asyncio
+asyncio.run(main())
\ No newline at end of file

From 72fed859e2ec51fa3de66e26d681b00bfe12ba87 Mon Sep 17 00:00:00 2001
From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com>
Date: Thu, 28 Nov 2024 07:51:34 +0000
Subject: [PATCH 24/40] SN1-327: Clean up, link system prompt ticket

---
 .../api/gpt_endpoints/process_completions.py  | 38 -------------------
 1 file changed, 38 deletions(-)

diff --git a/prompting/api/gpt_endpoints/process_completions.py b/prompting/api/gpt_endpoints/process_completions.py
index 4234227f..79eb6ed4 100644
--- a/prompting/api/gpt_endpoints/process_completions.py
+++ b/prompting/api/gpt_endpoints/process_completions.py
@@ -70,41 +70,3 @@ async def process_completions(body: dict[str, any]):
     )
 
     return [res.model_dump() for res in response]
-
-
-# async def process_and_collect_stream(miner_id: int, request: dict, response: AsyncGenerator):
-#     collected_content = []
-#     collected_chunks_timings = []
-#     with Timer() as timer:
-#         async for chunk in response:
-#             logger.debug(f"Chunk: {chunk}")
-#             if hasattr(chunk, "choices") and chunk.choices and isinstance(chunk.choices[0].delta.content, str):
-#                 collected_content.append(chunk.choices[0].delta.content)
-#                 collected_chunks_timings.append(timer.elapsed_time())
-#                 yield f"data: {json.dumps(chunk.model_dump())}\n\n"
-
-#     task = InferenceTask(
-#         query=request["messages"][-1]["content"],
-#         messages=[message["content"] for message in request["messages"]],
-#         model=request.get("model"),
-#         seed=request.get("seed"),
-#         response="".join(collected_content),
-#     )
-#     logger.debug(f"Adding Organic Request to scoring queue: {task}")
-#     response_event = DendriteResponseEvent(
-#         stream_results=[
-#             SynapseStreamResult(
-#                 uid=miner_id,
-#                 accumulated_chunks=collected_content,
-#                 accumulated_chunks_timings=collected_chunks_timings,
-#             )
-#         ],
-#         uids=[miner_id],
-#         timeout=settings.NEURON_TIMEOUT,
-#         completions=["".join(collected_content)],
-#     )
-
-#     task_scorer.add_to_queue(
-#         task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id
-#     )
-#     yield "data: [DONE]\n\n"

From fd476cedb7efb1b550412cf6600e9be3f831da89 Mon Sep 17 00:00:00 2001
From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com>
Date: Thu, 28 Nov 2024 10:22:33 +0000
Subject: [PATCH 25/40] Fix syntax

---
 prompting/tasks/base_task.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prompting/tasks/base_task.py b/prompting/tasks/base_task.py
index 26ab86f2..51563a25 100644
--- a/prompting/tasks/base_task.py
+++ b/prompting/tasks/base_task.py
@@ -94,7 +94,7 @@ def generate_query(
         """Generates a query to be used for generating the challenge"""
         logger.info("🤖 Generating query...")
         llm_messages = [LLMMessage(role="system", content=self.query_system_prompt)] if self.query_system_prompt else []
-        llm_messages.extemd([LLMMessage(role="user", content=message) for message in messages])
+        llm_messages.extend([LLMMessage(role="user", content=message) for message in messages])
 
         self.query = LLMWrapper.chat_complete(messages=LLMMessages(*llm_messages))
 

From 0b375183b93bcf3c478228a4eb6a4b79f3b24cf6 Mon Sep 17 00:00:00 2001
From: bkb2135 <98138173+bkb2135@users.noreply.github.com>
Date: Thu, 28 Nov 2024 08:26:21 -0500
Subject: [PATCH 26/40] Update api_keys.json

Co-authored-by: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com>
---
 api_keys.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api_keys.json b/api_keys.json
index 93b0e261..0967ef42 100644
--- a/api_keys.json
+++ b/api_keys.json
@@ -1 +1 @@
-{"0566dbe21ee33bba9419549716cd6f1f": {"rate_limit": 10, "usage": 0}, "e03da67439c0b7e7a622dde4fa3cf857": {"rate_limit": 10, "usage": 0}}
+{}

From 9485e563380041132011ee089579e3ed6bb79d4e Mon Sep 17 00:00:00 2001
From: bkb2135 <98138173+bkb2135@users.noreply.github.com>
Date: Thu, 28 Nov 2024 08:26:35 -0500
Subject: [PATCH 27/40] Update prompting/api/gpt_endpoints/api.py

Co-authored-by: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com>
---
 prompting/api/gpt_endpoints/api.py | 54 ------------------------------
 1 file changed, 54 deletions(-)

diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
index 33f77e3f..a4adb613 100644
--- a/prompting/api/gpt_endpoints/api.py
+++ b/prompting/api/gpt_endpoints/api.py
@@ -116,57 +116,3 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
 
         return [res.model_dump() for res in response]
 
-    # axon_info = settings.METAGRAPH.axons[available_miners[0]]
-    # base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1"
-    # miner_id = available_miners[0]
-    # logger.debug(f"Using base_url: {base_url}")
-
-    # miner = openai.AsyncOpenAI(
-    #     base_url=base_url,
-    #     max_retries=0,
-    #     timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5),
-    #     http_client=openai.DefaultAsyncHttpxClient(
-    #         event_hooks={"request": [create_header_hook(settings.WALLET.hotkey, axon_info.hotkey)]}
-    #     ),
-    # )
-
-    # try:
-    #     with Timer() as timer:
-    #         if task:
-    #             response = await miner.chat.completions.create(**body, extra_body={"task": task.__name__})
-    #         else:
-    #             response = await miner.chat.completions.create(**body)
-    #     if body.get("stream"):
-    #         return StreamingResponse(
-    #             process_and_collect_stream(miner_id, body, response), media_type="text/event-stream"
-    #         )
-    # except Exception as e:
-    #     logger.exception(f"Error coming from Miner: {e}")
-    #     raise HTTPException(status_code=500, detail=f"Error coming from Miner: {e}")
-
-    # response_event = DendriteResponseEvent(
-    #     stream_results=[
-    #         SynapseStreamResult(
-    #             uid=miner_id,
-    #             accumulated_chunks=[response.choices[0].message.content],
-    #             accumulated_chunks_timings=[timer.final_time],
-    #         )
-    #     ],
-    #     completions=[response.choices[0].message.content],
-    #     uids=[miner_id],
-    #     timeout=settings.NEURON_TIMEOUT,
-    # )
-
-    # task = InferenceTask(
-    #     query=body["messages"][-1]["content"],
-    #     messages=[message["content"] for message in body["messages"]],
-    #     model=body.get("model"),
-    #     seed=body.get("seed"),
-    #     response=response_event,
-    # )
-
-    # task_scorer.add_to_queue(
-    #     task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id
-    # )
-
-    # return response

From f17682171055db962a0e63c9be4fea5457a5cfcc Mon Sep 17 00:00:00 2001
From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com>
Date: Thu, 28 Nov 2024 13:37:05 +0000
Subject: [PATCH 28/40] Add keys example

---
 api_keys.json               | 1 -
 api_keys.json.example       | 1 +
 neurons/validator.py        | 6 +++---
 prompting/api/api_keys.json | 1 -
 4 files changed, 4 insertions(+), 5 deletions(-)
 delete mode 100644 api_keys.json
 create mode 100644 api_keys.json.example
 delete mode 100644 prompting/api/api_keys.json

diff --git a/api_keys.json b/api_keys.json
deleted file mode 100644
index 93b0e261..00000000
--- a/api_keys.json
+++ /dev/null
@@ -1 +0,0 @@
-{"0566dbe21ee33bba9419549716cd6f1f": {"rate_limit": 10, "usage": 0}, "e03da67439c0b7e7a622dde4fa3cf857": {"rate_limit": 10, "usage": 0}}
diff --git a/api_keys.json.example b/api_keys.json.example
new file mode 100644
index 00000000..fd065361
--- /dev/null
+++ b/api_keys.json.example
@@ -0,0 +1 @@
+{"API_KEY_VALUE": {"rate_limit": 10, "usage": 0}}
diff --git a/neurons/validator.py b/neurons/validator.py
index 3da470b6..eae65a92 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -220,9 +220,9 @@ async def main():
 
     # start scoring tasks in separate loop
     asyncio.create_task(task_scorer.start())
-    # # TODO: Think about whether we want to store the task queue locally in case of a crash
-    # # TODO: Possibly run task scorer & model scheduler with a lock so I don't unload a model whilst it's generating
-    # # TODO: Make weight setting happen as specific intervals as we load/unload models
+    # TODO: Think about whether we want to store the task queue locally in case of a crash
+    # TODO: Possibly run task scorer & model scheduler with a lock so I don't unload a model whilst it's generating
+    # TODO: Make weight setting happen as specific intervals as we load/unload models
     with Validator() as v:
         while True:
             logger.info(
diff --git a/prompting/api/api_keys.json b/prompting/api/api_keys.json
deleted file mode 100644
index 0967ef42..00000000
--- a/prompting/api/api_keys.json
+++ /dev/null
@@ -1 +0,0 @@
-{}

From 1bf399633ebe5c76486669459bdd1cef6288e325 Mon Sep 17 00:00:00 2001
From: richwardle <richard.wardle@macrocosmos.ai>
Date: Thu, 28 Nov 2024 14:57:06 +0000
Subject: [PATCH 29/40] Push Working Changes

---
 .gitignore                                         |  2 ++
 prompting/api/gpt_endpoints/api.py                 |  2 +-
 prompting/miner_availability/miner_availability.py |  9 ++++++++-
 scripts/test_api.py                                | 13 +++++++------
 4 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5834a8af..c9dedde8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -177,3 +177,5 @@ core
 app.config.js
 wandb
 .vscode
+api_keys.json
+prompting/api/api_keys.json
\ No newline at end of file
diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
index a4adb613..ac7445e6 100644
--- a/prompting/api/gpt_endpoints/api.py
+++ b/prompting/api/gpt_endpoints/api.py
@@ -82,7 +82,7 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
     ):
         raise HTTPException(
             status_code=503,
-            detail=f"No miners available for model: {body.get('model')} and task: {task.__class__.__name__}",
+            detail=f"No miners available for model: {body.get('model')} and task: {task.__name__}",
         )
 
     response = query_miners(available_miners, json.dumps(body).encode("utf-8"), stream=stream)
diff --git a/prompting/miner_availability/miner_availability.py b/prompting/miner_availability/miner_availability.py
index 29bbc7f9..28d50fa5 100644
--- a/prompting/miner_availability/miner_availability.py
+++ b/prompting/miner_availability/miner_availability.py
@@ -35,7 +35,14 @@ def is_model_available(self, model: str) -> bool:
         return self.llm_model_availabilities[model]
 
     def is_task_available(self, task: BaseTask) -> bool:
-        return self.task_availabilities[task.__class__.__name__]
+        if isinstance(task, BaseTask):
+            try:
+                return self.task_availabilities[task.__class__.__name__]
+            except Exception as e: 
+                logger.error(f"Error in is_task_available: {e}")
+                return False
+        else:
+            return self.task_availabilities[task.__name__]
 
 
 class MinerAvailabilities(BaseModel):
diff --git a/scripts/test_api.py b/scripts/test_api.py
index 147d850c..43a3a477 100644
--- a/scripts/test_api.py
+++ b/scripts/test_api.py
@@ -38,7 +38,7 @@ async def combined_header_hook(request):
     return openai.AsyncOpenAI(
         base_url=f"http://localhost:{port}/v1",
         max_retries=0,
-        timeout=Timeout(15, connect=5, read=10),
+        timeout=Timeout(30, connect=10, read=20),
         http_client=openai.DefaultAsyncHttpxClient(
             event_hooks={"request": [combined_header_hook]}
         ),
@@ -64,15 +64,16 @@ async def make_completion(
         Generated completion text
     """
     result = await miner.chat.completions.create(
-        model="Test-Model",
+        model=None,
         messages=[{"role": "user", "content": prompt}],
         stream=stream,
-        extra_body={"seed": seed, "sampling_parameters": settings.SAMPLING_PARAMS, "task": "QuestionAnsweringTask"}
+        extra_body={"seed": seed, "sampling_parameters": settings.SAMPLING_PARAMS, "task": "QuestionAnsweringTask", "mixture": False}
     )
     
     if not stream:
         return result
     else:
+        print('In the else')
         chunks = []
         async for chunk in result:
             print(chunk)
@@ -83,7 +84,7 @@ async def make_completion(
 
 async def main():
     PORT = 8004
-    API_KEY = "YOUR_API_KEY_HERE"
+    API_KEY = "0566dbe21ee33bba9419549716cd6f1f"
     miner = setup_miner_client(
         port=PORT,
         api_key=API_KEY,
@@ -92,9 +93,9 @@ async def main():
     response = await make_completion(
         miner=miner,
         prompt="Say 10 random numbers between 1 and 100",
-        stream=False
+        stream=True
     )
-    print(response)
+    print(["".join(res.accumulated_chunks) for res in response])
 
 
 # Run the async main function

From 6bab37e2035401c388bfc54592859b19ac397dfe Mon Sep 17 00:00:00 2001
From: bkb2135 <bkb2135@columbia.edu>
Date: Thu, 28 Nov 2024 10:13:03 -1000
Subject: [PATCH 30/40] Add Optional Api Deployment

---
 neurons/validator.py  | 3 ++-
 prompting/settings.py | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/neurons/validator.py b/neurons/validator.py
index 312b88c1..c615d0db 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -197,7 +197,8 @@ def __exit__(self, exc_type, exc_value, traceback):
 
 
 async def main():
-    asyncio.create_task(start_api())
+    if settings.DEPLOY_API:
+        asyncio.create_task(start_api())
 
     GPUInfo.log_gpu_info()
     # start profiling
diff --git a/prompting/settings.py b/prompting/settings.py
index 04d3a4cd..6d007ab3 100644
--- a/prompting/settings.py
+++ b/prompting/settings.py
@@ -65,6 +65,7 @@ class Settings(BaseSettings):
     TASK_QUEUE_LENGTH_THRESHOLD: int = Field(10, env="TASK_QUEUE_LENGTH_THRESHOLD")
     SCORING_QUEUE_LENGTH_THRESHOLD: int = Field(10, env="SCORING_QUEUE_LENGTH_THRESHOLD")
     HF_TOKEN: Optional[str] = Field(None, env="HF_TOKEN")
+    DEPLOY_API: bool = Field(False, env="DEPLOY_API")
 
     # API Management.
     API_KEYS_FILE: str = Field("api_keys.json", env="API_KEYS_FILE")

From bb115cf5409f410a82aff06588e232966ccc0c33 Mon Sep 17 00:00:00 2001
From: Hollyqui <felix.quinque@gmail.com>
Date: Mon, 2 Dec 2024 10:32:47 +0000
Subject: [PATCH 31/40] Fixing formatting

---
 .gitignore                                    |  2 +-
 prompting/api/gpt_endpoints/api.py            |  1 -
 .../miner_availability/miner_availability.py  |  2 +-
 prompting/utils/logging.py                    |  2 +-
 scripts/test_api.py                           | 51 ++++++++-----------
 5 files changed, 25 insertions(+), 33 deletions(-)

diff --git a/.gitignore b/.gitignore
index c9dedde8..6afd93cb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -178,4 +178,4 @@ app.config.js
 wandb
 .vscode
 api_keys.json
-prompting/api/api_keys.json
\ No newline at end of file
+prompting/api/api_keys.json
diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
index ac7445e6..db871a3f 100644
--- a/prompting/api/gpt_endpoints/api.py
+++ b/prompting/api/gpt_endpoints/api.py
@@ -115,4 +115,3 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(
         )
 
         return [res.model_dump() for res in response]
-
diff --git a/prompting/miner_availability/miner_availability.py b/prompting/miner_availability/miner_availability.py
index 28d50fa5..64f912a1 100644
--- a/prompting/miner_availability/miner_availability.py
+++ b/prompting/miner_availability/miner_availability.py
@@ -38,7 +38,7 @@ def is_task_available(self, task: BaseTask) -> bool:
         if isinstance(task, BaseTask):
             try:
                 return self.task_availabilities[task.__class__.__name__]
-            except Exception as e: 
+            except Exception as e:
                 logger.error(f"Error in is_task_available: {e}")
                 return False
         else:
diff --git a/prompting/utils/logging.py b/prompting/utils/logging.py
index a95d888f..5028b0cb 100644
--- a/prompting/utils/logging.py
+++ b/prompting/utils/logging.py
@@ -5,12 +5,12 @@
 from typing import Any, Literal
 
 import numpy as np
-import wandb
 from loguru import logger
 from pydantic import BaseModel, ConfigDict
 from wandb.wandb_run import Run
 
 import prompting
+import wandb
 from prompting.base.dendrite import DendriteResponseEvent
 from prompting.rewards.reward import WeightedRewardEvent
 from prompting.settings import settings
diff --git a/scripts/test_api.py b/scripts/test_api.py
index 43a3a477..6f9a3192 100644
--- a/scripts/test_api.py
+++ b/scripts/test_api.py
@@ -1,26 +1,26 @@
+from typing import Optional
+
 import openai
 from httpx import Timeout
-from typing import Optional
-from prompting.base.epistula import create_header_hook
+
 from prompting import settings
+from prompting.base.epistula import create_header_hook
 
 settings.settings = settings.Settings.load(mode="validator")
 settings = settings.settings
 
 
 def setup_miner_client(
-    port: int = 8004,
-    api_key: str = "123456",  # Default key from your api_keys.json
-    hotkey: Optional[str] = None
+    port: int = 8004, api_key: str = "123456", hotkey: Optional[str] = None  # Default key from your api_keys.json
 ) -> openai.AsyncOpenAI:
     """
     Setup an authenticated OpenAI client for the miner.
-    
+
     Args:
         port: Port number for the local server
         api_key: API key for authentication
         hotkey: Optional wallet hotkey
-    
+
     Returns:
         Configured AsyncOpenAI client
     """
@@ -39,21 +39,14 @@ async def combined_header_hook(request):
         base_url=f"http://localhost:{port}/v1",
         max_retries=0,
         timeout=Timeout(30, connect=10, read=20),
-        http_client=openai.DefaultAsyncHttpxClient(
-            event_hooks={"request": [combined_header_hook]}
-        ),
+        http_client=openai.DefaultAsyncHttpxClient(event_hooks={"request": [combined_header_hook]}),
     )
 
 
-async def make_completion(
-    miner: openai.AsyncOpenAI,
-    prompt: str,
-    stream: bool = False,
-    seed: str = "1759348"
-) -> str:
+async def make_completion(miner: openai.AsyncOpenAI, prompt: str, stream: bool = False, seed: str = "1759348") -> str:
     """
     Make a completion request to the API.
-    
+
     Args:
         miner: Configured AsyncOpenAI client
         prompt: Input prompt
@@ -67,13 +60,18 @@ async def make_completion(
         model=None,
         messages=[{"role": "user", "content": prompt}],
         stream=stream,
-        extra_body={"seed": seed, "sampling_parameters": settings.SAMPLING_PARAMS, "task": "QuestionAnsweringTask", "mixture": False}
+        extra_body={
+            "seed": seed,
+            "sampling_parameters": settings.SAMPLING_PARAMS,
+            "task": "QuestionAnsweringTask",
+            "mixture": False,
+        },
     )
-    
+
     if not stream:
         return result
     else:
-        print('In the else')
+        print("In the else")
         chunks = []
         async for chunk in result:
             print(chunk)
@@ -86,18 +84,13 @@ async def main():
     PORT = 8004
     API_KEY = "0566dbe21ee33bba9419549716cd6f1f"
     miner = setup_miner_client(
-        port=PORT,
-        api_key=API_KEY,
-        hotkey=settings.WALLET.hotkey if hasattr(settings, 'WALLET') else None
-    )
-    response = await make_completion(
-        miner=miner,
-        prompt="Say 10 random numbers between 1 and 100",
-        stream=True
+        port=PORT, api_key=API_KEY, hotkey=settings.WALLET.hotkey if hasattr(settings, "WALLET") else None
     )
+    response = await make_completion(miner=miner, prompt="Say 10 random numbers between 1 and 100", stream=True)
     print(["".join(res.accumulated_chunks) for res in response])
 
 
 # Run the async main function
 import asyncio
-asyncio.run(main())
\ No newline at end of file
+
+asyncio.run(main())

From 09e41039256f34e13f62cb5d9b363197107edebf Mon Sep 17 00:00:00 2001
From: richwardle <richard.wardle@macrocosmos.ai>
Date: Mon, 2 Dec 2024 11:25:09 +0000
Subject: [PATCH 32/40] sort: fix import formatting

---
 prompting/utils/logging.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prompting/utils/logging.py b/prompting/utils/logging.py
index 5028b0cb..a95d888f 100644
--- a/prompting/utils/logging.py
+++ b/prompting/utils/logging.py
@@ -5,12 +5,12 @@
 from typing import Any, Literal
 
 import numpy as np
+import wandb
 from loguru import logger
 from pydantic import BaseModel, ConfigDict
 from wandb.wandb_run import Run
 
 import prompting
-import wandb
 from prompting.base.dendrite import DendriteResponseEvent
 from prompting.rewards.reward import WeightedRewardEvent
 from prompting.settings import settings

From e2965fbd48a758c793b36a343c9e84481552eb90 Mon Sep 17 00:00:00 2001
From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com>
Date: Mon, 2 Dec 2024 12:26:51 +0000
Subject: [PATCH 33/40] Fix synapse system prompt

---
 neurons/validator.py             |  2 -
 prompting/tasks/task_registry.py | 68 ++++++++++++++------------------
 2 files changed, 30 insertions(+), 40 deletions(-)

diff --git a/neurons/validator.py b/neurons/validator.py
index a2e54413..6c455a3c 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -129,8 +129,6 @@ async def collect_responses(self, task: BaseTextTask) -> DendriteResponseEvent |
             return
 
         messages: list[dict[str, str]] = []
-        if task.synapse_system_prompt:
-            messages.append({"role": "system", "content": task.synapse_system_prompt})
         messages.append({"role": "user", "content": task.query})
 
         body = {
diff --git a/prompting/tasks/task_registry.py b/prompting/tasks/task_registry.py
index 1650f31b..1bdaca0e 100644
--- a/prompting/tasks/task_registry.py
+++ b/prompting/tasks/task_registry.py
@@ -6,19 +6,10 @@
 from pydantic import BaseModel, ConfigDict
 
 from prompting.datasets.base import BaseDataset
-from prompting.datasets.huggingface_github import HuggingFaceGithubDataset
-from prompting.datasets.random_website import DDGDataset
 from prompting.datasets.sn13 import SN13Dataset
-from prompting.datasets.wiki import WikiDataset, WikiDateDataset
 from prompting.rewards.reward import BaseRewardConfig
 from prompting.tasks.base_task import BaseTextTask
-from prompting.tasks.date_qa import DateQARewardConfig, DateQuestionAnsweringTask
 from prompting.tasks.inference import InferenceRewardConfig, InferenceTask
-from prompting.tasks.multi_choice import MultiChoiceRewardConfig, MultiChoiceTask
-from prompting.tasks.programming_task import ProgrammingRewardConfig, ProgrammingTask
-from prompting.tasks.qa import QARewardConfig, QuestionAnsweringTask
-from prompting.tasks.summarization import SummarizationRewardConfig, SummarizationTask
-from prompting.tasks.web_retrieval import WebRetrievalRewardConfig, WebRetrievalTask
 
 # from prompting.tasks.
 
@@ -37,40 +28,41 @@ def __hash__(self):
 
 class TaskRegistry(BaseModel):
     task_configs: ClassVar[list[TaskConfig]] = [
-        TaskConfig(task=QuestionAnsweringTask, probability=0.2, datasets=[WikiDataset], reward_model=QARewardConfig),
-        TaskConfig(
-            task=SummarizationTask, probability=0.1, datasets=[WikiDataset], reward_model=SummarizationRewardConfig
-        ),
-        TaskConfig(
-            task=DateQuestionAnsweringTask,
-            probability=0.1,
-            datasets=[WikiDateDataset],
-            reward_model=DateQARewardConfig,
-        ),
+        # TaskConfig(task=QuestionAnsweringTask, probability=0.2, datasets=[WikiDataset], reward_model=QARewardConfig),
+        # TaskConfig(
+        #     task=SummarizationTask, probability=0.1, datasets=[WikiDataset], reward_model=SummarizationRewardConfig
+        # ),
+        # TaskConfig(
+        #     task=DateQuestionAnsweringTask,
+        #     probability=0.1,
+        #     datasets=[WikiDateDataset],
+        #     reward_model=DateQARewardConfig,
+        # ),
         TaskConfig(
             task=InferenceTask,
-            probability=0.16,
+            # probability=0.16,
+            probability=1.00,
             datasets=[SN13Dataset],
             reward_model=InferenceRewardConfig,
         ),
-        TaskConfig(
-            task=MultiChoiceTask,
-            probability=0.31,
-            datasets=[WikiDataset],
-            reward_model=MultiChoiceRewardConfig,
-        ),
-        TaskConfig(
-            task=ProgrammingTask,
-            probability=0.1,
-            datasets=[HuggingFaceGithubDataset],
-            reward_model=ProgrammingRewardConfig,
-        ),
-        TaskConfig(
-            task=WebRetrievalTask,
-            probability=0.03,
-            datasets=[DDGDataset],
-            reward_model=WebRetrievalRewardConfig,
-        ),
+        # TaskConfig(
+        #     task=MultiChoiceTask,
+        #     probability=0.31,
+        #     datasets=[WikiDataset],
+        #     reward_model=MultiChoiceRewardConfig,
+        # ),
+        # TaskConfig(
+        #     task=ProgrammingTask,
+        #     probability=0.1,
+        #     datasets=[HuggingFaceGithubDataset],
+        #     reward_model=ProgrammingRewardConfig,
+        # ),
+        # TaskConfig(
+        #     task=WebRetrievalTask,
+        #     probability=0.03,
+        #     datasets=[DDGDataset],
+        #     reward_model=WebRetrievalRewardConfig,
+        # ),
     ]
 
     @classmethod

From a7c53c84b4a1812eba6e70d111ebe2629e16cba7 Mon Sep 17 00:00:00 2001
From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com>
Date: Wed, 18 Dec 2024 14:17:43 +0000
Subject: [PATCH 34/40] WIP: Move MoA to new API

---
 prompting/api/gpt_endpoints/api.py            | 17 -----
 .../api/gpt_endpoints/process_completions.py  | 67 -------------------
 prompting/api/gpt_endpoints/serialisers.py    |  0
 .../mixture_of_miners.py                      |  0
 4 files changed, 84 deletions(-)
 delete mode 100644 prompting/api/gpt_endpoints/api.py
 delete mode 100644 prompting/api/gpt_endpoints/process_completions.py
 delete mode 100644 prompting/api/gpt_endpoints/serialisers.py
 rename {prompting/api/gpt_endpoints => validator_api}/mixture_of_miners.py (100%)

diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py
deleted file mode 100644
index e361b51c..00000000
--- a/prompting/api/gpt_endpoints/api.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from fastapi import APIRouter, Depends, Request
-
-from prompting.api.api_managements.api import validate_api_key
-from prompting.api.gpt_endpoints.mixture_of_miners import mixture_of_miners
-from prompting.api.gpt_endpoints.process_completions import process_completions
-
-router = APIRouter()
-
-
-@router.post("/v1/chat/completions")
-async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(validate_api_key)):
-    """OpenAI-style chat completions endpoint."""
-    body = await request.json()
-    if body.get("mixture", False):
-        return await mixture_of_miners(body)
-    else:
-        return await process_completions(body)
diff --git a/prompting/api/gpt_endpoints/process_completions.py b/prompting/api/gpt_endpoints/process_completions.py
deleted file mode 100644
index 129bc46c..00000000
--- a/prompting/api/gpt_endpoints/process_completions.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import json
-import random
-
-from fastapi import HTTPException
-from loguru import logger
-
-from prompting.base.dendrite import DendriteResponseEvent
-from prompting.base.epistula import query_miners
-from prompting.miner_availability.miner_availability import miner_availabilities
-from prompting.rewards.scoring import task_scorer
-from prompting.settings import settings
-from prompting.tasks.inference import InferenceTask
-from prompting.tasks.task_registry import TaskRegistry
-
-
-async def process_completions(body: dict[str, any]):
-    task = TaskRegistry.get_task_by_name(body.get("task", InferenceTask.__name__))
-    if body.get("task") and not task:
-        raise HTTPException(status_code=400, detail=f"Task {body.get('task')} not found")
-    logger.debug(f"Requested Task: {body.get('task')}, {task}")
-
-    stream = body.get("stream")
-    body = {k: v for k, v in body.items() if k not in ["task", "stream"]}
-    body["task"] = task.__class__.__name__
-    body["seed"] = body.get("seed") or str(random.randint(0, 1_000_000))
-    logger.debug(f"Seed provided by miner: {bool(body.get('seed'))} -- Using seed: {body.get('seed')}")
-
-    if settings.TEST_MINER_IDS:
-        available_miners = settings.TEST_MINER_IDS
-    elif not settings.mode == "mock" and not (
-        available_miners := miner_availabilities.get_available_miners(task=task, model=body.get("model"))
-    ):
-        raise HTTPException(
-            status_code=503,
-            detail=f"No miners available for model: {body.get('model')} and task: {task.__name__}",
-        )
-
-    response = query_miners(available_miners, json.dumps(body).encode("utf-8"), stream=stream)
-    if stream:
-        return response
-
-    response = await response
-    response_event = DendriteResponseEvent(
-        stream_results=response,
-        uids=available_miners,
-        timeout=settings.NEURON_TIMEOUT,
-        completions=["".join(res.accumulated_chunks) for res in response],
-    )
-
-    task = task(
-        query=body["messages"][-1]["content"],
-        messages=[message["content"] for message in body["messages"]],
-        model=body.get("model"),
-        seed=body.get("seed"),
-        response=response_event,
-    )
-
-    task_scorer.add_to_queue(
-        task=task,
-        response=response_event,
-        dataset_entry=task.dataset_entry,
-        block=-1,
-        step=-1,
-        task_id=task.task_id,
-    )
-
-    return [res.model_dump() for res in response]
diff --git a/prompting/api/gpt_endpoints/serialisers.py b/prompting/api/gpt_endpoints/serialisers.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/prompting/api/gpt_endpoints/mixture_of_miners.py b/validator_api/mixture_of_miners.py
similarity index 100%
rename from prompting/api/gpt_endpoints/mixture_of_miners.py
rename to validator_api/mixture_of_miners.py

From 34949f16cf8db50511a149c7ff4ce804adc90c37 Mon Sep 17 00:00:00 2001
From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com>
Date: Wed, 18 Dec 2024 15:39:13 +0000
Subject: [PATCH 35/40] Add MoA code

---
 validator_api/chat_completion.py   | 106 +++++++++++++++++++++++++++++
 validator_api/gpt_endpoints.py     |  99 +++------------------------
 validator_api/mixture_of_miners.py |  77 ++++++++++++++-------
 3 files changed, 168 insertions(+), 114 deletions(-)
 create mode 100644 validator_api/chat_completion.py

diff --git a/validator_api/chat_completion.py b/validator_api/chat_completion.py
new file mode 100644
index 00000000..0d1470e2
--- /dev/null
+++ b/validator_api/chat_completion.py
@@ -0,0 +1,106 @@
+
+import asyncio
+import json
+import random
+from fastapi import HTTPException
+from fastapi.responses import StreamingResponse
+import httpx
+from loguru import logger
+from shared.epistula import make_openai_query
+from shared.settings import shared_settings
+from shared.uids import get_uids
+
+
+async def forward_response(uid: int, body: dict[str, any], chunks: list[str]):
+    if not shared_settings.SCORE_ORGANICS:  # Allow disabling of scoring by default
+        return
+
+    # if body.get("task") != "InferenceTask":
+    #     logger.debug(f"Skipping forwarding for non-inference task: {body.get('task')}")
+    #     return
+    url = f"http://{shared_settings.VALIDATOR_API}/scoring"
+    payload = {"body": body, "chunks": chunks, "uid": uid}
+    # headers = {
+    #     "Authorization": f"Bearer {shared_settings.SCORING_KEY}",  #Add API key in Authorization header
+    #     "Content-Type": "application/json",
+    # }
+    try:
+        timeout = httpx.Timeout(timeout=120.0, connect=60.0, read=30.0, write=30.0, pool=5.0)
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            response = await client.post(url, json=payload)  # , headers=headers)
+            if response.status_code == 200:
+                logger.info(f"Forwarding response completed with status {response.status_code}")
+
+            else:
+                logger.exception(
+                    f"Forwarding response uid {uid} failed with status {response.status_code} and payload {payload}"
+                )
+
+    except Exception as e:
+        logger.error(f"Tried to forward response to {url} with payload {payload}")
+        logger.exception(f"Error while forwarding response: {e}")
+
+
+async def stream_response(response, collected_chunks: list[str], body: dict[str, any], uid: int) -> AsyncGenerator[str, None]:
+    chunks_received = False
+    try:
+        async for chunk in response:
+            chunks_received = True
+            collected_chunks.append(chunk.choices[0].delta.content)
+            yield f"data: {json.dumps(chunk.model_dump())}\n\n"
+
+        if not chunks_received:
+            logger.error("Stream is empty: No chunks were received")
+            yield 'data: {"error": "502 - Response is empty"}\n\n'
+        yield "data: [DONE]\n\n"
+
+        # Forward the collected chunks after streaming is complete
+        asyncio.create_task(forward_response(uid=uid, body=body, chunks=collected_chunks))
+    except asyncio.CancelledError:
+        logger.info("Client disconnected, streaming cancelled")
+        raise
+    except Exception as e:
+        logger.exception(f"Error during streaming: {e}")
+        yield 'data: {"error": "Internal server Error"}\n\n'
+
+
+async def regular_chat_completion(body: dict[str, any], uid: int | None = None) -> tuple | StreamingResponse:
+    """Handle regular chat completion without mixture of miners."""
+    if uid is None:
+        uid = random.choice(get_uids(sampling_mode="top_incentive", k=100))
+        
+    if uid is None:
+        logger.error("No available miner found")
+        raise HTTPException(status_code=503, detail="No available miner found")
+        
+    logger.debug(f"Querying uid {uid}")
+    STREAM = body.get("stream", False)
+    
+    collected_chunks: list[str] = []
+    
+    logger.info(f"Making {'streaming' if STREAM else 'non-streaming'} openai query with body: {body}")
+    response = await make_openai_query(shared_settings.METAGRAPH, shared_settings.WALLET, body, uid, stream=STREAM)
+    
+    if STREAM:
+        return StreamingResponse(
+            stream_response(response, collected_chunks, body, uid),
+            media_type="text/event-stream",
+            headers={
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+            },
+        )
+    else:
+        asyncio.create_task(forward_response(uid=uid, body=body, chunks=response[1]))
+        return response[0]
+
+
+async def get_response_from_miner(body: dict[str, any], uid: int) -> tuple:
+    """Get response from a single miner."""
+    return await make_openai_query(
+        shared_settings.METAGRAPH,
+        shared_settings.WALLET,
+        body,
+        uid,
+        stream=False
+    )
diff --git a/validator_api/gpt_endpoints.py b/validator_api/gpt_endpoints.py
index bc3d2e2f..91d5563d 100644
--- a/validator_api/gpt_endpoints.py
+++ b/validator_api/gpt_endpoints.py
@@ -1,106 +1,29 @@
-import asyncio
-import json
 import random
 
-import httpx
-from fastapi import APIRouter, HTTPException, Request
+from fastapi import APIRouter, Request
 from loguru import logger
 from starlette.responses import StreamingResponse
 
-from shared.epistula import make_openai_query
-from shared.settings import shared_settings
-from shared.uids import get_uids
+from validator_api import mixture_of_miners
+from validator_api.chat_completion import regular_chat_completion
 
 router = APIRouter()
 
 
-async def forward_response(uid: int, body: dict[str, any], chunks: list[str]):
-    if not shared_settings.SCORE_ORGANICS:  # Allow disabling of scoring by default
-        return
-
-    # if body.get("task") != "InferenceTask":
-    #     logger.debug(f"Skipping forwarding for non-inference task: {body.get('task')}")
-    #     return
-    url = f"http://{shared_settings.VALIDATOR_API}/scoring"
-    payload = {"body": body, "chunks": chunks, "uid": uid}
-    # headers = {
-    #     "Authorization": f"Bearer {shared_settings.SCORING_KEY}",  #Add API key in Authorization header
-    #     "Content-Type": "application/json",
-    # }
-    try:
-        timeout = httpx.Timeout(timeout=120.0, connect=60.0, read=30.0, write=30.0, pool=5.0)
-        async with httpx.AsyncClient(timeout=timeout) as client:
-            response = await client.post(url, json=payload)  # , headers=headers)
-            if response.status_code == 200:
-                logger.info(f"Forwarding response completed with status {response.status_code}")
-
-            else:
-                logger.exception(
-                    f"Forwarding response uid {uid} failed with status {response.status_code} and payload {payload}"
-                )
-
-    except Exception as e:
-        logger.error(f"Tried to forward response to {url} with payload {payload}")
-        logger.exception(f"Error while forwarding response: {e}")
-
 
 @router.post("/v1/chat/completions")
-async def chat_completion(request: Request):  # , cbackground_tasks: BackgroundTasks):
+async def chat_completion(request: Request):
+    """Main endpoint that handles both regular and mixture of miners chat completion."""
     try:
         body = await request.json()
         body["seed"] = int(body.get("seed") or random.randint(0, 1000000))
-        STREAM = body.get("stream") or False
-        logger.debug(f"Streaming: {STREAM}")
-        # Get random miner from top 100 incentive.
-        uid = random.choice(get_uids(sampling_mode="top_incentive", k=100))
-        # uid = get_available_miner(task=body.get("task"), model=body.get("model"))
-        if uid is None:
-            logger.error("No available miner found")
-            raise HTTPException(status_code=503, detail="No available miner found")
-        logger.debug(f"Querying uid {uid}")
-
-        collected_chunks: list[str] = []
-
-        # Create a wrapper for the streaming response
-        async def stream_with_error_handling():
-            chunks_received = False
-            try:
-                async for chunk in response:
-                    chunks_received = True
-                    collected_chunks.append(chunk.choices[0].delta.content)
-                    yield f"data: {json.dumps(chunk.model_dump())}\n\n"
-
-                if not chunks_received:
-                    logger.error("Stream is empty: No chunks were received")
-                    yield 'data: {"error": "502 - Response is empty"}\n\n'
-                yield "data: [DONE]\n\n"
-
-                # Once the stream is done, forward the collected chunks
-                asyncio.create_task(forward_response(uid=uid, body=body, chunks=collected_chunks))
-                # background_tasks.add_task(forward_response, uid=uid, body=body, chunks=collected_chunks)
-            except asyncio.CancelledError:
-                logger.info("Client disconnected, streaming cancelled")
-                raise
-            except Exception as e:
-                logger.exception(f"Error during streaming: {e}")
-                yield 'data: {"error": "Internal server Error"}\n\n'
-
-        logger.info(f"Making {'streaming' if STREAM else 'non-streaming'} openai query with body: {body}")
-        response = await make_openai_query(shared_settings.METAGRAPH, shared_settings.WALLET, body, uid, stream=STREAM)
-
-        if STREAM:
-            return StreamingResponse(
-                stream_with_error_handling(),
-                media_type="text/event-stream",
-                headers={
-                    "Cache-Control": "no-cache",
-                    "Connection": "keep-alive",
-                },
-            )
+        
+        # Choose between regular completion and mixture of miners.
+        if body.get("mixture", False):
+            return await mixture_of_miners(body)
         else:
-            asyncio.create_task(forward_response(uid=uid, body=body, chunks=response[1]))
-            return response[0]
+            return await regular_chat_completion(body)
 
     except Exception as e:
-        logger.exception(f"Error setting up streaming: {e}")
+        logger.exception(f"Error in chat completion: {e}")
         return StreamingResponse(content="Internal Server Error", status_code=500)
diff --git a/validator_api/mixture_of_miners.py b/validator_api/mixture_of_miners.py
index 96fc22f2..6349dd41 100644
--- a/validator_api/mixture_of_miners.py
+++ b/validator_api/mixture_of_miners.py
@@ -1,45 +1,70 @@
+
+
 import copy
+import random
+
+from fastapi import HTTPException
+from fastapi.responses import StreamingResponse
+from loguru import logger
+
+from shared.uids import get_uids
+from validator_api.chat_completion import get_response_from_miner, regular_chat_completion
+
 
-from prompting.api.gpt_endpoints.process_completions import process_completions
+async def mixture_of_miners(body: dict[str, any]) -> tuple | StreamingResponse:
+    """Handle chat completion with mixture of miners approach."""
+    DEFAULT_SYSTEM_PROMPT = """You have been provided with a set of responses from various open-source models to the latest user query.
+    Your task is to synthesize these responses into a single, high-quality and concise response.
+    It is crucial to follow the provided instuctions or examples in the given prompt if any, and ensure the answer is in correct and expected format.
+    Critically evaluate the information provided in these responses, recognizing that some of it may be biased or incorrect.
+    Your response should not simply replicate the given answers but should offer a refined and accurate reply to the instruction.
+    Ensure your response is well-structured, coherent, and adheres to the highest standards of accuracy and reliability.
+    Responses from models:"""
 
-DEFAULT_SYSTEM_PROMPT = """You have been provided with a set of responses from various open-source models to the latest user query.
-Your task is to synthesize these responses into a single, high-quality and concise response.
-It is crucial to follow the provided instuctions or examples in the given prompt if any, and ensure the answer is in correct and expected format.
-Critically evaluate the information provided in these responses, recognizing that some of it may be biased or incorrect.
-Your response should not simply replicate the given answers but should offer a refined and accurate reply to the instruction.
-Ensure your response is well-structured, coherent, and adheres to the highest standards of accuracy and reliability.
-Responses from models:"""
+    TASK_SYSTEM_PROMPT = {
+        None: DEFAULT_SYSTEM_PROMPT,
+    }
 
-TASK_SYSTEM_PROMPT = {
-    None: DEFAULT_SYSTEM_PROMPT,
-}
+    # Get responses from multiple miners
+    body_first_step = copy.deepcopy(body)
+    body_first_step["stream"] = False
 
+    # Get multiple miners
+    miner_uids = get_uids(sampling_mode="top_incentive", k=3)  # Get responses from top 3 miners
+    if not miner_uids:
+        raise HTTPException(status_code=503, detail="No available miners found")
 
-async def mixture_of_miners(
-    body: dict[str, any],
-):
-    body_1st_step = copy.deepcopy(body)
-    body_1st_step["stream"] = False
+    # Collect responses from all miners
+    responses = []
+    for uid in miner_uids:
+        try:
+            response = await get_response_from_miner(body_first_step, uid)
+            responses.append(response)
+        except Exception as e:
+            logger.error(f"Error getting response from miner {uid}: {e}")
+            continue
 
-    # First step: Get initial responses from miners.
-    responses = await process_completions(body_1st_step)
+    if not responses:
+        raise HTTPException(status_code=503, detail="Failed to get responses from miners")
 
-    # Extract completions from the responses.
-    completions = ["".join(res["accumulated_chunks"]) for res in responses]
+    # Extract completions from the responses
+    completions = [response[1][0] for response in responses if response and len(response) > 1]
 
     task_name = body.get("task")
     system_prompt = TASK_SYSTEM_PROMPT.get(task_name, DEFAULT_SYSTEM_PROMPT)
 
-    # Aggregate responses into one system prompt.
+    # Aggregate responses into one system prompt
     agg_system_prompt = system_prompt + "\n" + "\n".join([f"{i+1}. {comp}" for i, comp in enumerate(completions)])
 
-    # Prepare new messages with the aggregated system prompt.
+    # Prepare new messages with the aggregated system prompt
     original_messages = body["messages"]
     original_user_messages = [msg for msg in original_messages if msg["role"] != "system"]
     new_messages = [{"role": "system", "content": agg_system_prompt}] + original_user_messages
 
-    # Update the body with the new messages.
-    body["messages"] = new_messages
+    # Update the body with the new messages
+    final_body = copy.deepcopy(body)
+    final_body["messages"] = new_messages
 
-    # Second step: Get the final response using the aggregated system prompt.
-    return await process_completions(body)
+    # Get final response using a random top miner
+    final_uid = random.choice(get_uids(sampling_mode="top_incentive", k=100))
+    return await regular_chat_completion(final_body, final_uid)

From edf1bfeedacad19be19dd10cc12d1b51e329b217 Mon Sep 17 00:00:00 2001
From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com>
Date: Thu, 19 Dec 2024 13:25:39 +0000
Subject: [PATCH 36/40] WIP: Finish MoA

---
 validator_api/gpt_endpoints.py     |   1 -
 validator_api/mixture_of_miners.py | 104 +++++++++++++++++------------
 2 files changed, 62 insertions(+), 43 deletions(-)

diff --git a/validator_api/gpt_endpoints.py b/validator_api/gpt_endpoints.py
index 91d5563d..9638a89c 100644
--- a/validator_api/gpt_endpoints.py
+++ b/validator_api/gpt_endpoints.py
@@ -10,7 +10,6 @@
 router = APIRouter()
 
 
-
 @router.post("/v1/chat/completions")
 async def chat_completion(request: Request):
     """Main endpoint that handles both regular and mixture of miners chat completion."""
diff --git a/validator_api/mixture_of_miners.py b/validator_api/mixture_of_miners.py
index 6349dd41..b15f6f7d 100644
--- a/validator_api/mixture_of_miners.py
+++ b/validator_api/mixture_of_miners.py
@@ -1,5 +1,4 @@
-
-
+import asyncio
 import copy
 import random
 
@@ -11,60 +10,81 @@
 from validator_api.chat_completion import get_response_from_miner, regular_chat_completion
 
 
-async def mixture_of_miners(body: dict[str, any]) -> tuple | StreamingResponse:
-    """Handle chat completion with mixture of miners approach."""
-    DEFAULT_SYSTEM_PROMPT = """You have been provided with a set of responses from various open-source models to the latest user query.
-    Your task is to synthesize these responses into a single, high-quality and concise response.
-    It is crucial to follow the provided instuctions or examples in the given prompt if any, and ensure the answer is in correct and expected format.
-    Critically evaluate the information provided in these responses, recognizing that some of it may be biased or incorrect.
-    Your response should not simply replicate the given answers but should offer a refined and accurate reply to the instruction.
-    Ensure your response is well-structured, coherent, and adheres to the highest standards of accuracy and reliability.
-    Responses from models:"""
+DEFAULT_SYSTEM_PROMPT = """You have been provided with a set of responses from various open-source models to the latest user query.
+Your task is to synthesize these responses into a single, high-quality and concise response.
+It is crucial to follow the provided instuctions or examples in the given prompt if any, and ensure the answer is in correct and expected format.
+Critically evaluate the information provided in these responses, recognizing that some of it may be biased or incorrect.
+Your response should not simply replicate the given answers but should offer a refined and accurate reply to the instruction.
+Ensure your response is well-structured, coherent, and adheres to the highest standards of accuracy and reliability.
+Responses from models:"""
+
+TASK_SYSTEM_PROMPT = {
+    None: DEFAULT_SYSTEM_PROMPT,
+    # Add more task-specific system prompts here.
+}
+
+NUM_MIXTURE_MINERS = 5
+TOP_INCENTIVE_POOL = 100
+
+
+
+async def get_miner_response(body: dict, uid: str) -> tuple | None:
+    """Get response from a single miner with error handling."""
+    try:
+        return await get_response_from_miner(body, uid)
+    except Exception as e:
+        logger.error(f"Error getting response from miner {uid}: {e}")
+        return None
 
-    TASK_SYSTEM_PROMPT = {
-        None: DEFAULT_SYSTEM_PROMPT,
-    }
 
-    # Get responses from multiple miners
+async def mixture_of_miners(body: dict[str, any]) -> tuple | StreamingResponse:
+    """Handle chat completion with mixture of miners approach.
+    
+    Based on Mixture-of-Agents Enhances Large Language Model Capabilities, 2024, Wang et al.:
+        https://arxiv.org/abs/2406.04692
+    
+    Args:
+        body: Query parameters:
+            messages: User prompt.
+            stream: If True, stream the response.
+            model: Optional model used for inference, SharedSettings.LLM_MODEL is used by default.
+            task: Optional task, see prompting/tasks/task_registry.py, InferenceTask is used by default.
+    """
     body_first_step = copy.deepcopy(body)
     body_first_step["stream"] = False
 
     # Get multiple miners
-    miner_uids = get_uids(sampling_mode="top_incentive", k=3)  # Get responses from top 3 miners
+    miner_uids = get_uids(sampling_mode="top_incentive", k=NUM_MIXTURE_MINERS)
     if not miner_uids:
         raise HTTPException(status_code=503, detail="No available miners found")
 
-    # Collect responses from all miners
-    responses = []
-    for uid in miner_uids:
-        try:
-            response = await get_response_from_miner(body_first_step, uid)
-            responses.append(response)
-        except Exception as e:
-            logger.error(f"Error getting response from miner {uid}: {e}")
-            continue
-
-    if not responses:
+    # Concurrently collect responses from all miners.
+    miner_tasks = [get_miner_response(body_first_step, uid) for uid in miner_uids]
+    responses = await asyncio.gather(*miner_tasks)
+    
+    # Filter out None responses (failed requests).
+    valid_responses = [r for r in responses if r is not None]
+    
+    if not valid_responses:
         raise HTTPException(status_code=503, detail="Failed to get responses from miners")
 
-    # Extract completions from the responses
-    completions = [response[1][0] for response in responses if response and len(response) > 1]
-
+    # Extract completions from the responses.
+    completions = [response[1][0] for response in valid_responses if response and len(response) > 1]
+    
     task_name = body.get("task")
     system_prompt = TASK_SYSTEM_PROMPT.get(task_name, DEFAULT_SYSTEM_PROMPT)
-
-    # Aggregate responses into one system prompt
+    
+    # Aggregate responses into one system prompt.
     agg_system_prompt = system_prompt + "\n" + "\n".join([f"{i+1}. {comp}" for i, comp in enumerate(completions)])
-
-    # Prepare new messages with the aggregated system prompt
-    original_messages = body["messages"]
-    original_user_messages = [msg for msg in original_messages if msg["role"] != "system"]
-    new_messages = [{"role": "system", "content": agg_system_prompt}] + original_user_messages
-
-    # Update the body with the new messages
+    
+    # Prepare new messages with the aggregated system prompt.
+    new_messages = [{"role": "system", "content": agg_system_prompt}]
+    new_messages.extend([msg for msg in body["messages"] if msg["role"] != "system"])
+    
+    # Update the body with the new messages.
     final_body = copy.deepcopy(body)
     final_body["messages"] = new_messages
-
-    # Get final response using a random top miner
-    final_uid = random.choice(get_uids(sampling_mode="top_incentive", k=100))
+    
+    # Get final response using a random top miner.
+    final_uid = random.choice(get_uids(sampling_mode="top_incentive", k=TOP_INCENTIVE_POOL))
     return await regular_chat_completion(final_body, final_uid)

From 27b035aceb82b7933f022ea2e33bb278866e035c Mon Sep 17 00:00:00 2001
From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com>
Date: Thu, 19 Dec 2024 13:47:31 +0000
Subject: [PATCH 37/40] Finish implementation

---
 validator_api/chat_completion.py   | 10 ++++++++--
 validator_api/gpt_endpoints.py     |  8 ++++----
 validator_api/mixture_of_miners.py |  6 +++---
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/validator_api/chat_completion.py b/validator_api/chat_completion.py
index 0d1470e2..337ae45d 100644
--- a/validator_api/chat_completion.py
+++ b/validator_api/chat_completion.py
@@ -2,6 +2,7 @@
 import asyncio
 import json
 import random
+from typing import AsyncGenerator
 from fastapi import HTTPException
 from fastapi.responses import StreamingResponse
 import httpx
@@ -41,7 +42,12 @@ async def forward_response(uid: int, body: dict[str, any], chunks: list[str]):
         logger.exception(f"Error while forwarding response: {e}")
 
 
-async def stream_response(response, collected_chunks: list[str], body: dict[str, any], uid: int) -> AsyncGenerator[str, None]:
+async def stream_response(
+        response,
+        collected_chunks: list[str],
+        body: dict[str, any],
+        uid: int
+    ) -> AsyncGenerator[str, None]:
     chunks_received = False
     try:
         async for chunk in response:
@@ -64,7 +70,7 @@ async def stream_response(response, collected_chunks: list[str], body: dict[str,
         yield 'data: {"error": "Internal server Error"}\n\n'
 
 
-async def regular_chat_completion(body: dict[str, any], uid: int | None = None) -> tuple | StreamingResponse:
+async def chat_completion(body: dict[str, any], uid: int | None = None) -> tuple | StreamingResponse:
     """Handle regular chat completion without mixture of miners."""
     if uid is None:
         uid = random.choice(get_uids(sampling_mode="top_incentive", k=100))
diff --git a/validator_api/gpt_endpoints.py b/validator_api/gpt_endpoints.py
index 9638a89c..0058f738 100644
--- a/validator_api/gpt_endpoints.py
+++ b/validator_api/gpt_endpoints.py
@@ -4,14 +4,14 @@
 from loguru import logger
 from starlette.responses import StreamingResponse
 
-from validator_api import mixture_of_miners
-from validator_api.chat_completion import regular_chat_completion
+from validator_api.mixture_of_miners import mixture_of_miners
+from validator_api.chat_completion import chat_completion
 
 router = APIRouter()
 
 
 @router.post("/v1/chat/completions")
-async def chat_completion(request: Request):
+async def completions(request: Request):
     """Main endpoint that handles both regular and mixture of miners chat completion."""
     try:
         body = await request.json()
@@ -21,7 +21,7 @@ async def chat_completion(request: Request):
         if body.get("mixture", False):
             return await mixture_of_miners(body)
         else:
-            return await regular_chat_completion(body)
+            return await chat_completion(body)
 
     except Exception as e:
         logger.exception(f"Error in chat completion: {e}")
diff --git a/validator_api/mixture_of_miners.py b/validator_api/mixture_of_miners.py
index b15f6f7d..189bfc56 100644
--- a/validator_api/mixture_of_miners.py
+++ b/validator_api/mixture_of_miners.py
@@ -7,7 +7,7 @@
 from loguru import logger
 
 from shared.uids import get_uids
-from validator_api.chat_completion import get_response_from_miner, regular_chat_completion
+from validator_api.chat_completion import get_response_from_miner, chat_completion
 
 
 DEFAULT_SYSTEM_PROMPT = """You have been provided with a set of responses from various open-source models to the latest user query.
@@ -55,7 +55,7 @@ async def mixture_of_miners(body: dict[str, any]) -> tuple | StreamingResponse:
 
     # Get multiple miners
     miner_uids = get_uids(sampling_mode="top_incentive", k=NUM_MIXTURE_MINERS)
-    if not miner_uids:
+    if len(miner_uids) == 0:
         raise HTTPException(status_code=503, detail="No available miners found")
 
     # Concurrently collect responses from all miners.
@@ -87,4 +87,4 @@ async def mixture_of_miners(body: dict[str, any]) -> tuple | StreamingResponse:
     
     # Get final response using a random top miner.
     final_uid = random.choice(get_uids(sampling_mode="top_incentive", k=TOP_INCENTIVE_POOL))
-    return await regular_chat_completion(final_body, final_uid)
+    return await chat_completion(final_body, final_uid)

From f75d28c03715350f6d65e446cbadc84657b57a84 Mon Sep 17 00:00:00 2001
From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com>
Date: Thu, 19 Dec 2024 13:49:14 +0000
Subject: [PATCH 38/40] Clean up code

---
 api_keys.json.example                |  1 -
 prompting/api/api_managements/api.py | 82 ----------------------------
 prompting/tasks/task_registry.py     |  5 +-
 3 files changed, 3 insertions(+), 85 deletions(-)
 delete mode 100644 api_keys.json.example
 delete mode 100644 prompting/api/api_managements/api.py

diff --git a/api_keys.json.example b/api_keys.json.example
deleted file mode 100644
index fd065361..00000000
--- a/api_keys.json.example
+++ /dev/null
@@ -1 +0,0 @@
-{"API_KEY_VALUE": {"rate_limit": 10, "usage": 0}}
diff --git a/prompting/api/api_managements/api.py b/prompting/api/api_managements/api.py
deleted file mode 100644
index 92ccc922..00000000
--- a/prompting/api/api_managements/api.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import json
-import secrets
-
-from fastapi import APIRouter, Depends, Header, HTTPException
-from loguru import logger
-
-from prompting.settings import settings
-
-router = APIRouter()
-
-
-# Load and save functions for API keys
-def load_api_keys():
-    try:
-        with open(settings.API_KEYS_FILE, "r") as f:
-            return json.load(f)
-    except FileNotFoundError:
-        return {}
-
-
-def save_api_keys(api_keys):
-    with open(settings.API_KEYS_FILE, "w") as f:
-        json.dump(api_keys, f)
-
-
-# Use lifespan to initialize API keys
-_keys = load_api_keys()
-logger.info(f"Loaded API keys: {_keys}")
-save_api_keys(_keys)
-
-
-# Dependency to validate the admin key
-def validate_admin_key(admin_key: str = Header(...)):
-    if admin_key != settings.ADMIN_KEY:
-        raise HTTPException(status_code=403, detail="Invalid admin key")
-
-
-# Dependency to validate API keys
-def validate_api_key(api_key: str = Header(...)):
-    if api_key not in _keys:
-        raise HTTPException(status_code=403, detail="Invalid API key")
-    return _keys[api_key]
-
-
-@router.post("/create-api-key/")
-def create_api_key(rate_limit: int, admin_key: str = Depends(validate_admin_key)):
-    """Creates a new API key with a specified rate limit."""
-    new_api_key = secrets.token_hex(16)
-    _keys[new_api_key] = {"rate_limit": rate_limit, "usage": 0}
-    save_api_keys(_keys)
-    return {"message": "API key created", "api_key": new_api_key}
-
-
-@router.put("/modify-api-key/{api_key}")
-def modify_api_key(api_key: str, rate_limit: int, admin_key: str = Depends(validate_admin_key)):
-    """Modifies the rate limit of an existing API key."""
-    if api_key not in _keys:
-        raise HTTPException(status_code=404, detail="API key not found")
-    _keys[api_key]["rate_limit"] = rate_limit
-    save_api_keys(_keys)
-    return {"message": "API key updated", "api_key": api_key}
-
-
-@router.delete("/delete-api-key/{api_key}")
-def delete_api_key(api_key: str, admin_key: str = Depends(validate_admin_key)):
-    """Deletes an existing API key."""
-    if api_key not in _keys:
-        raise HTTPException(status_code=404, detail="API key not found")
-    del _keys[api_key]
-    save_api_keys(_keys)
-    return {"message": "API key deleted"}
-
-
-@router.get("/demo-endpoint/")
-def demo_endpoint(api_key_data: dict = Depends(validate_api_key)):
-    """A demo endpoint that requires a valid API key."""
-    return {"message": "Access granted", "your_rate_limit": api_key_data["rate_limit"]}
-
-
-# # Create FastAPI app and include the router
-# app = FastAPI()
-# app.include_router(router)
diff --git a/prompting/tasks/task_registry.py b/prompting/tasks/task_registry.py
index 9fbf0d04..0bdf0324 100644
--- a/prompting/tasks/task_registry.py
+++ b/prompting/tasks/task_registry.py
@@ -8,8 +8,10 @@
 from prompting.datasets.huggingface_github import HuggingFaceGithubDataset
 from prompting.datasets.random_website import DDGDataset
 from prompting.datasets.sn13 import SN13Dataset
+from prompting.datasets.wiki import WikiDataset, WikiDateDataset
 from prompting.rewards.reward import BaseRewardConfig
 from prompting.tasks.base_task import BaseTextTask
+from prompting.tasks.date_qa import DateQARewardConfig, DateQuestionAnsweringTask
 from prompting.tasks.inference import InferenceRewardConfig, InferenceTask
 from prompting.tasks.multi_choice import MultiChoiceRewardConfig, MultiChoiceTask
 from prompting.tasks.multi_step_reasoning import MultiStepReasoningRewardConfig, MultiStepReasoningTask
@@ -48,8 +50,7 @@ class TaskRegistry(BaseModel):
         ),
         TaskConfig(
             task=InferenceTask,
-            # probability=0.16,
-            probability=1.00,
+            probability=0.16,
             datasets=[SN13Dataset],
             reward_model=InferenceRewardConfig,
         ),

From bc48fb979ecdfac607c50995395ecc15fe8892f4 Mon Sep 17 00:00:00 2001
From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com>
Date: Thu, 19 Dec 2024 13:50:54 +0000
Subject: [PATCH 39/40] Run pre-commit hook

---
 validator_api/chat_completion.py   | 30 +++++++++++-------------------
 validator_api/gpt_endpoints.py     |  4 ++--
 validator_api/mixture_of_miners.py | 22 ++++++++++------------
 3 files changed, 23 insertions(+), 33 deletions(-)

diff --git a/validator_api/chat_completion.py b/validator_api/chat_completion.py
index 337ae45d..668a5e8a 100644
--- a/validator_api/chat_completion.py
+++ b/validator_api/chat_completion.py
@@ -1,12 +1,13 @@
-
 import asyncio
 import json
 import random
 from typing import AsyncGenerator
+
+import httpx
 from fastapi import HTTPException
 from fastapi.responses import StreamingResponse
-import httpx
 from loguru import logger
+
 from shared.epistula import make_openai_query
 from shared.settings import shared_settings
 from shared.uids import get_uids
@@ -43,11 +44,8 @@ async def forward_response(uid: int, body: dict[str, any], chunks: list[str]):
 
 
 async def stream_response(
-        response,
-        collected_chunks: list[str],
-        body: dict[str, any],
-        uid: int
-    ) -> AsyncGenerator[str, None]:
+    response, collected_chunks: list[str], body: dict[str, any], uid: int
+) -> AsyncGenerator[str, None]:
     chunks_received = False
     try:
         async for chunk in response:
@@ -74,19 +72,19 @@ async def chat_completion(body: dict[str, any], uid: int | None = None) -> tuple
     """Handle regular chat completion without mixture of miners."""
     if uid is None:
         uid = random.choice(get_uids(sampling_mode="top_incentive", k=100))
-        
+
     if uid is None:
         logger.error("No available miner found")
         raise HTTPException(status_code=503, detail="No available miner found")
-        
+
     logger.debug(f"Querying uid {uid}")
     STREAM = body.get("stream", False)
-    
+
     collected_chunks: list[str] = []
-    
+
     logger.info(f"Making {'streaming' if STREAM else 'non-streaming'} openai query with body: {body}")
     response = await make_openai_query(shared_settings.METAGRAPH, shared_settings.WALLET, body, uid, stream=STREAM)
-    
+
     if STREAM:
         return StreamingResponse(
             stream_response(response, collected_chunks, body, uid),
@@ -103,10 +101,4 @@ async def chat_completion(body: dict[str, any], uid: int | None = None) -> tuple
 
 async def get_response_from_miner(body: dict[str, any], uid: int) -> tuple:
     """Get response from a single miner."""
-    return await make_openai_query(
-        shared_settings.METAGRAPH,
-        shared_settings.WALLET,
-        body,
-        uid,
-        stream=False
-    )
+    return await make_openai_query(shared_settings.METAGRAPH, shared_settings.WALLET, body, uid, stream=False)
diff --git a/validator_api/gpt_endpoints.py b/validator_api/gpt_endpoints.py
index 0058f738..34681f0e 100644
--- a/validator_api/gpt_endpoints.py
+++ b/validator_api/gpt_endpoints.py
@@ -4,8 +4,8 @@
 from loguru import logger
 from starlette.responses import StreamingResponse
 
-from validator_api.mixture_of_miners import mixture_of_miners
 from validator_api.chat_completion import chat_completion
+from validator_api.mixture_of_miners import mixture_of_miners
 
 router = APIRouter()
 
@@ -16,7 +16,7 @@ async def completions(request: Request):
     try:
         body = await request.json()
         body["seed"] = int(body.get("seed") or random.randint(0, 1000000))
-        
+
         # Choose between regular completion and mixture of miners.
         if body.get("mixture", False):
             return await mixture_of_miners(body)
diff --git a/validator_api/mixture_of_miners.py b/validator_api/mixture_of_miners.py
index 189bfc56..e2aaa05a 100644
--- a/validator_api/mixture_of_miners.py
+++ b/validator_api/mixture_of_miners.py
@@ -7,8 +7,7 @@
 from loguru import logger
 
 from shared.uids import get_uids
-from validator_api.chat_completion import get_response_from_miner, chat_completion
-
+from validator_api.chat_completion import chat_completion, get_response_from_miner
 
 DEFAULT_SYSTEM_PROMPT = """You have been provided with a set of responses from various open-source models to the latest user query.
 Your task is to synthesize these responses into a single, high-quality and concise response.
@@ -27,7 +26,6 @@
 TOP_INCENTIVE_POOL = 100
 
 
-
 async def get_miner_response(body: dict, uid: str) -> tuple | None:
     """Get response from a single miner with error handling."""
     try:
@@ -39,10 +37,10 @@ async def get_miner_response(body: dict, uid: str) -> tuple | None:
 
 async def mixture_of_miners(body: dict[str, any]) -> tuple | StreamingResponse:
     """Handle chat completion with mixture of miners approach.
-    
+
     Based on Mixture-of-Agents Enhances Large Language Model Capabilities, 2024, Wang et al.:
         https://arxiv.org/abs/2406.04692
-    
+
     Args:
         body: Query parameters:
             messages: User prompt.
@@ -61,30 +59,30 @@ async def mixture_of_miners(body: dict[str, any]) -> tuple | StreamingResponse:
     # Concurrently collect responses from all miners.
     miner_tasks = [get_miner_response(body_first_step, uid) for uid in miner_uids]
     responses = await asyncio.gather(*miner_tasks)
-    
+
     # Filter out None responses (failed requests).
     valid_responses = [r for r in responses if r is not None]
-    
+
     if not valid_responses:
         raise HTTPException(status_code=503, detail="Failed to get responses from miners")
 
     # Extract completions from the responses.
     completions = [response[1][0] for response in valid_responses if response and len(response) > 1]
-    
+
     task_name = body.get("task")
     system_prompt = TASK_SYSTEM_PROMPT.get(task_name, DEFAULT_SYSTEM_PROMPT)
-    
+
     # Aggregate responses into one system prompt.
     agg_system_prompt = system_prompt + "\n" + "\n".join([f"{i+1}. {comp}" for i, comp in enumerate(completions)])
-    
+
     # Prepare new messages with the aggregated system prompt.
     new_messages = [{"role": "system", "content": agg_system_prompt}]
     new_messages.extend([msg for msg in body["messages"] if msg["role"] != "system"])
-    
+
     # Update the body with the new messages.
     final_body = copy.deepcopy(body)
     final_body["messages"] = new_messages
-    
+
     # Get final response using a random top miner.
     final_uid = random.choice(get_uids(sampling_mode="top_incentive", k=TOP_INCENTIVE_POOL))
     return await chat_completion(final_body, final_uid)

From 1445e1f2f527cfad0d8d4ef1d922337d0e994486 Mon Sep 17 00:00:00 2001
From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com>
Date: Fri, 20 Dec 2024 11:14:19 +0000
Subject: [PATCH 40/40] Merge with staging

---
 validator_api/gpt_endpoints.py | 39 ----------------------------------
 1 file changed, 39 deletions(-)

diff --git a/validator_api/gpt_endpoints.py b/validator_api/gpt_endpoints.py
index 2ed297c1..34681f0e 100644
--- a/validator_api/gpt_endpoints.py
+++ b/validator_api/gpt_endpoints.py
@@ -10,39 +10,6 @@
 router = APIRouter()
 
 
-<<<<<<< HEAD
-=======
-async def forward_response(uid: int, body: dict[str, any], chunks: list[str]):
-    uid = int(uid)  # sometimes uid is type np.uint64
-    logger.info(f"Forwarding response to scoring with body: {body}")
-    if not shared_settings.SCORE_ORGANICS:  # Allow disabling of scoring by default
-        return
-
-    if body.get("task") != "InferenceTask":
-        logger.debug(f"Skipping forwarding for non-inference task: {body.get('task')}")
-        return
-    url = f"http://{shared_settings.VALIDATOR_API}/scoring"
-    payload = {"body": body, "chunks": chunks, "uid": uid}
-    try:
-        timeout = httpx.Timeout(timeout=120.0, connect=60.0, read=30.0, write=30.0, pool=5.0)
-        async with httpx.AsyncClient(timeout=timeout) as client:
-            response = await client.post(
-                url, json=payload, headers={"api-key": shared_settings.SCORING_KEY, "Content-Type": "application/json"}
-            )
-            if response.status_code == 200:
-                logger.info(f"Forwarding response completed with status {response.status_code}")
-
-            else:
-                logger.exception(
-                    f"Forwarding response uid {uid} failed with status {response.status_code} and payload {payload}"
-                )
-
-    except Exception as e:
-        logger.error(f"Tried to forward response to {url} with payload {payload}")
-        logger.exception(f"Error while forwarding response: {e}")
-
-
->>>>>>> staging
 @router.post("/v1/chat/completions")
 async def completions(request: Request):
     """Main endpoint that handles both regular and mixture of miners chat completion."""
@@ -54,13 +21,7 @@ async def completions(request: Request):
         if body.get("mixture", False):
             return await mixture_of_miners(body)
         else:
-<<<<<<< HEAD
             return await chat_completion(body)
-=======
-            logger.info("Forwarding response to scoring...")
-            asyncio.create_task(forward_response(uid=uid, body=body, chunks=response[1]))
-            return response[0]
->>>>>>> staging
 
     except Exception as e:
         logger.exception(f"Error in chat completion: {e}")