From 4457d4e42ac55a133ffe04bdcdc249be430f97cc Mon Sep 17 00:00:00 2001 From: richwardle Date: Tue, 19 Nov 2024 06:17:42 -1000 Subject: [PATCH 01/40] Initial upload --- neurons/miners/epistula_miner/miner.py | 195 +++++++++++++++++ neurons/validator.py | 29 +-- prompting/base/epistula.py | 207 ++++++++++++++++++ .../miner_availability/miner_availability.py | 17 +- 4 files changed, 412 insertions(+), 36 deletions(-) create mode 100644 neurons/miners/epistula_miner/miner.py create mode 100644 prompting/base/epistula.py diff --git a/neurons/miners/epistula_miner/miner.py b/neurons/miners/epistula_miner/miner.py new file mode 100644 index 00000000..9ff5a4f6 --- /dev/null +++ b/neurons/miners/epistula_miner/miner.py @@ -0,0 +1,195 @@ +# ruff: noqa: E402 +from prompting import settings + +settings.settings = settings.Settings.load(mode="miner") +settings = settings.settings + +import time +import httpx +import netaddr +import uvicorn +import requests +import traceback +import bittensor as bt + +from loguru import logger +from fastapi import APIRouter, Depends, FastAPI, Request, HTTPException +from starlette.background import BackgroundTask +from starlette.responses import StreamingResponse +from bittensor.subtensor import serve_extrinsic +from bittensor.axon import FastAPIThreadedServer +from prompting.base.epistula import verify_signature + + +MODEL_ID: str = "gpt-3.5-turbo" +NEURON_MAX_TOKENS: int = 256 +NEURON_TEMPERATURE: float = 0.7 +NEURON_TOP_K: int = 50 +NEURON_TOP_P: float = 0.95 +NEURON_STREAMING_BATCH_SIZE: int = 12 +NEURON_STOP_ON_FORWARD_EXCEPTION: bool = False + +SYSTEM_PROMPT = """You are a helpful agent that does it's best to answer all questions!""" + + +class OpenAIMiner(): + + def __init__(self): + self.should_exit = False + self.client = httpx.AsyncClient( + base_url="https://api.openai.com/v1", + headers={ + "Authorization": f"Bearer {settings.OPENAI_API_KEY}", + "Content-Type": "application/json", + }, + ) + + def format_headers(self, request: Request): + # Iterate through the headers and only keep the ones that will be used for the openai request + + return request + + async def create_chat_completion(self, request: Request): + request["model"] = MODEL_ID + bt.logging.info( + "\u2713", + f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!", + ) + req = self.client.build_request( + "POST", "/chat/completions", content=await request.body() + ) + r = await self.client.send(req, stream=True) + return StreamingResponse( + r.aiter_raw(), background=BackgroundTask(r.aclose), headers=r.headers + ) + + async def check_availability(self, request: Request): + # Parse the incoming JSON request + data = await request.json() + task_availabilities = data.get('task_availabilities', {}) + llm_model_availabilities = data.get('llm_model_availabilities', {}) + + # Set all task availabilities to True + task_response = {key: True for key in task_availabilities} + + # Set all model availabilities to False + model_response = {key: False for key in llm_model_availabilities} + + # Construct the response dictionary + response = { + 'task_availabilities': task_response, + 'llm_model_availabilities': model_response + } + + return response + + async def verify_request( + self, + request: Request, + ): + # We do this as early as possible so that now has a lesser chance + # of causing a stale request + now = round(time.time() * 1000) + + # We need to check the signature of the body as bytes + # But use some specific fields from the body + signed_by = request.headers.get("Epistula-Signed-By") + signed_for = request.headers.get("Epistula-Signed-For") + if signed_for != self.wallet.hotkey.ss58_address: + raise HTTPException( + status_code=400, detail="Bad Request, message is not intended for self" + ) + if signed_by not in self.metagraph.hotkeys: + raise HTTPException(status_code=401, detail="Signer not in metagraph") + + uid = self.metagraph.hotkeys.index(signed_by) + stake = self.metagraph.S[uid].item() + if not self.config.no_force_validator_permit and stake < 10000: + bt.logging.warning( + f"Blacklisting request from {signed_by} [uid={uid}], not enough stake -- {stake}" + ) + raise HTTPException(status_code=401, detail="Stake below minimum: {stake}") + + # If anything is returned here, we can throw + body = await request.body() + err = verify_signature( + request.headers.get("Epistula-Request-Signature"), + body, + request.headers.get("Epistula-Timestamp"), + request.headers.get("Epistula-Uuid"), + signed_for, + signed_by, + now, + ) + if err: + bt.logging.error(err) + raise HTTPException(status_code=400, detail=err) + + def run(self): + + external_ip = None #settings.EXTERNAL_IP + if not external_ip or external_ip == "[::]": + try: + external_ip = requests.get("https://checkip.amazonaws.com").text.strip() + netaddr.IPAddress(external_ip) + except Exception: + bt.logging.error("Failed to get external IP") + + bt.logging.info( + f"Serving miner endpoint {external_ip}:{settings.AXON_PORT} on network: {settings.SUBTENSOR_NETWORK} with netuid: {settings.NETUID}" + ) + + serve_success = serve_extrinsic( + subtensor=settings.SUBTENSOR, + wallet=settings.WALLET, + ip=external_ip, + port=settings.AXON_PORT, + protocol=4, + netuid=settings.NETUID, + ) + if not serve_success: + bt.logging.error("Failed to serve endpoint") + return + + # Start starts the miner's endpoint, making it active on the network. + # change the config in the axon + app = FastAPI() + router = APIRouter() + router.add_api_route( + "/chat/completions", + self.create_chat_completion, + dependencies=[Depends(self.verify_request)], + methods=["POST"], + ) + router.add_api_route( + "/availability", + self.check_availability, + methods=["POST"], + ) + app.include_router(router) + fast_config = uvicorn.Config( + app, + host="0.0.0.0", + port=settings.AXON_PORT, + log_level="info", + loop="asyncio", + ) + self.fast_api = FastAPIThreadedServer(config=fast_config) + self.fast_api.start() + + bt.logging.info(f"Miner starting at block: {settings.SUBTENSOR.block}") + + # This loop maintains the miner's operations until intentionally stopped. + try: + while not self.should_exit: + time.sleep(1) + except Exception as e: + bt.logging.error(str(e)) + bt.logging.error(traceback.format_exc()) + self.shutdown() + + +if __name__ == "__main__": + miner = OpenAIMiner() + miner.run() + logger.warning("Ending miner...") diff --git a/neurons/validator.py b/neurons/validator.py index 7e7b0caf..fa580804 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -23,6 +23,7 @@ from prompting.organic.organic_loop import start_organic from prompting.weight_setting.weight_setter import weight_setter from prompting.llms.utils import GPUInfo +from prompting.base.epistula import query_miners NEURON_SAMPLE_SIZE = 100 @@ -136,35 +137,15 @@ async def collect_responses(self, task: BaseTextTask) -> DendriteResponseEvent | if len(uids) == 0: logger.warning("No available miners. This should already have been caught earlier.") return - axons = [settings.METAGRAPH.axons[uid] for uid in uids] - - # Create the synapse - synapse = StreamPromptingSynapse( - task_name=task.__class__.__name__, - seed=task.seed, - target_model=task.llm_model_id, - roles=["user"], - messages=[task.query], - ) - # Call the synchronous wrapper that includes both DENDRITE and handle_response - stream_results = run_dendrite_and_handle_response_sync( - uids=uids, - axons=axons, - synapse=synapse, - timeout=settings.NEURON_TIMEOUT, - deserialize=False, - streaming=True, - ) - logger.debug( - f"Non-empty responses: {len([r.completion for r in stream_results if len(r.completion) > 0])}\n" - f"Empty responses: {len([r.completion for r in stream_results if len(r.completion) == 0])}" - ) + body = {"seed": task.seed, "model": task.llm_model_id, "roles": ["user"], "messages": [task.query]} + body_bytes = json.dumps(body).encode("utf-8") + stream_results = query_miners(task.__class__.__name__, uids, body) log_stream_results(stream_results) - # Encapsulate the responses in a response event (dataclass + response_event = DendriteResponseEvent( stream_results=stream_results, uids=uids, timeout=settings.NEURON_TIMEOUT ) diff --git a/prompting/base/epistula.py b/prompting/base/epistula.py new file mode 100644 index 00000000..951b3fc0 --- /dev/null +++ b/prompting/base/epistula.py @@ -0,0 +1,207 @@ +import json +from hashlib import sha256 +from uuid import uuid4 +from math import ceil +import time +from prompting.utils.timer import Timer +from substrateinterface import Keypair +import asyncio +import bittensor as bt +import math +from os import urandom +import time +import traceback +from typing import Dict, List, Optional, Tuple, Any, Annotated +from prompting.base.dendrite import SynapseStreamResult +from httpx import Timeout +import httpx +import openai +import requests +from prompting.settings import settings + +def verify_signature( + signature, body: bytes, timestamp, uuid, signed_for, signed_by, now +) -> Optional[Annotated[str, "Error Message"]]: + if not isinstance(signature, str): + return "Invalid Signature" + timestamp = int(timestamp) + if not isinstance(timestamp, int): + return "Invalid Timestamp" + if not isinstance(signed_by, str): + return "Invalid Sender key" + if not isinstance(signed_for, str): + return "Invalid receiver key" + if not isinstance(uuid, str): + return "Invalid uuid" + if not isinstance(body, bytes): + return "Body is not of type bytes" + ALLOWED_DELTA_MS = 8000 + keypair = Keypair(ss58_address=signed_by) + if timestamp + ALLOWED_DELTA_MS < now: + return "Request is too stale" + message = f"{sha256(body).hexdigest()}.{uuid}.{timestamp}.{signed_for}" + verified = keypair.verify(message, signature) + if not verified: + return "Signature Mismatch" + return None + +def generate_header( + hotkey: Keypair, + body_bytes: Dict[str, Any], + signed_for: Optional[str] = None, +) -> Dict[str, Any]: + timestamp = round(time.time() * 1000) + timestampInterval = ceil(timestamp / 1e4) * 1e4 + uuid = str(uuid4()) + headers = { + "Epistula-Version": "2", + "Epistula-Timestamp": str(timestamp), + "Epistula-Uuid": uuid, + "Epistula-Signed-By": hotkey.ss58_address, + "Epistula-Request-Signature": "0x" + + hotkey.sign( + f"{sha256(body_bytes).hexdigest()}.{uuid}.{timestamp}.{signed_for or ''}" + ).hex(), + } + if signed_for: + headers["Epistula-Signed-For"] = signed_for + headers["Epistula-Secret-Signature-0"] = ( + "0x" + hotkey.sign(str(timestampInterval - 1) + "." + signed_for).hex() + ) + headers["Epistula-Secret-Signature-1"] = ( + "0x" + hotkey.sign(str(timestampInterval) + "." + signed_for).hex() + ) + headers["Epistula-Secret-Signature-2"] = ( + "0x" + hotkey.sign(str(timestampInterval + 1) + "." + signed_for).hex() + ) + return headers + +def create_header_hook(hotkey, axon_hotkey, task): + async def add_headers(request: httpx.Request): + for key, header in generate_header(hotkey, request.read(), axon_hotkey).items(): + request.headers[key] = header + request.headers["Task"] = task + + return add_headers + +async def query_miners(task, uids, body): + try: + tasks = [] + for uid in uids: + tasks.append( + asyncio.create_task( + handle_inference( + settings.METAGRAPH, settings.WALLET, task, body, uid, + ) + ) + ) + responses: List[SynapseStreamResult] = await asyncio.gather(*tasks) + return responses + except Exception as e: + bt.logging.error(f"Error in forward for: {e}") + bt.logging.error(traceback.format_exc()) + return [] + +async def query_availabilities(uids, task_config, model_config): + """ Query the availability of the miners """ + availability_dict = {'task_availabilities': task_config, 'llm_model_availabilities': model_config} + # Query the availability of the miners + try: + tasks = [] + for uid in uids: + tasks.append( + asyncio.create_task( + handle_availability( + settings.METAGRAPH, availability_dict, uid, + ) + ) + ) + responses: List[SynapseStreamResult] = await asyncio.gather(*tasks) + return responses + + except Exception as e: + bt.logging.error(f"Error in availability call: {e}") + bt.logging.error(traceback.format_exc()) + return [] + +async def handle_availability( + metagraph: "bt.NonTorchMetagraph", + request: Dict[str, Any], + uid: int, +) -> Dict[str, bool]: + try: + axon_info = metagraph.axons[uid] + url = f"http://{axon_info.ip}:{axon_info.port}/availability" + + timeout = httpx.Timeout(settings.NEURON_TIMEOUT, connect=5, read=5) + + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.post(url, json=request) + + response.raise_for_status() + return response.json() + + except Exception as e: + # If the miner is not available, we will return a failure response + bt.logging.error(f"Miner {uid} failed request: {e}") + return {} + + +async def handle_inference( + metagraph: "bt.NonTorchMetagraph", + wallet: "bt.wallet", + task: str, + body: Dict[str, Any], + uid: int, +) -> SynapseStreamResult: + + try: + with Timer() as timer: + axon_info = metagraph.axons[uid] + miner = openai.AsyncOpenAI( + base_url=f"http://{axon_info.ip}:{axon_info.port}/v1", #Maybe need to change this? + api_key="Apex", + max_retries=0, + timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5), + http_client=openai.DefaultAsyncHttpxClient(event_hooks={ + "request": [ + create_header_hook( + wallet.hotkey, axon_info.hotkey, task + ) + ] + }), + ) + try: + chunk_timings = [] + chunks = [] + chat = await miner.chat.completions.create(**generate_header(wallet.hotkey, body, signed_for=axon_info.hotkey)) + async for chunk in chat: + if chunk.choices[0].delta is None: + continue + if ( + chunk.choices[0].delta.content == "" + or chunk.choices[0].delta.content is None + ) and len(chunks) == 0: + continue + + chunks.append(chunk.choices[0].delta.content) + chunk_timings.append(timer.elapsed_time()) + + except openai.APIConnectionError as e: + bt.logging.trace(f"Miner {uid} failed request: {e}") + + except Exception as e: + bt.logging.trace(f"Unknown Error when sending to miner {uid}: {e}") + + except Exception as e: + exception = e + bt.logging.error(f"{uid}: Error in forward for: {e}") + bt.logging.error(traceback.format_exc()) + finally: + return SynapseStreamResult( + accumulated_chunks=chunks, + accumulated_chunks_timings=chunk_timings, + synapse=None, + uid=uid, + exception=exception, + ) \ No newline at end of file diff --git a/prompting/miner_availability/miner_availability.py b/prompting/miner_availability/miner_availability.py index f349c7c5..d5dfc84b 100644 --- a/prompting/miner_availability/miner_availability.py +++ b/prompting/miner_availability/miner_availability.py @@ -10,6 +10,8 @@ import random import asyncio import numpy as np +from prompting.base.epistula import query_availabilities +from typing import Dict task_config: dict[str, bool] = {str(task_config.task.__name__): True for task_config in TaskRegistry.task_configs} # task_config: dict[str, bool] = { @@ -74,22 +76,13 @@ async def run_step(self): if any(uid >= len(settings.METAGRAPH.axons) for uid in uids_to_query): raise ValueError("Some UIDs are out of bounds. Make sure all the TEST_MINER_IDS are valid.") + responses: list[Dict[str, bool]] = await query_availabilities(uids_to_query, task_config, model_config) - axons = [settings.METAGRAPH.axons[uid] for uid in uids_to_query] - responses: list[AvailabilitySynapse] = await settings.DENDRITE( - axons=axons, - synapse=AvailabilitySynapse(task_availabilities=task_config, llm_model_availabilities=model_config), - timeout=settings.NEURON_TIMEOUT, - deserialize=False, - streaming=False, - ) logger.debug(f"Availability responses: {responses}") - for response, uid in zip(responses, uids_to_query): - if response.is_failure: - logger.warning(f"Miner {uid} failed to respond. Response is timeout: {response.timeout}") - continue for response, uid in zip(responses, uids_to_query): + if not response: + continue miner_availabilities.miners[uid] = MinerAvailability( task_availabilities=response.task_availabilities, llm_model_availabilities=response.llm_model_availabilities, From 62ae30cfa14bdc3b4886cefc0b9929f295536f09 Mon Sep 17 00:00:00 2001 From: richwardle Date: Tue, 19 Nov 2024 13:22:34 -1000 Subject: [PATCH 02/40] Get everything working --- neurons/miners/epistula_miner/miner.py | 94 +++++++++++++++++-- neurons/validator.py | 7 +- prompting/base/dendrite.py | 26 +++-- prompting/base/epistula.py | 92 +++++++++--------- prompting/base/forward.py | 6 +- .../miner_availability/miner_availability.py | 4 +- prompting/rewards/reward.py | 2 + 7 files changed, 156 insertions(+), 75 deletions(-) diff --git a/neurons/miners/epistula_miner/miner.py b/neurons/miners/epistula_miner/miner.py index 9ff5a4f6..06fb1c0f 100644 --- a/neurons/miners/epistula_miner/miner.py +++ b/neurons/miners/epistula_miner/miner.py @@ -5,13 +5,15 @@ settings = settings.settings import time +import asyncio +import json import httpx import netaddr import uvicorn import requests import traceback import bittensor as bt - +from starlette.responses import JSONResponse from loguru import logger from fastapi import APIRouter, Depends, FastAPI, Request, HTTPException from starlette.background import BackgroundTask @@ -43,27 +45,99 @@ def __init__(self): "Content-Type": "application/json", }, ) + print("OpenAI Key: ", settings.OPENAI_API_KEY) - def format_headers(self, request: Request): - # Iterate through the headers and only keep the ones that will be used for the openai request - - return request - + async def format_openai_query(self, request: Request): + # Read the JSON data once + data = await request.json() + + # Extract the required fields + openai_request = {} + for key in ["messages", "model", "stream"]: + if key in data: + openai_request[key] = data[key] + openai_request["model"] = MODEL_ID + + return openai_request + async def create_chat_completion(self, request: Request): - request["model"] = MODEL_ID bt.logging.info( "\u2713", f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!", ) req = self.client.build_request( - "POST", "/chat/completions", content=await request.body() + "POST", "chat/completions", json = await self.format_openai_query(request) ) r = await self.client.send(req, stream=True) return StreamingResponse( r.aiter_raw(), background=BackgroundTask(r.aclose), headers=r.headers ) + # async def create_chat_completion(self, request: Request): + # bt.logging.info( + # "\u2713", + # f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!", + # ) + # openai_request_body = await self.format_openai_query(request) + # try: + # req = self.client.build_request( + # "POST", "chat/completions", json=openai_request_body + # ) + # r = await self.client.send(req, stream=True) + # # Check for non-200 status code + # if r.status_code != 200: + # error_content = await r.aread() + # bt.logging.error(f"OpenAI API Error {r.status_code}: {error_content}") + # return JSONResponse( + # content=json.loads(error_content), + # status_code=r.status_code + # ) + # except Exception as e: + # bt.logging.error(f"Exception during OpenAI API call: {str(e)}") + # return JSONResponse( + # content={"error": str(e)}, + # status_code=500 + # ) + + # async def create_chat_completion(self, request: Request): + # bt.logging.info( + # "\u2713", + # f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!", + # ) + + # async def word_stream(): + # words = "This is a test stream".split() + # for word in words: + # # Simulate the OpenAI streaming response format + # data = { + # "choices": [ + # { + # "delta": {"content": word + ' '}, + # "index": 0, + # "finish_reason": None + # } + # ] + # } + # # Yield the data in SSE (Server-Sent Events) format + # yield f"data: {json.dumps(data)}\n\n" + # await asyncio.sleep(0.1) # Simulate a delay between words + # # Indicate the end of the stream + # data = { + # "choices": [ + # { + # "delta": {}, + # "index": 0, + # "finish_reason": "stop" + # } + # ] + # } + # yield f"data: {json.dumps(data)}\n\n" + # yield "data: [DONE]\n\n" + + # return StreamingResponse(word_stream(), media_type='text/event-stream') + async def check_availability(self, request: Request): + print("Checking availability") # Parse the incoming JSON request data = await request.json() task_availabilities = data.get('task_availabilities', {}) @@ -156,9 +230,9 @@ def run(self): app = FastAPI() router = APIRouter() router.add_api_route( - "/chat/completions", + "/v1/chat/completions", self.create_chat_completion, - dependencies=[Depends(self.verify_request)], + #dependencies=[Depends(self.verify_request)], methods=["POST"], ) router.add_api_route( diff --git a/neurons/validator.py b/neurons/validator.py index fa580804..e7021343 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -1,6 +1,7 @@ # ruff: noqa: E402 import asyncio import time +import json from prompting import settings from prompting.utils.profiling import profiler @@ -10,7 +11,7 @@ from loguru import logger from prompting.base.validator import BaseValidatorNeuron from prompting.base.forward import log_stream_results, handle_response -from prompting.base.dendrite import DendriteResponseEvent, StreamPromptingSynapse +from prompting.base.dendrite import DendriteResponseEvent from prompting.tasks.task_creation import task_loop from prompting.utils.logging import ValidatorLoggingEvent, ErrorLoggingEvent from prompting.rewards.scoring import task_scorer @@ -139,9 +140,9 @@ async def collect_responses(self, task: BaseTextTask) -> DendriteResponseEvent | return - body = {"seed": task.seed, "model": task.llm_model_id, "roles": ["user"], "messages": [task.query]} + body = {"seed": task.seed, "model": task.llm_model_id, "messages": [{'role': 'user', 'content': task.query},]} body_bytes = json.dumps(body).encode("utf-8") - stream_results = query_miners(task.__class__.__name__, uids, body) + stream_results = await query_miners(task.__class__.__name__, uids, body_bytes) log_stream_results(stream_results) diff --git a/prompting/base/dendrite.py b/prompting/base/dendrite.py index 046b737e..5e62284f 100644 --- a/prompting/base/dendrite.py +++ b/prompting/base/dendrite.py @@ -6,21 +6,21 @@ class SynapseStreamResult(BaseModel): - exception: BaseException | None = None + exception: str | None = None uid: int | None = None accumulated_chunks: list[str] | None = None accumulated_chunks_timings: list[float] | None = None tokens_per_chunk: list[int] | None = None - synapse: StreamPromptingSynapse | None = None + status_code: int = 200 + status_message: str = "" model_config = ConfigDict(arbitrary_types_allowed=True) @property def completion(self) -> str: - if not self.synapse: - logger.warning("Synapse is None") - return - return self.synapse.completion + if not self.accumulated_chunks: + return "" + return "".join(self.accumulated_chunks) def model_dump(self): # without a custom model dump, this leads to serialization errors in DendriteResponseEvent... @@ -31,7 +31,6 @@ def model_dump(self): "accumulated_chunks": self.accumulated_chunks, "accumulated_chunks_timings": self.accumulated_chunks_timings, "tokens_per_chunk": self.tokens_per_chunk, - "synapse": self.synapse.model_dump() if self.synapse is not None else None, } @@ -59,19 +58,16 @@ def process_stream_results(self) -> "DendriteResponseEvent": return self for stream_result in self.stream_results: # for some reason the language server needs this line to understand the type of stream_result - stream_result: SynapseStreamResult - synapse = stream_result.synapse + self.completions.append(stream_result.completion) + self.status_messages.append(stream_result.status_message) + status_code = stream_result.status_code - self.completions.append(synapse.completion) - self.status_messages.append(synapse.dendrite.status_message) - status_code = synapse.dendrite.status_code - - if len(synapse.completion) == 0 and status_code == 200: + if len(stream_result.completion) == 0 and status_code == 200: status_code = 204 self.status_codes.append(status_code) - process_time = synapse.dendrite.process_time or 0 + process_time = stream_result.accumulated_chunks_timings[-1] if stream_result.accumulated_chunks_timings else 0 if status_code == 200 or status_code == 204: self.timings.append(process_time) elif status_code == 408: diff --git a/prompting/base/epistula.py b/prompting/base/epistula.py index 951b3fc0..ccf8d460 100644 --- a/prompting/base/epistula.py +++ b/prompting/base/epistula.py @@ -74,13 +74,14 @@ def generate_header( headers["Epistula-Secret-Signature-2"] = ( "0x" + hotkey.sign(str(timestampInterval + 1) + "." + signed_for).hex() ) - return headers + return {**headers, **json.loads(body_bytes)} -def create_header_hook(hotkey, axon_hotkey, task): +def create_header_hook(hotkey, axon_hotkey): async def add_headers(request: httpx.Request): for key, header in generate_header(hotkey, request.read(), axon_hotkey).items(): - request.headers[key] = header - request.headers["Task"] = task + if key not in ['messages', 'model', 'stream']: + request.headers[key] = header + return request return add_headers @@ -142,8 +143,6 @@ async def handle_availability( return response.json() except Exception as e: - # If the miner is not available, we will return a failure response - bt.logging.error(f"Miner {uid} failed request: {e}") return {} @@ -154,54 +153,63 @@ async def handle_inference( body: Dict[str, Any], uid: int, ) -> SynapseStreamResult: - + exception = None + chunks = [] + chunk_timings = [] try: - with Timer() as timer: - axon_info = metagraph.axons[uid] - miner = openai.AsyncOpenAI( - base_url=f"http://{axon_info.ip}:{axon_info.port}/v1", #Maybe need to change this? - api_key="Apex", - max_retries=0, - timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5), - http_client=openai.DefaultAsyncHttpxClient(event_hooks={ - "request": [ - create_header_hook( - wallet.hotkey, axon_info.hotkey, task - ) - ] - }), - ) - try: - chunk_timings = [] - chunks = [] - chat = await miner.chat.completions.create(**generate_header(wallet.hotkey, body, signed_for=axon_info.hotkey)) - async for chunk in chat: - if chunk.choices[0].delta is None: - continue - if ( - chunk.choices[0].delta.content == "" - or chunk.choices[0].delta.content is None - ) and len(chunks) == 0: - continue - + start_time = time.time() + axon_info = metagraph.axons[uid] + miner = openai.AsyncOpenAI( + base_url=f"http://{axon_info.ip}:{axon_info.port}/v1", + api_key="Apex", + max_retries=0, + timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5), + http_client=openai.DefaultAsyncHttpxClient(event_hooks={ + "request": [ + create_header_hook( + wallet.hotkey, axon_info.hotkey + ) + ] + }), + ) + try: + payload = json.loads(body) + chat = await miner.chat.completions.create(messages=payload["messages"], model=payload["model"], stream=True) + async for chunk in chat: + if chunk.choices[0].delta and chunk.choices[0].delta.content: chunks.append(chunk.choices[0].delta.content) - chunk_timings.append(timer.elapsed_time()) + chunk_timings.append(time.time() - start_time) - except openai.APIConnectionError as e: - bt.logging.trace(f"Miner {uid} failed request: {e}") + except openai.APIConnectionError as e: + bt.logging.trace(f"Miner {uid} failed request: {e}") + exception = e - except Exception as e: - bt.logging.trace(f"Unknown Error when sending to miner {uid}: {e}") + except Exception as e: + bt.logging.trace(f"Unknown Error when sending to miner {uid}: {e}") + exception = e except Exception as e: exception = e bt.logging.error(f"{uid}: Error in forward for: {e}") bt.logging.error(traceback.format_exc()) finally: + if exception: + exception = str(exception) + if exception is None: + status_code = 200 + status_message = "Success" + elif isinstance(exception, openai.APIConnectionError): + status_code = 502 + status_message = str(exception) + else: + status_code = 500 + status_message = str(exception) + return SynapseStreamResult( accumulated_chunks=chunks, accumulated_chunks_timings=chunk_timings, - synapse=None, uid=uid, exception=exception, - ) \ No newline at end of file + status_code=status_code, + status_message=status_message, + ) diff --git a/prompting/base/forward.py b/prompting/base/forward.py index e3ee6254..25596bb6 100644 --- a/prompting/base/forward.py +++ b/prompting/base/forward.py @@ -86,13 +86,13 @@ async def generate_reference(task: BaseTextTask, pipeline: BasePipeline) -> str: def log_stream_results(stream_results: List[SynapseStreamResult]): failed_responses = [ - response for response in stream_results if response.exception is not None or response.synapse is None + response for response in stream_results if response.exception is not None or response.completion is None ] empty_responses = [ - response for response in stream_results if response.exception is None and response.synapse.completion == "" + response for response in stream_results if response.exception is None and response.completion == "" ] non_empty_responses = [ - response for response in stream_results if response.exception is None and response.synapse.completion != "" + response for response in stream_results if response.exception is None and response.completion != "" ] logger.debug(f"Total of non_empty responses: ({len(non_empty_responses)})") diff --git a/prompting/miner_availability/miner_availability.py b/prompting/miner_availability/miner_availability.py index d5dfc84b..b3ab8f64 100644 --- a/prompting/miner_availability/miner_availability.py +++ b/prompting/miner_availability/miner_availability.py @@ -84,8 +84,8 @@ async def run_step(self): if not response: continue miner_availabilities.miners[uid] = MinerAvailability( - task_availabilities=response.task_availabilities, - llm_model_availabilities=response.llm_model_availabilities, + task_availabilities=response['task_availabilities'], + llm_model_availabilities=response['llm_model_availabilities'], ) logger.debug("Miner availabilities updated.") diff --git a/prompting/rewards/reward.py b/prompting/rewards/reward.py index 1564d425..f95ebf2f 100644 --- a/prompting/rewards/reward.py +++ b/prompting/rewards/reward.py @@ -50,6 +50,8 @@ class BatchRewardOutput(BaseModel): @property def rewards_normalized(self) -> np.ndarray: + if self.rewards.size == 0: + return np.array([]) if self.rewards.shape != self.timings.shape: raise ValueError(f"rewards.shape {self.rewards.shape} != timings.shape {self.timings.shape}") if self.rewards.min() == self.rewards.max(): From 8e25f33a267c85da6581e69401404f086ad28f33 Mon Sep 17 00:00:00 2001 From: Hollyqui Date: Wed, 20 Nov 2024 17:26:16 +0000 Subject: [PATCH 03/40] SN1-331: Adding initial draft for endpoints --- neurons/miners/epistula_miner/miner.py | 75 ++++++++----------- neurons/validator.py | 14 +++- prompting/api/api.py | 15 ++++ prompting/api/gpt_endpoints/api.py | 45 +++++++++++ prompting/api/gpt_endpoints/serialisers.py | 0 prompting/api/miner_availabilities/api.py | 19 +++++ prompting/base/epistula.py | 69 ++++++++--------- .../miner_availability/miner_availability.py | 7 +- 8 files changed, 157 insertions(+), 87 deletions(-) create mode 100644 prompting/api/api.py create mode 100644 prompting/api/gpt_endpoints/api.py create mode 100644 prompting/api/gpt_endpoints/serialisers.py create mode 100644 prompting/api/miner_availabilities/api.py diff --git a/neurons/miners/epistula_miner/miner.py b/neurons/miners/epistula_miner/miner.py index 06fb1c0f..52a53819 100644 --- a/neurons/miners/epistula_miner/miner.py +++ b/neurons/miners/epistula_miner/miner.py @@ -5,17 +5,14 @@ settings = settings.settings import time -import asyncio -import json import httpx import netaddr import uvicorn import requests import traceback import bittensor as bt -from starlette.responses import JSONResponse from loguru import logger -from fastapi import APIRouter, Depends, FastAPI, Request, HTTPException +from fastapi import APIRouter, FastAPI, Request, HTTPException from starlette.background import BackgroundTask from starlette.responses import StreamingResponse from bittensor.subtensor import serve_extrinsic @@ -34,44 +31,41 @@ SYSTEM_PROMPT = """You are a helpful agent that does it's best to answer all questions!""" -class OpenAIMiner(): - +class OpenAIMiner: def __init__(self): self.should_exit = False self.client = httpx.AsyncClient( - base_url="https://api.openai.com/v1", - headers={ - "Authorization": f"Bearer {settings.OPENAI_API_KEY}", - "Content-Type": "application/json", - }, - ) + base_url="https://api.openai.com/v1", + headers={ + "Authorization": f"Bearer {settings.OPENAI_API_KEY}", + "Content-Type": "application/json", + }, + ) print("OpenAI Key: ", settings.OPENAI_API_KEY) async def format_openai_query(self, request: Request): # Read the JSON data once data = await request.json() - + # Extract the required fields openai_request = {} for key in ["messages", "model", "stream"]: if key in data: openai_request[key] = data[key] openai_request["model"] = MODEL_ID - + return openai_request - + async def create_chat_completion(self, request: Request): bt.logging.info( "\u2713", f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!", ) - req = self.client.build_request( - "POST", "chat/completions", json = await self.format_openai_query(request) - ) + logger.debug("Starting chat completion request...") + req = self.client.build_request("POST", "chat/completions", json=await self.format_openai_query(request)) r = await self.client.send(req, stream=True) - return StreamingResponse( - r.aiter_raw(), background=BackgroundTask(r.aclose), headers=r.headers - ) + logger.debug("Chat completion request returning...") + return StreamingResponse(r.aiter_raw(), background=BackgroundTask(r.aclose), headers=r.headers) # async def create_chat_completion(self, request: Request): # bt.logging.info( @@ -104,7 +98,7 @@ async def create_chat_completion(self, request: Request): # "\u2713", # f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!", # ) - + # async def word_stream(): # words = "This is a test stream".split() # for word in words: @@ -133,30 +127,27 @@ async def create_chat_completion(self, request: Request): # } # yield f"data: {json.dumps(data)}\n\n" # yield "data: [DONE]\n\n" - + # return StreamingResponse(word_stream(), media_type='text/event-stream') async def check_availability(self, request: Request): print("Checking availability") # Parse the incoming JSON request data = await request.json() - task_availabilities = data.get('task_availabilities', {}) - llm_model_availabilities = data.get('llm_model_availabilities', {}) - + task_availabilities = data.get("task_availabilities", {}) + llm_model_availabilities = data.get("llm_model_availabilities", {}) + # Set all task availabilities to True task_response = {key: True for key in task_availabilities} - + # Set all model availabilities to False model_response = {key: False for key in llm_model_availabilities} - + # Construct the response dictionary - response = { - 'task_availabilities': task_response, - 'llm_model_availabilities': model_response - } - + response = {"task_availabilities": task_response, "llm_model_availabilities": model_response} + return response - + async def verify_request( self, request: Request, @@ -170,18 +161,14 @@ async def verify_request( signed_by = request.headers.get("Epistula-Signed-By") signed_for = request.headers.get("Epistula-Signed-For") if signed_for != self.wallet.hotkey.ss58_address: - raise HTTPException( - status_code=400, detail="Bad Request, message is not intended for self" - ) + raise HTTPException(status_code=400, detail="Bad Request, message is not intended for self") if signed_by not in self.metagraph.hotkeys: raise HTTPException(status_code=401, detail="Signer not in metagraph") uid = self.metagraph.hotkeys.index(signed_by) stake = self.metagraph.S[uid].item() if not self.config.no_force_validator_permit and stake < 10000: - bt.logging.warning( - f"Blacklisting request from {signed_by} [uid={uid}], not enough stake -- {stake}" - ) + bt.logging.warning(f"Blacklisting request from {signed_by} [uid={uid}], not enough stake -- {stake}") raise HTTPException(status_code=401, detail="Stake below minimum: {stake}") # If anything is returned here, we can throw @@ -200,8 +187,7 @@ async def verify_request( raise HTTPException(status_code=400, detail=err) def run(self): - - external_ip = None #settings.EXTERNAL_IP + external_ip = None # settings.EXTERNAL_IP if not external_ip or external_ip == "[::]": try: external_ip = requests.get("https://checkip.amazonaws.com").text.strip() @@ -232,7 +218,7 @@ def run(self): router.add_api_route( "/v1/chat/completions", self.create_chat_completion, - #dependencies=[Depends(self.verify_request)], + # dependencies=[Depends(self.verify_request)], methods=["POST"], ) router.add_api_route( @@ -244,7 +230,8 @@ def run(self): fast_config = uvicorn.Config( app, host="0.0.0.0", - port=settings.AXON_PORT, + # port=settings.AXON_PORT, + port=8008, log_level="info", loop="asyncio", ) diff --git a/neurons/validator.py b/neurons/validator.py index e7021343..892649d8 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -25,6 +25,7 @@ from prompting.weight_setting.weight_setter import weight_setter from prompting.llms.utils import GPUInfo from prompting.base.epistula import query_miners +from prompting.api.api import start_api NEURON_SAMPLE_SIZE = 100 @@ -139,14 +140,18 @@ async def collect_responses(self, task: BaseTextTask) -> DendriteResponseEvent | logger.warning("No available miners. This should already have been caught earlier.") return - - body = {"seed": task.seed, "model": task.llm_model_id, "messages": [{'role': 'user', 'content': task.query},]} + body = { + "seed": task.seed, + "model": task.llm_model_id, + "messages": [ + {"role": "user", "content": task.query}, + ], + } body_bytes = json.dumps(body).encode("utf-8") stream_results = await query_miners(task.__class__.__name__, uids, body_bytes) log_stream_results(stream_results) - response_event = DendriteResponseEvent( stream_results=stream_results, uids=uids, timeout=settings.NEURON_TIMEOUT ) @@ -202,6 +207,9 @@ def __exit__(self, exc_type, exc_value, traceback): async def main(): + # start api + asyncio.create_task(start_api()) + GPUInfo.log_gpu_info() # start profiling asyncio.create_task(profiler.print_stats()) diff --git a/prompting/api/api.py b/prompting/api/api.py new file mode 100644 index 00000000..1f9da1f0 --- /dev/null +++ b/prompting/api/api.py @@ -0,0 +1,15 @@ +from fastapi import FastAPI +import uvicorn +from prompting.api.gpt_endpoints.api import router as gpt_router +from prompting.api.miner_availabilities.api import router as miner_availabilities_router +from loguru import logger + +app = FastAPI() + +app.include_router(gpt_router) +app.include_router(miner_availabilities_router) + + +async def start_api(): + logger.info("Starting API") + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py new file mode 100644 index 00000000..061f7cdb --- /dev/null +++ b/prompting/api/gpt_endpoints/api.py @@ -0,0 +1,45 @@ +from fastapi import APIRouter, Request +import openai +from prompting.settings import settings +from httpx import Timeout +from prompting.base.epistula import create_header_hook +from fastapi.responses import StreamingResponse +import json + +router = APIRouter() + + +async def process_stream(stream): + async for chunk in stream: + if hasattr(chunk, "choices") and chunk.choices: + # Extract the delta content from the chunk + delta = chunk.choices[0].delta + if hasattr(delta, "content") and delta.content is not None: + # Format as SSE data + yield f"data: {json.dumps(chunk.model_dump())}\n\n" + yield "data: [DONE]\n\n" + + +@router.post("/v1/chat/completions") +async def proxy_chat_completions(request: Request): + # Get the request body + body = await request.json() + + # Ensure streaming is enabled + body["stream"] = True + + # TODO: Forward to actual miners + miner = openai.AsyncOpenAI( + base_url="http://localhost:8008/v1", + max_retries=0, + timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5), + http_client=openai.DefaultAsyncHttpxClient( + event_hooks={"request": [create_header_hook(settings.WALLET.hotkey, None)]} + ), + ) + + # Create streaming request to OpenAI + response = await miner.chat.completions.create(**body) + + # Return a streaming response with properly formatted chunks + return StreamingResponse(process_stream(response), media_type="text/event-stream") diff --git a/prompting/api/gpt_endpoints/serialisers.py b/prompting/api/gpt_endpoints/serialisers.py new file mode 100644 index 00000000..e69de29b diff --git a/prompting/api/miner_availabilities/api.py b/prompting/api/miner_availabilities/api.py new file mode 100644 index 00000000..f45b058b --- /dev/null +++ b/prompting/api/miner_availabilities/api.py @@ -0,0 +1,19 @@ +from fastapi import APIRouter +from prompting.miner_availability.miner_availability import miner_availabilities +from loguru import logger + +router = APIRouter() + + +@router.post("/miner_availabilities") +async def get_miner_availabilities(uids: list[int] | None = None): + if uids: + return {uid: miner_availabilities.miners.get(uid) for uid in uids} + logger.info(f"Returning all miner availabilities for {len(miner_availabilities.miners)} miners") + return miner_availabilities.miners + + +@router.get("/get_available_miners") +async def get_available_miners(task: str | None = None, model: str | None = None, k: int = 10): + logger.info(f"Getting {k} available miners for task {task} and model {model}") + return miner_availabilities.get_available_miners(task=task, model=model, k=k) diff --git a/prompting/base/epistula.py b/prompting/base/epistula.py index ccf8d460..4fbca51c 100644 --- a/prompting/base/epistula.py +++ b/prompting/base/epistula.py @@ -3,22 +3,18 @@ from uuid import uuid4 from math import ceil import time -from prompting.utils.timer import Timer from substrateinterface import Keypair import asyncio import bittensor as bt -import math -from os import urandom -import time import traceback -from typing import Dict, List, Optional, Tuple, Any, Annotated +from typing import Dict, List, Optional, Any, Annotated from prompting.base.dendrite import SynapseStreamResult from httpx import Timeout import httpx import openai -import requests from prompting.settings import settings + def verify_signature( signature, body: bytes, timestamp, uuid, signed_for, signed_by, now ) -> Optional[Annotated[str, "Error Message"]]: @@ -45,6 +41,7 @@ def verify_signature( return "Signature Mismatch" return None + def generate_header( hotkey: Keypair, body_bytes: Dict[str, Any], @@ -59,32 +56,26 @@ def generate_header( "Epistula-Uuid": uuid, "Epistula-Signed-By": hotkey.ss58_address, "Epistula-Request-Signature": "0x" - + hotkey.sign( - f"{sha256(body_bytes).hexdigest()}.{uuid}.{timestamp}.{signed_for or ''}" - ).hex(), + + hotkey.sign(f"{sha256(body_bytes).hexdigest()}.{uuid}.{timestamp}.{signed_for or ''}").hex(), } if signed_for: headers["Epistula-Signed-For"] = signed_for - headers["Epistula-Secret-Signature-0"] = ( - "0x" + hotkey.sign(str(timestampInterval - 1) + "." + signed_for).hex() - ) - headers["Epistula-Secret-Signature-1"] = ( - "0x" + hotkey.sign(str(timestampInterval) + "." + signed_for).hex() - ) - headers["Epistula-Secret-Signature-2"] = ( - "0x" + hotkey.sign(str(timestampInterval + 1) + "." + signed_for).hex() - ) + headers["Epistula-Secret-Signature-0"] = "0x" + hotkey.sign(str(timestampInterval - 1) + "." + signed_for).hex() + headers["Epistula-Secret-Signature-1"] = "0x" + hotkey.sign(str(timestampInterval) + "." + signed_for).hex() + headers["Epistula-Secret-Signature-2"] = "0x" + hotkey.sign(str(timestampInterval + 1) + "." + signed_for).hex() return {**headers, **json.loads(body_bytes)} -def create_header_hook(hotkey, axon_hotkey): + +def create_header_hook(hotkey, axon_hotkey=None): async def add_headers(request: httpx.Request): for key, header in generate_header(hotkey, request.read(), axon_hotkey).items(): - if key not in ['messages', 'model', 'stream']: + if key not in ["messages", "model", "stream"]: request.headers[key] = header return request return add_headers + async def query_miners(task, uids, body): try: tasks = [] @@ -92,7 +83,11 @@ async def query_miners(task, uids, body): tasks.append( asyncio.create_task( handle_inference( - settings.METAGRAPH, settings.WALLET, task, body, uid, + settings.METAGRAPH, + settings.WALLET, + task, + body, + uid, ) ) ) @@ -102,10 +97,11 @@ async def query_miners(task, uids, body): bt.logging.error(f"Error in forward for: {e}") bt.logging.error(traceback.format_exc()) return [] - + + async def query_availabilities(uids, task_config, model_config): - """ Query the availability of the miners """ - availability_dict = {'task_availabilities': task_config, 'llm_model_availabilities': model_config} + """Query the availability of the miners""" + availability_dict = {"task_availabilities": task_config, "llm_model_availabilities": model_config} # Query the availability of the miners try: tasks = [] @@ -113,18 +109,21 @@ async def query_availabilities(uids, task_config, model_config): tasks.append( asyncio.create_task( handle_availability( - settings.METAGRAPH, availability_dict, uid, + settings.METAGRAPH, + availability_dict, + uid, ) ) ) responses: List[SynapseStreamResult] = await asyncio.gather(*tasks) return responses - + except Exception as e: bt.logging.error(f"Error in availability call: {e}") bt.logging.error(traceback.format_exc()) return [] - + + async def handle_availability( metagraph: "bt.NonTorchMetagraph", request: Dict[str, Any], @@ -142,7 +141,7 @@ async def handle_availability( response.raise_for_status() return response.json() - except Exception as e: + except Exception: return {} @@ -164,17 +163,15 @@ async def handle_inference( api_key="Apex", max_retries=0, timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5), - http_client=openai.DefaultAsyncHttpxClient(event_hooks={ - "request": [ - create_header_hook( - wallet.hotkey, axon_info.hotkey - ) - ] - }), + http_client=openai.DefaultAsyncHttpxClient( + event_hooks={"request": [create_header_hook(wallet.hotkey, axon_info.hotkey)]} + ), ) try: payload = json.loads(body) - chat = await miner.chat.completions.create(messages=payload["messages"], model=payload["model"], stream=True) + chat = await miner.chat.completions.create( + messages=payload["messages"], model=payload["model"], stream=True + ) async for chunk in chat: if chunk.choices[0].delta and chunk.choices[0].delta.content: chunks.append(chunk.choices[0].delta.content) diff --git a/prompting/miner_availability/miner_availability.py b/prompting/miner_availability/miner_availability.py index b3ab8f64..1a7648b4 100644 --- a/prompting/miner_availability/miner_availability.py +++ b/prompting/miner_availability/miner_availability.py @@ -3,7 +3,6 @@ from prompting.tasks.base_task import BaseTask from prompting.llms.model_zoo import ModelZoo from prompting.base.loop_runner import AsyncLoopRunner -from prompting.base.protocol import AvailabilitySynapse from prompting.settings import settings from prompting.tasks.task_registry import TaskRegistry from prompting.utils.uids import get_uids @@ -53,7 +52,7 @@ def get_available_miners( available = [uid for uid in available if self.miners[uid].is_model_available(model)] if k: available = random.sample(available, min(len(available), k)) - return available + return list(map(int, available)) class CheckMinerAvailability(AsyncLoopRunner): @@ -84,8 +83,8 @@ async def run_step(self): if not response: continue miner_availabilities.miners[uid] = MinerAvailability( - task_availabilities=response['task_availabilities'], - llm_model_availabilities=response['llm_model_availabilities'], + task_availabilities=response["task_availabilities"], + llm_model_availabilities=response["llm_model_availabilities"], ) logger.debug("Miner availabilities updated.") From ee5351fc5709746973a53574034af9283e80058a Mon Sep 17 00:00:00 2001 From: Hollyqui Date: Fri, 22 Nov 2024 15:45:20 +0000 Subject: [PATCH 04/40] SN1-331: Adding API keys --- prompting/api/api.py | 25 ++++++-- prompting/api/api_keys.json | 1 + prompting/api/api_managements/api.py | 77 +++++++++++++++++++++++ prompting/api/gpt_endpoints/api.py | 11 +++- prompting/api/miner_availabilities/api.py | 12 +++- 5 files changed, 117 insertions(+), 9 deletions(-) create mode 100644 prompting/api/api_keys.json create mode 100644 prompting/api/api_managements/api.py diff --git a/prompting/api/api.py b/prompting/api/api.py index 1f9da1f0..e60e1d6d 100644 --- a/prompting/api/api.py +++ b/prompting/api/api.py @@ -1,15 +1,28 @@ from fastapi import FastAPI +from loguru import logger + +# This ensures uvicorn is imported first import uvicorn -from prompting.api.gpt_endpoints.api import router as gpt_router + +# Now we can safely import the rest +from prompting.api.api_managements.api import router as api_management_router from prompting.api.miner_availabilities.api import router as miner_availabilities_router -from loguru import logger +from prompting.api.gpt_endpoints.api import router as gpt_router app = FastAPI() -app.include_router(gpt_router) +# Add routers at the application level +app.include_router(api_management_router) app.include_router(miner_availabilities_router) +app.include_router(gpt_router) + + +@app.get("/health") +def health(): + logger.info("Health endpoint accessed.") + return {"status": "healthy"} -async def start_api(): - logger.info("Starting API") - uvicorn.run(app, host="0.0.0.0", port=8000) +if __name__ == "__main__": + logger.info("Starting API...") + uvicorn.run("api:app", host="0.0.0.0", port=8004, loop="asyncio", reload=True) diff --git a/prompting/api/api_keys.json b/prompting/api/api_keys.json new file mode 100644 index 00000000..0967ef42 --- /dev/null +++ b/prompting/api/api_keys.json @@ -0,0 +1 @@ +{} diff --git a/prompting/api/api_managements/api.py b/prompting/api/api_managements/api.py new file mode 100644 index 00000000..07c757a1 --- /dev/null +++ b/prompting/api/api_managements/api.py @@ -0,0 +1,77 @@ +from fastapi import APIRouter, FastAPI, HTTPException, Header, Depends +import json +import secrets + +from prompting.settings import settings + + +router = APIRouter() + + +# Load and save functions for API keys +def load_api_keys(): + try: + with open(settings.API_KEYS_FILE, "r") as f: + return json.load(f) + except FileNotFoundError: + return {} + + +def save_api_keys(api_keys): + with open(settings.API_KEYS_FILE, "w") as f: + json.dump(api_keys, f) + + +# Use lifespan to initialize API keys +_keys = load_api_keys() +save_api_keys(_keys) + + +# Dependency to validate the admin key +def validate_admin_key(admin_key: str = Header(...)): + if admin_key != settings.ADMIN_KEY: + raise HTTPException(status_code=403, detail="Invalid admin key") + + +# Dependency to validate API keys +def validate_api_key(api_key: str = Header(...)): + if api_key not in _keys: + raise HTTPException(status_code=403, detail="Invalid API key") + return _keys[api_key] + + +@router.post("/create-api-key/") +def create_api_key(rate_limit: int, admin_key: str = Depends(validate_admin_key)): + """Creates a new API key with a specified rate limit.""" + new_api_key = secrets.token_hex(16) + _keys[new_api_key] = {"rate_limit": rate_limit, "usage": 0} + return {"message": "API key created", "api_key": new_api_key} + + +@router.put("/modify-api-key/{api_key}") +def modify_api_key(api_key: str, rate_limit: int, admin_key: str = Depends(validate_admin_key)): + """Modifies the rate limit of an existing API key.""" + if api_key not in _keys: + raise HTTPException(status_code=404, detail="API key not found") + _keys[api_key]["rate_limit"] = rate_limit + return {"message": "API key updated", "api_key": api_key} + + +@router.delete("/delete-api-key/{api_key}") +def delete_api_key(api_key: str, admin_key: str = Depends(validate_admin_key)): + """Deletes an existing API key.""" + if api_key not in _keys: + raise HTTPException(status_code=404, detail="API key not found") + del _keys[api_key] + return {"message": "API key deleted"} + + +@router.get("/demo-endpoint/") +def demo_endpoint(api_key_data: dict = Depends(validate_api_key)): + """A demo endpoint that requires a valid API key.""" + return {"message": "Access granted", "your_rate_limit": api_key_data["rate_limit"]} + + +# Create FastAPI app and include the router +app = FastAPI() +app.include_router(router) diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py index 061f7cdb..5d9735fc 100644 --- a/prompting/api/gpt_endpoints/api.py +++ b/prompting/api/gpt_endpoints/api.py @@ -5,6 +5,7 @@ from prompting.base.epistula import create_header_hook from fastapi.responses import StreamingResponse import json +from prompting.miner_availability.miner_availability import miner_availabilities router = APIRouter() @@ -27,10 +28,16 @@ async def proxy_chat_completions(request: Request): # Ensure streaming is enabled body["stream"] = True + if not settings.mode == "mock" and not ( + available_miners := miner_availabilities.get_available_miners(task="Inference", model=None) + ): + return "No miners available" + axon_info = settings.METAGRAPH.axons[available_miners[0]] + base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1" # TODO: Forward to actual miners miner = openai.AsyncOpenAI( - base_url="http://localhost:8008/v1", + base_url=base_url, max_retries=0, timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5), http_client=openai.DefaultAsyncHttpxClient( @@ -41,5 +48,7 @@ async def proxy_chat_completions(request: Request): # Create streaming request to OpenAI response = await miner.chat.completions.create(**body) + # TODO: Add final response to scoring_queue + # Return a streaming response with properly formatted chunks return StreamingResponse(process_stream(response), media_type="text/event-stream") diff --git a/prompting/api/miner_availabilities/api.py b/prompting/api/miner_availabilities/api.py index f45b058b..d8f43a63 100644 --- a/prompting/api/miner_availabilities/api.py +++ b/prompting/api/miner_availabilities/api.py @@ -1,6 +1,8 @@ from fastapi import APIRouter from prompting.miner_availability.miner_availability import miner_availabilities from loguru import logger +from prompting.tasks.task_registry import TaskRegistry +from typing import Literal router = APIRouter() @@ -14,6 +16,12 @@ async def get_miner_availabilities(uids: list[int] | None = None): @router.get("/get_available_miners") -async def get_available_miners(task: str | None = None, model: str | None = None, k: int = 10): +async def get_available_miners( + task: Literal[tuple([config.task.__name__ for config in TaskRegistry.task_configs])] | None = None, + model: str | None = None, + k: int = 10, +): logger.info(f"Getting {k} available miners for task {task} and model {model}") - return miner_availabilities.get_available_miners(task=task, model=model, k=k) + task_configs = [config for config in TaskRegistry.task_configs if config.task.__name__ == task] + task_config = task_configs[0] if task_configs else None + return miner_availabilities.get_available_miners(task=task_config, model=model, k=k) From 4ebe74d32d327e0b5fc2c55fb96cfbdd4ad5f6d6 Mon Sep 17 00:00:00 2001 From: Hollyqui Date: Fri, 22 Nov 2024 15:51:56 +0000 Subject: [PATCH 05/40] Adding test miner ids --- prompting/api/gpt_endpoints/api.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py index 5d9735fc..10f742e0 100644 --- a/prompting/api/gpt_endpoints/api.py +++ b/prompting/api/gpt_endpoints/api.py @@ -28,7 +28,9 @@ async def proxy_chat_completions(request: Request): # Ensure streaming is enabled body["stream"] = True - if not settings.mode == "mock" and not ( + if settings.TEST_MINER_IDS: + available_miners = settings.TEST_MINER_IDS + elif not settings.mode == "mock" and not ( available_miners := miner_availabilities.get_available_miners(task="Inference", model=None) ): return "No miners available" From 20e5376473ed3299bbafffb130b17fe0ea4434bb Mon Sep 17 00:00:00 2001 From: Hollyqui Date: Sun, 24 Nov 2024 16:41:03 +0000 Subject: [PATCH 06/40] Adding tasks to scoring queue --- api_keys.json | 1 + neurons/miners/epistula_miner/miner.py | 6 +-- neurons/miners/inference_miner/miner.py | 2 +- neurons/validator.py | 13 +++-- prompting/api/api.py | 5 +- prompting/api/api_managements/api.py | 8 +-- prompting/api/gpt_endpoints/api.py | 66 +++++++++++++++++++++---- prompting/datasets/base.py | 2 +- prompting/llms/vllm_llm.py | 2 +- prompting/settings.py | 34 ++++++++++--- prompting/utils/timer.py | 5 +- 11 files changed, 107 insertions(+), 37 deletions(-) create mode 100644 api_keys.json diff --git a/api_keys.json b/api_keys.json new file mode 100644 index 00000000..0967ef42 --- /dev/null +++ b/api_keys.json @@ -0,0 +1 @@ +{} diff --git a/neurons/miners/epistula_miner/miner.py b/neurons/miners/epistula_miner/miner.py index 52a53819..5f93154d 100644 --- a/neurons/miners/epistula_miner/miner.py +++ b/neurons/miners/epistula_miner/miner.py @@ -160,10 +160,10 @@ async def verify_request( # But use some specific fields from the body signed_by = request.headers.get("Epistula-Signed-By") signed_for = request.headers.get("Epistula-Signed-For") - if signed_for != self.wallet.hotkey.ss58_address: - raise HTTPException(status_code=400, detail="Bad Request, message is not intended for self") + if signed_for and signed_for != self.wallet.hotkey.ss58_address: + raise HTTPException(status_code=400, detail="EpistulaError: The message is not signed for this hotkey") if signed_by not in self.metagraph.hotkeys: - raise HTTPException(status_code=401, detail="Signer not in metagraph") + raise HTTPException(status_code=401, detail="EpistulaError: Signer not in metagraph") uid = self.metagraph.hotkeys.index(signed_by) stake = self.metagraph.S[uid].item() diff --git a/neurons/miners/inference_miner/miner.py b/neurons/miners/inference_miner/miner.py index 0af511bb..0b180bce 100644 --- a/neurons/miners/inference_miner/miner.py +++ b/neurons/miners/inference_miner/miner.py @@ -143,7 +143,7 @@ async def _forward( init_time, timeout_threshold, ) - logger.info(f"Time for complete response: {timer.elapsed_time}") + logger.info(f"Time for complete response: {timer.final_time}") return synapse.create_streaming_response(token_streamer) def check_availability(self, synapse: AvailabilitySynapse) -> AvailabilitySynapse: diff --git a/neurons/validator.py b/neurons/validator.py index 892649d8..fc435f98 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -105,7 +105,7 @@ async def run_step(self, k: int, timeout: float) -> ValidatorLoggingEvent | Erro if response_event is None: logger.warning("No response event collected. This should not be happening.") return - logger.debug(f"Collected responses in {timer.elapsed_time:.2f} seconds") + logger.debug(f"Collected responses in {timer.final_time:.2f} seconds") # scoring_manager will score the responses as and when the correct model is loaded task_scorer.add_to_queue( @@ -121,7 +121,7 @@ async def run_step(self, k: int, timeout: float) -> ValidatorLoggingEvent | Erro return ValidatorLoggingEvent( block=self.estimate_block, step=self.step, - step_time=timer.elapsed_time, + step_time=timer.final_time, response_event=response_event, task_id=task.task_id, ) @@ -174,7 +174,7 @@ async def forward(self): if not event: return - event.forward_time = timer.elapsed_time + event.forward_time = timer.final_time def __enter__(self): if settings.NO_BACKGROUND_THREAD: @@ -207,7 +207,6 @@ def __exit__(self, exc_type, exc_value, traceback): async def main(): - # start api asyncio.create_task(start_api()) GPUInfo.log_gpu_info() @@ -228,9 +227,9 @@ async def main(): # start scoring tasks in separate loop asyncio.create_task(task_scorer.start()) - # TODO: Think about whether we want to store the task queue locally in case of a crash - # TODO: Possibly run task scorer & model scheduler with a lock so I don't unload a model whilst it's generating - # TODO: Make weight setting happen as specific intervals as we load/unload models + # # TODO: Think about whether we want to store the task queue locally in case of a crash + # # TODO: Possibly run task scorer & model scheduler with a lock so I don't unload a model whilst it's generating + # # TODO: Make weight setting happen as specific intervals as we load/unload models with Validator() as v: while True: logger.info( diff --git a/prompting/api/api.py b/prompting/api/api.py index e60e1d6d..b797273b 100644 --- a/prompting/api/api.py +++ b/prompting/api/api.py @@ -23,6 +23,7 @@ def health(): return {"status": "healthy"} -if __name__ == "__main__": +# if __name__ == "__main__": +async def start_api(): logger.info("Starting API...") - uvicorn.run("api:app", host="0.0.0.0", port=8004, loop="asyncio", reload=True) + uvicorn.run("prompting.api.api:app", host="0.0.0.0", port=8004, loop="asyncio", reload=False) diff --git a/prompting/api/api_managements/api.py b/prompting/api/api_managements/api.py index 07c757a1..40d21577 100644 --- a/prompting/api/api_managements/api.py +++ b/prompting/api/api_managements/api.py @@ -1,4 +1,4 @@ -from fastapi import APIRouter, FastAPI, HTTPException, Header, Depends +from fastapi import APIRouter, HTTPException, Header, Depends import json import secrets @@ -72,6 +72,6 @@ def demo_endpoint(api_key_data: dict = Depends(validate_api_key)): return {"message": "Access granted", "your_rate_limit": api_key_data["rate_limit"]} -# Create FastAPI app and include the router -app = FastAPI() -app.include_router(router) +# # Create FastAPI app and include the router +# app = FastAPI() +# app.include_router(router) diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py index 10f742e0..f7162da8 100644 --- a/prompting/api/gpt_endpoints/api.py +++ b/prompting/api/gpt_endpoints/api.py @@ -1,4 +1,6 @@ from fastapi import APIRouter, Request +from loguru import logger +import random import openai from prompting.settings import settings from httpx import Timeout @@ -6,18 +8,50 @@ from fastapi.responses import StreamingResponse import json from prompting.miner_availability.miner_availability import miner_availabilities +from prompting.tasks.inference import InferenceTask +from typing import AsyncGenerator +from prompting.rewards.scoring import task_scorer +from prompting.base.dendrite import DendriteResponseEvent, SynapseStreamResult +from prompting.utils.timer import Timer router = APIRouter() -async def process_stream(stream): - async for chunk in stream: - if hasattr(chunk, "choices") and chunk.choices: - # Extract the delta content from the chunk - delta = chunk.choices[0].delta - if hasattr(delta, "content") and delta.content is not None: - # Format as SSE data +async def process_and_collect_stream(miner_id: int, request: dict, response: AsyncGenerator): + collected_content = [] + collected_chunks_timings = [] + with Timer() as timer: + async for chunk in response: + logger.debug(f"Chunk: {chunk}") + if hasattr(chunk, "choices") and chunk.choices and isinstance(chunk.choices[0].delta.content, str): + collected_content.append(chunk.choices[0].delta.content) + collected_chunks_timings.append(timer.elapsed_time()) + # Format in SSE format yield f"data: {json.dumps(chunk.model_dump())}\n\n" + # After streaming is complete, put the response in the queue + task = InferenceTask( + query=request["messages"][-1]["content"], + model=request.get("model"), + seed=request.get("seed"), + response="".join(collected_content), + ) + logger.debug(f"Adding Organic Request to scoring queue: {task}") + response_event = DendriteResponseEvent( + stream_results=[ + SynapseStreamResult( + uid=miner_id, + accumulated_chunks=collected_content, + accumulated_chunks_timings=collected_chunks_timings, + ) + ], + uids=[miner_id], + timeout=settings.NEURON_TIMEOUT, + ) + + # TODO: Estimate block and step + task_scorer.add_to_queue( + task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id + ) yield "data: [DONE]\n\n" @@ -25,17 +59,28 @@ async def process_stream(stream): async def proxy_chat_completions(request: Request): # Get the request body body = await request.json() + body["seed"] = body.get("seed") or str( + random.randint(0, 1_000_000) + ) # for some reason needs to be passed as string... it seems? # Ensure streaming is enabled - body["stream"] = True + # body["stream"] = True if settings.TEST_MINER_IDS: available_miners = settings.TEST_MINER_IDS elif not settings.mode == "mock" and not ( - available_miners := miner_availabilities.get_available_miners(task="Inference", model=None) + available_miners := miner_availabilities.get_available_miners(task=InferenceTask(), model=None) ): return "No miners available" axon_info = settings.METAGRAPH.axons[available_miners[0]] + + # TODO: Remove this/build better testing mechanism base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1" + # base_url = "http://localhost:8008/v1" + # available_miners = [-1] + + miner_id = available_miners[0] + + logger.debug(f"Using base_url: {base_url}") # TODO: Forward to actual miners miner = openai.AsyncOpenAI( @@ -53,4 +98,5 @@ async def proxy_chat_completions(request: Request): # TODO: Add final response to scoring_queue # Return a streaming response with properly formatted chunks - return StreamingResponse(process_stream(response), media_type="text/event-stream") + # return await process_and_collect_stream(process_stream(response)) + return StreamingResponse(process_and_collect_stream(miner_id, body, response), media_type="text/event-stream") diff --git a/prompting/datasets/base.py b/prompting/datasets/base.py index 2a92c39b..aabd33ed 100644 --- a/prompting/datasets/base.py +++ b/prompting/datasets/base.py @@ -81,7 +81,7 @@ def next(self, method: Literal["random", "search", "get"] = "random", **kwargs) context.source = self.__class__.__name__ context.stats = { - "fetch_time": timer.elapsed_time, + "fetch_time": timer.final_time, "num_tries": tries, "fetch_method": method, "next_kwargs": kwargs, diff --git a/prompting/llms/vllm_llm.py b/prompting/llms/vllm_llm.py index 2ffb1c43..2d04726d 100644 --- a/prompting/llms/vllm_llm.py +++ b/prompting/llms/vllm_llm.py @@ -270,7 +270,7 @@ def generate(self, prompts, sampling_params=None): for output in outputs: results.append(output.outputs[0].text.strip()) logger.debug( - f"PROMPT: {prompts}\n\nRESPONSES: {results}\n\nSAMPLING PARAMS: {sampling_params}\n\nTIME FOR RESPONSE: {timer.elapsed_time}" + f"PROMPT: {prompts}\n\nRESPONSES: {results}\n\nSAMPLING PARAMS: {sampling_params}\n\nTIME FOR RESPONSE: {timer.final_time}" ) return results if len(results) > 1 else results[0] diff --git a/prompting/settings.py b/prompting/settings.py index 2d00a011..da85f9e2 100644 --- a/prompting/settings.py +++ b/prompting/settings.py @@ -1,14 +1,11 @@ import os from functools import cached_property from typing import Any, Literal, Optional - -import bittensor as bt -import dotenv -import torch from loguru import logger +import dotenv from pydantic import Field, model_validator from pydantic_settings import BaseSettings - +import bittensor as bt from prompting.utils.config import config @@ -30,7 +27,7 @@ class Settings(BaseSettings): # Neuron. NEURON_EPOCH_LENGTH: int = Field(100, env="NEURON_EPOCH_LENGTH") - NEURON_DEVICE: str = Field("cuda" if torch.cuda.is_available() else "cpu", env="NEURON_DEVICE") + NEURON_DEVICE: str | None = Field(None, env="NEURON_DEVICE") NEURON_GPUS: int = Field(1, env="NEURON_GPUS") # Logging. @@ -66,6 +63,10 @@ class Settings(BaseSettings): SCORING_QUEUE_LENGTH_THRESHOLD: int = Field(10, env="SCORING_QUEUE_LENGTH_THRESHOLD") HF_TOKEN: Optional[str] = Field(None, env="HF_TOKEN") + # API Management. + API_KEYS_FILE: str = Field("api_keys.json", env="API_KEYS_FILE") + ADMIN_KEY: str | None = Field(None, env="ADMIN_KEY") + # Additional Fields. NETUID: Optional[int] = Field(61, env="NETUID") TEST: bool = False @@ -127,8 +128,9 @@ def load_env_file(cls, mode: Literal["miner", "validator", "mock"]): dotenv_file = ".env.miner" elif mode == "validator": dotenv_file = ".env.validator" + # For mock testing, still make validator env vars available where possible. elif mode == "mock": - dotenv_file = None + dotenv_file = ".env.validator" else: raise ValueError(f"Invalid mode: {mode}") @@ -154,15 +156,23 @@ def load(cls, mode: Literal["miner", "validator", "mock"]) -> "Settings": def complete_settings(cls, values: dict[str, Any]) -> dict[str, Any]: mode = values["mode"] netuid = values.get("NETUID", 61) + if netuid is None: raise ValueError("NETUID must be specified") values["TEST"] = netuid != 1 if mode == "mock": values["MOCK"] = True + values["NEURON_DEVICE"] = "cpu" logger.info("Running in mock mode. Bittensor objects will not be initialized.") return values + # load slow packages only if not in mock mode + import torch + + if not values.get("NEURON_DEVICE"): + values["NEURON_DEVICE"] = "cuda" if torch.cuda.is_available() else "cpu" + # Ensure SAVE_PATH exists. save_path = values.get("SAVE_PATH", "./storage") if not os.path.exists(save_path): @@ -177,6 +187,8 @@ def complete_settings(cls, values: dict[str, Any]) -> dict[str, Any]: raise Exception( "You must provide an OpenAI API key as a backup. It is recommended to also provide an SN19 API key + url to avoid incurring API costs." ) + if mode == "validator" and values.get("ADMIN_KEY") is None: + raise Exception("You must provide an admin key to access the API.") return values @cached_property @@ -208,4 +220,12 @@ def DENDRITE(self) -> bt.dendrite: return bt.dendrite(wallet=self.WALLET) +logger.info("Settings class instantiated.") settings: Optional[Settings] = None +try: + settings: Optional[Settings] = Settings.load(mode="mock") + pass +except Exception as e: + logger.exception(f"Error loading settings: {e}") + settings = None +logger.info("Settings loaded.") diff --git a/prompting/utils/timer.py b/prompting/utils/timer.py index 039e5bad..4fa05496 100644 --- a/prompting/utils/timer.py +++ b/prompting/utils/timer.py @@ -6,6 +6,9 @@ def __enter__(self): self.start_time = time.perf_counter() return self + def elapsed_time(self): + return self.start_time - time.perf_counter() + def __exit__(self, exc_type, exc_val, exc_tb): self.end_time = time.perf_counter() - self.elapsed_time = self.end_time - self.start_time + self.final_time = self.end_time - self.start_time From 37a8874ecc15824f7424bfe796dd6907affbf52b Mon Sep 17 00:00:00 2001 From: Hollyqui Date: Mon, 25 Nov 2024 15:55:06 +0000 Subject: [PATCH 07/40] Enabling non-streaming response + bug fixes --- prompting/api/gpt_endpoints/api.py | 64 ++++++++++++++++++++++-------- prompting/settings.py | 5 +-- 2 files changed, 49 insertions(+), 20 deletions(-) diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py index f7162da8..58d664bf 100644 --- a/prompting/api/gpt_endpoints/api.py +++ b/prompting/api/gpt_endpoints/api.py @@ -1,4 +1,4 @@ -from fastapi import APIRouter, Request +from fastapi import APIRouter, Request, HTTPException from loguru import logger import random import openai @@ -31,6 +31,7 @@ async def process_and_collect_stream(miner_id: int, request: dict, response: Asy # After streaming is complete, put the response in the queue task = InferenceTask( query=request["messages"][-1]["content"], + messages=[message["content"] for message in request["messages"]], model=request.get("model"), seed=request.get("seed"), response="".join(collected_content), @@ -46,6 +47,7 @@ async def process_and_collect_stream(miner_id: int, request: dict, response: Asy ], uids=[miner_id], timeout=settings.NEURON_TIMEOUT, + completions=["".join(collected_content)], ) # TODO: Estimate block and step @@ -55,34 +57,34 @@ async def process_and_collect_stream(miner_id: int, request: dict, response: Asy yield "data: [DONE]\n\n" +@router.post("/mixture_of_agents") +async def mixture_of_agents(request: Request): + # body = await request.json() + # return {"message": "Mixture of Agents"} + return {"message": "Mixture of Agents"} + + @router.post("/v1/chat/completions") async def proxy_chat_completions(request: Request): - # Get the request body body = await request.json() body["seed"] = body.get("seed") or str( random.randint(0, 1_000_000) ) # for some reason needs to be passed as string... it seems? + logger.debug(f"Seed provided by miner: {bool(body.get('seed'))} -- Using seed: {body.get('seed')}") - # Ensure streaming is enabled - # body["stream"] = True if settings.TEST_MINER_IDS: available_miners = settings.TEST_MINER_IDS elif not settings.mode == "mock" and not ( available_miners := miner_availabilities.get_available_miners(task=InferenceTask(), model=None) ): return "No miners available" - axon_info = settings.METAGRAPH.axons[available_miners[0]] - # TODO: Remove this/build better testing mechanism + axon_info = settings.METAGRAPH.axons[available_miners[0]] base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1" # base_url = "http://localhost:8008/v1" - # available_miners = [-1] - miner_id = available_miners[0] - logger.debug(f"Using base_url: {base_url}") - # TODO: Forward to actual miners miner = openai.AsyncOpenAI( base_url=base_url, max_retries=0, @@ -92,11 +94,39 @@ async def proxy_chat_completions(request: Request): ), ) - # Create streaming request to OpenAI - response = await miner.chat.completions.create(**body) - - # TODO: Add final response to scoring_queue + try: + with Timer() as timer: + # Create request to OpenAI + response = await miner.chat.completions.create(**body) + if body.get("stream"): + # If streaming is requested, return streaming response + return StreamingResponse( + process_and_collect_stream(miner_id, body, response), media_type="text/event-stream" + ) + except Exception as e: + logger.exception(f"Error coming from Miner: {e}") + raise HTTPException(status_code=500, detail=f"Error coming from Miner: {e}") - # Return a streaming response with properly formatted chunks - # return await process_and_collect_stream(process_stream(response)) - return StreamingResponse(process_and_collect_stream(miner_id, body, response), media_type="text/event-stream") + response_event = DendriteResponseEvent( + stream_results=[ + SynapseStreamResult( + uid=miner_id, + accumulated_chunks=[response.choices[0].message.content], + accumulated_chunks_timings=[timer.final_time], + ) + ], + completions=[response.choices[0].message.content], + uids=[miner_id], + timeout=settings.NEURON_TIMEOUT, + ) + task = InferenceTask( + query=body["messages"][-1]["content"], + messages=[message["content"] for message in body["messages"]], + model=body.get("model"), + seed=body.get("seed"), + response=response_event, + ) + task_scorer.add_to_queue( + task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id + ) + return response diff --git a/prompting/settings.py b/prompting/settings.py index da85f9e2..6bbc47ad 100644 --- a/prompting/settings.py +++ b/prompting/settings.py @@ -160,7 +160,8 @@ def complete_settings(cls, values: dict[str, Any]) -> dict[str, Any]: if netuid is None: raise ValueError("NETUID must be specified") values["TEST"] = netuid != 1 - + if values.get("TEST_MINER_IDS"): + values["TEST_MINER_IDS"] = str(values["TEST_MINER_IDS"]).split(",") if mode == "mock": values["MOCK"] = True values["NEURON_DEVICE"] = "cpu" @@ -177,8 +178,6 @@ def complete_settings(cls, values: dict[str, Any]) -> dict[str, Any]: save_path = values.get("SAVE_PATH", "./storage") if not os.path.exists(save_path): os.makedirs(save_path) - if values.get("TEST_MINER_IDS"): - values["TEST_MINER_IDS"] = str(values["TEST_MINER_IDS"]).split(",") if values.get("SN19_API_KEY") is None or values.get("SN19_API_URL") is None: logger.warning( "It is strongly recommended to provide an SN19 API KEY + URL to avoid incurring OpenAI API costs." From a09cd9bcbe0ecda25dcc695adf08917de8140002 Mon Sep 17 00:00:00 2001 From: Hollyqui Date: Mon, 25 Nov 2024 16:40:46 +0000 Subject: [PATCH 08/40] Making model loading non-blocking --- prompting/llms/model_manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/prompting/llms/model_manager.py b/prompting/llms/model_manager.py index 55fa5e23..0adf032c 100644 --- a/prompting/llms/model_manager.py +++ b/prompting/llms/model_manager.py @@ -182,7 +182,8 @@ async def run_step(self): logger.debug(f"Active models: {model_manager.active_models.keys()}") # Load the selected model - model_manager.load_model(selected_model) + loop = asyncio.get_running_loop() + await loop.run_in_executor(None, self.llm_model_manager.load_model, selected_model) await asyncio.sleep(0.01) From 566dc770fde2035dca61c85cfdcda853f455a29c Mon Sep 17 00:00:00 2001 From: Hollyqui Date: Tue, 26 Nov 2024 11:21:30 +0000 Subject: [PATCH 09/40] Protecting endpoints with API key --- prompting/api/gpt_endpoints/api.py | 24 ++++++++++-------------- prompting/base/epistula.py | 20 +++++++++++++++++++- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py index 58d664bf..54111122 100644 --- a/prompting/api/gpt_endpoints/api.py +++ b/prompting/api/gpt_endpoints/api.py @@ -13,6 +13,8 @@ from prompting.rewards.scoring import task_scorer from prompting.base.dendrite import DendriteResponseEvent, SynapseStreamResult from prompting.utils.timer import Timer +from prompting.api.api_managements.api import validate_api_key +from fastapi import Depends router = APIRouter() @@ -26,9 +28,8 @@ async def process_and_collect_stream(miner_id: int, request: dict, response: Asy if hasattr(chunk, "choices") and chunk.choices and isinstance(chunk.choices[0].delta.content, str): collected_content.append(chunk.choices[0].delta.content) collected_chunks_timings.append(timer.elapsed_time()) - # Format in SSE format yield f"data: {json.dumps(chunk.model_dump())}\n\n" - # After streaming is complete, put the response in the queue + task = InferenceTask( query=request["messages"][-1]["content"], messages=[message["content"] for message in request["messages"]], @@ -50,7 +51,6 @@ async def process_and_collect_stream(miner_id: int, request: dict, response: Asy completions=["".join(collected_content)], ) - # TODO: Estimate block and step task_scorer.add_to_queue( task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id ) @@ -58,18 +58,14 @@ async def process_and_collect_stream(miner_id: int, request: dict, response: Asy @router.post("/mixture_of_agents") -async def mixture_of_agents(request: Request): - # body = await request.json() - # return {"message": "Mixture of Agents"} +async def mixture_of_agents(request: Request, api_key_data: dict = Depends(validate_api_key)): return {"message": "Mixture of Agents"} @router.post("/v1/chat/completions") -async def proxy_chat_completions(request: Request): +async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(validate_api_key)): body = await request.json() - body["seed"] = body.get("seed") or str( - random.randint(0, 1_000_000) - ) # for some reason needs to be passed as string... it seems? + body["seed"] = body.get("seed") or str(random.randint(0, 1_000_000)) logger.debug(f"Seed provided by miner: {bool(body.get('seed'))} -- Using seed: {body.get('seed')}") if settings.TEST_MINER_IDS: @@ -77,11 +73,10 @@ async def proxy_chat_completions(request: Request): elif not settings.mode == "mock" and not ( available_miners := miner_availabilities.get_available_miners(task=InferenceTask(), model=None) ): - return "No miners available" + raise HTTPException(status_code=503, detail="No miners available") axon_info = settings.METAGRAPH.axons[available_miners[0]] base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1" - # base_url = "http://localhost:8008/v1" miner_id = available_miners[0] logger.debug(f"Using base_url: {base_url}") @@ -96,10 +91,8 @@ async def proxy_chat_completions(request: Request): try: with Timer() as timer: - # Create request to OpenAI response = await miner.chat.completions.create(**body) if body.get("stream"): - # If streaming is requested, return streaming response return StreamingResponse( process_and_collect_stream(miner_id, body, response), media_type="text/event-stream" ) @@ -119,6 +112,7 @@ async def proxy_chat_completions(request: Request): uids=[miner_id], timeout=settings.NEURON_TIMEOUT, ) + task = InferenceTask( query=body["messages"][-1]["content"], messages=[message["content"] for message in body["messages"]], @@ -126,7 +120,9 @@ async def proxy_chat_completions(request: Request): seed=body.get("seed"), response=response_event, ) + task_scorer.add_to_queue( task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id ) + return response diff --git a/prompting/base/epistula.py b/prompting/base/epistula.py index 4fbca51c..afe4fa92 100644 --- a/prompting/base/epistula.py +++ b/prompting/base/epistula.py @@ -66,11 +66,29 @@ def generate_header( return {**headers, **json.loads(body_bytes)} -def create_header_hook(hotkey, axon_hotkey=None): +def create_header_hook(hotkey, axon_hotkey=None, api_key=None): + """ + Creates a header hook function that adds authentication headers including API key. + + Args: + hotkey: The wallet hotkey + axon_hotkey: Optional axon hotkey + api_key: Optional API key for endpoint authentication + + Returns: + Async function that adds headers to the request + """ + async def add_headers(request: httpx.Request): + # Add standard headers for key, header in generate_header(hotkey, request.read(), axon_hotkey).items(): if key not in ["messages", "model", "stream"]: request.headers[key] = header + + # Add API key if provided + if api_key: + request.headers["api-key"] = api_key + return request return add_headers From 9d810ce3d4b7d1cdf77b3701690eecd92120c212 Mon Sep 17 00:00:00 2001 From: Hollyqui Date: Tue, 26 Nov 2024 14:43:05 +0000 Subject: [PATCH 10/40] Improving error messages + improving API key saving --- api_keys.json | 2 +- neurons/miners/epistula_miner/miner.py | 63 -------------------------- prompting/api/api_managements/api.py | 5 ++ prompting/api/gpt_endpoints/api.py | 6 ++- 4 files changed, 10 insertions(+), 66 deletions(-) diff --git a/api_keys.json b/api_keys.json index 0967ef42..1e41dd87 100644 --- a/api_keys.json +++ b/api_keys.json @@ -1 +1 @@ -{} +{"0566dbe21ee33bba9419549716cd6f1f": {"rate_limit": 10, "usage": 0}} diff --git a/neurons/miners/epistula_miner/miner.py b/neurons/miners/epistula_miner/miner.py index 5f93154d..42c302e8 100644 --- a/neurons/miners/epistula_miner/miner.py +++ b/neurons/miners/epistula_miner/miner.py @@ -67,69 +67,6 @@ async def create_chat_completion(self, request: Request): logger.debug("Chat completion request returning...") return StreamingResponse(r.aiter_raw(), background=BackgroundTask(r.aclose), headers=r.headers) - # async def create_chat_completion(self, request: Request): - # bt.logging.info( - # "\u2713", - # f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!", - # ) - # openai_request_body = await self.format_openai_query(request) - # try: - # req = self.client.build_request( - # "POST", "chat/completions", json=openai_request_body - # ) - # r = await self.client.send(req, stream=True) - # # Check for non-200 status code - # if r.status_code != 200: - # error_content = await r.aread() - # bt.logging.error(f"OpenAI API Error {r.status_code}: {error_content}") - # return JSONResponse( - # content=json.loads(error_content), - # status_code=r.status_code - # ) - # except Exception as e: - # bt.logging.error(f"Exception during OpenAI API call: {str(e)}") - # return JSONResponse( - # content={"error": str(e)}, - # status_code=500 - # ) - - # async def create_chat_completion(self, request: Request): - # bt.logging.info( - # "\u2713", - # f"Getting Chat Completion request from {request.headers.get('Epistula-Signed-By', '')[:8]}!", - # ) - - # async def word_stream(): - # words = "This is a test stream".split() - # for word in words: - # # Simulate the OpenAI streaming response format - # data = { - # "choices": [ - # { - # "delta": {"content": word + ' '}, - # "index": 0, - # "finish_reason": None - # } - # ] - # } - # # Yield the data in SSE (Server-Sent Events) format - # yield f"data: {json.dumps(data)}\n\n" - # await asyncio.sleep(0.1) # Simulate a delay between words - # # Indicate the end of the stream - # data = { - # "choices": [ - # { - # "delta": {}, - # "index": 0, - # "finish_reason": "stop" - # } - # ] - # } - # yield f"data: {json.dumps(data)}\n\n" - # yield "data: [DONE]\n\n" - - # return StreamingResponse(word_stream(), media_type='text/event-stream') - async def check_availability(self, request: Request): print("Checking availability") # Parse the incoming JSON request diff --git a/prompting/api/api_managements/api.py b/prompting/api/api_managements/api.py index 40d21577..5538dc96 100644 --- a/prompting/api/api_managements/api.py +++ b/prompting/api/api_managements/api.py @@ -1,5 +1,6 @@ from fastapi import APIRouter, HTTPException, Header, Depends import json +from loguru import logger import secrets from prompting.settings import settings @@ -24,6 +25,7 @@ def save_api_keys(api_keys): # Use lifespan to initialize API keys _keys = load_api_keys() +logger.info(f"Loaded API keys: {_keys}") save_api_keys(_keys) @@ -45,6 +47,7 @@ def create_api_key(rate_limit: int, admin_key: str = Depends(validate_admin_key) """Creates a new API key with a specified rate limit.""" new_api_key = secrets.token_hex(16) _keys[new_api_key] = {"rate_limit": rate_limit, "usage": 0} + save_api_keys(_keys) return {"message": "API key created", "api_key": new_api_key} @@ -54,6 +57,7 @@ def modify_api_key(api_key: str, rate_limit: int, admin_key: str = Depends(valid if api_key not in _keys: raise HTTPException(status_code=404, detail="API key not found") _keys[api_key]["rate_limit"] = rate_limit + save_api_keys(_keys) return {"message": "API key updated", "api_key": api_key} @@ -63,6 +67,7 @@ def delete_api_key(api_key: str, admin_key: str = Depends(validate_admin_key)): if api_key not in _keys: raise HTTPException(status_code=404, detail="API key not found") del _keys[api_key] + save_api_keys(_keys) return {"message": "API key deleted"} diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py index 54111122..010d7f2b 100644 --- a/prompting/api/gpt_endpoints/api.py +++ b/prompting/api/gpt_endpoints/api.py @@ -71,9 +71,11 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( if settings.TEST_MINER_IDS: available_miners = settings.TEST_MINER_IDS elif not settings.mode == "mock" and not ( - available_miners := miner_availabilities.get_available_miners(task=InferenceTask(), model=None) + available_miners := miner_availabilities.get_available_miners(task=InferenceTask(), model=body.get("model")) ): - raise HTTPException(status_code=503, detail="No miners available") + raise HTTPException( + status_code=503, detail=f"No miners available for model: {body.get('model')} and task: {InferenceTask()}" + ) axon_info = settings.METAGRAPH.axons[available_miners[0]] base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1" From 685290a270a95c39b5c50c156d1f5f7134fd10d2 Mon Sep 17 00:00:00 2001 From: Hollyqui Date: Tue, 26 Nov 2024 16:03:07 +0000 Subject: [PATCH 11/40] Signing epistula properly for recipient --- api_keys.json | 2 +- prompting/api/api.py | 6 +++--- prompting/api/gpt_endpoints/api.py | 7 +++++-- prompting/tasks/task_registry.py | 6 ++++++ 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/api_keys.json b/api_keys.json index 1e41dd87..93b0e261 100644 --- a/api_keys.json +++ b/api_keys.json @@ -1 +1 @@ -{"0566dbe21ee33bba9419549716cd6f1f": {"rate_limit": 10, "usage": 0}} +{"0566dbe21ee33bba9419549716cd6f1f": {"rate_limit": 10, "usage": 0}, "e03da67439c0b7e7a622dde4fa3cf857": {"rate_limit": 10, "usage": 0}} diff --git a/prompting/api/api.py b/prompting/api/api.py index b797273b..7a528ece 100644 --- a/prompting/api/api.py +++ b/prompting/api/api.py @@ -12,9 +12,9 @@ app = FastAPI() # Add routers at the application level -app.include_router(api_management_router) -app.include_router(miner_availabilities_router) -app.include_router(gpt_router) +app.include_router(api_management_router, prefix="/api_management", tags=["api_management"]) +app.include_router(miner_availabilities_router, prefix="/miner_availabilities", tags=["miner_availabilities"]) +app.include_router(gpt_router, tags=["gpt"]) @app.get("/health") diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py index 010d7f2b..f43770b5 100644 --- a/prompting/api/gpt_endpoints/api.py +++ b/prompting/api/gpt_endpoints/api.py @@ -9,6 +9,7 @@ import json from prompting.miner_availability.miner_availability import miner_availabilities from prompting.tasks.inference import InferenceTask +from prompting.tasks.task_registry import TaskRegistry from typing import AsyncGenerator from prompting.rewards.scoring import task_scorer from prompting.base.dendrite import DendriteResponseEvent, SynapseStreamResult @@ -67,11 +68,12 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( body = await request.json() body["seed"] = body.get("seed") or str(random.randint(0, 1_000_000)) logger.debug(f"Seed provided by miner: {bool(body.get('seed'))} -- Using seed: {body.get('seed')}") + task = TaskRegistry.get_task_by_name(body.get("task")) if settings.TEST_MINER_IDS: available_miners = settings.TEST_MINER_IDS elif not settings.mode == "mock" and not ( - available_miners := miner_availabilities.get_available_miners(task=InferenceTask(), model=body.get("model")) + available_miners := miner_availabilities.get_available_miners(task=task, model=body.get("model")) ): raise HTTPException( status_code=503, detail=f"No miners available for model: {body.get('model')} and task: {InferenceTask()}" @@ -79,6 +81,7 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( axon_info = settings.METAGRAPH.axons[available_miners[0]] base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1" + # base_url = "http://localhost:8008/v1" miner_id = available_miners[0] logger.debug(f"Using base_url: {base_url}") @@ -87,7 +90,7 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( max_retries=0, timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5), http_client=openai.DefaultAsyncHttpxClient( - event_hooks={"request": [create_header_hook(settings.WALLET.hotkey, None)]} + event_hooks={"request": [create_header_hook(settings.WALLET.hotkey, axon_info.hotkey)]} ), ) diff --git a/prompting/tasks/task_registry.py b/prompting/tasks/task_registry.py index f311057a..32320030 100644 --- a/prompting/tasks/task_registry.py +++ b/prompting/tasks/task_registry.py @@ -75,6 +75,12 @@ class TaskRegistry(BaseModel): ), ] + @classmethod + def get_task_by_name(cls, task_name: str) -> BaseTextTask: + if matching_tasks := [t.task for t in cls.task_configs if t.task.__name__ == task_name]: + return matching_tasks[0] + return None + @classmethod def get_task_config(cls, task: BaseTextTask.__class__ | BaseTextTask) -> TaskConfig: task = task.__class__ if isinstance(task, BaseTextTask) else task From 7296fe77a18822a6429469486dc33bf13b3cdf1e Mon Sep 17 00:00:00 2001 From: Hollyqui Date: Tue, 26 Nov 2024 17:17:45 +0000 Subject: [PATCH 12/40] Passing task type --- prompting/api/gpt_endpoints/api.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py index f43770b5..40aa0844 100644 --- a/prompting/api/gpt_endpoints/api.py +++ b/prompting/api/gpt_endpoints/api.py @@ -66,9 +66,14 @@ async def mixture_of_agents(request: Request, api_key_data: dict = Depends(valid @router.post("/v1/chat/completions") async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(validate_api_key)): body = await request.json() + task = TaskRegistry.get_task_by_name(body.get("task")) + if body.get("task") and not task: + raise HTTPException(status_code=400, detail=f"Task {body.get('task')} not found") + logger.debug(f"Requested Task: {body.get('task')}, {task}") + + body = {k: v for k, v in body.items() if k != "task"} body["seed"] = body.get("seed") or str(random.randint(0, 1_000_000)) logger.debug(f"Seed provided by miner: {bool(body.get('seed'))} -- Using seed: {body.get('seed')}") - task = TaskRegistry.get_task_by_name(body.get("task")) if settings.TEST_MINER_IDS: available_miners = settings.TEST_MINER_IDS @@ -81,7 +86,7 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( axon_info = settings.METAGRAPH.axons[available_miners[0]] base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1" - # base_url = "http://localhost:8008/v1" + base_url = "http://localhost:8008/v1" miner_id = available_miners[0] logger.debug(f"Using base_url: {base_url}") @@ -96,7 +101,10 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( try: with Timer() as timer: - response = await miner.chat.completions.create(**body) + if task: + response = await miner.chat.completions.create(**body, extra_body={"task": task.__name__}) + else: + response = await miner.chat.completions.create(**body) if body.get("stream"): return StreamingResponse( process_and_collect_stream(miner_id, body, response), media_type="text/event-stream" From 120a90a84c8104b1252fb5be90af9db55898b1bb Mon Sep 17 00:00:00 2001 From: bkb2135 Date: Tue, 26 Nov 2024 13:52:18 -1000 Subject: [PATCH 13/40] Move streaming of miners into query_miners function --- neurons/validator.py | 6 +- prompting/api/api.py | 7 +- prompting/api/api_managements/api.py | 6 +- prompting/api/gpt_endpoints/api.py | 21 ++--- prompting/api/miner_availabilities/api.py | 6 +- prompting/base/epistula.py | 90 +++++++++++++------ .../miner_availability/miner_availability.py | 5 -- prompting/settings.py | 5 +- 8 files changed, 89 insertions(+), 57 deletions(-) diff --git a/neurons/validator.py b/neurons/validator.py index 734585bc..312b88c1 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -2,7 +2,7 @@ import asyncio import json import time -import json + from prompting import settings from prompting.utils.profiling import profiler @@ -12,6 +12,7 @@ from loguru import logger from prompting import mutable_globals +from prompting.api.api import start_api from prompting.base.dendrite import DendriteResponseEvent from prompting.base.epistula import query_miners from prompting.base.forward import log_stream_results @@ -26,9 +27,6 @@ from prompting.utils.logging import ErrorLoggingEvent, ValidatorLoggingEvent from prompting.utils.timer import Timer from prompting.weight_setting.weight_setter import weight_setter -from prompting.llms.utils import GPUInfo -from prompting.base.epistula import query_miners -from prompting.api.api import start_api NEURON_SAMPLE_SIZE = 100 diff --git a/prompting/api/api.py b/prompting/api/api.py index 7a528ece..dff0d2bb 100644 --- a/prompting/api/api.py +++ b/prompting/api/api.py @@ -1,13 +1,12 @@ -from fastapi import FastAPI -from loguru import logger - # This ensures uvicorn is imported first import uvicorn +from fastapi import FastAPI +from loguru import logger # Now we can safely import the rest from prompting.api.api_managements.api import router as api_management_router -from prompting.api.miner_availabilities.api import router as miner_availabilities_router from prompting.api.gpt_endpoints.api import router as gpt_router +from prompting.api.miner_availabilities.api import router as miner_availabilities_router app = FastAPI() diff --git a/prompting/api/api_managements/api.py b/prompting/api/api_managements/api.py index 5538dc96..92ccc922 100644 --- a/prompting/api/api_managements/api.py +++ b/prompting/api/api_managements/api.py @@ -1,10 +1,10 @@ -from fastapi import APIRouter, HTTPException, Header, Depends import json -from loguru import logger import secrets -from prompting.settings import settings +from fastapi import APIRouter, Depends, Header, HTTPException +from loguru import logger +from prompting.settings import settings router = APIRouter() diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py index 40aa0844..2fe64ced 100644 --- a/prompting/api/gpt_endpoints/api.py +++ b/prompting/api/gpt_endpoints/api.py @@ -1,21 +1,22 @@ -from fastapi import APIRouter, Request, HTTPException -from loguru import logger +import json import random +from typing import AsyncGenerator + import openai -from prompting.settings import settings +from fastapi import APIRouter, Depends, HTTPException, Request +from fastapi.responses import StreamingResponse from httpx import Timeout +from loguru import logger + +from prompting.api.api_managements.api import validate_api_key +from prompting.base.dendrite import DendriteResponseEvent, SynapseStreamResult from prompting.base.epistula import create_header_hook -from fastapi.responses import StreamingResponse -import json from prompting.miner_availability.miner_availability import miner_availabilities +from prompting.rewards.scoring import task_scorer +from prompting.settings import settings from prompting.tasks.inference import InferenceTask from prompting.tasks.task_registry import TaskRegistry -from typing import AsyncGenerator -from prompting.rewards.scoring import task_scorer -from prompting.base.dendrite import DendriteResponseEvent, SynapseStreamResult from prompting.utils.timer import Timer -from prompting.api.api_managements.api import validate_api_key -from fastapi import Depends router = APIRouter() diff --git a/prompting/api/miner_availabilities/api.py b/prompting/api/miner_availabilities/api.py index d8f43a63..44bee346 100644 --- a/prompting/api/miner_availabilities/api.py +++ b/prompting/api/miner_availabilities/api.py @@ -1,8 +1,10 @@ +from typing import Literal + from fastapi import APIRouter -from prompting.miner_availability.miner_availability import miner_availabilities from loguru import logger + +from prompting.miner_availability.miner_availability import miner_availabilities from prompting.tasks.task_registry import TaskRegistry -from typing import Literal router = APIRouter() diff --git a/prompting/base/epistula.py b/prompting/base/epistula.py index 73534c2d..cd8607f6 100644 --- a/prompting/base/epistula.py +++ b/prompting/base/epistula.py @@ -79,7 +79,7 @@ async def add_headers(request: httpx.Request): return add_headers -async def query_miners(uids, body): +async def query_miners(uids: list = [], body: bytes = b"", stream: bool = False): try: tasks = [] for uid in uids: @@ -90,13 +90,53 @@ async def query_miners(uids, body): settings.WALLET, body, uid, + stream=stream, ) ) ) - responses: List[SynapseStreamResult] = await asyncio.gather(*tasks) - return responses + responses = await asyncio.gather(*tasks, return_exceptions=True) + + # Filter out exceptions from responses + exceptions = [resp for resp in responses if isinstance(resp, Exception)] + if exceptions: + for exc in exceptions: + logger.error(f"Error in handle_inference: {exc}") + # Handle exceptions as needed + + if stream: + # 'responses' is a list of async iterators (chat objects) + async def merged_stream(): + streams = [response.__aiter__() for response in responses if not isinstance(response, Exception)] + pending = {} + for stream in streams: + try: + task = asyncio.create_task(stream.__anext__()) + pending[task] = stream + except StopAsyncIteration: + continue # Skip empty streams + + while pending: + done, _ = await asyncio.wait(pending.keys(), return_when=asyncio.FIRST_COMPLETED) + for task in done: + stream = pending.pop(task) + try: + result = task.result() + yield result + # Schedule the next item from the same stream + next_task = asyncio.create_task(stream.__anext__()) + pending[next_task] = stream + except StopAsyncIteration: + # Stream is exhausted + pass + except Exception as e: + logger.error(f"Error while streaming: {e}") + + return merged_stream() + else: + # 'responses' is a list of SynapseStreamResult objects + return [resp for resp in responses if not isinstance(resp, Exception)] except Exception as e: - logger.error(f"Error in forward for: {e}") + logger.error(f"Error in query_miners: {e}") return [] @@ -150,6 +190,7 @@ async def handle_inference( wallet: "bt.wallet", body: Dict[str, Any], uid: int, + stream: bool = False, ) -> SynapseStreamResult: exception = None chunks = [] @@ -166,43 +207,38 @@ async def handle_inference( event_hooks={"request": [create_header_hook(wallet.hotkey, axon_info.hotkey)]} ), ) - try: - payload = json.loads(body) - chat = await miner.chat.completions.create( - messages=payload["messages"], - model=payload["model"], - stream=True, - extra_body={k: v for k, v in payload.items() if k not in ["messages", "model"]}, - ) + payload = json.loads(body) + chat = await miner.chat.completions.create( + messages=payload["messages"], + model=payload["model"], + stream=True, + extra_body={k: v for k, v in payload.items() if k not in ["messages", "model"]}, + ) + if not stream: async for chunk in chat: if chunk.choices[0].delta and chunk.choices[0].delta.content: chunks.append(chunk.choices[0].delta.content) chunk_timings.append(time.time() - start_time) - - except openai.APIConnectionError as e: - logger.trace(f"Miner {uid} failed request: {e}") - exception = e - - except Exception as e: - logger.trace(f"Unknown Error when sending to miner {uid}: {e}") - exception = e - + except openai.APIConnectionError as e: + logger.trace(f"Miner {uid} failed request: {e}") + exception = str(e) except Exception as e: - exception = e - logger.error(f"{uid}: Error in forward for: {e}") + logger.trace(f"Unknown Error when sending to miner {uid}: {e}") + exception = str(e) finally: - if exception: - exception = str(exception) if exception is None: status_code = 200 status_message = "Success" elif isinstance(exception, openai.APIConnectionError): status_code = 502 - status_message = str(exception) + status_message = exception else: status_code = 500 - status_message = str(exception) + status_message = exception + if stream: + return chat + else: return SynapseStreamResult( accumulated_chunks=chunks, accumulated_chunks_timings=chunk_timings, diff --git a/prompting/miner_availability/miner_availability.py b/prompting/miner_availability/miner_availability.py index dee65660..29bbc7f9 100644 --- a/prompting/miner_availability/miner_availability.py +++ b/prompting/miner_availability/miner_availability.py @@ -13,11 +13,6 @@ from prompting.tasks.base_task import BaseTask from prompting.tasks.task_registry import TaskRegistry from prompting.utils.uids import get_uids -import random -import asyncio -import numpy as np -from prompting.base.epistula import query_availabilities -from typing import Dict task_config: dict[str, bool] = {str(task_config.task.__name__): True for task_config in TaskRegistry.task_configs} # task_config: dict[str, bool] = { diff --git a/prompting/settings.py b/prompting/settings.py index 08890df5..04d3a4cd 100644 --- a/prompting/settings.py +++ b/prompting/settings.py @@ -1,11 +1,12 @@ import os from functools import cached_property from typing import Any, Literal, Optional -from loguru import logger + +import bittensor as bt import dotenv +from loguru import logger from pydantic import Field, model_validator from pydantic_settings import BaseSettings -import bittensor as bt from transformers import AwqConfig # from prompting.utils.config import config From 7200833fc451e95b684a053e7ea45147ff14e9ad Mon Sep 17 00:00:00 2001 From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com> Date: Wed, 27 Nov 2024 03:58:30 +0000 Subject: [PATCH 14/40] WIP: Add system prompt --- neurons/validator.py | 9 ++++++--- prompting/api/gpt_endpoints/api.py | 2 ++ prompting/base/epistula.py | 2 +- prompting/tasks/base_task.py | 3 ++- prompting/tasks/inference.py | 4 ++-- 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/neurons/validator.py b/neurons/validator.py index fc435f98..bdf6cd6f 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -140,12 +140,15 @@ async def collect_responses(self, task: BaseTextTask) -> DendriteResponseEvent | logger.warning("No available miners. This should already have been caught earlier.") return + messages: list[dict[str, str]] = [] + if task.synapse_system_prompt: + messages.append({"role": "system", "content": task.synapse_system_prompt}) + messages.append({"role": "user", "content": task.query}) + body = { "seed": task.seed, "model": task.llm_model_id, - "messages": [ - {"role": "user", "content": task.query}, - ], + "messages": messages, } body_bytes = json.dumps(body).encode("utf-8") stream_results = await query_miners(task.__class__.__name__, uids, body_bytes) diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py index f43770b5..62a00401 100644 --- a/prompting/api/gpt_endpoints/api.py +++ b/prompting/api/gpt_endpoints/api.py @@ -34,6 +34,7 @@ async def process_and_collect_stream(miner_id: int, request: dict, response: Asy task = InferenceTask( query=request["messages"][-1]["content"], messages=[message["content"] for message in request["messages"]], + roles=request.get("roles", None), model=request.get("model"), seed=request.get("seed"), response="".join(collected_content), @@ -121,6 +122,7 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( task = InferenceTask( query=body["messages"][-1]["content"], messages=[message["content"] for message in body["messages"]], + roles=roles, model=body.get("model"), seed=body.get("seed"), response=response_event, diff --git a/prompting/base/epistula.py b/prompting/base/epistula.py index afe4fa92..fe1d675c 100644 --- a/prompting/base/epistula.py +++ b/prompting/base/epistula.py @@ -94,7 +94,7 @@ async def add_headers(request: httpx.Request): return add_headers -async def query_miners(task, uids, body): +async def query_miners(task: str, uids: list[int], body: dict[str, any]): try: tasks = [] for uid in uids: diff --git a/prompting/tasks/base_task.py b/prompting/tasks/base_task.py index 56024817..f7414776 100644 --- a/prompting/tasks/base_task.py +++ b/prompting/tasks/base_task.py @@ -49,6 +49,7 @@ def make_reference(self, **kwargs): class BaseTextTask(BaseTask): query: str | None = None + roles: list[str] | None = None messages: list[str] | None = None reference: str | None = None llm_model: ModelConfig = None @@ -93,7 +94,7 @@ def generate_query( """Generates a query to be used for generating the challenge""" logger.info("🤖 Generating query...") llm_messages = [LLMMessage(role="system", content=self.query_system_prompt)] if self.query_system_prompt else [] - llm_messages += [LLMMessage(role="user", content=message) for message in messages] + llm_messages.extemd([LLMMessage(role="user", content=message) for message in messages]) self.query = LLMWrapper.chat_complete(messages=LLMMessages(*llm_messages)) diff --git a/prompting/tasks/inference.py b/prompting/tasks/inference.py index fefa5b7a..e1cd602c 100644 --- a/prompting/tasks/inference.py +++ b/prompting/tasks/inference.py @@ -57,8 +57,8 @@ def make_query(self, dataset_entry: ChatEntry) -> str: def make_reference(self, dataset_entry: ChatEntry) -> str: self.reference = model_manager.generate( - messages=[self.messages[-1]], - roles=["user"], + messages=[self.messages[-1]] if self.roles is None else self.messages, + roles=["user"] if self.roles is None else self.roles, model=self.llm_model, sampling_params=SamplingParams(seed=self.seed), )[0] From 6cf7aa5e0a8748c680864efabf018c183850cd98 Mon Sep 17 00:00:00 2001 From: bkb2135 Date: Wed, 27 Nov 2024 02:36:08 -1000 Subject: [PATCH 15/40] Use query_miners in api --- prompting/api/gpt_endpoints/api.py | 139 +++++++++++++++++------------ 1 file changed, 83 insertions(+), 56 deletions(-) diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py index 2fe64ced..fd9e91b1 100644 --- a/prompting/api/gpt_endpoints/api.py +++ b/prompting/api/gpt_endpoints/api.py @@ -10,7 +10,7 @@ from prompting.api.api_managements.api import validate_api_key from prompting.base.dendrite import DendriteResponseEvent, SynapseStreamResult -from prompting.base.epistula import create_header_hook +from prompting.base.epistula import create_header_hook, query_miners from prompting.miner_availability.miner_availability import miner_availabilities from prompting.rewards.scoring import task_scorer from prompting.settings import settings @@ -18,6 +18,7 @@ from prompting.tasks.task_registry import TaskRegistry from prompting.utils.timer import Timer + router = APIRouter() @@ -72,7 +73,9 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( raise HTTPException(status_code=400, detail=f"Task {body.get('task')} not found") logger.debug(f"Requested Task: {body.get('task')}, {task}") - body = {k: v for k, v in body.items() if k != "task"} + stream = body.get("stream") + body = {k: v for k, v in body.items() if k not in ["task", "stream"]} + body['task'] = task.__class__.__name__ body["seed"] = body.get("seed") or str(random.randint(0, 1_000_000)) logger.debug(f"Seed provided by miner: {bool(body.get('seed'))} -- Using seed: {body.get('seed')}") @@ -82,61 +85,85 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( available_miners := miner_availabilities.get_available_miners(task=task, model=body.get("model")) ): raise HTTPException( - status_code=503, detail=f"No miners available for model: {body.get('model')} and task: {InferenceTask()}" + status_code=503, detail=f"No miners available for model: {body.get('model')} and task: {task.__class__.__name__}" ) - axon_info = settings.METAGRAPH.axons[available_miners[0]] - base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1" - base_url = "http://localhost:8008/v1" - miner_id = available_miners[0] - logger.debug(f"Using base_url: {base_url}") - - miner = openai.AsyncOpenAI( - base_url=base_url, - max_retries=0, - timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5), - http_client=openai.DefaultAsyncHttpxClient( - event_hooks={"request": [create_header_hook(settings.WALLET.hotkey, axon_info.hotkey)]} - ), - ) - - try: - with Timer() as timer: - if task: - response = await miner.chat.completions.create(**body, extra_body={"task": task.__name__}) - else: - response = await miner.chat.completions.create(**body) - if body.get("stream"): - return StreamingResponse( - process_and_collect_stream(miner_id, body, response), media_type="text/event-stream" - ) - except Exception as e: - logger.exception(f"Error coming from Miner: {e}") - raise HTTPException(status_code=500, detail=f"Error coming from Miner: {e}") - - response_event = DendriteResponseEvent( - stream_results=[ - SynapseStreamResult( - uid=miner_id, - accumulated_chunks=[response.choices[0].message.content], - accumulated_chunks_timings=[timer.final_time], - ) - ], - completions=[response.choices[0].message.content], - uids=[miner_id], - timeout=settings.NEURON_TIMEOUT, - ) - - task = InferenceTask( - query=body["messages"][-1]["content"], - messages=[message["content"] for message in body["messages"]], - model=body.get("model"), - seed=body.get("seed"), - response=response_event, - ) + response = query_miners(available_miners, body, stream = stream) + if stream: + return response + else: + response_event = DendriteResponseEvent( + stream_results = response, + uids = available_miners, + timeout = settings.NEURON_TIMEOUT, + completions = ["".join(res.accumulated_chunks) for res in response] + ) - task_scorer.add_to_queue( - task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id - ) + task = task( + query=body["messages"][-1]["content"], + messages=[message["content"] for message in body["messages"]], + model=body.get("model"), + seed=body.get("seed"), + response=response_event, + ) - return response + task_scorer.add_to_queue( + task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id + ) + return response + + + # axon_info = settings.METAGRAPH.axons[available_miners[0]] + # base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1" + # miner_id = available_miners[0] + # logger.debug(f"Using base_url: {base_url}") + + # miner = openai.AsyncOpenAI( + # base_url=base_url, + # max_retries=0, + # timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5), + # http_client=openai.DefaultAsyncHttpxClient( + # event_hooks={"request": [create_header_hook(settings.WALLET.hotkey, axon_info.hotkey)]} + # ), + # ) + + # try: + # with Timer() as timer: + # if task: + # response = await miner.chat.completions.create(**body, extra_body={"task": task.__name__}) + # else: + # response = await miner.chat.completions.create(**body) + # if body.get("stream"): + # return StreamingResponse( + # process_and_collect_stream(miner_id, body, response), media_type="text/event-stream" + # ) + # except Exception as e: + # logger.exception(f"Error coming from Miner: {e}") + # raise HTTPException(status_code=500, detail=f"Error coming from Miner: {e}") + + # response_event = DendriteResponseEvent( + # stream_results=[ + # SynapseStreamResult( + # uid=miner_id, + # accumulated_chunks=[response.choices[0].message.content], + # accumulated_chunks_timings=[timer.final_time], + # ) + # ], + # completions=[response.choices[0].message.content], + # uids=[miner_id], + # timeout=settings.NEURON_TIMEOUT, + # ) + + # task = InferenceTask( + # query=body["messages"][-1]["content"], + # messages=[message["content"] for message in body["messages"]], + # model=body.get("model"), + # seed=body.get("seed"), + # response=response_event, + # ) + + # task_scorer.add_to_queue( + # task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id + # ) + + # return response From 07620fd76632927f696635f8af52b5d206672a79 Mon Sep 17 00:00:00 2001 From: bkb2135 Date: Wed, 27 Nov 2024 03:17:23 -1000 Subject: [PATCH 16/40] Fix syntax errors --- prompting/api/gpt_endpoints/api.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py index fd9e91b1..7b6d539d 100644 --- a/prompting/api/gpt_endpoints/api.py +++ b/prompting/api/gpt_endpoints/api.py @@ -88,10 +88,11 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( status_code=503, detail=f"No miners available for model: {body.get('model')} and task: {task.__class__.__name__}" ) - response = query_miners(available_miners, body, stream = stream) + response = query_miners(available_miners, json.dumps(body).encode("utf-8"), stream = stream) if stream: return response else: + response = await response response_event = DendriteResponseEvent( stream_results = response, uids = available_miners, @@ -110,6 +111,7 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( task_scorer.add_to_queue( task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id ) + return response From 21c223681fd24f4d94a17ec51e84c8e9e44ca279 Mon Sep 17 00:00:00 2001 From: bkb2135 Date: Wed, 27 Nov 2024 07:02:46 -1000 Subject: [PATCH 17/40] Manually dump models --- prompting/api/gpt_endpoints/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py index 7b6d539d..e25d4f21 100644 --- a/prompting/api/gpt_endpoints/api.py +++ b/prompting/api/gpt_endpoints/api.py @@ -112,7 +112,7 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id ) - return response + return [res.model_dump() for res in response] # axon_info = settings.METAGRAPH.axons[available_miners[0]] From ba900a2e19dd911ec21da11e65b5b29bf7192588 Mon Sep 17 00:00:00 2001 From: richwardle Date: Wed, 27 Nov 2024 18:14:49 +0000 Subject: [PATCH 18/40] Use autoawq 0.2.0 --- poetry.lock | 340 ++++++++++++++++++++++++++----------------------- pyproject.toml | 2 +- 2 files changed, 185 insertions(+), 157 deletions(-) diff --git a/poetry.lock b/poetry.lock index fc6bc9a8..0d35d7f4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "accelerate" @@ -345,29 +345,54 @@ tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] [[package]] name = "autoawq" -version = "0.2.7.post2" +version = "0.2.0" description = "AutoAWQ implements the AWQ algorithm for 4-bit quantization with a 2x speedup during inference." optional = false python-versions = ">=3.8.0" files = [ - {file = "autoawq-0.2.7.post2-py3-none-any.whl", hash = "sha256:cef26e2b21a812e298f1752326545cf1ea0456af4c54a92e1941b2d018b92815"}, + {file = "autoawq-0.2.0-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:4c9c4db6fbf23cd625a9cb5b5495777555659dc12aa7e0aba733f20c51f10005"}, + {file = "autoawq-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:9cfefc8e8c4d92b9b78f2f1bff61d6bb413138d2ab221029587251344d65007c"}, + {file = "autoawq-0.2.0-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:ee68699fec949c4440374b402558400efe83c359e7f85a5a7979608c5eec0da3"}, + {file = "autoawq-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:4d6080539bb386a5754cc76b5081b112a93df1ee38f4c2f82e2773e9f098470b"}, + {file = "autoawq-0.2.0-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:74d2c49780aaa7c7ba0fa4e1f196ac2dc4bdceba27e780115e7dfb32f1ba3c0a"}, + {file = "autoawq-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:43651382592e348c8f44bdc6796b9fa6fc5bd398f58908410376f0b7aaa2b3b3"}, + {file = "autoawq-0.2.0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:a40c12fc4ddeabec6f04a2179e720e79563bfe29646ddf9c130bce0bcb51a760"}, + {file = "autoawq-0.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:3c5dd45bcf23d8a0de2d79a04baf65fb2208249babeb729274c97df6218d48ae"}, ] [package.dependencies] accelerate = "*" -datasets = ">=2.20" +autoawq-kernels = "*" +datasets = "*" tokenizers = ">=0.12.1" -torch = ">=2.2.0" +torch = ">=2.0.1" transformers = ">=4.35.0" -triton = "*" typing-extensions = ">=4.8.0" zstandard = "*" [package.extras] -cpu = ["intel-extension-for-pytorch (>=2.4.0)"] dev = ["black", "griffe-typingdoc", "mkdocs-material", "mkdocstrings-python"] -eval = ["evaluate", "lm-eval (==0.4.1)", "protobuf", "scipy", "tabulate"] -kernels = ["autoawq-kernels"] +eval = ["evaluate", "lm-eval (>=0.4.0)", "protobuf", "scipy", "tabulate"] + +[[package]] +name = "autoawq-kernels" +version = "0.0.9" +description = "AutoAWQ Kernels implements the AWQ kernels." +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "autoawq_kernels-0.0.9-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:ed8f4d744df21beae445efb1de54061bffc5fccbfefc8ae65c1dc10d08f90052"}, + {file = "autoawq_kernels-0.0.9-cp310-cp310-win_amd64.whl", hash = "sha256:cd7d3db501068b3a12094a07921d985a57e640725cdda1252d4b135ed6aeaa65"}, + {file = "autoawq_kernels-0.0.9-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:fe800a6538691afaa77abe7c8b2b0a121351843f048d54e11d617d604dcba48f"}, + {file = "autoawq_kernels-0.0.9-cp311-cp311-win_amd64.whl", hash = "sha256:8c7f2404b3aa448ff77872dd6ba2963ce8b685d8aa73ef65fd1b8bc85d92b17d"}, + {file = "autoawq_kernels-0.0.9-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:4c41a71af1d5a75e52c9833b9c48237b04d3b0eee26d712fc1b074af9135afc8"}, + {file = "autoawq_kernels-0.0.9-cp312-cp312-win_amd64.whl", hash = "sha256:f259e7c60b11fa0689bb337dd4456319787256cbd2a8e4a491f01b51bb6c43d1"}, + {file = "autoawq_kernels-0.0.9-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:b6baf039c22deb02f2ae194fdd77551b3c85c8f8a77b749f7caa17dacf986adb"}, + {file = "autoawq_kernels-0.0.9-cp39-cp39-win_amd64.whl", hash = "sha256:6ad12dd68b0932182678f2f9fbee87452707b81f0e8dad242d23af018358f030"}, +] + +[package.dependencies] +torch = ">=2.5.1" [[package]] name = "babel" @@ -1338,18 +1363,18 @@ six = ">=1.4.0" [[package]] name = "duckduckgo-search" -version = "6.3.6" +version = "6.3.7" description = "Search for words, documents, images, news, maps and text translation using the DuckDuckGo.com search engine." optional = false python-versions = ">=3.8" files = [ - {file = "duckduckgo_search-6.3.6-py3-none-any.whl", hash = "sha256:0fb9e05df335619797828d0520fe5a84e43009600836b2eb61e034a645d2379c"}, - {file = "duckduckgo_search-6.3.6.tar.gz", hash = "sha256:58e020270e6a1515ead2ba386a86f9c5187c886654ddc7db62e3ddbc65489ff1"}, + {file = "duckduckgo_search-6.3.7-py3-none-any.whl", hash = "sha256:6a831a27977751e8928222f04c99a5d069ff80e2a7c78b699c9b9ac6cb48c41b"}, + {file = "duckduckgo_search-6.3.7.tar.gz", hash = "sha256:53d84966429a6377647e2a1ea7224b657575c7a4d506729bdb837e4ee12915ed"}, ] [package.dependencies] click = ">=8.1.7" -primp = ">=0.8.0" +primp = ">=0.8.1" [package.extras] dev = ["mypy (>=1.11.1)", "pytest (>=8.3.1)", "pytest-asyncio (>=0.23.8)", "ruff (>=0.6.1)"] @@ -1849,13 +1874,13 @@ typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "t [[package]] name = "identify" -version = "2.6.2" +version = "2.6.3" description = "File identification library for Python" optional = false python-versions = ">=3.9" files = [ - {file = "identify-2.6.2-py2.py3-none-any.whl", hash = "sha256:c097384259f49e372f4ea00a19719d95ae27dd5ff0fd77ad630aa891306b82f3"}, - {file = "identify-2.6.2.tar.gz", hash = "sha256:fab5c716c24d7a789775228823797296a2994b075fb6080ac83a102772a98cbd"}, + {file = "identify-2.6.3-py2.py3-none-any.whl", hash = "sha256:9edba65473324c2ea9684b1f944fe3191db3345e50b6d04571d10ed164f8d7bd"}, + {file = "identify-2.6.3.tar.gz", hash = "sha256:62f5dae9b5fef52c84cc188514e9ea4f3f636b1d8799ab5ebc475471f9e47a02"}, ] [package.extras] @@ -2009,84 +2034,86 @@ i18n = ["Babel (>=2.7)"] [[package]] name = "jiter" -version = "0.7.1" +version = "0.8.0" description = "Fast iterable JSON parser." optional = false python-versions = ">=3.8" files = [ - {file = "jiter-0.7.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:262e96d06696b673fad6f257e6a0abb6e873dc22818ca0e0600f4a1189eb334f"}, - {file = "jiter-0.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:be6de02939aac5be97eb437f45cfd279b1dc9de358b13ea6e040e63a3221c40d"}, - {file = "jiter-0.7.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:935f10b802bc1ce2b2f61843e498c7720aa7f4e4bb7797aa8121eab017293c3d"}, - {file = "jiter-0.7.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9cd3cccccabf5064e4bb3099c87bf67db94f805c1e62d1aefd2b7476e90e0ee2"}, - {file = "jiter-0.7.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4aa919ebfc5f7b027cc368fe3964c0015e1963b92e1db382419dadb098a05192"}, - {file = "jiter-0.7.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ae2d01e82c94491ce4d6f461a837f63b6c4e6dd5bb082553a70c509034ff3d4"}, - {file = "jiter-0.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f9568cd66dbbdab67ae1b4c99f3f7da1228c5682d65913e3f5f95586b3cb9a9"}, - {file = "jiter-0.7.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9ecbf4e20ec2c26512736284dc1a3f8ed79b6ca7188e3b99032757ad48db97dc"}, - {file = "jiter-0.7.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b1a0508fddc70ce00b872e463b387d49308ef02b0787992ca471c8d4ba1c0fa1"}, - {file = "jiter-0.7.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f84c9996664c460f24213ff1e5881530abd8fafd82058d39af3682d5fd2d6316"}, - {file = "jiter-0.7.1-cp310-none-win32.whl", hash = "sha256:c915e1a1960976ba4dfe06551ea87063b2d5b4d30759012210099e712a414d9f"}, - {file = "jiter-0.7.1-cp310-none-win_amd64.whl", hash = "sha256:75bf3b7fdc5c0faa6ffffcf8028a1f974d126bac86d96490d1b51b3210aa0f3f"}, - {file = "jiter-0.7.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:ad04a23a91f3d10d69d6c87a5f4471b61c2c5cd6e112e85136594a02043f462c"}, - {file = "jiter-0.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e47a554de88dff701226bb5722b7f1b6bccd0b98f1748459b7e56acac2707a5"}, - {file = "jiter-0.7.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e44fff69c814a2e96a20b4ecee3e2365e9b15cf5fe4e00869d18396daa91dab"}, - {file = "jiter-0.7.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:df0a1d05081541b45743c965436f8b5a1048d6fd726e4a030113a2699a6046ea"}, - {file = "jiter-0.7.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f22cf8f236a645cb6d8ffe2a64edb5d2b66fb148bf7c75eea0cb36d17014a7bc"}, - {file = "jiter-0.7.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da8589f50b728ea4bf22e0632eefa125c8aa9c38ed202a5ee6ca371f05eeb3ff"}, - {file = "jiter-0.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f20de711224f2ca2dbb166a8d512f6ff48c9c38cc06b51f796520eb4722cc2ce"}, - {file = "jiter-0.7.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8a9803396032117b85ec8cbf008a54590644a062fedd0425cbdb95e4b2b60479"}, - {file = "jiter-0.7.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3d8bae77c82741032e9d89a4026479061aba6e646de3bf5f2fc1ae2bbd9d06e0"}, - {file = "jiter-0.7.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3dc9939e576bbc68c813fc82f6620353ed68c194c7bcf3d58dc822591ec12490"}, - {file = "jiter-0.7.1-cp311-none-win32.whl", hash = "sha256:f7605d24cd6fab156ec89e7924578e21604feee9c4f1e9da34d8b67f63e54892"}, - {file = "jiter-0.7.1-cp311-none-win_amd64.whl", hash = "sha256:f3ea649e7751a1a29ea5ecc03c4ada0a833846c59c6da75d747899f9b48b7282"}, - {file = "jiter-0.7.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ad36a1155cbd92e7a084a568f7dc6023497df781adf2390c345dd77a120905ca"}, - {file = "jiter-0.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7ba52e6aaed2dc5c81a3d9b5e4ab95b039c4592c66ac973879ba57c3506492bb"}, - {file = "jiter-0.7.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b7de0b6f6728b678540c7927587e23f715284596724be203af952418acb8a2d"}, - {file = "jiter-0.7.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9463b62bd53c2fb85529c700c6a3beb2ee54fde8bef714b150601616dcb184a6"}, - {file = "jiter-0.7.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:627164ec01d28af56e1f549da84caf0fe06da3880ebc7b7ee1ca15df106ae172"}, - {file = "jiter-0.7.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:25d0e5bf64e368b0aa9e0a559c3ab2f9b67e35fe7269e8a0d81f48bbd10e8963"}, - {file = "jiter-0.7.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c244261306f08f8008b3087059601997016549cb8bb23cf4317a4827f07b7d74"}, - {file = "jiter-0.7.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7ded4e4b75b68b843b7cea5cd7c55f738c20e1394c68c2cb10adb655526c5f1b"}, - {file = "jiter-0.7.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:80dae4f1889b9d09e5f4de6b58c490d9c8ce7730e35e0b8643ab62b1538f095c"}, - {file = "jiter-0.7.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5970cf8ec943b51bce7f4b98d2e1ed3ada170c2a789e2db3cb484486591a176a"}, - {file = "jiter-0.7.1-cp312-none-win32.whl", hash = "sha256:701d90220d6ecb3125d46853c8ca8a5bc158de8c49af60fd706475a49fee157e"}, - {file = "jiter-0.7.1-cp312-none-win_amd64.whl", hash = "sha256:7824c3ecf9ecf3321c37f4e4d4411aad49c666ee5bc2a937071bdd80917e4533"}, - {file = "jiter-0.7.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:097676a37778ba3c80cb53f34abd6943ceb0848263c21bf423ae98b090f6c6ba"}, - {file = "jiter-0.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3298af506d4271257c0a8f48668b0f47048d69351675dd8500f22420d4eec378"}, - {file = "jiter-0.7.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:12fd88cfe6067e2199964839c19bd2b422ca3fd792949b8f44bb8a4e7d21946a"}, - {file = "jiter-0.7.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dacca921efcd21939123c8ea8883a54b9fa7f6545c8019ffcf4f762985b6d0c8"}, - {file = "jiter-0.7.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de3674a5fe1f6713a746d25ad9c32cd32fadc824e64b9d6159b3b34fd9134143"}, - {file = "jiter-0.7.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65df9dbae6d67e0788a05b4bad5706ad40f6f911e0137eb416b9eead6ba6f044"}, - {file = "jiter-0.7.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ba9a358d59a0a55cccaa4957e6ae10b1a25ffdabda863c0343c51817610501d"}, - {file = "jiter-0.7.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:576eb0f0c6207e9ede2b11ec01d9c2182973986514f9c60bc3b3b5d5798c8f50"}, - {file = "jiter-0.7.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:e550e29cdf3577d2c970a18f3959e6b8646fd60ef1b0507e5947dc73703b5627"}, - {file = "jiter-0.7.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:81d968dbf3ce0db2e0e4dec6b0a0d5d94f846ee84caf779b07cab49f5325ae43"}, - {file = "jiter-0.7.1-cp313-none-win32.whl", hash = "sha256:f892e547e6e79a1506eb571a676cf2f480a4533675f834e9ae98de84f9b941ac"}, - {file = "jiter-0.7.1-cp313-none-win_amd64.whl", hash = "sha256:0302f0940b1455b2a7fb0409b8d5b31183db70d2b07fd177906d83bf941385d1"}, - {file = "jiter-0.7.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:c65a3ce72b679958b79d556473f192a4dfc5895e8cc1030c9f4e434690906076"}, - {file = "jiter-0.7.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e80052d3db39f9bb8eb86d207a1be3d9ecee5e05fdec31380817f9609ad38e60"}, - {file = "jiter-0.7.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70a497859c4f3f7acd71c8bd89a6f9cf753ebacacf5e3e799138b8e1843084e3"}, - {file = "jiter-0.7.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c1288bc22b9e36854a0536ba83666c3b1fb066b811019d7b682c9cf0269cdf9f"}, - {file = "jiter-0.7.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b096ca72dd38ef35675e1d3b01785874315182243ef7aea9752cb62266ad516f"}, - {file = "jiter-0.7.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8dbbd52c50b605af13dbee1a08373c520e6fcc6b5d32f17738875847fea4e2cd"}, - {file = "jiter-0.7.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af29c5c6eb2517e71ffa15c7ae9509fa5e833ec2a99319ac88cc271eca865519"}, - {file = "jiter-0.7.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f114a4df1e40c03c0efbf974b376ed57756a1141eb27d04baee0680c5af3d424"}, - {file = "jiter-0.7.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:191fbaee7cf46a9dd9b817547bf556facde50f83199d07fc48ebeff4082f9df4"}, - {file = "jiter-0.7.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0e2b445e5ee627fb4ee6bbceeb486251e60a0c881a8e12398dfdff47c56f0723"}, - {file = "jiter-0.7.1-cp38-none-win32.whl", hash = "sha256:47ac4c3cf8135c83e64755b7276339b26cd3c7ddadf9e67306ace4832b283edf"}, - {file = "jiter-0.7.1-cp38-none-win_amd64.whl", hash = "sha256:60b49c245cd90cde4794f5c30f123ee06ccf42fb8730a019a2870cd005653ebd"}, - {file = "jiter-0.7.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:8f212eeacc7203256f526f550d105d8efa24605828382cd7d296b703181ff11d"}, - {file = "jiter-0.7.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d9e247079d88c00e75e297e6cb3a18a039ebcd79fefc43be9ba4eb7fb43eb726"}, - {file = "jiter-0.7.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0aacaa56360139c53dcf352992b0331f4057a0373bbffd43f64ba0c32d2d155"}, - {file = "jiter-0.7.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bc1b55314ca97dbb6c48d9144323896e9c1a25d41c65bcb9550b3e0c270ca560"}, - {file = "jiter-0.7.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f281aae41b47e90deb70e7386558e877a8e62e1693e0086f37d015fa1c102289"}, - {file = "jiter-0.7.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:93c20d2730a84d43f7c0b6fb2579dc54335db742a59cf9776d0b80e99d587382"}, - {file = "jiter-0.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e81ccccd8069110e150613496deafa10da2f6ff322a707cbec2b0d52a87b9671"}, - {file = "jiter-0.7.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a7d5e85766eff4c9be481d77e2226b4c259999cb6862ccac5ef6621d3c8dcce"}, - {file = "jiter-0.7.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f52ce5799df5b6975439ecb16b1e879d7655e1685b6e3758c9b1b97696313bfb"}, - {file = "jiter-0.7.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e0c91a0304373fdf97d56f88356a010bba442e6d995eb7773cbe32885b71cdd8"}, - {file = "jiter-0.7.1-cp39-none-win32.whl", hash = "sha256:5c08adf93e41ce2755970e8aa95262298afe2bf58897fb9653c47cd93c3c6cdc"}, - {file = "jiter-0.7.1-cp39-none-win_amd64.whl", hash = "sha256:6592f4067c74176e5f369228fb2995ed01400c9e8e1225fb73417183a5e635f0"}, - {file = "jiter-0.7.1.tar.gz", hash = "sha256:448cf4f74f7363c34cdef26214da527e8eeffd88ba06d0b80b485ad0667baf5d"}, + {file = "jiter-0.8.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:dee4eeb293ffcd2c3b31ebab684dbf7f7b71fe198f8eddcdf3a042cc6e10205a"}, + {file = "jiter-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aad1e6e9b01cf0304dcee14db03e92e0073287a6297caf5caf2e9dbfea16a924"}, + {file = "jiter-0.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:504099fb7acdbe763e10690d560a25d4aee03d918d6a063f3a761d8a09fb833f"}, + {file = "jiter-0.8.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2373487caad7fe39581f588ab5c9262fc1ade078d448626fec93f4ffba528858"}, + {file = "jiter-0.8.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c341ecc3f9bccde952898b0c97c24f75b84b56a7e2f8bbc7c8e38cab0875a027"}, + {file = "jiter-0.8.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0e48e7a336529b9419d299b70c358d4ebf99b8f4b847ed3f1000ec9f320e8c0c"}, + {file = "jiter-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5ee157a8afd2943be690db679f82fafb8d347a8342e8b9c34863de30c538d55"}, + {file = "jiter-0.8.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d7dceae3549b80087f913aad4acc2a7c1e0ab7cb983effd78bdc9c41cabdcf18"}, + {file = "jiter-0.8.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e29e9ecce53d396772590438214cac4ab89776f5e60bd30601f1050b34464019"}, + {file = "jiter-0.8.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fa1782f22d5f92c620153133f35a9a395d3f3823374bceddd3e7032e2fdfa0b1"}, + {file = "jiter-0.8.0-cp310-none-win32.whl", hash = "sha256:f754ef13b4e4f67a3bf59fe974ef4342523801c48bf422f720bd37a02a360584"}, + {file = "jiter-0.8.0-cp310-none-win_amd64.whl", hash = "sha256:796f750b65f5d605f5e7acaccc6b051675e60c41d7ac3eab40dbd7b5b81a290f"}, + {file = "jiter-0.8.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f6f4e645efd96b4690b9b6091dbd4e0fa2885ba5c57a0305c1916b75b4f30ff6"}, + {file = "jiter-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f61cf6d93c1ade9b8245c9f14b7900feadb0b7899dbe4aa8de268b705647df81"}, + {file = "jiter-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0396bc5cb1309c6dab085e70bb3913cdd92218315e47b44afe9eace68ee8adaa"}, + {file = "jiter-0.8.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:62d0e42ec5dc772bd8554a304358220be5d97d721c4648b23f3a9c01ccc2cb26"}, + {file = "jiter-0.8.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec4b711989860705733fc59fb8c41b2def97041cea656b37cf6c8ea8dee1c3f4"}, + {file = "jiter-0.8.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:859cc35bf304ab066d88f10a44a3251a9cd057fb11ec23e00be22206db878f4f"}, + {file = "jiter-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5000195921aa293b39b9b5bc959d7fa658e7f18f938c0e52732da8e3cc70a278"}, + {file = "jiter-0.8.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:36050284c0abde57aba34964d3920f3d6228211b65df7187059bb7c7f143759a"}, + {file = "jiter-0.8.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a88f608e050cfe45c48d771e86ecdbf5258314c883c986d4217cc79e1fb5f689"}, + {file = "jiter-0.8.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:646cf4237665b2e13b4159d8f26d53f59bc9f2e6e135e3a508a2e5dd26d978c6"}, + {file = "jiter-0.8.0-cp311-none-win32.whl", hash = "sha256:21fe5b8345db1b3023052b2ade9bb4d369417827242892051244af8fae8ba231"}, + {file = "jiter-0.8.0-cp311-none-win_amd64.whl", hash = "sha256:30c2161c5493acf6b6c3c909973fb64ae863747def01cc7574f3954e0a15042c"}, + {file = "jiter-0.8.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:d91a52d8f49ada2672a4b808a0c5c25d28f320a2c9ca690e30ebd561eb5a1002"}, + {file = "jiter-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c38cf25cf7862f61410b7a49684d34eb3b5bcbd7ddaf4773eea40e0bd43de706"}, + {file = "jiter-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6189beb5c4b3117624be6b2e84545cff7611f5855d02de2d06ff68e316182be"}, + {file = "jiter-0.8.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e13fa849c0e30643554add089983caa82f027d69fad8f50acadcb21c462244ab"}, + {file = "jiter-0.8.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d7765ca159d0a58e8e0f8ca972cd6d26a33bc97b4480d0d2309856763807cd28"}, + {file = "jiter-0.8.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1b0befe7c6e9fc867d5bed21bab0131dfe27d1fa5cd52ba2bced67da33730b7d"}, + {file = "jiter-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7d6363d4c6f1052b1d8b494eb9a72667c3ef5f80ebacfe18712728e85327000"}, + {file = "jiter-0.8.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a873e57009863eeac3e3969e4653f07031d6270d037d6224415074ac17e5505c"}, + {file = "jiter-0.8.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2582912473c0d9940791479fe1bf2976a34f212eb8e0a82ee9e645ac275c5d16"}, + {file = "jiter-0.8.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:646163201af42f55393ee6e8f6136b8df488253a6533f4230a64242ecbfe6048"}, + {file = "jiter-0.8.0-cp312-none-win32.whl", hash = "sha256:96e75c9abfbf7387cba89a324d2356d86d8897ac58c956017d062ad510832dae"}, + {file = "jiter-0.8.0-cp312-none-win_amd64.whl", hash = "sha256:ed6074552b4a32e047b52dad5ab497223721efbd0e9efe68c67749f094a092f7"}, + {file = "jiter-0.8.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:dd5e351cb9b3e676ec3360a85ea96def515ad2b83c8ae3a251ce84985a2c9a6f"}, + {file = "jiter-0.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ba9f12b0f801ecd5ed0cec29041dc425d1050922b434314c592fc30d51022467"}, + {file = "jiter-0.8.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7ba461c3681728d556392e8ae56fb44a550155a24905f01982317b367c21dd4"}, + {file = "jiter-0.8.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3a15ed47ab09576db560dbc5c2c5a64477535beb056cd7d997d5dd0f2798770e"}, + {file = "jiter-0.8.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cef55042816d0737142b0ec056c0356a5f681fb8d6aa8499b158e87098f4c6f8"}, + {file = "jiter-0.8.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:549f170215adeb5e866f10617c3d019d8eb4e6d4e3c6b724b3b8c056514a3487"}, + {file = "jiter-0.8.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f867edeb279d22020877640d2ea728de5817378c60a51be8af731a8a8f525306"}, + {file = "jiter-0.8.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aef8845f463093799db4464cee2aa59d61aa8edcb3762aaa4aacbec3f478c929"}, + {file = "jiter-0.8.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:d0d6e22e4062c3d3c1bf3594baa2f67fc9dcdda8275abad99e468e0c6540bc54"}, + {file = "jiter-0.8.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:079e62e64696241ac3f408e337aaac09137ed760ccf2b72b1094b48745c13641"}, + {file = "jiter-0.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74d2b56ed3da5760544df53b5f5c39782e68efb64dc3aa0bba4cc08815e6fae8"}, + {file = "jiter-0.8.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:798dafe108cba58a7bb0a50d4d5971f98bb7f3c974e1373e750de6eb21c1a329"}, + {file = "jiter-0.8.0-cp313-none-win32.whl", hash = "sha256:ca6d3064dfc743eb0d3d7539d89d4ba886957c717567adc72744341c1e3573c9"}, + {file = "jiter-0.8.0-cp313-none-win_amd64.whl", hash = "sha256:38caedda64fe1f04b06d7011fc15e86b3b837ed5088657bf778656551e3cd8f9"}, + {file = "jiter-0.8.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:bb5c8a0a8d081c338db22e5b8d53a89a121790569cbb85f7d3cfb1fe0fbe9836"}, + {file = "jiter-0.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:202dbe8970bfb166fab950eaab8f829c505730a0b33cc5e1cfb0a1c9dd56b2f9"}, + {file = "jiter-0.8.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9046812e5671fdcfb9ae02881fff1f6a14d484b7e8b3316179a372cdfa1e8026"}, + {file = "jiter-0.8.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e6ac56425023e52d65150918ae25480d0a1ce2a6bf5ea2097f66a2cc50f6d692"}, + {file = "jiter-0.8.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7dfcf97210c6eab9d2a1c6af15dd39e1d5154b96a7145d0a97fa1df865b7b834"}, + {file = "jiter-0.8.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4e3c8444d418686f78c9a547b9b90031faf72a0a1a46bfec7fb31edbd889c0d"}, + {file = "jiter-0.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6507011a299b7f578559084256405a8428875540d8d13530e00b688e41b09493"}, + {file = "jiter-0.8.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0aae4738eafdd34f0f25c2d3668ce9e8fa0d7cb75a2efae543c9a69aebc37323"}, + {file = "jiter-0.8.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7f5d782e790396b13f2a7b36bdcaa3736a33293bdda80a4bf1a3ce0cd5ef9f15"}, + {file = "jiter-0.8.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cc7f993bc2c4e03015445adbb16790c303282fce2e8d9dc3a3905b1d40e50564"}, + {file = "jiter-0.8.0-cp38-none-win32.whl", hash = "sha256:d4a8a6eda018a991fa58ef707dd51524055d11f5acb2f516d70b1be1d15ab39c"}, + {file = "jiter-0.8.0-cp38-none-win_amd64.whl", hash = "sha256:4cca948a3eda8ea24ed98acb0ee19dc755b6ad2e570ec85e1527d5167f91ff67"}, + {file = "jiter-0.8.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ef89663678d8257063ce7c00d94638e05bd72f662c5e1eb0e07a172e6c1a9a9f"}, + {file = "jiter-0.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c402ddcba90b4cc71db3216e8330f4db36e0da2c78cf1d8a9c3ed8f272602a94"}, + {file = "jiter-0.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a6dfe795b7a173a9f8ba7421cdd92193d60c1c973bbc50dc3758a9ad0fa5eb6"}, + {file = "jiter-0.8.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8ec29a31b9abd6be39453a2c45da067138a3005d65d2c0507c530e0f1fdcd9a4"}, + {file = "jiter-0.8.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a488f8c54bddc3ddefaf3bfd6de4a52c97fc265d77bc2dcc6ee540c17e8c342"}, + {file = "jiter-0.8.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aeb5561adf4d26ca0d01b5811b4d7b56a8986699a473d700757b4758ef787883"}, + {file = "jiter-0.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ab961858d7ad13132328517d29f121ae1b2d94502191d6bcf96bddcc8bb5d1c"}, + {file = "jiter-0.8.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a207e718d114d23acf0850a2174d290f42763d955030d9924ffa4227dbd0018f"}, + {file = "jiter-0.8.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:733bc9dc8ff718a0ae4695239e9268eb93e88b73b367dfac3ec227d8ce2f1e77"}, + {file = "jiter-0.8.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d1ec27299e22d05e13a06e460bf7f75f26f9aaa0e0fb7d060f40e88df1d81faa"}, + {file = "jiter-0.8.0-cp39-none-win32.whl", hash = "sha256:e8dbfcb46553e6661d3fc1f33831598fcddf73d0f67834bce9fc3e9ebfe5c439"}, + {file = "jiter-0.8.0-cp39-none-win_amd64.whl", hash = "sha256:af2ce2487b3a93747e2cb5150081d4ae1e5874fce5924fc1a12e9e768e489ad8"}, + {file = "jiter-0.8.0.tar.gz", hash = "sha256:86fee98b569d4cc511ff2e3ec131354fafebd9348a487549c31ad371ae730310"}, ] [[package]] @@ -3158,13 +3185,13 @@ files = [ [[package]] name = "openai" -version = "1.55.0" +version = "1.55.2" description = "The official Python library for the openai API" optional = false python-versions = ">=3.8" files = [ - {file = "openai-1.55.0-py3-none-any.whl", hash = "sha256:446e08918f8dd70d8723274be860404c8c7cc46b91b93bbc0ef051f57eb503c1"}, - {file = "openai-1.55.0.tar.gz", hash = "sha256:6c0975ac8540fe639d12b4ff5a8e0bf1424c844c4a4251148f59f06c4b2bd5db"}, + {file = "openai-1.55.2-py3-none-any.whl", hash = "sha256:3027c7fa4a33ed759f4a3d076093fcfa1c55658660c889bec33f651e2dc77922"}, + {file = "openai-1.55.2.tar.gz", hash = "sha256:5cc0b1162b65dcdf670b4b41448f18dd470d2724ca04821ab1e86b6b4e88650b"}, ] [package.dependencies] @@ -3536,19 +3563,20 @@ tests = ["pytest", "pytest-cov", "pytest-lazy-fixtures"] [[package]] name = "primp" -version = "0.8.0" +version = "0.8.1" description = "HTTP client that can impersonate web browsers, mimicking their headers and `TLS/JA3/JA4/HTTP2` fingerprints" optional = false python-versions = ">=3.8" files = [ - {file = "primp-0.8.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:5cb4d1db83d92a95fb4506d4605484b389a988fb962e80089caa73c035185f58"}, - {file = "primp-0.8.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:135e6350a6c509fcc3d1cc03d2025edd54783bca671a39a2d4f240ce5d406576"}, - {file = "primp-0.8.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:609f4363fb591bde351e6372ba0caaf1ac963d38cbf942bc42dc3284575b4cdf"}, - {file = "primp-0.8.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e341c821fa265f2eaf2a0de80924e465f7bc20a84e9ce28e65cee350ad2cc300"}, - {file = "primp-0.8.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6672554a653f4ef5e672f3985481bc4afff9bfbeaf2bc7b70b9230b7672d49d6"}, - {file = "primp-0.8.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ea18ebc1f664898beac62cfa092ff38ad70c7eb0b3120aecd18ab7a776b6b3fb"}, - {file = "primp-0.8.0-cp38-abi3-win_amd64.whl", hash = "sha256:bcf9895f8dd97d49843adbed635d713e3a1c2dc0a4b08ac0879292be83f1e447"}, - {file = "primp-0.8.0.tar.gz", hash = "sha256:6472651b8270247b3121f728b613e312301b8f7e9170944a4e71771dd58eaa8b"}, + {file = "primp-0.8.1-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:8294db817701ad76b6a186c16e22cc49d36fac5986647a83657ad4a58ddeee42"}, + {file = "primp-0.8.1-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:e8117531dcdb0dbcf9855fdbac73febdde5967ca0332a2c05b5961d2fbcfe749"}, + {file = "primp-0.8.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:993cc4284e8c5c858254748f078e872ba250c9339d64398dc000a8f9cffadda3"}, + {file = "primp-0.8.1-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4a27ac642be5c616fc5f139a5ad391dcd0c5964ace56fe6cf31cbffb972a7480"}, + {file = "primp-0.8.1-cp38-abi3-manylinux_2_34_armv7l.whl", hash = "sha256:e8483b8d9eec9fc43d77bb448555466030f29cdd99d9375eb75155e9f832e5bd"}, + {file = "primp-0.8.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:92f5f8267216252cfb27f2149811e14682bb64f0c5d37f00d218d1592e02f0b9"}, + {file = "primp-0.8.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:98f7f3a9481c55c56e7eff9024f29e16379a87d5b0a1b683e145dd8fcbdcc46b"}, + {file = "primp-0.8.1-cp38-abi3-win_amd64.whl", hash = "sha256:6f0018a26be787431504e32548b296a278abbe85da43bcbaf2d4982ac3dcd332"}, + {file = "primp-0.8.1.tar.gz", hash = "sha256:ddf05754a7b70d59df8a014a8585e418f9c04e0b69065bab6633f4a9b92bad93"}, ] [package.extras] @@ -4025,53 +4053,53 @@ files = [ [[package]] name = "pyarrow" -version = "18.0.0" +version = "18.1.0" description = "Python library for Apache Arrow" optional = false python-versions = ">=3.9" files = [ - {file = "pyarrow-18.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:2333f93260674e185cfbf208d2da3007132572e56871f451ba1a556b45dae6e2"}, - {file = "pyarrow-18.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:4c381857754da44326f3a49b8b199f7f87a51c2faacd5114352fc78de30d3aba"}, - {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:603cd8ad4976568954598ef0a6d4ed3dfb78aff3d57fa8d6271f470f0ce7d34f"}, - {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58a62549a3e0bc9e03df32f350e10e1efb94ec6cf63e3920c3385b26663948ce"}, - {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bc97316840a349485fbb137eb8d0f4d7057e1b2c1272b1a20eebbbe1848f5122"}, - {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:2e549a748fa8b8715e734919923f69318c953e077e9c02140ada13e59d043310"}, - {file = "pyarrow-18.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:606e9a3dcb0f52307c5040698ea962685fb1c852d72379ee9412be7de9c5f9e2"}, - {file = "pyarrow-18.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d5795e37c0a33baa618c5e054cd61f586cf76850a251e2b21355e4085def6280"}, - {file = "pyarrow-18.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:5f0510608ccd6e7f02ca8596962afb8c6cc84c453e7be0da4d85f5f4f7b0328a"}, - {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:616ea2826c03c16e87f517c46296621a7c51e30400f6d0a61be645f203aa2b93"}, - {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1824f5b029ddd289919f354bc285992cb4e32da518758c136271cf66046ef22"}, - {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6dd1b52d0d58dd8f685ced9971eb49f697d753aa7912f0a8f50833c7a7426319"}, - {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:320ae9bd45ad7ecc12ec858b3e8e462578de060832b98fc4d671dee9f10d9954"}, - {file = "pyarrow-18.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:2c992716cffb1088414f2b478f7af0175fd0a76fea80841b1706baa8fb0ebaad"}, - {file = "pyarrow-18.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:e7ab04f272f98ebffd2a0661e4e126036f6936391ba2889ed2d44c5006237802"}, - {file = "pyarrow-18.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:03f40b65a43be159d2f97fd64dc998f769d0995a50c00f07aab58b0b3da87e1f"}, - {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be08af84808dff63a76860847c48ec0416928a7b3a17c2f49a072cac7c45efbd"}, - {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c70c1965cde991b711a98448ccda3486f2a336457cf4ec4dca257a926e149c9"}, - {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:00178509f379415a3fcf855af020e3340254f990a8534294ec3cf674d6e255fd"}, - {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:a71ab0589a63a3e987beb2bc172e05f000a5c5be2636b4b263c44034e215b5d7"}, - {file = "pyarrow-18.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:fe92efcdbfa0bcf2fa602e466d7f2905500f33f09eb90bf0bcf2e6ca41b574c8"}, - {file = "pyarrow-18.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:907ee0aa8ca576f5e0cdc20b5aeb2ad4d3953a3b4769fc4b499e00ef0266f02f"}, - {file = "pyarrow-18.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:66dcc216ebae2eb4c37b223feaf82f15b69d502821dde2da138ec5a3716e7463"}, - {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc1daf7c425f58527900876354390ee41b0ae962a73ad0959b9d829def583bb1"}, - {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:871b292d4b696b09120ed5bde894f79ee2a5f109cb84470546471df264cae136"}, - {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:082ba62bdcb939824ba1ce10b8acef5ab621da1f4c4805e07bfd153617ac19d4"}, - {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:2c664ab88b9766413197733c1720d3dcd4190e8fa3bbdc3710384630a0a7207b"}, - {file = "pyarrow-18.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc892be34dbd058e8d189b47db1e33a227d965ea8805a235c8a7286f7fd17d3a"}, - {file = "pyarrow-18.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:28f9c39a56d2c78bf6b87dcc699d520ab850919d4a8c7418cd20eda49874a2ea"}, - {file = "pyarrow-18.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:f1a198a50c409ab2d009fbf20956ace84567d67f2c5701511d4dd561fae6f32e"}, - {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5bd7fd32e3ace012d43925ea4fc8bd1b02cc6cc1e9813b518302950e89b5a22"}, - {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:336addb8b6f5208be1b2398442c703a710b6b937b1a046065ee4db65e782ff5a"}, - {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:45476490dd4adec5472c92b4d253e245258745d0ccaabe706f8d03288ed60a79"}, - {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b46591222c864e7da7faa3b19455196416cd8355ff6c2cc2e65726a760a3c420"}, - {file = "pyarrow-18.0.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:eb7e3abcda7e1e6b83c2dc2909c8d045881017270a119cc6ee7fdcfe71d02df8"}, - {file = "pyarrow-18.0.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:09f30690b99ce34e0da64d20dab372ee54431745e4efb78ac938234a282d15f9"}, - {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d5ca5d707e158540312e09fd907f9f49bacbe779ab5236d9699ced14d2293b8"}, - {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6331f280c6e4521c69b201a42dd978f60f7e129511a55da9e0bfe426b4ebb8d"}, - {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3ac24b2be732e78a5a3ac0b3aa870d73766dd00beba6e015ea2ea7394f8b4e55"}, - {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b30a927c6dff89ee702686596f27c25160dd6c99be5bcc1513a763ae5b1bfc03"}, - {file = "pyarrow-18.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:8f40ec677e942374e3d7f2fad6a67a4c2811a8b975e8703c6fd26d3b168a90e2"}, - {file = "pyarrow-18.0.0.tar.gz", hash = "sha256:a6aa027b1a9d2970cf328ccd6dbe4a996bc13c39fd427f502782f5bdb9ca20f5"}, + {file = "pyarrow-18.1.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e21488d5cfd3d8b500b3238a6c4b075efabc18f0f6d80b29239737ebd69caa6c"}, + {file = "pyarrow-18.1.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:b516dad76f258a702f7ca0250885fc93d1fa5ac13ad51258e39d402bd9e2e1e4"}, + {file = "pyarrow-18.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f443122c8e31f4c9199cb23dca29ab9427cef990f283f80fe15b8e124bcc49b"}, + {file = "pyarrow-18.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0a03da7f2758645d17b7b4f83c8bffeae5bbb7f974523fe901f36288d2eab71"}, + {file = "pyarrow-18.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ba17845efe3aa358ec266cf9cc2800fa73038211fb27968bfa88acd09261a470"}, + {file = "pyarrow-18.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:3c35813c11a059056a22a3bef520461310f2f7eea5c8a11ef9de7062a23f8d56"}, + {file = "pyarrow-18.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9736ba3c85129d72aefa21b4f3bd715bc4190fe4426715abfff90481e7d00812"}, + {file = "pyarrow-18.1.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:eaeabf638408de2772ce3d7793b2668d4bb93807deed1725413b70e3156a7854"}, + {file = "pyarrow-18.1.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:3b2e2239339c538f3464308fd345113f886ad031ef8266c6f004d49769bb074c"}, + {file = "pyarrow-18.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f39a2e0ed32a0970e4e46c262753417a60c43a3246972cfc2d3eb85aedd01b21"}, + {file = "pyarrow-18.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e31e9417ba9c42627574bdbfeada7217ad8a4cbbe45b9d6bdd4b62abbca4c6f6"}, + {file = "pyarrow-18.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:01c034b576ce0eef554f7c3d8c341714954be9b3f5d5bc7117006b85fcf302fe"}, + {file = "pyarrow-18.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:f266a2c0fc31995a06ebd30bcfdb7f615d7278035ec5b1cd71c48d56daaf30b0"}, + {file = "pyarrow-18.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:d4f13eee18433f99adefaeb7e01d83b59f73360c231d4782d9ddfaf1c3fbde0a"}, + {file = "pyarrow-18.1.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:9f3a76670b263dc41d0ae877f09124ab96ce10e4e48f3e3e4257273cee61ad0d"}, + {file = "pyarrow-18.1.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:da31fbca07c435be88a0c321402c4e31a2ba61593ec7473630769de8346b54ee"}, + {file = "pyarrow-18.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:543ad8459bc438efc46d29a759e1079436290bd583141384c6f7a1068ed6f992"}, + {file = "pyarrow-18.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0743e503c55be0fdb5c08e7d44853da27f19dc854531c0570f9f394ec9671d54"}, + {file = "pyarrow-18.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:d4b3d2a34780645bed6414e22dda55a92e0fcd1b8a637fba86800ad737057e33"}, + {file = "pyarrow-18.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c52f81aa6f6575058d8e2c782bf79d4f9fdc89887f16825ec3a66607a5dd8e30"}, + {file = "pyarrow-18.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:0ad4892617e1a6c7a551cfc827e072a633eaff758fa09f21c4ee548c30bcaf99"}, + {file = "pyarrow-18.1.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:84e314d22231357d473eabec709d0ba285fa706a72377f9cc8e1cb3c8013813b"}, + {file = "pyarrow-18.1.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:f591704ac05dfd0477bb8f8e0bd4b5dc52c1cadf50503858dce3a15db6e46ff2"}, + {file = "pyarrow-18.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:acb7564204d3c40babf93a05624fc6a8ec1ab1def295c363afc40b0c9e66c191"}, + {file = "pyarrow-18.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74de649d1d2ccb778f7c3afff6085bd5092aed4c23df9feeb45dd6b16f3811aa"}, + {file = "pyarrow-18.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f96bd502cb11abb08efea6dab09c003305161cb6c9eafd432e35e76e7fa9b90c"}, + {file = "pyarrow-18.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:36ac22d7782554754a3b50201b607d553a8d71b78cdf03b33c1125be4b52397c"}, + {file = "pyarrow-18.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:25dbacab8c5952df0ca6ca0af28f50d45bd31c1ff6fcf79e2d120b4a65ee7181"}, + {file = "pyarrow-18.1.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6a276190309aba7bc9d5bd2933230458b3521a4317acfefe69a354f2fe59f2bc"}, + {file = "pyarrow-18.1.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:ad514dbfcffe30124ce655d72771ae070f30bf850b48bc4d9d3b25993ee0e386"}, + {file = "pyarrow-18.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aebc13a11ed3032d8dd6e7171eb6e86d40d67a5639d96c35142bd568b9299324"}, + {file = "pyarrow-18.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6cf5c05f3cee251d80e98726b5c7cc9f21bab9e9783673bac58e6dfab57ecc8"}, + {file = "pyarrow-18.1.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:11b676cd410cf162d3f6a70b43fb9e1e40affbc542a1e9ed3681895f2962d3d9"}, + {file = "pyarrow-18.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b76130d835261b38f14fc41fdfb39ad8d672afb84c447126b84d5472244cfaba"}, + {file = "pyarrow-18.1.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:0b331e477e40f07238adc7ba7469c36b908f07c89b95dd4bd3a0ec84a3d1e21e"}, + {file = "pyarrow-18.1.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:2c4dd0c9010a25ba03e198fe743b1cc03cd33c08190afff371749c52ccbbaf76"}, + {file = "pyarrow-18.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f97b31b4c4e21ff58c6f330235ff893cc81e23da081b1a4b1c982075e0ed4e9"}, + {file = "pyarrow-18.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a4813cb8ecf1809871fd2d64a8eff740a1bd3691bbe55f01a3cf6c5ec869754"}, + {file = "pyarrow-18.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:05a5636ec3eb5cc2a36c6edb534a38ef57b2ab127292a716d00eabb887835f1e"}, + {file = "pyarrow-18.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:73eeed32e724ea3568bb06161cad5fa7751e45bc2228e33dcb10c614044165c7"}, + {file = "pyarrow-18.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:a1880dd6772b685e803011a6b43a230c23b566859a6e0c9a276c1e0faf4f4052"}, + {file = "pyarrow-18.1.0.tar.gz", hash = "sha256:9386d3ca9c145b5539a1cfc75df07757dff870168c959b473a0bccbc3abc8c73"}, ] [package.extras] @@ -4142,13 +4170,13 @@ files = [ [[package]] name = "pydantic" -version = "2.10.1" +version = "2.10.2" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic-2.10.1-py3-none-any.whl", hash = "sha256:a8d20db84de64cf4a7d59e899c2caf0fe9d660c7cfc482528e7020d7dd189a7e"}, - {file = "pydantic-2.10.1.tar.gz", hash = "sha256:a4daca2dc0aa429555e0656d6bf94873a7dc5f54ee42b1f5873d666fb3f35560"}, + {file = "pydantic-2.10.2-py3-none-any.whl", hash = "sha256:cfb96e45951117c3024e6b67b25cdc33a3cb7b2fa62e239f7af1378358a1d99e"}, + {file = "pydantic-2.10.2.tar.gz", hash = "sha256:2bc2d7f17232e0841cbba4641e65ba1eb6fafb3a08de3a091ff3ce14a197c4fa"}, ] [package.dependencies] @@ -5820,20 +5848,20 @@ files = [ [[package]] name = "tqdm" -version = "4.67.0" +version = "4.67.1" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" files = [ - {file = "tqdm-4.67.0-py3-none-any.whl", hash = "sha256:0cd8af9d56911acab92182e88d763100d4788bdf421d251616040cc4d44863be"}, - {file = "tqdm-4.67.0.tar.gz", hash = "sha256:fe5a6f95e6fe0b9755e9469b77b9c3cf850048224ecaa8293d7d2d31f97d869a"}, + {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, + {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, ] [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] -dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +dev = ["nbval", "pytest (>=6)", "pytest-asyncio (>=0.24)", "pytest-cov", "pytest-timeout"] discord = ["requests"] notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] @@ -6063,13 +6091,13 @@ standard = ["colorama (>=0.4)", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", [[package]] name = "virtualenv" -version = "20.27.1" +version = "20.28.0" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" files = [ - {file = "virtualenv-20.27.1-py3-none-any.whl", hash = "sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4"}, - {file = "virtualenv-20.27.1.tar.gz", hash = "sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba"}, + {file = "virtualenv-20.28.0-py3-none-any.whl", hash = "sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0"}, + {file = "virtualenv-20.28.0.tar.gz", hash = "sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa"}, ] [package.dependencies] @@ -6688,4 +6716,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = ">=3.10 <3.11" -content-hash = "31da2759578c6c7e68b892ec067ac9db8309ba9c75132cb52fbd36bc85ceb4d6" +content-hash = "de00aa9b7294d8f0766f98ff424f176dc80f07d71feb4d3ab84ed396fd92cc50" diff --git a/pyproject.toml b/pyproject.toml index 73c7f20e..e0fcec0f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -149,7 +149,7 @@ black = "23.7.0" pytest = "^8.3.1" angle-emb = "0.4.3" organic-scoring = {git = "https://github.com/macrocosm-os/organic-scoring.git", rev = "main"} -autoawq = "^0.2.5" +autoawq = "0.2.0" loguru = "^0.7.2" duckduckgo-search = "^6.2.12" trafilatura = "^1.12.1" From 26d1db13f065e50d71a644073cd8d68e25848cc3 Mon Sep 17 00:00:00 2001 From: richwardle Date: Wed, 27 Nov 2024 18:36:37 +0000 Subject: [PATCH 19/40] Support delta or message in sn19 response --- prompting/llms/apis/sn19_wrapper.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/prompting/llms/apis/sn19_wrapper.py b/prompting/llms/apis/sn19_wrapper.py index a227d2ab..798c9852 100644 --- a/prompting/llms/apis/sn19_wrapper.py +++ b/prompting/llms/apis/sn19_wrapper.py @@ -6,6 +6,7 @@ from prompting.llms.apis.llm_messages import LLMMessages from prompting.settings import settings +from test_sn19 import response # TODO: key error in response.json() when response is 500 @@ -39,6 +40,9 @@ def chat_complete( response = requests.post(url, headers=headers, data=json.dumps(data)) try: response_json = response.json() - return response_json["choices"][0]["message"].get("content") + try: + return response_json["choices"][0]["message"].get("content") + except KeyError: + return response_json["choices"][0]["delta"].get("content") except Exception as e: logger.exception(f"Error in chat_complete: {e}") From 0389d1bdf64dc8afa1ab8f2c04fae80e83be11c3 Mon Sep 17 00:00:00 2001 From: richwardle Date: Wed, 27 Nov 2024 18:42:01 +0000 Subject: [PATCH 20/40] Remove Unecessary Line --- prompting/llms/apis/sn19_wrapper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/prompting/llms/apis/sn19_wrapper.py b/prompting/llms/apis/sn19_wrapper.py index 798c9852..e8445146 100644 --- a/prompting/llms/apis/sn19_wrapper.py +++ b/prompting/llms/apis/sn19_wrapper.py @@ -6,7 +6,6 @@ from prompting.llms.apis.llm_messages import LLMMessages from prompting.settings import settings -from test_sn19 import response # TODO: key error in response.json() when response is 500 From 351c14c70ebefbf8ce32e7ddb85915735dea526d Mon Sep 17 00:00:00 2001 From: bkb2135 Date: Wed, 27 Nov 2024 10:28:17 -1000 Subject: [PATCH 21/40] Formatting --- prompting/api/gpt_endpoints/api.py | 31 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py index e25d4f21..33f77e3f 100644 --- a/prompting/api/gpt_endpoints/api.py +++ b/prompting/api/gpt_endpoints/api.py @@ -2,15 +2,12 @@ import random from typing import AsyncGenerator -import openai from fastapi import APIRouter, Depends, HTTPException, Request -from fastapi.responses import StreamingResponse -from httpx import Timeout from loguru import logger from prompting.api.api_managements.api import validate_api_key from prompting.base.dendrite import DendriteResponseEvent, SynapseStreamResult -from prompting.base.epistula import create_header_hook, query_miners +from prompting.base.epistula import query_miners from prompting.miner_availability.miner_availability import miner_availabilities from prompting.rewards.scoring import task_scorer from prompting.settings import settings @@ -18,7 +15,6 @@ from prompting.tasks.task_registry import TaskRegistry from prompting.utils.timer import Timer - router = APIRouter() @@ -75,7 +71,7 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( stream = body.get("stream") body = {k: v for k, v in body.items() if k not in ["task", "stream"]} - body['task'] = task.__class__.__name__ + body["task"] = task.__class__.__name__ body["seed"] = body.get("seed") or str(random.randint(0, 1_000_000)) logger.debug(f"Seed provided by miner: {bool(body.get('seed'))} -- Using seed: {body.get('seed')}") @@ -85,19 +81,20 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( available_miners := miner_availabilities.get_available_miners(task=task, model=body.get("model")) ): raise HTTPException( - status_code=503, detail=f"No miners available for model: {body.get('model')} and task: {task.__class__.__name__}" + status_code=503, + detail=f"No miners available for model: {body.get('model')} and task: {task.__class__.__name__}", ) - response = query_miners(available_miners, json.dumps(body).encode("utf-8"), stream = stream) + response = query_miners(available_miners, json.dumps(body).encode("utf-8"), stream=stream) if stream: return response else: response = await response response_event = DendriteResponseEvent( - stream_results = response, - uids = available_miners, - timeout = settings.NEURON_TIMEOUT, - completions = ["".join(res.accumulated_chunks) for res in response] + stream_results=response, + uids=available_miners, + timeout=settings.NEURON_TIMEOUT, + completions=["".join(res.accumulated_chunks) for res in response], ) task = task( @@ -109,11 +106,15 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( ) task_scorer.add_to_queue( - task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id + task=task, + response=response_event, + dataset_entry=task.dataset_entry, + block=-1, + step=-1, + task_id=task.task_id, ) - - return [res.model_dump() for res in response] + return [res.model_dump() for res in response] # axon_info = settings.METAGRAPH.axons[available_miners[0]] # base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1" From 0f6bfd7476090a3941830f9dfe6ba73bc1db1214 Mon Sep 17 00:00:00 2001 From: bkb2135 <98138173+bkb2135@users.noreply.github.com> Date: Wed, 27 Nov 2024 16:11:24 -0500 Subject: [PATCH 22/40] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e0fcec0f..429d30b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "prompting" -version = "2.13.1" +version = "2.13.2" description = "Subnetwork 1 runs on Bittensor and is maintained by Macrocosmos. It's an effort to create decentralised AI" authors = ["Kalei Brady, Dmytro Bobrenko, Felix Quinque, Steffen Cruz"] readme = "README.md" From 9a037cc36dc3e629a036f943630c03244bd1f634 Mon Sep 17 00:00:00 2001 From: bkb2135 Date: Wed, 27 Nov 2024 17:00:35 -1000 Subject: [PATCH 23/40] Add test_api to scripts --- scripts/test_api.py | 102 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 scripts/test_api.py diff --git a/scripts/test_api.py b/scripts/test_api.py new file mode 100644 index 00000000..147d850c --- /dev/null +++ b/scripts/test_api.py @@ -0,0 +1,102 @@ +import openai +from httpx import Timeout +from typing import Optional +from prompting.base.epistula import create_header_hook +from prompting import settings + +settings.settings = settings.Settings.load(mode="validator") +settings = settings.settings + + +def setup_miner_client( + port: int = 8004, + api_key: str = "123456", # Default key from your api_keys.json + hotkey: Optional[str] = None +) -> openai.AsyncOpenAI: + """ + Setup an authenticated OpenAI client for the miner. + + Args: + port: Port number for the local server + api_key: API key for authentication + hotkey: Optional wallet hotkey + + Returns: + Configured AsyncOpenAI client + """ + + # Create headers with both API key and hotkey + async def combined_header_hook(request): + # Add API key header + request.headers["api-key"] = api_key + # Add any additional headers from the original header hook + if hotkey: + original_hook = create_header_hook(hotkey, None) + await original_hook(request) + return request + + return openai.AsyncOpenAI( + base_url=f"http://localhost:{port}/v1", + max_retries=0, + timeout=Timeout(15, connect=5, read=10), + http_client=openai.DefaultAsyncHttpxClient( + event_hooks={"request": [combined_header_hook]} + ), + ) + + +async def make_completion( + miner: openai.AsyncOpenAI, + prompt: str, + stream: bool = False, + seed: str = "1759348" +) -> str: + """ + Make a completion request to the API. + + Args: + miner: Configured AsyncOpenAI client + prompt: Input prompt + stream: Whether to stream the response + seed: Random seed for reproducibility + + Returns: + Generated completion text + """ + result = await miner.chat.completions.create( + model="Test-Model", + messages=[{"role": "user", "content": prompt}], + stream=stream, + extra_body={"seed": seed, "sampling_parameters": settings.SAMPLING_PARAMS, "task": "QuestionAnsweringTask"} + ) + + if not stream: + return result + else: + chunks = [] + async for chunk in result: + print(chunk) + if chunk.choices[0].delta.content: + chunks.append(chunk.choices[0].delta.content) + return "".join(chunks) + + +async def main(): + PORT = 8004 + API_KEY = "YOUR_API_KEY_HERE" + miner = setup_miner_client( + port=PORT, + api_key=API_KEY, + hotkey=settings.WALLET.hotkey if hasattr(settings, 'WALLET') else None + ) + response = await make_completion( + miner=miner, + prompt="Say 10 random numbers between 1 and 100", + stream=False + ) + print(response) + + +# Run the async main function +import asyncio +asyncio.run(main()) \ No newline at end of file From 72fed859e2ec51fa3de66e26d681b00bfe12ba87 Mon Sep 17 00:00:00 2001 From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com> Date: Thu, 28 Nov 2024 07:51:34 +0000 Subject: [PATCH 24/40] SN1-327: Clean up, link system prompt ticket --- .../api/gpt_endpoints/process_completions.py | 38 ------------------- 1 file changed, 38 deletions(-) diff --git a/prompting/api/gpt_endpoints/process_completions.py b/prompting/api/gpt_endpoints/process_completions.py index 4234227f..79eb6ed4 100644 --- a/prompting/api/gpt_endpoints/process_completions.py +++ b/prompting/api/gpt_endpoints/process_completions.py @@ -70,41 +70,3 @@ async def process_completions(body: dict[str, any]): ) return [res.model_dump() for res in response] - - -# async def process_and_collect_stream(miner_id: int, request: dict, response: AsyncGenerator): -# collected_content = [] -# collected_chunks_timings = [] -# with Timer() as timer: -# async for chunk in response: -# logger.debug(f"Chunk: {chunk}") -# if hasattr(chunk, "choices") and chunk.choices and isinstance(chunk.choices[0].delta.content, str): -# collected_content.append(chunk.choices[0].delta.content) -# collected_chunks_timings.append(timer.elapsed_time()) -# yield f"data: {json.dumps(chunk.model_dump())}\n\n" - -# task = InferenceTask( -# query=request["messages"][-1]["content"], -# messages=[message["content"] for message in request["messages"]], -# model=request.get("model"), -# seed=request.get("seed"), -# response="".join(collected_content), -# ) -# logger.debug(f"Adding Organic Request to scoring queue: {task}") -# response_event = DendriteResponseEvent( -# stream_results=[ -# SynapseStreamResult( -# uid=miner_id, -# accumulated_chunks=collected_content, -# accumulated_chunks_timings=collected_chunks_timings, -# ) -# ], -# uids=[miner_id], -# timeout=settings.NEURON_TIMEOUT, -# completions=["".join(collected_content)], -# ) - -# task_scorer.add_to_queue( -# task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id -# ) -# yield "data: [DONE]\n\n" From fd476cedb7efb1b550412cf6600e9be3f831da89 Mon Sep 17 00:00:00 2001 From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com> Date: Thu, 28 Nov 2024 10:22:33 +0000 Subject: [PATCH 25/40] Fix syntax --- prompting/tasks/base_task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompting/tasks/base_task.py b/prompting/tasks/base_task.py index 26ab86f2..51563a25 100644 --- a/prompting/tasks/base_task.py +++ b/prompting/tasks/base_task.py @@ -94,7 +94,7 @@ def generate_query( """Generates a query to be used for generating the challenge""" logger.info("🤖 Generating query...") llm_messages = [LLMMessage(role="system", content=self.query_system_prompt)] if self.query_system_prompt else [] - llm_messages.extemd([LLMMessage(role="user", content=message) for message in messages]) + llm_messages.extend([LLMMessage(role="user", content=message) for message in messages]) self.query = LLMWrapper.chat_complete(messages=LLMMessages(*llm_messages)) From 0b375183b93bcf3c478228a4eb6a4b79f3b24cf6 Mon Sep 17 00:00:00 2001 From: bkb2135 <98138173+bkb2135@users.noreply.github.com> Date: Thu, 28 Nov 2024 08:26:21 -0500 Subject: [PATCH 26/40] Update api_keys.json Co-authored-by: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com> --- api_keys.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api_keys.json b/api_keys.json index 93b0e261..0967ef42 100644 --- a/api_keys.json +++ b/api_keys.json @@ -1 +1 @@ -{"0566dbe21ee33bba9419549716cd6f1f": {"rate_limit": 10, "usage": 0}, "e03da67439c0b7e7a622dde4fa3cf857": {"rate_limit": 10, "usage": 0}} +{} From 9485e563380041132011ee089579e3ed6bb79d4e Mon Sep 17 00:00:00 2001 From: bkb2135 <98138173+bkb2135@users.noreply.github.com> Date: Thu, 28 Nov 2024 08:26:35 -0500 Subject: [PATCH 27/40] Update prompting/api/gpt_endpoints/api.py Co-authored-by: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com> --- prompting/api/gpt_endpoints/api.py | 54 ------------------------------ 1 file changed, 54 deletions(-) diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py index 33f77e3f..a4adb613 100644 --- a/prompting/api/gpt_endpoints/api.py +++ b/prompting/api/gpt_endpoints/api.py @@ -116,57 +116,3 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( return [res.model_dump() for res in response] - # axon_info = settings.METAGRAPH.axons[available_miners[0]] - # base_url = "http://localhost:8008/v1" if settings.mode == "mock" else f"http://{axon_info.ip}:{axon_info.port}/v1" - # miner_id = available_miners[0] - # logger.debug(f"Using base_url: {base_url}") - - # miner = openai.AsyncOpenAI( - # base_url=base_url, - # max_retries=0, - # timeout=Timeout(settings.NEURON_TIMEOUT, connect=5, read=5), - # http_client=openai.DefaultAsyncHttpxClient( - # event_hooks={"request": [create_header_hook(settings.WALLET.hotkey, axon_info.hotkey)]} - # ), - # ) - - # try: - # with Timer() as timer: - # if task: - # response = await miner.chat.completions.create(**body, extra_body={"task": task.__name__}) - # else: - # response = await miner.chat.completions.create(**body) - # if body.get("stream"): - # return StreamingResponse( - # process_and_collect_stream(miner_id, body, response), media_type="text/event-stream" - # ) - # except Exception as e: - # logger.exception(f"Error coming from Miner: {e}") - # raise HTTPException(status_code=500, detail=f"Error coming from Miner: {e}") - - # response_event = DendriteResponseEvent( - # stream_results=[ - # SynapseStreamResult( - # uid=miner_id, - # accumulated_chunks=[response.choices[0].message.content], - # accumulated_chunks_timings=[timer.final_time], - # ) - # ], - # completions=[response.choices[0].message.content], - # uids=[miner_id], - # timeout=settings.NEURON_TIMEOUT, - # ) - - # task = InferenceTask( - # query=body["messages"][-1]["content"], - # messages=[message["content"] for message in body["messages"]], - # model=body.get("model"), - # seed=body.get("seed"), - # response=response_event, - # ) - - # task_scorer.add_to_queue( - # task=task, response=response_event, dataset_entry=task.dataset_entry, block=-1, step=-1, task_id=task.task_id - # ) - - # return response From f17682171055db962a0e63c9be4fea5457a5cfcc Mon Sep 17 00:00:00 2001 From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com> Date: Thu, 28 Nov 2024 13:37:05 +0000 Subject: [PATCH 28/40] Add keys example --- api_keys.json | 1 - api_keys.json.example | 1 + neurons/validator.py | 6 +++--- prompting/api/api_keys.json | 1 - 4 files changed, 4 insertions(+), 5 deletions(-) delete mode 100644 api_keys.json create mode 100644 api_keys.json.example delete mode 100644 prompting/api/api_keys.json diff --git a/api_keys.json b/api_keys.json deleted file mode 100644 index 93b0e261..00000000 --- a/api_keys.json +++ /dev/null @@ -1 +0,0 @@ -{"0566dbe21ee33bba9419549716cd6f1f": {"rate_limit": 10, "usage": 0}, "e03da67439c0b7e7a622dde4fa3cf857": {"rate_limit": 10, "usage": 0}} diff --git a/api_keys.json.example b/api_keys.json.example new file mode 100644 index 00000000..fd065361 --- /dev/null +++ b/api_keys.json.example @@ -0,0 +1 @@ +{"API_KEY_VALUE": {"rate_limit": 10, "usage": 0}} diff --git a/neurons/validator.py b/neurons/validator.py index 3da470b6..eae65a92 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -220,9 +220,9 @@ async def main(): # start scoring tasks in separate loop asyncio.create_task(task_scorer.start()) - # # TODO: Think about whether we want to store the task queue locally in case of a crash - # # TODO: Possibly run task scorer & model scheduler with a lock so I don't unload a model whilst it's generating - # # TODO: Make weight setting happen as specific intervals as we load/unload models + # TODO: Think about whether we want to store the task queue locally in case of a crash + # TODO: Possibly run task scorer & model scheduler with a lock so I don't unload a model whilst it's generating + # TODO: Make weight setting happen as specific intervals as we load/unload models with Validator() as v: while True: logger.info( diff --git a/prompting/api/api_keys.json b/prompting/api/api_keys.json deleted file mode 100644 index 0967ef42..00000000 --- a/prompting/api/api_keys.json +++ /dev/null @@ -1 +0,0 @@ -{} From 1bf399633ebe5c76486669459bdd1cef6288e325 Mon Sep 17 00:00:00 2001 From: richwardle Date: Thu, 28 Nov 2024 14:57:06 +0000 Subject: [PATCH 29/40] Push Working Changes --- .gitignore | 2 ++ prompting/api/gpt_endpoints/api.py | 2 +- prompting/miner_availability/miner_availability.py | 9 ++++++++- scripts/test_api.py | 13 +++++++------ 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 5834a8af..c9dedde8 100644 --- a/.gitignore +++ b/.gitignore @@ -177,3 +177,5 @@ core app.config.js wandb .vscode +api_keys.json +prompting/api/api_keys.json \ No newline at end of file diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py index a4adb613..ac7445e6 100644 --- a/prompting/api/gpt_endpoints/api.py +++ b/prompting/api/gpt_endpoints/api.py @@ -82,7 +82,7 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( ): raise HTTPException( status_code=503, - detail=f"No miners available for model: {body.get('model')} and task: {task.__class__.__name__}", + detail=f"No miners available for model: {body.get('model')} and task: {task.__name__}", ) response = query_miners(available_miners, json.dumps(body).encode("utf-8"), stream=stream) diff --git a/prompting/miner_availability/miner_availability.py b/prompting/miner_availability/miner_availability.py index 29bbc7f9..28d50fa5 100644 --- a/prompting/miner_availability/miner_availability.py +++ b/prompting/miner_availability/miner_availability.py @@ -35,7 +35,14 @@ def is_model_available(self, model: str) -> bool: return self.llm_model_availabilities[model] def is_task_available(self, task: BaseTask) -> bool: - return self.task_availabilities[task.__class__.__name__] + if isinstance(task, BaseTask): + try: + return self.task_availabilities[task.__class__.__name__] + except Exception as e: + logger.error(f"Error in is_task_available: {e}") + return False + else: + return self.task_availabilities[task.__name__] class MinerAvailabilities(BaseModel): diff --git a/scripts/test_api.py b/scripts/test_api.py index 147d850c..43a3a477 100644 --- a/scripts/test_api.py +++ b/scripts/test_api.py @@ -38,7 +38,7 @@ async def combined_header_hook(request): return openai.AsyncOpenAI( base_url=f"http://localhost:{port}/v1", max_retries=0, - timeout=Timeout(15, connect=5, read=10), + timeout=Timeout(30, connect=10, read=20), http_client=openai.DefaultAsyncHttpxClient( event_hooks={"request": [combined_header_hook]} ), @@ -64,15 +64,16 @@ async def make_completion( Generated completion text """ result = await miner.chat.completions.create( - model="Test-Model", + model=None, messages=[{"role": "user", "content": prompt}], stream=stream, - extra_body={"seed": seed, "sampling_parameters": settings.SAMPLING_PARAMS, "task": "QuestionAnsweringTask"} + extra_body={"seed": seed, "sampling_parameters": settings.SAMPLING_PARAMS, "task": "QuestionAnsweringTask", "mixture": False} ) if not stream: return result else: + print('In the else') chunks = [] async for chunk in result: print(chunk) @@ -83,7 +84,7 @@ async def make_completion( async def main(): PORT = 8004 - API_KEY = "YOUR_API_KEY_HERE" + API_KEY = "0566dbe21ee33bba9419549716cd6f1f" miner = setup_miner_client( port=PORT, api_key=API_KEY, @@ -92,9 +93,9 @@ async def main(): response = await make_completion( miner=miner, prompt="Say 10 random numbers between 1 and 100", - stream=False + stream=True ) - print(response) + print(["".join(res.accumulated_chunks) for res in response]) # Run the async main function From 6bab37e2035401c388bfc54592859b19ac397dfe Mon Sep 17 00:00:00 2001 From: bkb2135 Date: Thu, 28 Nov 2024 10:13:03 -1000 Subject: [PATCH 30/40] Add Optional Api Deployment --- neurons/validator.py | 3 ++- prompting/settings.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/neurons/validator.py b/neurons/validator.py index 312b88c1..c615d0db 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -197,7 +197,8 @@ def __exit__(self, exc_type, exc_value, traceback): async def main(): - asyncio.create_task(start_api()) + if settings.DEPLOY_API: + asyncio.create_task(start_api()) GPUInfo.log_gpu_info() # start profiling diff --git a/prompting/settings.py b/prompting/settings.py index 04d3a4cd..6d007ab3 100644 --- a/prompting/settings.py +++ b/prompting/settings.py @@ -65,6 +65,7 @@ class Settings(BaseSettings): TASK_QUEUE_LENGTH_THRESHOLD: int = Field(10, env="TASK_QUEUE_LENGTH_THRESHOLD") SCORING_QUEUE_LENGTH_THRESHOLD: int = Field(10, env="SCORING_QUEUE_LENGTH_THRESHOLD") HF_TOKEN: Optional[str] = Field(None, env="HF_TOKEN") + DEPLOY_API: bool = Field(False, env="DEPLOY_API") # API Management. API_KEYS_FILE: str = Field("api_keys.json", env="API_KEYS_FILE") From bb115cf5409f410a82aff06588e232966ccc0c33 Mon Sep 17 00:00:00 2001 From: Hollyqui Date: Mon, 2 Dec 2024 10:32:47 +0000 Subject: [PATCH 31/40] Fixing formatting --- .gitignore | 2 +- prompting/api/gpt_endpoints/api.py | 1 - .../miner_availability/miner_availability.py | 2 +- prompting/utils/logging.py | 2 +- scripts/test_api.py | 51 ++++++++----------- 5 files changed, 25 insertions(+), 33 deletions(-) diff --git a/.gitignore b/.gitignore index c9dedde8..6afd93cb 100644 --- a/.gitignore +++ b/.gitignore @@ -178,4 +178,4 @@ app.config.js wandb .vscode api_keys.json -prompting/api/api_keys.json \ No newline at end of file +prompting/api/api_keys.json diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py index ac7445e6..db871a3f 100644 --- a/prompting/api/gpt_endpoints/api.py +++ b/prompting/api/gpt_endpoints/api.py @@ -115,4 +115,3 @@ async def proxy_chat_completions(request: Request, api_key_data: dict = Depends( ) return [res.model_dump() for res in response] - diff --git a/prompting/miner_availability/miner_availability.py b/prompting/miner_availability/miner_availability.py index 28d50fa5..64f912a1 100644 --- a/prompting/miner_availability/miner_availability.py +++ b/prompting/miner_availability/miner_availability.py @@ -38,7 +38,7 @@ def is_task_available(self, task: BaseTask) -> bool: if isinstance(task, BaseTask): try: return self.task_availabilities[task.__class__.__name__] - except Exception as e: + except Exception as e: logger.error(f"Error in is_task_available: {e}") return False else: diff --git a/prompting/utils/logging.py b/prompting/utils/logging.py index a95d888f..5028b0cb 100644 --- a/prompting/utils/logging.py +++ b/prompting/utils/logging.py @@ -5,12 +5,12 @@ from typing import Any, Literal import numpy as np -import wandb from loguru import logger from pydantic import BaseModel, ConfigDict from wandb.wandb_run import Run import prompting +import wandb from prompting.base.dendrite import DendriteResponseEvent from prompting.rewards.reward import WeightedRewardEvent from prompting.settings import settings diff --git a/scripts/test_api.py b/scripts/test_api.py index 43a3a477..6f9a3192 100644 --- a/scripts/test_api.py +++ b/scripts/test_api.py @@ -1,26 +1,26 @@ +from typing import Optional + import openai from httpx import Timeout -from typing import Optional -from prompting.base.epistula import create_header_hook + from prompting import settings +from prompting.base.epistula import create_header_hook settings.settings = settings.Settings.load(mode="validator") settings = settings.settings def setup_miner_client( - port: int = 8004, - api_key: str = "123456", # Default key from your api_keys.json - hotkey: Optional[str] = None + port: int = 8004, api_key: str = "123456", hotkey: Optional[str] = None # Default key from your api_keys.json ) -> openai.AsyncOpenAI: """ Setup an authenticated OpenAI client for the miner. - + Args: port: Port number for the local server api_key: API key for authentication hotkey: Optional wallet hotkey - + Returns: Configured AsyncOpenAI client """ @@ -39,21 +39,14 @@ async def combined_header_hook(request): base_url=f"http://localhost:{port}/v1", max_retries=0, timeout=Timeout(30, connect=10, read=20), - http_client=openai.DefaultAsyncHttpxClient( - event_hooks={"request": [combined_header_hook]} - ), + http_client=openai.DefaultAsyncHttpxClient(event_hooks={"request": [combined_header_hook]}), ) -async def make_completion( - miner: openai.AsyncOpenAI, - prompt: str, - stream: bool = False, - seed: str = "1759348" -) -> str: +async def make_completion(miner: openai.AsyncOpenAI, prompt: str, stream: bool = False, seed: str = "1759348") -> str: """ Make a completion request to the API. - + Args: miner: Configured AsyncOpenAI client prompt: Input prompt @@ -67,13 +60,18 @@ async def make_completion( model=None, messages=[{"role": "user", "content": prompt}], stream=stream, - extra_body={"seed": seed, "sampling_parameters": settings.SAMPLING_PARAMS, "task": "QuestionAnsweringTask", "mixture": False} + extra_body={ + "seed": seed, + "sampling_parameters": settings.SAMPLING_PARAMS, + "task": "QuestionAnsweringTask", + "mixture": False, + }, ) - + if not stream: return result else: - print('In the else') + print("In the else") chunks = [] async for chunk in result: print(chunk) @@ -86,18 +84,13 @@ async def main(): PORT = 8004 API_KEY = "0566dbe21ee33bba9419549716cd6f1f" miner = setup_miner_client( - port=PORT, - api_key=API_KEY, - hotkey=settings.WALLET.hotkey if hasattr(settings, 'WALLET') else None - ) - response = await make_completion( - miner=miner, - prompt="Say 10 random numbers between 1 and 100", - stream=True + port=PORT, api_key=API_KEY, hotkey=settings.WALLET.hotkey if hasattr(settings, "WALLET") else None ) + response = await make_completion(miner=miner, prompt="Say 10 random numbers between 1 and 100", stream=True) print(["".join(res.accumulated_chunks) for res in response]) # Run the async main function import asyncio -asyncio.run(main()) \ No newline at end of file + +asyncio.run(main()) From 09e41039256f34e13f62cb5d9b363197107edebf Mon Sep 17 00:00:00 2001 From: richwardle Date: Mon, 2 Dec 2024 11:25:09 +0000 Subject: [PATCH 32/40] sort: fix import formatting --- prompting/utils/logging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompting/utils/logging.py b/prompting/utils/logging.py index 5028b0cb..a95d888f 100644 --- a/prompting/utils/logging.py +++ b/prompting/utils/logging.py @@ -5,12 +5,12 @@ from typing import Any, Literal import numpy as np +import wandb from loguru import logger from pydantic import BaseModel, ConfigDict from wandb.wandb_run import Run import prompting -import wandb from prompting.base.dendrite import DendriteResponseEvent from prompting.rewards.reward import WeightedRewardEvent from prompting.settings import settings From e2965fbd48a758c793b36a343c9e84481552eb90 Mon Sep 17 00:00:00 2001 From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com> Date: Mon, 2 Dec 2024 12:26:51 +0000 Subject: [PATCH 33/40] Fix synapse system prompt --- neurons/validator.py | 2 - prompting/tasks/task_registry.py | 68 ++++++++++++++------------------ 2 files changed, 30 insertions(+), 40 deletions(-) diff --git a/neurons/validator.py b/neurons/validator.py index a2e54413..6c455a3c 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -129,8 +129,6 @@ async def collect_responses(self, task: BaseTextTask) -> DendriteResponseEvent | return messages: list[dict[str, str]] = [] - if task.synapse_system_prompt: - messages.append({"role": "system", "content": task.synapse_system_prompt}) messages.append({"role": "user", "content": task.query}) body = { diff --git a/prompting/tasks/task_registry.py b/prompting/tasks/task_registry.py index 1650f31b..1bdaca0e 100644 --- a/prompting/tasks/task_registry.py +++ b/prompting/tasks/task_registry.py @@ -6,19 +6,10 @@ from pydantic import BaseModel, ConfigDict from prompting.datasets.base import BaseDataset -from prompting.datasets.huggingface_github import HuggingFaceGithubDataset -from prompting.datasets.random_website import DDGDataset from prompting.datasets.sn13 import SN13Dataset -from prompting.datasets.wiki import WikiDataset, WikiDateDataset from prompting.rewards.reward import BaseRewardConfig from prompting.tasks.base_task import BaseTextTask -from prompting.tasks.date_qa import DateQARewardConfig, DateQuestionAnsweringTask from prompting.tasks.inference import InferenceRewardConfig, InferenceTask -from prompting.tasks.multi_choice import MultiChoiceRewardConfig, MultiChoiceTask -from prompting.tasks.programming_task import ProgrammingRewardConfig, ProgrammingTask -from prompting.tasks.qa import QARewardConfig, QuestionAnsweringTask -from prompting.tasks.summarization import SummarizationRewardConfig, SummarizationTask -from prompting.tasks.web_retrieval import WebRetrievalRewardConfig, WebRetrievalTask # from prompting.tasks. @@ -37,40 +28,41 @@ def __hash__(self): class TaskRegistry(BaseModel): task_configs: ClassVar[list[TaskConfig]] = [ - TaskConfig(task=QuestionAnsweringTask, probability=0.2, datasets=[WikiDataset], reward_model=QARewardConfig), - TaskConfig( - task=SummarizationTask, probability=0.1, datasets=[WikiDataset], reward_model=SummarizationRewardConfig - ), - TaskConfig( - task=DateQuestionAnsweringTask, - probability=0.1, - datasets=[WikiDateDataset], - reward_model=DateQARewardConfig, - ), + # TaskConfig(task=QuestionAnsweringTask, probability=0.2, datasets=[WikiDataset], reward_model=QARewardConfig), + # TaskConfig( + # task=SummarizationTask, probability=0.1, datasets=[WikiDataset], reward_model=SummarizationRewardConfig + # ), + # TaskConfig( + # task=DateQuestionAnsweringTask, + # probability=0.1, + # datasets=[WikiDateDataset], + # reward_model=DateQARewardConfig, + # ), TaskConfig( task=InferenceTask, - probability=0.16, + # probability=0.16, + probability=1.00, datasets=[SN13Dataset], reward_model=InferenceRewardConfig, ), - TaskConfig( - task=MultiChoiceTask, - probability=0.31, - datasets=[WikiDataset], - reward_model=MultiChoiceRewardConfig, - ), - TaskConfig( - task=ProgrammingTask, - probability=0.1, - datasets=[HuggingFaceGithubDataset], - reward_model=ProgrammingRewardConfig, - ), - TaskConfig( - task=WebRetrievalTask, - probability=0.03, - datasets=[DDGDataset], - reward_model=WebRetrievalRewardConfig, - ), + # TaskConfig( + # task=MultiChoiceTask, + # probability=0.31, + # datasets=[WikiDataset], + # reward_model=MultiChoiceRewardConfig, + # ), + # TaskConfig( + # task=ProgrammingTask, + # probability=0.1, + # datasets=[HuggingFaceGithubDataset], + # reward_model=ProgrammingRewardConfig, + # ), + # TaskConfig( + # task=WebRetrievalTask, + # probability=0.03, + # datasets=[DDGDataset], + # reward_model=WebRetrievalRewardConfig, + # ), ] @classmethod From a7c53c84b4a1812eba6e70d111ebe2629e16cba7 Mon Sep 17 00:00:00 2001 From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com> Date: Wed, 18 Dec 2024 14:17:43 +0000 Subject: [PATCH 34/40] WIP: Move MoA to new API --- prompting/api/gpt_endpoints/api.py | 17 ----- .../api/gpt_endpoints/process_completions.py | 67 ------------------- prompting/api/gpt_endpoints/serialisers.py | 0 .../mixture_of_miners.py | 0 4 files changed, 84 deletions(-) delete mode 100644 prompting/api/gpt_endpoints/api.py delete mode 100644 prompting/api/gpt_endpoints/process_completions.py delete mode 100644 prompting/api/gpt_endpoints/serialisers.py rename {prompting/api/gpt_endpoints => validator_api}/mixture_of_miners.py (100%) diff --git a/prompting/api/gpt_endpoints/api.py b/prompting/api/gpt_endpoints/api.py deleted file mode 100644 index e361b51c..00000000 --- a/prompting/api/gpt_endpoints/api.py +++ /dev/null @@ -1,17 +0,0 @@ -from fastapi import APIRouter, Depends, Request - -from prompting.api.api_managements.api import validate_api_key -from prompting.api.gpt_endpoints.mixture_of_miners import mixture_of_miners -from prompting.api.gpt_endpoints.process_completions import process_completions - -router = APIRouter() - - -@router.post("/v1/chat/completions") -async def proxy_chat_completions(request: Request, api_key_data: dict = Depends(validate_api_key)): - """OpenAI-style chat completions endpoint.""" - body = await request.json() - if body.get("mixture", False): - return await mixture_of_miners(body) - else: - return await process_completions(body) diff --git a/prompting/api/gpt_endpoints/process_completions.py b/prompting/api/gpt_endpoints/process_completions.py deleted file mode 100644 index 129bc46c..00000000 --- a/prompting/api/gpt_endpoints/process_completions.py +++ /dev/null @@ -1,67 +0,0 @@ -import json -import random - -from fastapi import HTTPException -from loguru import logger - -from prompting.base.dendrite import DendriteResponseEvent -from prompting.base.epistula import query_miners -from prompting.miner_availability.miner_availability import miner_availabilities -from prompting.rewards.scoring import task_scorer -from prompting.settings import settings -from prompting.tasks.inference import InferenceTask -from prompting.tasks.task_registry import TaskRegistry - - -async def process_completions(body: dict[str, any]): - task = TaskRegistry.get_task_by_name(body.get("task", InferenceTask.__name__)) - if body.get("task") and not task: - raise HTTPException(status_code=400, detail=f"Task {body.get('task')} not found") - logger.debug(f"Requested Task: {body.get('task')}, {task}") - - stream = body.get("stream") - body = {k: v for k, v in body.items() if k not in ["task", "stream"]} - body["task"] = task.__class__.__name__ - body["seed"] = body.get("seed") or str(random.randint(0, 1_000_000)) - logger.debug(f"Seed provided by miner: {bool(body.get('seed'))} -- Using seed: {body.get('seed')}") - - if settings.TEST_MINER_IDS: - available_miners = settings.TEST_MINER_IDS - elif not settings.mode == "mock" and not ( - available_miners := miner_availabilities.get_available_miners(task=task, model=body.get("model")) - ): - raise HTTPException( - status_code=503, - detail=f"No miners available for model: {body.get('model')} and task: {task.__name__}", - ) - - response = query_miners(available_miners, json.dumps(body).encode("utf-8"), stream=stream) - if stream: - return response - - response = await response - response_event = DendriteResponseEvent( - stream_results=response, - uids=available_miners, - timeout=settings.NEURON_TIMEOUT, - completions=["".join(res.accumulated_chunks) for res in response], - ) - - task = task( - query=body["messages"][-1]["content"], - messages=[message["content"] for message in body["messages"]], - model=body.get("model"), - seed=body.get("seed"), - response=response_event, - ) - - task_scorer.add_to_queue( - task=task, - response=response_event, - dataset_entry=task.dataset_entry, - block=-1, - step=-1, - task_id=task.task_id, - ) - - return [res.model_dump() for res in response] diff --git a/prompting/api/gpt_endpoints/serialisers.py b/prompting/api/gpt_endpoints/serialisers.py deleted file mode 100644 index e69de29b..00000000 diff --git a/prompting/api/gpt_endpoints/mixture_of_miners.py b/validator_api/mixture_of_miners.py similarity index 100% rename from prompting/api/gpt_endpoints/mixture_of_miners.py rename to validator_api/mixture_of_miners.py From 34949f16cf8db50511a149c7ff4ce804adc90c37 Mon Sep 17 00:00:00 2001 From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com> Date: Wed, 18 Dec 2024 15:39:13 +0000 Subject: [PATCH 35/40] Add MoA code --- validator_api/chat_completion.py | 106 +++++++++++++++++++++++++++++ validator_api/gpt_endpoints.py | 99 +++------------------------ validator_api/mixture_of_miners.py | 77 ++++++++++++++------- 3 files changed, 168 insertions(+), 114 deletions(-) create mode 100644 validator_api/chat_completion.py diff --git a/validator_api/chat_completion.py b/validator_api/chat_completion.py new file mode 100644 index 00000000..0d1470e2 --- /dev/null +++ b/validator_api/chat_completion.py @@ -0,0 +1,106 @@ + +import asyncio +import json +import random +from fastapi import HTTPException +from fastapi.responses import StreamingResponse +import httpx +from loguru import logger +from shared.epistula import make_openai_query +from shared.settings import shared_settings +from shared.uids import get_uids + + +async def forward_response(uid: int, body: dict[str, any], chunks: list[str]): + if not shared_settings.SCORE_ORGANICS: # Allow disabling of scoring by default + return + + # if body.get("task") != "InferenceTask": + # logger.debug(f"Skipping forwarding for non-inference task: {body.get('task')}") + # return + url = f"http://{shared_settings.VALIDATOR_API}/scoring" + payload = {"body": body, "chunks": chunks, "uid": uid} + # headers = { + # "Authorization": f"Bearer {shared_settings.SCORING_KEY}", #Add API key in Authorization header + # "Content-Type": "application/json", + # } + try: + timeout = httpx.Timeout(timeout=120.0, connect=60.0, read=30.0, write=30.0, pool=5.0) + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.post(url, json=payload) # , headers=headers) + if response.status_code == 200: + logger.info(f"Forwarding response completed with status {response.status_code}") + + else: + logger.exception( + f"Forwarding response uid {uid} failed with status {response.status_code} and payload {payload}" + ) + + except Exception as e: + logger.error(f"Tried to forward response to {url} with payload {payload}") + logger.exception(f"Error while forwarding response: {e}") + + +async def stream_response(response, collected_chunks: list[str], body: dict[str, any], uid: int) -> AsyncGenerator[str, None]: + chunks_received = False + try: + async for chunk in response: + chunks_received = True + collected_chunks.append(chunk.choices[0].delta.content) + yield f"data: {json.dumps(chunk.model_dump())}\n\n" + + if not chunks_received: + logger.error("Stream is empty: No chunks were received") + yield 'data: {"error": "502 - Response is empty"}\n\n' + yield "data: [DONE]\n\n" + + # Forward the collected chunks after streaming is complete + asyncio.create_task(forward_response(uid=uid, body=body, chunks=collected_chunks)) + except asyncio.CancelledError: + logger.info("Client disconnected, streaming cancelled") + raise + except Exception as e: + logger.exception(f"Error during streaming: {e}") + yield 'data: {"error": "Internal server Error"}\n\n' + + +async def regular_chat_completion(body: dict[str, any], uid: int | None = None) -> tuple | StreamingResponse: + """Handle regular chat completion without mixture of miners.""" + if uid is None: + uid = random.choice(get_uids(sampling_mode="top_incentive", k=100)) + + if uid is None: + logger.error("No available miner found") + raise HTTPException(status_code=503, detail="No available miner found") + + logger.debug(f"Querying uid {uid}") + STREAM = body.get("stream", False) + + collected_chunks: list[str] = [] + + logger.info(f"Making {'streaming' if STREAM else 'non-streaming'} openai query with body: {body}") + response = await make_openai_query(shared_settings.METAGRAPH, shared_settings.WALLET, body, uid, stream=STREAM) + + if STREAM: + return StreamingResponse( + stream_response(response, collected_chunks, body, uid), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + }, + ) + else: + asyncio.create_task(forward_response(uid=uid, body=body, chunks=response[1])) + return response[0] + + +async def get_response_from_miner(body: dict[str, any], uid: int) -> tuple: + """Get response from a single miner.""" + return await make_openai_query( + shared_settings.METAGRAPH, + shared_settings.WALLET, + body, + uid, + stream=False + ) diff --git a/validator_api/gpt_endpoints.py b/validator_api/gpt_endpoints.py index bc3d2e2f..91d5563d 100644 --- a/validator_api/gpt_endpoints.py +++ b/validator_api/gpt_endpoints.py @@ -1,106 +1,29 @@ -import asyncio -import json import random -import httpx -from fastapi import APIRouter, HTTPException, Request +from fastapi import APIRouter, Request from loguru import logger from starlette.responses import StreamingResponse -from shared.epistula import make_openai_query -from shared.settings import shared_settings -from shared.uids import get_uids +from validator_api import mixture_of_miners +from validator_api.chat_completion import regular_chat_completion router = APIRouter() -async def forward_response(uid: int, body: dict[str, any], chunks: list[str]): - if not shared_settings.SCORE_ORGANICS: # Allow disabling of scoring by default - return - - # if body.get("task") != "InferenceTask": - # logger.debug(f"Skipping forwarding for non-inference task: {body.get('task')}") - # return - url = f"http://{shared_settings.VALIDATOR_API}/scoring" - payload = {"body": body, "chunks": chunks, "uid": uid} - # headers = { - # "Authorization": f"Bearer {shared_settings.SCORING_KEY}", #Add API key in Authorization header - # "Content-Type": "application/json", - # } - try: - timeout = httpx.Timeout(timeout=120.0, connect=60.0, read=30.0, write=30.0, pool=5.0) - async with httpx.AsyncClient(timeout=timeout) as client: - response = await client.post(url, json=payload) # , headers=headers) - if response.status_code == 200: - logger.info(f"Forwarding response completed with status {response.status_code}") - - else: - logger.exception( - f"Forwarding response uid {uid} failed with status {response.status_code} and payload {payload}" - ) - - except Exception as e: - logger.error(f"Tried to forward response to {url} with payload {payload}") - logger.exception(f"Error while forwarding response: {e}") - @router.post("/v1/chat/completions") -async def chat_completion(request: Request): # , cbackground_tasks: BackgroundTasks): +async def chat_completion(request: Request): + """Main endpoint that handles both regular and mixture of miners chat completion.""" try: body = await request.json() body["seed"] = int(body.get("seed") or random.randint(0, 1000000)) - STREAM = body.get("stream") or False - logger.debug(f"Streaming: {STREAM}") - # Get random miner from top 100 incentive. - uid = random.choice(get_uids(sampling_mode="top_incentive", k=100)) - # uid = get_available_miner(task=body.get("task"), model=body.get("model")) - if uid is None: - logger.error("No available miner found") - raise HTTPException(status_code=503, detail="No available miner found") - logger.debug(f"Querying uid {uid}") - - collected_chunks: list[str] = [] - - # Create a wrapper for the streaming response - async def stream_with_error_handling(): - chunks_received = False - try: - async for chunk in response: - chunks_received = True - collected_chunks.append(chunk.choices[0].delta.content) - yield f"data: {json.dumps(chunk.model_dump())}\n\n" - - if not chunks_received: - logger.error("Stream is empty: No chunks were received") - yield 'data: {"error": "502 - Response is empty"}\n\n' - yield "data: [DONE]\n\n" - - # Once the stream is done, forward the collected chunks - asyncio.create_task(forward_response(uid=uid, body=body, chunks=collected_chunks)) - # background_tasks.add_task(forward_response, uid=uid, body=body, chunks=collected_chunks) - except asyncio.CancelledError: - logger.info("Client disconnected, streaming cancelled") - raise - except Exception as e: - logger.exception(f"Error during streaming: {e}") - yield 'data: {"error": "Internal server Error"}\n\n' - - logger.info(f"Making {'streaming' if STREAM else 'non-streaming'} openai query with body: {body}") - response = await make_openai_query(shared_settings.METAGRAPH, shared_settings.WALLET, body, uid, stream=STREAM) - - if STREAM: - return StreamingResponse( - stream_with_error_handling(), - media_type="text/event-stream", - headers={ - "Cache-Control": "no-cache", - "Connection": "keep-alive", - }, - ) + + # Choose between regular completion and mixture of miners. + if body.get("mixture", False): + return await mixture_of_miners(body) else: - asyncio.create_task(forward_response(uid=uid, body=body, chunks=response[1])) - return response[0] + return await regular_chat_completion(body) except Exception as e: - logger.exception(f"Error setting up streaming: {e}") + logger.exception(f"Error in chat completion: {e}") return StreamingResponse(content="Internal Server Error", status_code=500) diff --git a/validator_api/mixture_of_miners.py b/validator_api/mixture_of_miners.py index 96fc22f2..6349dd41 100644 --- a/validator_api/mixture_of_miners.py +++ b/validator_api/mixture_of_miners.py @@ -1,45 +1,70 @@ + + import copy +import random + +from fastapi import HTTPException +from fastapi.responses import StreamingResponse +from loguru import logger + +from shared.uids import get_uids +from validator_api.chat_completion import get_response_from_miner, regular_chat_completion + -from prompting.api.gpt_endpoints.process_completions import process_completions +async def mixture_of_miners(body: dict[str, any]) -> tuple | StreamingResponse: + """Handle chat completion with mixture of miners approach.""" + DEFAULT_SYSTEM_PROMPT = """You have been provided with a set of responses from various open-source models to the latest user query. + Your task is to synthesize these responses into a single, high-quality and concise response. + It is crucial to follow the provided instuctions or examples in the given prompt if any, and ensure the answer is in correct and expected format. + Critically evaluate the information provided in these responses, recognizing that some of it may be biased or incorrect. + Your response should not simply replicate the given answers but should offer a refined and accurate reply to the instruction. + Ensure your response is well-structured, coherent, and adheres to the highest standards of accuracy and reliability. + Responses from models:""" -DEFAULT_SYSTEM_PROMPT = """You have been provided with a set of responses from various open-source models to the latest user query. -Your task is to synthesize these responses into a single, high-quality and concise response. -It is crucial to follow the provided instuctions or examples in the given prompt if any, and ensure the answer is in correct and expected format. -Critically evaluate the information provided in these responses, recognizing that some of it may be biased or incorrect. -Your response should not simply replicate the given answers but should offer a refined and accurate reply to the instruction. -Ensure your response is well-structured, coherent, and adheres to the highest standards of accuracy and reliability. -Responses from models:""" + TASK_SYSTEM_PROMPT = { + None: DEFAULT_SYSTEM_PROMPT, + } -TASK_SYSTEM_PROMPT = { - None: DEFAULT_SYSTEM_PROMPT, -} + # Get responses from multiple miners + body_first_step = copy.deepcopy(body) + body_first_step["stream"] = False + # Get multiple miners + miner_uids = get_uids(sampling_mode="top_incentive", k=3) # Get responses from top 3 miners + if not miner_uids: + raise HTTPException(status_code=503, detail="No available miners found") -async def mixture_of_miners( - body: dict[str, any], -): - body_1st_step = copy.deepcopy(body) - body_1st_step["stream"] = False + # Collect responses from all miners + responses = [] + for uid in miner_uids: + try: + response = await get_response_from_miner(body_first_step, uid) + responses.append(response) + except Exception as e: + logger.error(f"Error getting response from miner {uid}: {e}") + continue - # First step: Get initial responses from miners. - responses = await process_completions(body_1st_step) + if not responses: + raise HTTPException(status_code=503, detail="Failed to get responses from miners") - # Extract completions from the responses. - completions = ["".join(res["accumulated_chunks"]) for res in responses] + # Extract completions from the responses + completions = [response[1][0] for response in responses if response and len(response) > 1] task_name = body.get("task") system_prompt = TASK_SYSTEM_PROMPT.get(task_name, DEFAULT_SYSTEM_PROMPT) - # Aggregate responses into one system prompt. + # Aggregate responses into one system prompt agg_system_prompt = system_prompt + "\n" + "\n".join([f"{i+1}. {comp}" for i, comp in enumerate(completions)]) - # Prepare new messages with the aggregated system prompt. + # Prepare new messages with the aggregated system prompt original_messages = body["messages"] original_user_messages = [msg for msg in original_messages if msg["role"] != "system"] new_messages = [{"role": "system", "content": agg_system_prompt}] + original_user_messages - # Update the body with the new messages. - body["messages"] = new_messages + # Update the body with the new messages + final_body = copy.deepcopy(body) + final_body["messages"] = new_messages - # Second step: Get the final response using the aggregated system prompt. - return await process_completions(body) + # Get final response using a random top miner + final_uid = random.choice(get_uids(sampling_mode="top_incentive", k=100)) + return await regular_chat_completion(final_body, final_uid) From edf1bfeedacad19be19dd10cc12d1b51e329b217 Mon Sep 17 00:00:00 2001 From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:25:39 +0000 Subject: [PATCH 36/40] WIP: Finish MoA --- validator_api/gpt_endpoints.py | 1 - validator_api/mixture_of_miners.py | 104 +++++++++++++++++------------ 2 files changed, 62 insertions(+), 43 deletions(-) diff --git a/validator_api/gpt_endpoints.py b/validator_api/gpt_endpoints.py index 91d5563d..9638a89c 100644 --- a/validator_api/gpt_endpoints.py +++ b/validator_api/gpt_endpoints.py @@ -10,7 +10,6 @@ router = APIRouter() - @router.post("/v1/chat/completions") async def chat_completion(request: Request): """Main endpoint that handles both regular and mixture of miners chat completion.""" diff --git a/validator_api/mixture_of_miners.py b/validator_api/mixture_of_miners.py index 6349dd41..b15f6f7d 100644 --- a/validator_api/mixture_of_miners.py +++ b/validator_api/mixture_of_miners.py @@ -1,5 +1,4 @@ - - +import asyncio import copy import random @@ -11,60 +10,81 @@ from validator_api.chat_completion import get_response_from_miner, regular_chat_completion -async def mixture_of_miners(body: dict[str, any]) -> tuple | StreamingResponse: - """Handle chat completion with mixture of miners approach.""" - DEFAULT_SYSTEM_PROMPT = """You have been provided with a set of responses from various open-source models to the latest user query. - Your task is to synthesize these responses into a single, high-quality and concise response. - It is crucial to follow the provided instuctions or examples in the given prompt if any, and ensure the answer is in correct and expected format. - Critically evaluate the information provided in these responses, recognizing that some of it may be biased or incorrect. - Your response should not simply replicate the given answers but should offer a refined and accurate reply to the instruction. - Ensure your response is well-structured, coherent, and adheres to the highest standards of accuracy and reliability. - Responses from models:""" +DEFAULT_SYSTEM_PROMPT = """You have been provided with a set of responses from various open-source models to the latest user query. +Your task is to synthesize these responses into a single, high-quality and concise response. +It is crucial to follow the provided instuctions or examples in the given prompt if any, and ensure the answer is in correct and expected format. +Critically evaluate the information provided in these responses, recognizing that some of it may be biased or incorrect. +Your response should not simply replicate the given answers but should offer a refined and accurate reply to the instruction. +Ensure your response is well-structured, coherent, and adheres to the highest standards of accuracy and reliability. +Responses from models:""" + +TASK_SYSTEM_PROMPT = { + None: DEFAULT_SYSTEM_PROMPT, + # Add more task-specific system prompts here. +} + +NUM_MIXTURE_MINERS = 5 +TOP_INCENTIVE_POOL = 100 + + + +async def get_miner_response(body: dict, uid: str) -> tuple | None: + """Get response from a single miner with error handling.""" + try: + return await get_response_from_miner(body, uid) + except Exception as e: + logger.error(f"Error getting response from miner {uid}: {e}") + return None - TASK_SYSTEM_PROMPT = { - None: DEFAULT_SYSTEM_PROMPT, - } - # Get responses from multiple miners +async def mixture_of_miners(body: dict[str, any]) -> tuple | StreamingResponse: + """Handle chat completion with mixture of miners approach. + + Based on Mixture-of-Agents Enhances Large Language Model Capabilities, 2024, Wang et al.: + https://arxiv.org/abs/2406.04692 + + Args: + body: Query parameters: + messages: User prompt. + stream: If True, stream the response. + model: Optional model used for inference, SharedSettings.LLM_MODEL is used by default. + task: Optional task, see prompting/tasks/task_registry.py, InferenceTask is used by default. + """ body_first_step = copy.deepcopy(body) body_first_step["stream"] = False # Get multiple miners - miner_uids = get_uids(sampling_mode="top_incentive", k=3) # Get responses from top 3 miners + miner_uids = get_uids(sampling_mode="top_incentive", k=NUM_MIXTURE_MINERS) if not miner_uids: raise HTTPException(status_code=503, detail="No available miners found") - # Collect responses from all miners - responses = [] - for uid in miner_uids: - try: - response = await get_response_from_miner(body_first_step, uid) - responses.append(response) - except Exception as e: - logger.error(f"Error getting response from miner {uid}: {e}") - continue - - if not responses: + # Concurrently collect responses from all miners. + miner_tasks = [get_miner_response(body_first_step, uid) for uid in miner_uids] + responses = await asyncio.gather(*miner_tasks) + + # Filter out None responses (failed requests). + valid_responses = [r for r in responses if r is not None] + + if not valid_responses: raise HTTPException(status_code=503, detail="Failed to get responses from miners") - # Extract completions from the responses - completions = [response[1][0] for response in responses if response and len(response) > 1] - + # Extract completions from the responses. + completions = [response[1][0] for response in valid_responses if response and len(response) > 1] + task_name = body.get("task") system_prompt = TASK_SYSTEM_PROMPT.get(task_name, DEFAULT_SYSTEM_PROMPT) - - # Aggregate responses into one system prompt + + # Aggregate responses into one system prompt. agg_system_prompt = system_prompt + "\n" + "\n".join([f"{i+1}. {comp}" for i, comp in enumerate(completions)]) - - # Prepare new messages with the aggregated system prompt - original_messages = body["messages"] - original_user_messages = [msg for msg in original_messages if msg["role"] != "system"] - new_messages = [{"role": "system", "content": agg_system_prompt}] + original_user_messages - - # Update the body with the new messages + + # Prepare new messages with the aggregated system prompt. + new_messages = [{"role": "system", "content": agg_system_prompt}] + new_messages.extend([msg for msg in body["messages"] if msg["role"] != "system"]) + + # Update the body with the new messages. final_body = copy.deepcopy(body) final_body["messages"] = new_messages - - # Get final response using a random top miner - final_uid = random.choice(get_uids(sampling_mode="top_incentive", k=100)) + + # Get final response using a random top miner. + final_uid = random.choice(get_uids(sampling_mode="top_incentive", k=TOP_INCENTIVE_POOL)) return await regular_chat_completion(final_body, final_uid) From 27b035aceb82b7933f022ea2e33bb278866e035c Mon Sep 17 00:00:00 2001 From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:47:31 +0000 Subject: [PATCH 37/40] Finish implementation --- validator_api/chat_completion.py | 10 ++++++++-- validator_api/gpt_endpoints.py | 8 ++++---- validator_api/mixture_of_miners.py | 6 +++--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/validator_api/chat_completion.py b/validator_api/chat_completion.py index 0d1470e2..337ae45d 100644 --- a/validator_api/chat_completion.py +++ b/validator_api/chat_completion.py @@ -2,6 +2,7 @@ import asyncio import json import random +from typing import AsyncGenerator from fastapi import HTTPException from fastapi.responses import StreamingResponse import httpx @@ -41,7 +42,12 @@ async def forward_response(uid: int, body: dict[str, any], chunks: list[str]): logger.exception(f"Error while forwarding response: {e}") -async def stream_response(response, collected_chunks: list[str], body: dict[str, any], uid: int) -> AsyncGenerator[str, None]: +async def stream_response( + response, + collected_chunks: list[str], + body: dict[str, any], + uid: int + ) -> AsyncGenerator[str, None]: chunks_received = False try: async for chunk in response: @@ -64,7 +70,7 @@ async def stream_response(response, collected_chunks: list[str], body: dict[str, yield 'data: {"error": "Internal server Error"}\n\n' -async def regular_chat_completion(body: dict[str, any], uid: int | None = None) -> tuple | StreamingResponse: +async def chat_completion(body: dict[str, any], uid: int | None = None) -> tuple | StreamingResponse: """Handle regular chat completion without mixture of miners.""" if uid is None: uid = random.choice(get_uids(sampling_mode="top_incentive", k=100)) diff --git a/validator_api/gpt_endpoints.py b/validator_api/gpt_endpoints.py index 9638a89c..0058f738 100644 --- a/validator_api/gpt_endpoints.py +++ b/validator_api/gpt_endpoints.py @@ -4,14 +4,14 @@ from loguru import logger from starlette.responses import StreamingResponse -from validator_api import mixture_of_miners -from validator_api.chat_completion import regular_chat_completion +from validator_api.mixture_of_miners import mixture_of_miners +from validator_api.chat_completion import chat_completion router = APIRouter() @router.post("/v1/chat/completions") -async def chat_completion(request: Request): +async def completions(request: Request): """Main endpoint that handles both regular and mixture of miners chat completion.""" try: body = await request.json() @@ -21,7 +21,7 @@ async def chat_completion(request: Request): if body.get("mixture", False): return await mixture_of_miners(body) else: - return await regular_chat_completion(body) + return await chat_completion(body) except Exception as e: logger.exception(f"Error in chat completion: {e}") diff --git a/validator_api/mixture_of_miners.py b/validator_api/mixture_of_miners.py index b15f6f7d..189bfc56 100644 --- a/validator_api/mixture_of_miners.py +++ b/validator_api/mixture_of_miners.py @@ -7,7 +7,7 @@ from loguru import logger from shared.uids import get_uids -from validator_api.chat_completion import get_response_from_miner, regular_chat_completion +from validator_api.chat_completion import get_response_from_miner, chat_completion DEFAULT_SYSTEM_PROMPT = """You have been provided with a set of responses from various open-source models to the latest user query. @@ -55,7 +55,7 @@ async def mixture_of_miners(body: dict[str, any]) -> tuple | StreamingResponse: # Get multiple miners miner_uids = get_uids(sampling_mode="top_incentive", k=NUM_MIXTURE_MINERS) - if not miner_uids: + if len(miner_uids) == 0: raise HTTPException(status_code=503, detail="No available miners found") # Concurrently collect responses from all miners. @@ -87,4 +87,4 @@ async def mixture_of_miners(body: dict[str, any]) -> tuple | StreamingResponse: # Get final response using a random top miner. final_uid = random.choice(get_uids(sampling_mode="top_incentive", k=TOP_INCENTIVE_POOL)) - return await regular_chat_completion(final_body, final_uid) + return await chat_completion(final_body, final_uid) From f75d28c03715350f6d65e446cbadc84657b57a84 Mon Sep 17 00:00:00 2001 From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:49:14 +0000 Subject: [PATCH 38/40] Clean up code --- api_keys.json.example | 1 - prompting/api/api_managements/api.py | 82 ---------------------------- prompting/tasks/task_registry.py | 5 +- 3 files changed, 3 insertions(+), 85 deletions(-) delete mode 100644 api_keys.json.example delete mode 100644 prompting/api/api_managements/api.py diff --git a/api_keys.json.example b/api_keys.json.example deleted file mode 100644 index fd065361..00000000 --- a/api_keys.json.example +++ /dev/null @@ -1 +0,0 @@ -{"API_KEY_VALUE": {"rate_limit": 10, "usage": 0}} diff --git a/prompting/api/api_managements/api.py b/prompting/api/api_managements/api.py deleted file mode 100644 index 92ccc922..00000000 --- a/prompting/api/api_managements/api.py +++ /dev/null @@ -1,82 +0,0 @@ -import json -import secrets - -from fastapi import APIRouter, Depends, Header, HTTPException -from loguru import logger - -from prompting.settings import settings - -router = APIRouter() - - -# Load and save functions for API keys -def load_api_keys(): - try: - with open(settings.API_KEYS_FILE, "r") as f: - return json.load(f) - except FileNotFoundError: - return {} - - -def save_api_keys(api_keys): - with open(settings.API_KEYS_FILE, "w") as f: - json.dump(api_keys, f) - - -# Use lifespan to initialize API keys -_keys = load_api_keys() -logger.info(f"Loaded API keys: {_keys}") -save_api_keys(_keys) - - -# Dependency to validate the admin key -def validate_admin_key(admin_key: str = Header(...)): - if admin_key != settings.ADMIN_KEY: - raise HTTPException(status_code=403, detail="Invalid admin key") - - -# Dependency to validate API keys -def validate_api_key(api_key: str = Header(...)): - if api_key not in _keys: - raise HTTPException(status_code=403, detail="Invalid API key") - return _keys[api_key] - - -@router.post("/create-api-key/") -def create_api_key(rate_limit: int, admin_key: str = Depends(validate_admin_key)): - """Creates a new API key with a specified rate limit.""" - new_api_key = secrets.token_hex(16) - _keys[new_api_key] = {"rate_limit": rate_limit, "usage": 0} - save_api_keys(_keys) - return {"message": "API key created", "api_key": new_api_key} - - -@router.put("/modify-api-key/{api_key}") -def modify_api_key(api_key: str, rate_limit: int, admin_key: str = Depends(validate_admin_key)): - """Modifies the rate limit of an existing API key.""" - if api_key not in _keys: - raise HTTPException(status_code=404, detail="API key not found") - _keys[api_key]["rate_limit"] = rate_limit - save_api_keys(_keys) - return {"message": "API key updated", "api_key": api_key} - - -@router.delete("/delete-api-key/{api_key}") -def delete_api_key(api_key: str, admin_key: str = Depends(validate_admin_key)): - """Deletes an existing API key.""" - if api_key not in _keys: - raise HTTPException(status_code=404, detail="API key not found") - del _keys[api_key] - save_api_keys(_keys) - return {"message": "API key deleted"} - - -@router.get("/demo-endpoint/") -def demo_endpoint(api_key_data: dict = Depends(validate_api_key)): - """A demo endpoint that requires a valid API key.""" - return {"message": "Access granted", "your_rate_limit": api_key_data["rate_limit"]} - - -# # Create FastAPI app and include the router -# app = FastAPI() -# app.include_router(router) diff --git a/prompting/tasks/task_registry.py b/prompting/tasks/task_registry.py index 9fbf0d04..0bdf0324 100644 --- a/prompting/tasks/task_registry.py +++ b/prompting/tasks/task_registry.py @@ -8,8 +8,10 @@ from prompting.datasets.huggingface_github import HuggingFaceGithubDataset from prompting.datasets.random_website import DDGDataset from prompting.datasets.sn13 import SN13Dataset +from prompting.datasets.wiki import WikiDataset, WikiDateDataset from prompting.rewards.reward import BaseRewardConfig from prompting.tasks.base_task import BaseTextTask +from prompting.tasks.date_qa import DateQARewardConfig, DateQuestionAnsweringTask from prompting.tasks.inference import InferenceRewardConfig, InferenceTask from prompting.tasks.multi_choice import MultiChoiceRewardConfig, MultiChoiceTask from prompting.tasks.multi_step_reasoning import MultiStepReasoningRewardConfig, MultiStepReasoningTask @@ -48,8 +50,7 @@ class TaskRegistry(BaseModel): ), TaskConfig( task=InferenceTask, - # probability=0.16, - probability=1.00, + probability=0.16, datasets=[SN13Dataset], reward_model=InferenceRewardConfig, ), From bc48fb979ecdfac607c50995395ecc15fe8892f4 Mon Sep 17 00:00:00 2001 From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:50:54 +0000 Subject: [PATCH 39/40] Run pre-commit hook --- validator_api/chat_completion.py | 30 +++++++++++------------------- validator_api/gpt_endpoints.py | 4 ++-- validator_api/mixture_of_miners.py | 22 ++++++++++------------ 3 files changed, 23 insertions(+), 33 deletions(-) diff --git a/validator_api/chat_completion.py b/validator_api/chat_completion.py index 337ae45d..668a5e8a 100644 --- a/validator_api/chat_completion.py +++ b/validator_api/chat_completion.py @@ -1,12 +1,13 @@ - import asyncio import json import random from typing import AsyncGenerator + +import httpx from fastapi import HTTPException from fastapi.responses import StreamingResponse -import httpx from loguru import logger + from shared.epistula import make_openai_query from shared.settings import shared_settings from shared.uids import get_uids @@ -43,11 +44,8 @@ async def forward_response(uid: int, body: dict[str, any], chunks: list[str]): async def stream_response( - response, - collected_chunks: list[str], - body: dict[str, any], - uid: int - ) -> AsyncGenerator[str, None]: + response, collected_chunks: list[str], body: dict[str, any], uid: int +) -> AsyncGenerator[str, None]: chunks_received = False try: async for chunk in response: @@ -74,19 +72,19 @@ async def chat_completion(body: dict[str, any], uid: int | None = None) -> tuple """Handle regular chat completion without mixture of miners.""" if uid is None: uid = random.choice(get_uids(sampling_mode="top_incentive", k=100)) - + if uid is None: logger.error("No available miner found") raise HTTPException(status_code=503, detail="No available miner found") - + logger.debug(f"Querying uid {uid}") STREAM = body.get("stream", False) - + collected_chunks: list[str] = [] - + logger.info(f"Making {'streaming' if STREAM else 'non-streaming'} openai query with body: {body}") response = await make_openai_query(shared_settings.METAGRAPH, shared_settings.WALLET, body, uid, stream=STREAM) - + if STREAM: return StreamingResponse( stream_response(response, collected_chunks, body, uid), @@ -103,10 +101,4 @@ async def chat_completion(body: dict[str, any], uid: int | None = None) -> tuple async def get_response_from_miner(body: dict[str, any], uid: int) -> tuple: """Get response from a single miner.""" - return await make_openai_query( - shared_settings.METAGRAPH, - shared_settings.WALLET, - body, - uid, - stream=False - ) + return await make_openai_query(shared_settings.METAGRAPH, shared_settings.WALLET, body, uid, stream=False) diff --git a/validator_api/gpt_endpoints.py b/validator_api/gpt_endpoints.py index 0058f738..34681f0e 100644 --- a/validator_api/gpt_endpoints.py +++ b/validator_api/gpt_endpoints.py @@ -4,8 +4,8 @@ from loguru import logger from starlette.responses import StreamingResponse -from validator_api.mixture_of_miners import mixture_of_miners from validator_api.chat_completion import chat_completion +from validator_api.mixture_of_miners import mixture_of_miners router = APIRouter() @@ -16,7 +16,7 @@ async def completions(request: Request): try: body = await request.json() body["seed"] = int(body.get("seed") or random.randint(0, 1000000)) - + # Choose between regular completion and mixture of miners. if body.get("mixture", False): return await mixture_of_miners(body) diff --git a/validator_api/mixture_of_miners.py b/validator_api/mixture_of_miners.py index 189bfc56..e2aaa05a 100644 --- a/validator_api/mixture_of_miners.py +++ b/validator_api/mixture_of_miners.py @@ -7,8 +7,7 @@ from loguru import logger from shared.uids import get_uids -from validator_api.chat_completion import get_response_from_miner, chat_completion - +from validator_api.chat_completion import chat_completion, get_response_from_miner DEFAULT_SYSTEM_PROMPT = """You have been provided with a set of responses from various open-source models to the latest user query. Your task is to synthesize these responses into a single, high-quality and concise response. @@ -27,7 +26,6 @@ TOP_INCENTIVE_POOL = 100 - async def get_miner_response(body: dict, uid: str) -> tuple | None: """Get response from a single miner with error handling.""" try: @@ -39,10 +37,10 @@ async def get_miner_response(body: dict, uid: str) -> tuple | None: async def mixture_of_miners(body: dict[str, any]) -> tuple | StreamingResponse: """Handle chat completion with mixture of miners approach. - + Based on Mixture-of-Agents Enhances Large Language Model Capabilities, 2024, Wang et al.: https://arxiv.org/abs/2406.04692 - + Args: body: Query parameters: messages: User prompt. @@ -61,30 +59,30 @@ async def mixture_of_miners(body: dict[str, any]) -> tuple | StreamingResponse: # Concurrently collect responses from all miners. miner_tasks = [get_miner_response(body_first_step, uid) for uid in miner_uids] responses = await asyncio.gather(*miner_tasks) - + # Filter out None responses (failed requests). valid_responses = [r for r in responses if r is not None] - + if not valid_responses: raise HTTPException(status_code=503, detail="Failed to get responses from miners") # Extract completions from the responses. completions = [response[1][0] for response in valid_responses if response and len(response) > 1] - + task_name = body.get("task") system_prompt = TASK_SYSTEM_PROMPT.get(task_name, DEFAULT_SYSTEM_PROMPT) - + # Aggregate responses into one system prompt. agg_system_prompt = system_prompt + "\n" + "\n".join([f"{i+1}. {comp}" for i, comp in enumerate(completions)]) - + # Prepare new messages with the aggregated system prompt. new_messages = [{"role": "system", "content": agg_system_prompt}] new_messages.extend([msg for msg in body["messages"] if msg["role"] != "system"]) - + # Update the body with the new messages. final_body = copy.deepcopy(body) final_body["messages"] = new_messages - + # Get final response using a random top miner. final_uid = random.choice(get_uids(sampling_mode="top_incentive", k=TOP_INCENTIVE_POOL)) return await chat_completion(final_body, final_uid) From 1445e1f2f527cfad0d8d4ef1d922337d0e994486 Mon Sep 17 00:00:00 2001 From: Dmytro Bobrenko <17252809+dbobrenko@users.noreply.github.com> Date: Fri, 20 Dec 2024 11:14:19 +0000 Subject: [PATCH 40/40] Merge with staging --- validator_api/gpt_endpoints.py | 39 ---------------------------------- 1 file changed, 39 deletions(-) diff --git a/validator_api/gpt_endpoints.py b/validator_api/gpt_endpoints.py index 2ed297c1..34681f0e 100644 --- a/validator_api/gpt_endpoints.py +++ b/validator_api/gpt_endpoints.py @@ -10,39 +10,6 @@ router = APIRouter() -<<<<<<< HEAD -======= -async def forward_response(uid: int, body: dict[str, any], chunks: list[str]): - uid = int(uid) # sometimes uid is type np.uint64 - logger.info(f"Forwarding response to scoring with body: {body}") - if not shared_settings.SCORE_ORGANICS: # Allow disabling of scoring by default - return - - if body.get("task") != "InferenceTask": - logger.debug(f"Skipping forwarding for non-inference task: {body.get('task')}") - return - url = f"http://{shared_settings.VALIDATOR_API}/scoring" - payload = {"body": body, "chunks": chunks, "uid": uid} - try: - timeout = httpx.Timeout(timeout=120.0, connect=60.0, read=30.0, write=30.0, pool=5.0) - async with httpx.AsyncClient(timeout=timeout) as client: - response = await client.post( - url, json=payload, headers={"api-key": shared_settings.SCORING_KEY, "Content-Type": "application/json"} - ) - if response.status_code == 200: - logger.info(f"Forwarding response completed with status {response.status_code}") - - else: - logger.exception( - f"Forwarding response uid {uid} failed with status {response.status_code} and payload {payload}" - ) - - except Exception as e: - logger.error(f"Tried to forward response to {url} with payload {payload}") - logger.exception(f"Error while forwarding response: {e}") - - ->>>>>>> staging @router.post("/v1/chat/completions") async def completions(request: Request): """Main endpoint that handles both regular and mixture of miners chat completion.""" @@ -54,13 +21,7 @@ async def completions(request: Request): if body.get("mixture", False): return await mixture_of_miners(body) else: -<<<<<<< HEAD return await chat_completion(body) -======= - logger.info("Forwarding response to scoring...") - asyncio.create_task(forward_response(uid=uid, body=body, chunks=response[1])) - return response[0] ->>>>>>> staging except Exception as e: logger.exception(f"Error in chat completion: {e}")