Skip to content

Commit

Permalink
[feat](audio): create endpoint
Browse files Browse the repository at this point in the history
fix(audio): config && create audio endpoint

fix l'API audio et ajout d'un fichier pour tester

clean clode

fix(audio): make it work
  • Loading branch information
ArtixJP authored and Ledoux committed Oct 9, 2024
1 parent d1e6368 commit db62995
Show file tree
Hide file tree
Showing 8 changed files with 140 additions and 8 deletions.
55 changes: 55 additions & 0 deletions app/endpoints/audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from typing import Optional, Union

from fastapi import APIRouter, Form, Security, HTTPException, UploadFile, File

from app.schemas.audio import AudioTranscription, AudioTranscriptionVerbose
from app.schemas.config import AUDIO_MODEL_TYPE
from app.utils.security import check_api_key
from app.utils.lifespan import clients


router = APIRouter()


@router.post("/audio/transcriptions")
async def audio_transcriptions(
file: UploadFile = File(...),
model: str = Form(...),
prompt: Optional[str] = Form(None),
response_format: str = Form(...),
temperature: float = Form(0.2),
_: str = Security(check_api_key),
) -> Union[AudioTranscription, AudioTranscriptionVerbose]:
"""
API de transcription similaire à l'API d'OpenAI.
"""
client = clients.models[model]

if client.type != AUDIO_MODEL_TYPE:
raise HTTPException(status_code=400, detail="Le modèle n'est pas un modèle audio.")

url = f"{client.base_url}audio/transcriptions"
headers = {"Authorization": f"Bearer {client.api_key}"}

"""
with httpx.AsyncClient(timeout=20) as async_client:
response = await async_client.request(
method="POST",
url=url,
headers=headers,
files={"file": file.file},
json={"prompt": prompt, "response_format": response_format, "temperature": temperature},
)
response.raise_for_status()
data = response.json()
return data
if response_format == "verbon_json":
return AudioTranscriptionVerbose(**data)
if response_format == "json":
return AudioTranscription(**data)
raise HTTPException(status_code=400, detail="Invalid response format")
"""
file_content = await file.read()
response = client.audio.transcriptions.create(file=("audio.mp3", file_content, file.content_type), model=model)
return response
19 changes: 16 additions & 3 deletions app/helpers/_modelclients.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from app.schemas.models import Model, Models
from app.utils.config import LOGGER
from app.utils.exceptions import ContextLengthExceededException, ModelNotAvailableException, ModelNotFoundException
from app.utils.variables import EMBEDDINGS_MODEL_TYPE, LANGUAGE_MODEL_TYPE
from app.utils.variables import EMBEDDINGS_MODEL_TYPE, LANGUAGE_MODEL_TYPE, AUDIO_MODEL_TYPE


def get_models_list(self, *args, **kwargs):
Expand Down Expand Up @@ -45,6 +45,15 @@ def get_models_list(self, *args, **kwargs):
self.created = round(time.time())
self.max_model_len = response.get("max_input_length", None)

elif self.type == AUDIO_MODEL_TYPE:
endpoint = f"{self.base_url}models/Systran/faster-whisper-large-v3"
response = requests.get(url=endpoint, headers=headers, timeout=self.DEFAULT_TIMEOUT).json()

self.id = response["id"]
self.owned_by = response.get("owned_by", "")
self.created = response.get("created", round(time.time()))
self.max_model_len = response.get("max_model_len", None)

self.status = "available"

except Exception:
Expand All @@ -61,6 +70,9 @@ def get_models_list(self, *args, **kwargs):
)

return Models(data=[data])


return Models(data=data)


def check_context_length(self, messages: List[Dict[str, str]], add_special_tokens: bool = True):
Expand Down Expand Up @@ -99,11 +111,12 @@ def create_embeddings(self, *args, **kwargs):
class ModelClient(OpenAI):
DEFAULT_TIMEOUT = 10

def __init__(self, type=Literal[EMBEDDINGS_MODEL_TYPE, LANGUAGE_MODEL_TYPE], *args, **kwargs):
def __init__(self, type=Literal[EMBEDDINGS_MODEL_TYPE, LANGUAGE_MODEL_TYPE, AUDIO_MODEL_TYPE], *args, **kwargs):
"""
ModelClient class extends OpenAI class to support custom methods.
"""
super().__init__(timeout=self.DEFAULT_TIMEOUT, *args, **kwargs)
timeout = 60 if type == AUDIO_MODEL_TYPE else self.DEFAULT_TIMEOUT
super().__init__(timeout=timeout, *args, **kwargs)
self.type = type

# set attributes for unavailable models
Expand Down
4 changes: 3 additions & 1 deletion app/main.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from fastapi import FastAPI, Response, Security

from app.endpoints import chat, chunks, collections, completions, embeddings, files, models, search, documents
from app.endpoints import audio, chat, chunks, collections, completions, embeddings, files, models, search, documents
from app.helpers import ContentSizeLimitMiddleware
from app.schemas.security import User
from app.utils.config import APP_CONTACT_EMAIL, APP_CONTACT_URL, APP_DESCRIPTION, APP_VERSION
from app.utils.lifespan import lifespan
from app.utils.security import check_api_key


app = FastAPI(
title="Albert API",
version=APP_VERSION,
Expand All @@ -32,6 +33,7 @@ def health(user: User = Security(check_api_key)):
app.include_router(chat.router, tags=["Core"], prefix="/v1")
app.include_router(completions.router, tags=["Core"], prefix="/v1")
app.include_router(embeddings.router, tags=["Core"], prefix="/v1")
app.include_router(audio.router, tags=["Core"], prefix="/v1")

# RAG
app.include_router(search.router, tags=["Retrieval Augmented Generation"], prefix="/v1")
Expand Down
40 changes: 40 additions & 0 deletions app/schemas/audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from typing import Dict, List, Optional, Union, Literal, Tuple

from pydantic import BaseModel


#cf https://platform.openai.com/docs/api-reference/audio/createTranscription
class AudioTranscriptionRequest(BaseModel):
model: Optional[str] = "Systran/faster-distil-whisper-large-v3"
language: Optional[str] = None
prompt: Optional[str] = None
response_format: Optional[str] = "json"
temperature: Optional[float] = 0
timestamp: Optional[bool] = False

class AudioTranscription(BaseModel):
text: str

class Word(BaseModel):
word: str
start: float
end: float

class Segment(BaseModel):
id: int
seek: int
start: float
end: float
text: str
tokens: List[int]
temperature: float
avg_logprob: float
compression_ratio: float
no_speech_prob: float

class AudioTranscriptionVerbose(BaseModel):
language: str
duration: float
text: str
words: List[Word]
segments: List[Segment]
4 changes: 2 additions & 2 deletions app/schemas/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pydantic import BaseModel, Field, model_validator

from app.utils.variables import EMBEDDINGS_MODEL_TYPE, LANGUAGE_MODEL_TYPE
from app.utils.variables import EMBEDDINGS_MODEL_TYPE, LANGUAGE_MODEL_TYPE, AUDIO_MODEL_TYPE


class Key(BaseModel):
Expand All @@ -16,7 +16,7 @@ class Auth(BaseModel):

class Model(BaseModel):
url: str
type: Literal[LANGUAGE_MODEL_TYPE, EMBEDDINGS_MODEL_TYPE]
type: Literal[LANGUAGE_MODEL_TYPE, EMBEDDINGS_MODEL_TYPE, AUDIO_MODEL_TYPE]
key: Optional[str] = "EMPTY"
search_internet: bool = False

Expand Down
4 changes: 2 additions & 2 deletions app/schemas/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
from openai.types import Model
from pydantic import BaseModel

from app.utils.variables import EMBEDDINGS_MODEL_TYPE, LANGUAGE_MODEL_TYPE
from app.utils.variables import EMBEDDINGS_MODEL_TYPE, LANGUAGE_MODEL_TYPE, AUDIO_MODEL_TYPE


class Model(Model):
type: Literal[LANGUAGE_MODEL_TYPE, EMBEDDINGS_MODEL_TYPE]
type: Literal[LANGUAGE_MODEL_TYPE, EMBEDDINGS_MODEL_TYPE, AUDIO_MODEL_TYPE]
status: Literal["available", "unavailable"]


Expand Down
21 changes: 21 additions & 0 deletions app/tests/test_audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from openai import OpenAI
import requests

with open('audio.mp3', 'wb') as fopen:
r = requests.get('https://huggingface.co/datasets/huseinzol05/temp-storage/resolve/main/Lex-Fridman-on-Grigori-Perelman-turning-away-1million-and-Fields-Medal.mp3?download=true')
fopen.write(r.content)

client = OpenAI(
api_key='jules-f0b946fa-0da4-4500-a536-e4fb7448d6bc',
base_url = 'http://127.0.0.1:8080'
)

audio_file = open("audio.mp3", "rb")
transcript = client.audio.transcriptions.create(
file=audio_file,
model="Systran/faster-distil-whisper-large-v3",
response_format="json",
#timestamp_granularities="segment"
)

print(transcript)
1 change: 1 addition & 0 deletions app/utils/variables.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
AUDIO_MODEL_TYPE = "automatic-speech-recognition"
INTERNET_COLLECTION_ID = "internet"
PUBLIC_COLLECTION_TYPE = "public"
PRIVATE_COLLECTION_TYPE = "private"
Expand Down

0 comments on commit db62995

Please sign in to comment.