Add assist route for HTTP requests (#3)

* Add `assist` route with stt/tts endpoints * Add `get_response` route Implement safety checks on STT/TTS Request size Add `user_profile` schema * Add new config params to configuration docs with rationale for defaults --------- Co-authored-by: Daniel McKnight <[email protected]>
NeonGeckoCom · Jan 22, 2024 · b42290f · b42290f
1 parent 1dbcebf
commit b42290f
Show file tree

Hide file tree

Showing 8 changed files with 325 additions and 19 deletions.
diff --git a/README.md b/README.md
@@ -22,6 +22,8 @@ hana:
   fastapi_title: "My HANA API Host"
   fastapi_summary: "Personal HTTP API to access my DIANA backend."
   disable_auth: True
+  stt_max_length_encoded: 500000  # Arbitrary limit that is larger than any expected voice command
+  tts_max_words: 128  # Arbitrary limit that is longer than any default LLM token limit
 ```
 It is recommended to generate unique values for configured tokens, these are 32
 bytes in hexadecimal representation.

diff --git a/docker_overlay/etc/neon/diana.yaml b/docker_overlay/etc/neon/diana.yaml
@@ -25,4 +25,6 @@ hana:
   server_host: "0.0.0.0"
   server_port: 8080
   fastapi_title: "Hana"
-  fastapi_summary: "HANA (HTTP API for Neon Applications) is the HTTP component of the Device Independent API for Neon Applications (DIANA)"
+  fastapi_summary: "HANA (HTTP API for Neon Applications) is the HTTP component of the Device Independent API for Neon Applications (DIANA)"
+  stt_max_length_encoded: 500000
+  tts_max_words: 128
diff --git a/neon_hana/app/__init__.py b/neon_hana/app/__init__.py
@@ -28,6 +28,7 @@
 
 from neon_hana.app.dependencies import client_manager, jwt_bearer, mq_connector
 from neon_hana.app.routers.api_proxy import proxy_route
+from neon_hana.app.routers.assist import assist_route
 from neon_hana.app.routers.llm import llm_route
 from neon_hana.app.routers.mq_backend import mq_route
 from neon_hana.app.routers.auth import auth_route
@@ -40,6 +41,7 @@ def create_app(config: dict):
     version = __version__
     app = FastAPI(title=title, summary=summary, version=version)
     app.include_router(auth_route)
+    app.include_router(assist_route)
     app.include_router(proxy_route)
     app.include_router(mq_route)
     app.include_router(llm_route)

diff --git a/neon_hana/app/__main__.py b/neon_hana/app/__main__.py
@@ -25,6 +25,10 @@
 # SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import uvicorn
+from os import environ
+
+environ.setdefault("OVOS_CONFIG_BASE_FOLDER", "neon")
+environ.setdefault("OVOS_CONFIG_FILENAME", "diana.yaml")
 
 from ovos_config.config import Configuration
 

diff --git a/neon_hana/app/routers/assist.py b/neon_hana/app/routers/assist.py
@@ -0,0 +1,48 @@
+# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System
+# All trademark and other rights reserved by their respective owners
+# Copyright 2008-2021 Neongecko.com Inc.
+# BSD-3
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 1. Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+# 3. Neither the name of the copyright holder nor the names of its
+#    contributors may be used to endorse or promote products derived from this
+#    software without specific prior written permission.
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+# OR PROFITS;  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from fastapi import APIRouter, Depends
+from neon_hana.schema.assist_requests import *
+from neon_hana.app.dependencies import jwt_bearer, mq_connector
+
+
+assist_route = APIRouter(prefix="/neon", tags=["assist"],
+                         dependencies=[Depends(jwt_bearer)])
+
+
+@assist_route.post("/get_stt")
+async def get_stt(audio_in: STTRequest) -> STTResponse:
+    return mq_connector.get_stt(**dict(audio_in))
+
+
+@assist_route.post("/get_tts")
+async def get_tts(request: TTSRequest) -> TTSResponse:
+    return mq_connector.get_tts(**dict(request))
+
+
+@assist_route.post("/get_response")
+async def get_response(request: SkillRequest) -> SkillResponse:
+    return mq_connector.get_response(**dict(request))
diff --git a/neon_hana/mq_service_api.py b/neon_hana/mq_service_api.py
@@ -25,11 +25,14 @@
 # SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import json
+
+from tempfile import mkdtemp
+from time import time
 from typing import Optional, Dict, Any, List
 from uuid import uuid4
-
 from fastapi import HTTPException
 
+from neon_hana.schema.user_profile import UserProfile
 from neon_mq_connector.utils.client_utils import send_mq_request
 
 
@@ -43,6 +46,8 @@ class MQServiceManager:
     def __init__(self, config: dict):
         self.mq_default_timeout = config.get('mq_default_timeout', 10)
         self.mq_cliend_id = config.get('mq_client_id') or str(uuid4())
+        self.stt_max_length = config.get('stt_max_length_encoded') or 500000
+        self.tts_max_words = config.get('tts_max_words') or 128
 
     def _validate_api_proxy_response(self, response: dict):
         if response['status_code'] == 200:
@@ -120,25 +125,56 @@ def get_coupons(self):
         except TimeoutError as e:
             raise APIError(status_code=500, detail=repr(e))
 
-    def get_stt(self, b64_audio: str, lang: str, timeout: int = 20):
+    def get_stt(self, encoded_audio: str, lang_code: str):
+        if 0 < self.stt_max_length < len(encoded_audio):
+            raise APIError(status_code=400,
+                           detail=f"Audio exceeds maximum encoded length of "
+                                  f"{self.stt_max_length}")
         request_data = {"msg_type": "neon.get_stt",
-                                    "data": {"audio_data": b64_audio,
+                                    "data": {"audio_data": encoded_audio,
                                              "utterances": [""],  # TODO: Compat
-                                             "lang": lang},
-                                    "context": {"source": "hana"}}
+                                             "lang": lang_code},
+                                    "context": {"source": "hana",
+                                                "ident": f"{self.mq_cliend_id}"
+                                                         f"{time()}"}}
         response = send_mq_request("/neon_chat_api", request_data,
-                                   "neon_chat_api_request", timeout=timeout)
-        return response
-
-    def get_tts(self, string: str, lang: str, gender: str, timeout: int = 20):
+                                   "neon_chat_api_request",
+                                   timeout=self.mq_default_timeout)
+        return response['data']
+
+    def get_tts(self, to_speak: str, lang_code: str, gender: str):
+        if 0 < self.tts_max_words < len(to_speak.split()):
+            raise APIError(status_code=400,
+                           detail=f"Text exceeds maximum word count of "
+                                  f"{self.tts_max_words}")
         request_data = {"msg_type": "neon.get_tts",
-                                    "data": {"text": string,
-                                             "utterance": "",  # TODO: Compat
-                                             "speaker": {"name": "Neon",
-                                                         "gender": gender,
-                                                         "lang": lang},
-                                             "lang": lang},
-                                    "context": {"source": "hana"}}
+                        "data": {"text": to_speak,
+                                 "utterance": "",  # TODO: Compat
+                                 "speaker": {"name": "Neon",
+                                             "gender": gender,
+                                             "lang": lang_code},
+                                 "lang": lang_code},
+                        "context": {"source": "hana",
+                                    "ident": f"{self.mq_cliend_id}{time()}"}}
+        response = send_mq_request("/neon_chat_api", request_data,
+                                   "neon_chat_api_request",
+                                   timeout=self.mq_default_timeout)
+        audio = response['data'][lang_code]['audio'][gender]
+        return {"encoded_audio": audio}
+
+    def get_response(self, utterance: str, lang_code: str,
+                     user_profile: UserProfile):
+        user_profile.user.username = (user_profile.user.username or
+                                      self.mq_cliend_id)
+        request_data = {"msg_type": "recognizer_loop:utterance",
+                        "data": {"utterances": [utterance],
+                                 "lang": lang_code},
+                        "context": {"username": user_profile.user.username,
+                                    "user_profiles": [user_profile.model_dump(mode="json")],
+                                    "source": "hana",
+                                    "ident": f"{self.mq_cliend_id}{time()}"}}
         response = send_mq_request("/neon_chat_api", request_data,
-                                   "neon_chat_api_request", timeout=timeout)
-        return response
+                                   "neon_chat_api_request",
+                                   timeout=self.mq_default_timeout)
+        sentence = response['data']['responses'][lang_code]['sentence']
+        return {"answer": sentence, "lang_code": lang_code}
diff --git a/neon_hana/schema/assist_requests.py b/neon_hana/schema/assist_requests.py
diff --git a/neon_hana/schema/user_profile.py b/neon_hana/schema/user_profile.py
@@ -0,0 +1,104 @@
+# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System
+# All trademark and other rights reserved by their respective owners
+# Copyright 2008-2021 Neongecko.com Inc.
+# BSD-3
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 1. Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+# 3. Neither the name of the copyright holder nor the names of its
+#    contributors may be used to endorse or promote products derived from this
+#    software without specific prior written permission.
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+# OR PROFITS;  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from typing import Optional, List
+from pydantic import BaseModel
+
+
+class ProfileUser(BaseModel):
+    first_name: str = ""
+    middle_name: str = ""
+    last_name: str = ""
+    preferred_name: str = ""
+    full_name: str = ""
+    dob: str = "YYYY/MM/DD"
+    age: str = ""
+    email: str = ""
+    username: str = ""
+    password: str = ""
+    picture: str = ""
+    about: str = ""
+    phone: str = ""
+    phone_verified: bool = False
+    email_verified: bool = False
+
+
+class ProfileBrands(BaseModel):
+    ignored_brands: dict = {}
+    favorite_brands: dict = {}
+    specially_requested: dict = {}
+
+
+class ProfileSpeech(BaseModel):
+    stt_language: str = "en-us"
+    alt_languages: List[str] = ['en']
+    tts_language: str = "en-us"
+    tts_gender: str = "female"
+    neon_voice: Optional[str] = ''
+    secondary_tts_language: Optional[str] = ''
+    secondary_tts_gender: str = "male"
+    secondary_neon_voice: str = ''
+    speed_multiplier: float = 1.0
+
+
+class ProfileUnits(BaseModel):
+    time: int = 12
+    # 12, 24
+    date: str = "MDY"
+    # MDY, YMD, YDM
+    measure: str = "imperial"
+    # imperial, metric
+
+
+class ProfileLocation(BaseModel):
+    lat: Optional[float] = None
+    lng: Optional[float] = None
+    city: Optional[str] = None
+    state: Optional[str] = None
+    country: Optional[str] = None
+    tz: Optional[str] = None
+    utc: Optional[float] = None
+
+
+class ProfileResponseMode(BaseModel):
+    speed_mode: str = "quick"
+    hesitation: bool = False
+    limit_dialog: bool = False
+
+
+class ProfilePrivacy(BaseModel):
+    save_audio: bool = False
+    save_text: bool = False
+
+
+class UserProfile(BaseModel):
+    user: ProfileUser = ProfileUser()
+    # brands: ProfileBrands
+    speech: ProfileSpeech = ProfileSpeech()
+    units: ProfileUnits = ProfileUnits()
+    location: ProfileLocation = ProfileLocation()
+    response_mode: ProfileResponseMode = ProfileResponseMode()
+    privacy: ProfilePrivacy = ProfilePrivacy()