From 9455901867ed8dac174ac9a15bc6444437d21250 Mon Sep 17 00:00:00 2001
From: Daniel McKnight <daniel@neon.ai>
Date: Thu, 25 Jan 2024 19:24:18 -0800
Subject: [PATCH] Add NodeData model for client connection properties Update
 MQServiceManager to manage Sessions

---
 examples/voice_client.py            | 187 ++++++++++++++++++++++++++++
 neon_hana/app/routers/assist.py     |   8 +-
 neon_hana/mq_service_api.py         |  31 ++++-
 neon_hana/schema/assist_requests.py |   6 +-
 neon_hana/schema/node_model.py      |  57 +++++++++
 requirements/voice_client.txt       |   8 ++
 6 files changed, 289 insertions(+), 8 deletions(-)
 create mode 100644 examples/voice_client.py
 create mode 100644 neon_hana/schema/node_model.py
 create mode 100644 requirements/voice_client.txt

diff --git a/examples/voice_client.py b/examples/voice_client.py
new file mode 100644
index 0000000..32f12c6
--- /dev/null
+++ b/examples/voice_client.py
@@ -0,0 +1,187 @@
+# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System
+# All trademark and other rights reserved by their respective owners
+# Copyright 2008-2021 Neongecko.com Inc.
+# BSD-3
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 1. Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+# 3. Neither the name of the copyright holder nor the names of its
+#    contributors may be used to endorse or promote products derived from this
+#    software without specific prior written permission.
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+# OR PROFITS;  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from os import environ
+environ.setdefault("OVOS_CONFIG_BASE_FOLDER", "neon")
+environ.setdefault("OVOS_CONFIG_FILENAME", "diana.yaml")
+
+import io
+
+from threading import Thread
+from unittest.mock import Mock
+from base64 import b64decode, b64encode
+from ovos_plugin_manager.microphone import OVOSMicrophoneFactory
+from ovos_plugin_manager.vad import OVOSVADFactory
+from ovos_dinkum_listener.voice_loop.voice_loop import DinkumVoiceLoop
+from ovos_dinkum_listener.voice_loop.hotwords import HotwordContainer
+from ovos_config.config import Configuration
+from ovos_utils.messagebus import FakeBus
+from ovos_utils import wait_for_exit_signal
+from ovos_utils.log import LOG
+from ovos_bus_client.message import Message
+from neon_utils.hana_utils import request_backend, ServerException
+from speech_recognition import AudioData
+from pydub import AudioSegment
+from pydub.playback import play
+
+
+class MockTransformers(Mock):
+    def transform(self, chunk):
+        return chunk, dict()
+
+
+def on_ready():
+    LOG.info("ready")
+
+
+def on_stopping():
+    LOG.info("stopping")
+
+
+def on_error(e="unknown"):
+    LOG.error(e)
+
+
+def on_alive():
+    LOG.debug("alive")
+
+
+def on_started():
+    LOG.debug("started")
+
+
+class NeonVoiceClient:
+    def __init__(self, bus=None, ready_hook=on_ready, error_hook=on_error,
+                 stopping_hook=on_stopping, alive_hook=on_alive,
+                 started_hook=on_started):
+        self.error_hook = error_hook
+        self.stopping_hook = stopping_hook
+        alive_hook()
+        self.config = Configuration()
+        self.bus = bus or FakeBus()
+        self.lang = self.config.get('lang') or "en-us"
+        self._mic = OVOSMicrophoneFactory.create(self.config)
+        self._mic.start()
+        self._hotwords = HotwordContainer(self.bus)
+        self._hotwords.load_hotword_engines()
+        self._vad = OVOSVADFactory.create(self.config)
+
+        self._voice_loop = DinkumVoiceLoop(mic=self._mic,
+                                           hotwords=self._hotwords,
+                                           stt=Mock(),
+                                           fallback_stt=Mock(),
+                                           vad=self._vad,
+                                           transformers=MockTransformers(),
+                                           stt_audio_callback=self.on_stt_audio,
+                                           listenword_audio_callback=self.on_hotword_audio)
+        self._voice_loop.start()
+        self._voice_thread = None
+
+        self._listening_sound = None
+        self._error_sound = None
+        started_hook()
+        self.run()
+        ready_hook()
+
+    @property
+    def listening_sound(self) -> AudioSegment:
+        if not self._listening_sound:
+            res_file = Configuration().get('sounds').get('start_listening')
+
+            self._listening_sound = AudioSegment.from_file(res_file,
+                                                           format="wav")
+        return self._listening_sound
+
+    @property
+    def error_sound(self) -> AudioSegment:
+        if not self._error_sound:
+            res_file = Configuration().get('sounds').get('error')
+            self._error_sound = AudioSegment.from_file(res_file, format="wav")
+        return self._error_sound
+
+    @property
+    def user_profile(self):
+        # TODO
+        return dict()
+
+    def run(self):
+        try:
+            self._voice_thread = Thread(target=self._voice_loop.run, daemon=True)
+            self._voice_thread.start()
+        except Exception as e:
+            self.error_hook(repr(e))
+
+    def on_stt_audio(self, audio_bytes: bytes, context: dict):
+        LOG.debug(f"Got {len(audio_bytes)} bytes of audio")
+        wav_data = AudioData(audio_bytes, self._mic.sample_rate,
+                             self._mic.sample_width).get_wav_data()
+        try:
+            self.get_audio_response(wav_data)
+        except ServerException as e:
+            LOG.error(e)
+            play(self.error_sound)
+
+    def on_hotword_audio(self, audio: bytes, context: dict):
+        payload = context
+        msg_type = "recognizer_loop:wakeword"
+        play(self.listening_sound)
+        LOG.info(f"Emitting hotword event: {msg_type}")
+        # emit ww event
+        self.bus.emit(Message(msg_type, payload, context))
+
+    def get_audio_response(self, audio: bytes):
+        audio_data = b64encode(audio).decode("utf-8")
+        transcript = request_backend("neon/get_stt",
+                                     {"encoded_audio": audio_data,
+                                      "lang_code": self.lang})
+        transcribed = transcript['transcripts'][0]
+        LOG.info(transcribed)
+        response = request_backend("neon/get_response",
+                                   {"lang_code": self.lang,
+                                    "user_profile": self.user_profile,
+                                    "utterance": transcribed})
+        answer = response['answer']
+        LOG.info(answer)
+        audio = request_backend("neon/get_tts", {"lang_code": self.lang,
+                                                 "to_speak": answer})
+        audio_bytes = b64decode(audio['encoded_audio'])
+        play(AudioSegment.from_file(io.BytesIO(audio_bytes), format="wav"))
+        LOG.info(f"Playback completed")
+
+    def shutdown(self):
+        self.stopping_hook()
+        self._voice_loop.stop()
+        self._voice_thread.join(30)
+
+
+def main(*args, **kwargs):
+    client = NeonVoiceClient(*args, **kwargs)
+    wait_for_exit_signal()
+    client.shutdown()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/neon_hana/app/routers/assist.py b/neon_hana/app/routers/assist.py
index a274df7..1ca560d 100644
--- a/neon_hana/app/routers/assist.py
+++ b/neon_hana/app/routers/assist.py
@@ -24,7 +24,7 @@
 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from fastapi import APIRouter, Depends
+from fastapi import APIRouter, Depends, Request
 from neon_hana.schema.assist_requests import *
 from neon_hana.app.dependencies import jwt_bearer, mq_connector
 
@@ -44,5 +44,7 @@ async def get_tts(request: TTSRequest) -> TTSResponse:
 
 
 @assist_route.post("/get_response")
-async def get_response(request: SkillRequest) -> SkillResponse:
-    return mq_connector.get_response(**dict(request))
+async def get_response(skill_request: SkillRequest,
+                       request: Request) -> SkillResponse:
+    skill_request.node_data.networking.public_ip = request.client.host
+    return mq_connector.get_response(**dict(skill_request))
diff --git a/neon_hana/mq_service_api.py b/neon_hana/mq_service_api.py
index e7c71ae..c74a1ad 100644
--- a/neon_hana/mq_service_api.py
+++ b/neon_hana/mq_service_api.py
@@ -31,6 +31,7 @@
 from uuid import uuid4
 from fastapi import HTTPException
 
+from neon_hana.schema.node_model import NodeData
 from neon_hana.schema.user_profile import UserProfile
 from neon_mq_connector.utils.client_utils import send_mq_request
 
@@ -48,8 +49,10 @@ def __init__(self, config: dict):
         self.stt_max_length = config.get('stt_max_length_encoded') or 500000
         self.tts_max_words = config.get('tts_max_words') or 128
         self.email_enabled = config.get('enable_email')
+        self.sessions_by_id = dict()
 
-    def _validate_api_proxy_response(self, response: dict):
+    @staticmethod
+    def _validate_api_proxy_response(response: dict):
         if response['status_code'] == 200:
             try:
                 resp = json.loads(response['content'])
@@ -67,6 +70,18 @@ def _validate_api_proxy_response(self, response: dict):
         code = response['status_code'] if response['status_code'] > 200 else 500
         raise APIError(status_code=code, detail=response['content'])
 
+    def get_session(self, node_data: NodeData) -> dict:
+        """
+        Get a serialized Session object for the specified Node.
+        @param node_data: NodeData received from client
+        @returns: Serialized session, possibly cached from previous a response
+        """
+        session_id = node_data.device_id
+        self.sessions_by_id.setdefault(session_id,
+                                       {"session_id": session_id,
+                                        "site_id": node_data.location.site_id})
+        return self.sessions_by_id[session_id]
+
     def query_api_proxy(self, service_name: str, query_params: dict,
                         timeout: int = 10):
         query_params['service'] = service_name
@@ -165,18 +180,28 @@ def get_tts(self, to_speak: str, lang_code: str, gender: str):
         return {"encoded_audio": audio}
 
     def get_response(self, utterance: str, lang_code: str,
-                     user_profile: UserProfile):
+                     user_profile: UserProfile, node_data: NodeData):
+        session = self.get_session(node_data)
         user_profile.user.username = (user_profile.user.username or
                                       self.mq_cliend_id)
+
         request_data = {"msg_type": "recognizer_loop:utterance",
                         "data": {"utterances": [utterance],
                                  "lang": lang_code},
                         "context": {"username": user_profile.user.username,
-                                    "user_profiles": [user_profile.model_dump(mode="json")],
+                                    "user_profiles": [
+                                        user_profile.model_dump(mode="json")],
                                     "source": "hana",
+                                    "session": session,
+                                    "node_data": node_data.model_dump(
+                                        mode="json"),
                                     "ident": f"{self.mq_cliend_id}{time()}"}}
         response = send_mq_request("/neon_chat_api", request_data,
                                    "neon_chat_api_request",
                                    timeout=self.mq_default_timeout)
+
+        # Update session data for future inputs
+        self.sessions_by_id[session['session_id']] = \
+            response['context']['session']
         sentence = response['data']['responses'][lang_code]['sentence']
         return {"answer": sentence, "lang_code": lang_code}
diff --git a/neon_hana/schema/assist_requests.py b/neon_hana/schema/assist_requests.py
index 8acd25c..9cd66c4 100644
--- a/neon_hana/schema/assist_requests.py
+++ b/neon_hana/schema/assist_requests.py
@@ -24,9 +24,10 @@
 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from typing import List
+from typing import List, Optional
 from pydantic import BaseModel
 
+from neon_hana.schema.node_model import NodeData
 from neon_hana.schema.user_profile import UserProfile
 
 
@@ -82,7 +83,8 @@ class TTSResponse(BaseModel):
 class SkillRequest(BaseModel):
     utterance: str
     lang_code: str
-    user_profile: UserProfile
+    user_profile: UserProfile = UserProfile()
+    node_data: Optional[NodeData] = NodeData()
 
     model_config = {
         "json_schema_extra": {
diff --git a/neon_hana/schema/node_model.py b/neon_hana/schema/node_model.py
new file mode 100644
index 0000000..3fbdc50
--- /dev/null
+++ b/neon_hana/schema/node_model.py
@@ -0,0 +1,57 @@
+# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System
+# All trademark and other rights reserved by their respective owners
+# Copyright 2008-2021 Neongecko.com Inc.
+# BSD-3
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 1. Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+# 3. Neither the name of the copyright holder nor the names of its
+#    contributors may be used to endorse or promote products derived from this
+#    software without specific prior written permission.
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+# OR PROFITS;  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+from uuid import uuid4
+
+from pydantic import BaseModel, Field
+from typing import Optional, Dict
+
+
+class NodeSoftware(BaseModel):
+    operating_system: str = ""
+    os_version: str = ""
+    neon_packages: Optional[Dict[str, str]] = None
+
+
+class NodeNetworking(BaseModel):
+    local_ip: str = "127.0.0.1"
+    public_ip: str = ""
+    mac_address: str = ""
+
+
+class NodeLocation(BaseModel):
+    lat: Optional[float] = None
+    lon: Optional[float] = None
+    site_id: Optional[str] = None
+
+
+class NodeData(BaseModel):
+    device_id: str = Field(default_factory=lambda: str(uuid4()))
+    device_name: str = ""
+    device_description: str = ""
+    platform: str = ""
+    networking: NodeNetworking = NodeNetworking()
+    software: NodeSoftware = NodeSoftware()
+    location: NodeLocation = NodeLocation()
diff --git a/requirements/voice_client.txt b/requirements/voice_client.txt
new file mode 100644
index 0000000..02ad367
--- /dev/null
+++ b/requirements/voice_client.txt
@@ -0,0 +1,8 @@
+neon-utils~=1.8,>=1.8.3a3
+ovos-dinkum-listener~=0.0.2,>=0.0.3a27
+ovos-vad-plugin-webrtcvad~=0.0.1
+ovos-microphone-plugin-alsa~=0.0.0
+ovos-ww-plugin-precise-lite[tflite]~=0.1
+pydub~=0.25
+SpeechRecognition~=3.10
+sdnotify~=0.3
\ No newline at end of file