From 9455901867ed8dac174ac9a15bc6444437d21250 Mon Sep 17 00:00:00 2001 From: Daniel McKnight Date: Thu, 25 Jan 2024 19:24:18 -0800 Subject: [PATCH] Add NodeData model for client connection properties Update MQServiceManager to manage Sessions --- examples/voice_client.py | 187 ++++++++++++++++++++++++++++ neon_hana/app/routers/assist.py | 8 +- neon_hana/mq_service_api.py | 31 ++++- neon_hana/schema/assist_requests.py | 6 +- neon_hana/schema/node_model.py | 57 +++++++++ requirements/voice_client.txt | 8 ++ 6 files changed, 289 insertions(+), 8 deletions(-) create mode 100644 examples/voice_client.py create mode 100644 neon_hana/schema/node_model.py create mode 100644 requirements/voice_client.txt diff --git a/examples/voice_client.py b/examples/voice_client.py new file mode 100644 index 0000000..32f12c6 --- /dev/null +++ b/examples/voice_client.py @@ -0,0 +1,187 @@ +# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System +# All trademark and other rights reserved by their respective owners +# Copyright 2008-2021 Neongecko.com Inc. +# BSD-3 +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, +# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from os import environ +environ.setdefault("OVOS_CONFIG_BASE_FOLDER", "neon") +environ.setdefault("OVOS_CONFIG_FILENAME", "diana.yaml") + +import io + +from threading import Thread +from unittest.mock import Mock +from base64 import b64decode, b64encode +from ovos_plugin_manager.microphone import OVOSMicrophoneFactory +from ovos_plugin_manager.vad import OVOSVADFactory +from ovos_dinkum_listener.voice_loop.voice_loop import DinkumVoiceLoop +from ovos_dinkum_listener.voice_loop.hotwords import HotwordContainer +from ovos_config.config import Configuration +from ovos_utils.messagebus import FakeBus +from ovos_utils import wait_for_exit_signal +from ovos_utils.log import LOG +from ovos_bus_client.message import Message +from neon_utils.hana_utils import request_backend, ServerException +from speech_recognition import AudioData +from pydub import AudioSegment +from pydub.playback import play + + +class MockTransformers(Mock): + def transform(self, chunk): + return chunk, dict() + + +def on_ready(): + LOG.info("ready") + + +def on_stopping(): + LOG.info("stopping") + + +def on_error(e="unknown"): + LOG.error(e) + + +def on_alive(): + LOG.debug("alive") + + +def on_started(): + LOG.debug("started") + + +class NeonVoiceClient: + def __init__(self, bus=None, ready_hook=on_ready, error_hook=on_error, + stopping_hook=on_stopping, alive_hook=on_alive, + started_hook=on_started): + self.error_hook = error_hook + self.stopping_hook = stopping_hook + alive_hook() + self.config = Configuration() + self.bus = bus or FakeBus() + self.lang = self.config.get('lang') or "en-us" + self._mic = OVOSMicrophoneFactory.create(self.config) + self._mic.start() + self._hotwords = HotwordContainer(self.bus) + self._hotwords.load_hotword_engines() + self._vad = OVOSVADFactory.create(self.config) + + self._voice_loop = DinkumVoiceLoop(mic=self._mic, + hotwords=self._hotwords, + stt=Mock(), + fallback_stt=Mock(), + vad=self._vad, + transformers=MockTransformers(), + stt_audio_callback=self.on_stt_audio, + listenword_audio_callback=self.on_hotword_audio) + self._voice_loop.start() + self._voice_thread = None + + self._listening_sound = None + self._error_sound = None + started_hook() + self.run() + ready_hook() + + @property + def listening_sound(self) -> AudioSegment: + if not self._listening_sound: + res_file = Configuration().get('sounds').get('start_listening') + + self._listening_sound = AudioSegment.from_file(res_file, + format="wav") + return self._listening_sound + + @property + def error_sound(self) -> AudioSegment: + if not self._error_sound: + res_file = Configuration().get('sounds').get('error') + self._error_sound = AudioSegment.from_file(res_file, format="wav") + return self._error_sound + + @property + def user_profile(self): + # TODO + return dict() + + def run(self): + try: + self._voice_thread = Thread(target=self._voice_loop.run, daemon=True) + self._voice_thread.start() + except Exception as e: + self.error_hook(repr(e)) + + def on_stt_audio(self, audio_bytes: bytes, context: dict): + LOG.debug(f"Got {len(audio_bytes)} bytes of audio") + wav_data = AudioData(audio_bytes, self._mic.sample_rate, + self._mic.sample_width).get_wav_data() + try: + self.get_audio_response(wav_data) + except ServerException as e: + LOG.error(e) + play(self.error_sound) + + def on_hotword_audio(self, audio: bytes, context: dict): + payload = context + msg_type = "recognizer_loop:wakeword" + play(self.listening_sound) + LOG.info(f"Emitting hotword event: {msg_type}") + # emit ww event + self.bus.emit(Message(msg_type, payload, context)) + + def get_audio_response(self, audio: bytes): + audio_data = b64encode(audio).decode("utf-8") + transcript = request_backend("neon/get_stt", + {"encoded_audio": audio_data, + "lang_code": self.lang}) + transcribed = transcript['transcripts'][0] + LOG.info(transcribed) + response = request_backend("neon/get_response", + {"lang_code": self.lang, + "user_profile": self.user_profile, + "utterance": transcribed}) + answer = response['answer'] + LOG.info(answer) + audio = request_backend("neon/get_tts", {"lang_code": self.lang, + "to_speak": answer}) + audio_bytes = b64decode(audio['encoded_audio']) + play(AudioSegment.from_file(io.BytesIO(audio_bytes), format="wav")) + LOG.info(f"Playback completed") + + def shutdown(self): + self.stopping_hook() + self._voice_loop.stop() + self._voice_thread.join(30) + + +def main(*args, **kwargs): + client = NeonVoiceClient(*args, **kwargs) + wait_for_exit_signal() + client.shutdown() + + +if __name__ == "__main__": + main() diff --git a/neon_hana/app/routers/assist.py b/neon_hana/app/routers/assist.py index a274df7..1ca560d 100644 --- a/neon_hana/app/routers/assist.py +++ b/neon_hana/app/routers/assist.py @@ -24,7 +24,7 @@ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Depends, Request from neon_hana.schema.assist_requests import * from neon_hana.app.dependencies import jwt_bearer, mq_connector @@ -44,5 +44,7 @@ async def get_tts(request: TTSRequest) -> TTSResponse: @assist_route.post("/get_response") -async def get_response(request: SkillRequest) -> SkillResponse: - return mq_connector.get_response(**dict(request)) +async def get_response(skill_request: SkillRequest, + request: Request) -> SkillResponse: + skill_request.node_data.networking.public_ip = request.client.host + return mq_connector.get_response(**dict(skill_request)) diff --git a/neon_hana/mq_service_api.py b/neon_hana/mq_service_api.py index e7c71ae..c74a1ad 100644 --- a/neon_hana/mq_service_api.py +++ b/neon_hana/mq_service_api.py @@ -31,6 +31,7 @@ from uuid import uuid4 from fastapi import HTTPException +from neon_hana.schema.node_model import NodeData from neon_hana.schema.user_profile import UserProfile from neon_mq_connector.utils.client_utils import send_mq_request @@ -48,8 +49,10 @@ def __init__(self, config: dict): self.stt_max_length = config.get('stt_max_length_encoded') or 500000 self.tts_max_words = config.get('tts_max_words') or 128 self.email_enabled = config.get('enable_email') + self.sessions_by_id = dict() - def _validate_api_proxy_response(self, response: dict): + @staticmethod + def _validate_api_proxy_response(response: dict): if response['status_code'] == 200: try: resp = json.loads(response['content']) @@ -67,6 +70,18 @@ def _validate_api_proxy_response(self, response: dict): code = response['status_code'] if response['status_code'] > 200 else 500 raise APIError(status_code=code, detail=response['content']) + def get_session(self, node_data: NodeData) -> dict: + """ + Get a serialized Session object for the specified Node. + @param node_data: NodeData received from client + @returns: Serialized session, possibly cached from previous a response + """ + session_id = node_data.device_id + self.sessions_by_id.setdefault(session_id, + {"session_id": session_id, + "site_id": node_data.location.site_id}) + return self.sessions_by_id[session_id] + def query_api_proxy(self, service_name: str, query_params: dict, timeout: int = 10): query_params['service'] = service_name @@ -165,18 +180,28 @@ def get_tts(self, to_speak: str, lang_code: str, gender: str): return {"encoded_audio": audio} def get_response(self, utterance: str, lang_code: str, - user_profile: UserProfile): + user_profile: UserProfile, node_data: NodeData): + session = self.get_session(node_data) user_profile.user.username = (user_profile.user.username or self.mq_cliend_id) + request_data = {"msg_type": "recognizer_loop:utterance", "data": {"utterances": [utterance], "lang": lang_code}, "context": {"username": user_profile.user.username, - "user_profiles": [user_profile.model_dump(mode="json")], + "user_profiles": [ + user_profile.model_dump(mode="json")], "source": "hana", + "session": session, + "node_data": node_data.model_dump( + mode="json"), "ident": f"{self.mq_cliend_id}{time()}"}} response = send_mq_request("/neon_chat_api", request_data, "neon_chat_api_request", timeout=self.mq_default_timeout) + + # Update session data for future inputs + self.sessions_by_id[session['session_id']] = \ + response['context']['session'] sentence = response['data']['responses'][lang_code]['sentence'] return {"answer": sentence, "lang_code": lang_code} diff --git a/neon_hana/schema/assist_requests.py b/neon_hana/schema/assist_requests.py index 8acd25c..9cd66c4 100644 --- a/neon_hana/schema/assist_requests.py +++ b/neon_hana/schema/assist_requests.py @@ -24,9 +24,10 @@ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from typing import List +from typing import List, Optional from pydantic import BaseModel +from neon_hana.schema.node_model import NodeData from neon_hana.schema.user_profile import UserProfile @@ -82,7 +83,8 @@ class TTSResponse(BaseModel): class SkillRequest(BaseModel): utterance: str lang_code: str - user_profile: UserProfile + user_profile: UserProfile = UserProfile() + node_data: Optional[NodeData] = NodeData() model_config = { "json_schema_extra": { diff --git a/neon_hana/schema/node_model.py b/neon_hana/schema/node_model.py new file mode 100644 index 0000000..3fbdc50 --- /dev/null +++ b/neon_hana/schema/node_model.py @@ -0,0 +1,57 @@ +# NEON AI (TM) SOFTWARE, Software Development Kit & Application Development System +# All trademark and other rights reserved by their respective owners +# Copyright 2008-2021 Neongecko.com Inc. +# BSD-3 +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, +# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from uuid import uuid4 + +from pydantic import BaseModel, Field +from typing import Optional, Dict + + +class NodeSoftware(BaseModel): + operating_system: str = "" + os_version: str = "" + neon_packages: Optional[Dict[str, str]] = None + + +class NodeNetworking(BaseModel): + local_ip: str = "127.0.0.1" + public_ip: str = "" + mac_address: str = "" + + +class NodeLocation(BaseModel): + lat: Optional[float] = None + lon: Optional[float] = None + site_id: Optional[str] = None + + +class NodeData(BaseModel): + device_id: str = Field(default_factory=lambda: str(uuid4())) + device_name: str = "" + device_description: str = "" + platform: str = "" + networking: NodeNetworking = NodeNetworking() + software: NodeSoftware = NodeSoftware() + location: NodeLocation = NodeLocation() diff --git a/requirements/voice_client.txt b/requirements/voice_client.txt new file mode 100644 index 0000000..02ad367 --- /dev/null +++ b/requirements/voice_client.txt @@ -0,0 +1,8 @@ +neon-utils~=1.8,>=1.8.3a3 +ovos-dinkum-listener~=0.0.2,>=0.0.3a27 +ovos-vad-plugin-webrtcvad~=0.0.1 +ovos-microphone-plugin-alsa~=0.0.0 +ovos-ww-plugin-precise-lite[tflite]~=0.1 +pydub~=0.25 +SpeechRecognition~=3.10 +sdnotify~=0.3 \ No newline at end of file