Skip to content

Commit

Permalink
added timeout on speech recognition
Browse files Browse the repository at this point in the history
  • Loading branch information
zacharykratochvil committed Aug 3, 2022
1 parent 415fb74 commit e73482b
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 13 deletions.
17 changes: 15 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ cmake_minimum_required(VERSION 2.8.3)
project(respeaker_ros)

find_package(catkin REQUIRED COMPONENTS
rospy
message_generation
catkin_virtualenv
dynamic_reconfigure)
if ($ENV{ROS_PYTHON_VERSION} EQUAL 2)
Expand All @@ -12,10 +14,21 @@ else()
"roscd respeaker_ros && pip install -r requirements.txt")
endif()

add_message_files(
DIRECTORY
msg
FILES
StampedAudio.msg
)

generate_messages()

generate_dynamic_reconfigure_options(
cfg/Respeaker.cfg)

catkin_package()
catkin_package(
CATKIN_DEPENDS message_runtime
)

if (catkin_virtualenv_FOUND)
catkin_generate_virtualenv()
Expand All @@ -37,4 +50,4 @@ install(FILES requirements.txt
if (CATKIN_ENABLE_TESTING)
catkin_add_nosetests(test/test_installed.py
DEPENDENCIES ${PROJECT_NAME}_generate_virtualenv)
endif()
endif()
8 changes: 7 additions & 1 deletion package.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
<name>respeaker_ros</name>
<version>0.0.0</version>
<description>The respeaker_ros package</description>
<maintainer email="[email protected]">Yuki Furuta</maintainer>
<maintainer email="[email protected]">Zachary Kratochvil</maintainer>
<license>Apache</license>
<author email="[email protected]">Yuki Furuta</author>

<buildtool_depend>catkin</buildtool_depend>
<build_depend>message_generation</build_depend>

<depend condition="$ROS_PYTHON_VERSION == 3">catkin_virtualenv</depend>
<depend>angles</depend>
<depend>audio_common_msgs</depend>
Expand All @@ -20,7 +22,11 @@
<depend>speech_recognition_msgs</depend>
<depend>std_msgs</depend>
<depend>tf</depend>

<export>
<pip_requirements>requirements.txt</pip_requirements>
</export>

<exec_depend>message_runtime</exec_depend>

</package>
14 changes: 7 additions & 7 deletions scripts/respeaker_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import sys
import time
import speech_recognition as SR
from audio_common_msgs.msg import AudioData
from respeaker_ros.msg import StampedAudio
from geometry_msgs.msg import PoseStamped
from std_msgs.msg import Bool, Int32, ColorRGBA
from dynamic_reconfigure.server import Server
Expand Down Expand Up @@ -340,9 +340,9 @@ def __init__(self):
self.pub_vad = rospy.Publisher("is_speeching", Bool, queue_size=1, latch=True)
self.pub_doa_raw = rospy.Publisher("sound_direction", Int32, queue_size=1, latch=True)
self.pub_doa = rospy.Publisher("sound_localization", PoseStamped, queue_size=1, latch=True)
self.pub_audio = rospy.Publisher("audio", AudioData, queue_size=10)
self.pub_speech_audio = rospy.Publisher("speech_audio", AudioData, queue_size=10)
self.pub_audios = {c:rospy.Publisher('audio/channel%d' % c, AudioData, queue_size=10) for c in self.respeaker_audio.channels}
self.pub_audio = rospy.Publisher("audio", StampedAudio, queue_size=10)
self.pub_speech_audio = rospy.Publisher("speech_audio", StampedAudio, queue_size=2)
self.pub_audios = {c:rospy.Publisher('audio/channel%d' % c, StampedAudio, queue_size=10) for c in self.respeaker_audio.channels}
# init config
self.config = None
self.dyn_srv = Server(RespeakerConfig, self.on_config)
Expand Down Expand Up @@ -401,9 +401,9 @@ def on_audio(self, data, channel):
if channel == 0:
self.out.writeframes(data)

self.pub_audios[channel].publish(AudioData(data=data))
self.pub_audios[channel].publish(StampedAudio(data=data, stamp=rospy.get_rostime()))
if channel == self.main_channel:
self.pub_audio.publish(AudioData(data=data))
self.pub_audio.publish(StampedAudio(data=data, stamp=rospy.get_rostime()))
if self.is_speeching:
if len(self.speech_audio_buffer) == 0:
self.speech_audio_buffer = self.speech_prefetch_buffer
Expand Down Expand Up @@ -456,7 +456,7 @@ def on_timer(self, event):
rospy.loginfo("Speech detected for %.3f seconds" % duration)
if self.speech_min_duration <= duration < self.speech_max_duration:

self.pub_speech_audio.publish(AudioData(data=list(buf)))
self.pub_speech_audio.publish(StampedAudio(data=list(buf), stamp=rospy.get_rostime()))


if __name__ == '__main__':
Expand Down
11 changes: 8 additions & 3 deletions scripts/speech_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import speech_recognition as SR

from actionlib_msgs.msg import GoalStatus, GoalStatusArray
from audio_common_msgs.msg import AudioData
from respeaker_ros.msg import StampedAudio
from sound_play.msg import SoundRequest, SoundRequestAction, SoundRequestGoal
from speech_recognition_msgs.msg import SpeechRecognitionCandidates

Expand Down Expand Up @@ -40,8 +40,8 @@ def __init__(self):
self.tts_action = None

self.pub_speech = rospy.Publisher(
"speech_to_text", SpeechRecognitionCandidates, queue_size=1)
self.sub_audio = rospy.Subscriber("speech_audio", AudioData, self.audio_cb)
"speech_to_text", SpeechRecognitionCandidates, queue_size=2)
self.sub_audio = rospy.Subscriber("speech_audio", StampedAudio, self.audio_cb, queue_size=2)

def tts_timer_cb(self, event):
stamp = event.current_real
Expand All @@ -63,9 +63,14 @@ def tts_timer_cb(self, event):
self.is_canceling = Falser

def audio_cb(self, msg):

if self.is_canceling:
rospy.loginfo("Speech is cancelled")
return
if rospy.get_rostime().secs - msg.stamp.secs > 2:
rospy.loginfo("Old speech discarded")
return

data = SR.AudioData(bytes(msg.data), self.sample_rate, self.sample_width)
with open(str(len(msg.data)) + ".wav","wb") as f:
f.write(data.get_wav_data())
Expand Down

0 comments on commit e73482b

Please sign in to comment.