-
Notifications
You must be signed in to change notification settings - Fork 60
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ROS Noetic, Python 3 Updates #31
base: master
Are you sure you want to change the base?
Changes from all commits
e771068
5237497
415fb74
5427c94
dd8af5c
ef9aabe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,5 @@ | ||
PyAudio==0.2.8 | ||
SpeechRecognition==3.8.1 | ||
click==6.7 | ||
numpy==1.16.2 | ||
pixel-ring==0.1.0 | ||
pyusb==1.0.2 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
import usb.core | ||
import usb.util | ||
import pyaudio | ||
import wave | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be removed again as well, right? that was for the wav file you used for debugging? |
||
import math | ||
import numpy as np | ||
import tf.transformations as T | ||
|
@@ -15,20 +16,21 @@ | |
import struct | ||
import sys | ||
import time | ||
import speech_recognition as SR | ||
from audio_common_msgs.msg import AudioData | ||
from geometry_msgs.msg import PoseStamped | ||
from std_msgs.msg import Bool, Int32, ColorRGBA | ||
from dynamic_reconfigure.server import Server | ||
try: | ||
from pixel_ring import usb_pixel_ring_v2 | ||
except IOError as e: | ||
print e | ||
print(e) | ||
raise RuntimeError("Check the device is connected and recognized") | ||
|
||
try: | ||
from respeaker_ros.cfg import RespeakerConfig | ||
except Exception as e: | ||
print e | ||
print(e) | ||
raise RuntimeError("Need to run respeaker_gencfg.py first") | ||
|
||
|
||
|
@@ -169,12 +171,14 @@ def read(self, name): | |
usb.util.CTRL_IN | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE, | ||
0, cmd, id, length, self.TIMEOUT) | ||
|
||
response = struct.unpack(b'ii', response.tostring()) | ||
#response = struct.unpack(b'ii', struct.pack(b'ii',*response)) | ||
response = struct.unpack(b'ii', bytes([x for x in response])) | ||
|
||
if data[2] == 'int': | ||
result = response[0] | ||
else: | ||
result = response[0] * (2.**response[1]) | ||
#result = response[0] | ||
|
||
return result | ||
|
||
|
@@ -221,16 +225,17 @@ def __init__(self, on_audio, channels=None, suppress_error=True): | |
self.available_channels = None | ||
self.channels = channels | ||
self.device_index = None | ||
self.rate = 16000 | ||
self.bitwidth = 2 | ||
self.rate = rospy.get_param("~sample_rate", 16000) | ||
self.bitwidth = rospy.get_param("~sample_width", 2) | ||
self.bitdepth = 16 | ||
self.i = 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is also a debugging left-over? |
||
|
||
# find device | ||
count = self.pyaudio.get_device_count() | ||
rospy.logdebug("%d audio devices found" % count) | ||
for i in range(count): | ||
info = self.pyaudio.get_device_info_by_index(i) | ||
name = info["name"].encode("utf-8") | ||
name = str(info["name"].encode("utf-8")) | ||
chan = info["maxInputChannels"] | ||
rospy.logdebug(" - %d: %s" % (i, name)) | ||
if name.lower().find("respeaker") >= 0: | ||
|
@@ -266,6 +271,7 @@ def __init__(self, on_audio, channels=None, suppress_error=True): | |
input_device_index=self.device_index, | ||
) | ||
|
||
|
||
def __del__(self): | ||
self.stop() | ||
try: | ||
|
@@ -279,15 +285,19 @@ def __del__(self): | |
except: | ||
pass | ||
|
||
|
||
def stream_callback(self, in_data, frame_count, time_info, status): | ||
|
||
# split channel | ||
data = np.fromstring(in_data, dtype=np.int16) | ||
chunk_per_channel = len(data) / self.available_channels | ||
data = np.frombuffer(in_data, dtype=np.int16) | ||
chunk_per_channel = np.math.ceil(len(data) / self.available_channels) | ||
data = np.reshape(data, (chunk_per_channel, self.available_channels)) | ||
for chan in self.channels: | ||
chan_data = data[:, chan] | ||
chan_data = bytearray(data[:, chan].tobytes()) | ||
|
||
# invoke callback | ||
self.on_audio(chan_data.tostring(), chan) | ||
self.on_audio(chan_data, chan) | ||
|
||
return None, pyaudio.paContinue | ||
|
||
def start(self): | ||
|
@@ -315,7 +325,7 @@ def __init__(self): | |
# | ||
self.respeaker = RespeakerInterface() | ||
self.respeaker_audio = RespeakerAudio(self.on_audio, suppress_error=suppress_pyaudio_error) | ||
self.speech_audio_buffer = str() | ||
self.speech_audio_buffer = bytearray() | ||
self.is_speeching = False | ||
self.speech_stopped = rospy.Time(0) | ||
self.prev_is_voice = None | ||
|
@@ -333,12 +343,13 @@ def __init__(self): | |
# start | ||
self.speech_prefetch_bytes = int( | ||
self.speech_prefetch * self.respeaker_audio.rate * self.respeaker_audio.bitdepth / 8.0) | ||
self.speech_prefetch_buffer = str() | ||
self.speech_prefetch_buffer = bytearray() | ||
self.respeaker_audio.start() | ||
self.info_timer = rospy.Timer(rospy.Duration(1.0 / self.update_rate), | ||
self.on_timer) | ||
self.timer_led = None | ||
self.sub_led = rospy.Subscriber("status_led", ColorRGBA, self.on_status_led) | ||
self.big_data0 = [] | ||
|
||
def on_shutdown(self): | ||
try: | ||
|
@@ -377,15 +388,16 @@ def on_status_led(self, msg): | |
oneshot=True) | ||
|
||
def on_audio(self, data, channel): | ||
|
||
self.pub_audios[channel].publish(AudioData(data=data)) | ||
if channel == self.main_channel: | ||
self.pub_audio.publish(AudioData(data=data)) | ||
if self.is_speeching: | ||
if len(self.speech_audio_buffer) == 0: | ||
self.speech_audio_buffer = self.speech_prefetch_buffer | ||
self.speech_audio_buffer += data | ||
for x in data: self.speech_audio_buffer += bytearray([x]) | ||
else: | ||
self.speech_prefetch_buffer += data | ||
for x in data: self.speech_prefetch_buffer += bytearray([x]) | ||
self.speech_prefetch_buffer = self.speech_prefetch_buffer[-self.speech_prefetch_bytes:] | ||
|
||
def on_timer(self, event): | ||
|
@@ -403,7 +415,7 @@ def on_timer(self, event): | |
|
||
# doa | ||
if doa != self.prev_doa: | ||
self.pub_doa_raw.publish(Int32(data=doa)) | ||
self.pub_doa_raw.publish(data=int(doa)) | ||
self.prev_doa = doa | ||
|
||
msg = PoseStamped() | ||
|
@@ -425,14 +437,14 @@ def on_timer(self, event): | |
self.is_speeching = True | ||
elif self.is_speeching: | ||
buf = self.speech_audio_buffer | ||
self.speech_audio_buffer = str() | ||
self.speech_audio_buffer = bytearray() | ||
self.is_speeching = False | ||
duration = 8.0 * len(buf) * self.respeaker_audio.bitwidth | ||
duration = len(buf) * self.respeaker_audio.bitwidth * 8.0 | ||
duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth | ||
rospy.loginfo("Speech detected for %.3f seconds" % duration) | ||
if self.speech_min_duration <= duration < self.speech_max_duration: | ||
|
||
self.pub_speech_audio.publish(AudioData(data=buf)) | ||
self.pub_speech_audio.publish(AudioData(data=list(buf))) | ||
|
||
|
||
if __name__ == '__main__': | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,9 +16,9 @@ class SpeechToText(object): | |
def __init__(self): | ||
# format of input audio data | ||
self.sample_rate = rospy.get_param("~sample_rate", 16000) | ||
self.sample_width = rospy.get_param("~sample_width", 2L) | ||
self.sample_width = rospy.get_param("~sample_width", 2) | ||
# language of STT service | ||
self.language = rospy.get_param("~language", "ja-JP") | ||
self.language = rospy.get_param("~language", "en-US") | ||
# ignore voice input while the robot is speaking | ||
self.self_cancellation = rospy.get_param("~self_cancellation", True) | ||
# time to assume as SPEAKING after tts service is finished | ||
|
@@ -41,7 +41,7 @@ def __init__(self): | |
|
||
self.pub_speech = rospy.Publisher( | ||
"speech_to_text", SpeechRecognitionCandidates, queue_size=1) | ||
self.sub_audio = rospy.Subscriber("audio", AudioData, self.audio_cb) | ||
self.sub_audio = rospy.Subscriber("speech_audio", AudioData, self.audio_cb) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. public topic name change, meaning API changed. intended? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, this is important, the package appears to be set up to publish raw audio to "audio" and processed, clean audio to "speech_audio", so the speech to text feature ought to use the cleaned up audio for better recognition |
||
|
||
def tts_timer_cb(self, event): | ||
stamp = event.current_real | ||
|
@@ -60,23 +60,29 @@ def tts_timer_cb(self, event): | |
self.last_tts = stamp | ||
if stamp - self.last_tts > self.tts_tolerance: | ||
rospy.logdebug("END CANCELLATION") | ||
self.is_canceling = False | ||
self.is_canceling = Falser | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes |
||
|
||
def audio_cb(self, msg): | ||
if self.is_canceling: | ||
rospy.loginfo("Speech is cancelled") | ||
return | ||
data = SR.AudioData(msg.data, self.sample_rate, self.sample_width) | ||
data = SR.AudioData(bytes(msg.data), self.sample_rate, self.sample_width) | ||
|
||
try: | ||
rospy.loginfo("Waiting for result %d" % len(data.get_raw_data())) | ||
result = self.recognizer.recognize_google( | ||
data, language=self.language) | ||
msg = SpeechRecognitionCandidates(transcript=[result]) | ||
result = self.recognizer.recognize_sphinx( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not an expert on this, but google->sphinx may change user experience somehow There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be removed now. specific to my use case |
||
data, language=self.language, show_all=False) #, key=None | ||
rospy.loginfo(result) | ||
|
||
msg = SpeechRecognitionCandidates(transcript=result) | ||
self.pub_speech.publish(msg) | ||
|
||
except SR.UnknownValueError as e: | ||
rospy.logerr("Failed to recognize: %s" % str(e)) | ||
rospy.loginfo("value error") | ||
except SR.RequestError as e: | ||
rospy.logerr("Failed to recognize: %s" % str(e)) | ||
rospy.loginfo("request error") | ||
|
||
|
||
if __name__ == '__main__': | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you sure? I believe numpy is still required. See scripts/respeaker_node.py line 11