Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Queue Strategy on Android and iOS #139

Merged
merged 6 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ const speak = async () => {
pitch: 1.0,
volume: 1.0,
category: 'ambient',
queueStrategy: 1
});
};

Expand Down Expand Up @@ -201,6 +202,7 @@ addListener(eventName: 'onRangeStart', listenerFunc: (info: { start: number; end
| **`volume`** | <code>number</code> | The volume that the utterance will be spoken at. | <code>1.0</code> |
| **`voice`** | <code>number</code> | The index of the selected voice that will be used to speak the utterance. Possible voices can be queried using `getSupportedVoices`. | |
| **`category`** | <code>string</code> | Select the iOS Audio session category. Possible values: `ambient` and `playback`. Use `playback` to play audio even when the app is in the background. Only available for iOS. | <code>"ambient"</code> |
| **`queueStrategy`** | <code>number</code> | Select the strategy to adopt when several requests to speak overlap. Possible values: `0` and `1`. Use `0` to stop the current request when a new request is sent. Use `1` to buffer the speech request. The request will be executed when all previous requests have been completed. | <code>0</code> |


#### SpeechSynthesisVoice
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
public interface SpeakResultCallback {
void onDone();
void onError();
void onRangeStart(int start, int end, String spokenWord);
void onRangeStart(int start, int end);
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Set;

public class TextToSpeech implements android.speech.tts.TextToSpeech.OnInitListener {
Expand All @@ -25,11 +26,44 @@ public class TextToSpeech implements android.speech.tts.TextToSpeech.OnInitListe
private android.speech.tts.TextToSpeech tts = null;
private int initializationStatus;
private JSObject[] supportedVoices = null;
private Map<String, SpeakResultCallback> requests = new HashMap();

TextToSpeech(Context context) {
this.context = context;
try {
tts = new android.speech.tts.TextToSpeech(context, this);
tts.setOnUtteranceProgressListener(
new UtteranceProgressListener() {
@Override
public void onStart(String utteranceId) {}

@Override
public void onDone(String utteranceId) {
SpeakResultCallback callback = requests.get(utteranceId);
if(callback != null) {
callback.onDone();
requests.remove(utteranceId);
}
}

@Override
public void onError(String utteranceId) {
SpeakResultCallback callback = requests.get(utteranceId);
if(callback != null) {
callback.onError();
requests.remove(utteranceId);
}
}

@Override
public void onRangeStart(String utteranceId, int start, int end, int frame) {
SpeakResultCallback callback = requests.get(utteranceId);
if(callback != null) {
callback.onRangeStart(start, end);
}
}
}
);
} catch (Exception ex) {
Log.d(LOG_TAG, ex.getLocalizedMessage());
}
Expand All @@ -40,6 +74,19 @@ public void onInit(int status) {
this.initializationStatus = status;
}

public void speak(
String text,
String lang,
float rate,
float pitch,
float volume,
int voice,
String callbackId,
SpeakResultCallback resultCallback
) {
speak(text, lang, rate, pitch, volume, voice, callbackId, resultCallback, android.speech.tts.TextToSpeech.QUEUE_FLUSH);
}

public void speak(
String text,
String lang,
Expand All @@ -48,31 +95,13 @@ public void speak(
float volume,
int voice,
String callbackId,
SpeakResultCallback resultCallback
SpeakResultCallback resultCallback,
int queueStrategy
) {
tts.stop();
tts.setOnUtteranceProgressListener(
new UtteranceProgressListener() {
@Override
public void onStart(String utteranceId) {}

@Override
public void onDone(String utteranceId) {
resultCallback.onDone();
}

@Override
public void onError(String utteranceId) {
resultCallback.onError();
}

@Override
public void onRangeStart(String utteranceId, int start, int end, int frame) {
String spokenWord = text.substring(start, end);
resultCallback.onRangeStart(start, end, spokenWord);
}
}
);
if(queueStrategy != android.speech.tts.TextToSpeech.QUEUE_ADD) {
stop();
}
requests.put(callbackId, resultCallback);

Locale locale = Locale.forLanguageTag(lang);

Expand All @@ -92,8 +121,7 @@ public void onRangeStart(String utteranceId, int start, int end, int frame) {
int resultCode = tts.setVoice(newVoice);
}
}

tts.speak(text, android.speech.tts.TextToSpeech.QUEUE_FLUSH, ttsParams, callbackId);
tts.speak(text, queueStrategy, ttsParams, callbackId);
} else {
HashMap<String, String> ttsParams = new HashMap<>();
ttsParams.put(android.speech.tts.TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, callbackId);
Expand All @@ -102,12 +130,13 @@ public void onRangeStart(String utteranceId, int start, int end, int frame) {
tts.setLanguage(locale);
tts.setSpeechRate(rate);
tts.setPitch(pitch);
tts.speak(text, android.speech.tts.TextToSpeech.QUEUE_FLUSH, ttsParams);
tts.speak(text, queueStrategy, ttsParams);
}
}

public void stop() {
tts.stop();
requests.clear();
}

public JSArray getSupportedLanguages() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package com.getcapacitor.community.tts;

import android.util.Base64;
import android.util.Log;

import com.getcapacitor.JSArray;
import com.getcapacitor.JSObject;
import com.getcapacitor.Plugin;
Expand Down Expand Up @@ -37,6 +39,7 @@ public void speak(PluginCall call) {
float pitch = call.getFloat("pitch", 1.0f);
float volume = call.getFloat("volume", 1.0f);
int voice = call.getInt("voice", -1);
int queueStrategy = call.getInt("queueStrategy", 0);

boolean isLanguageSupported = implementation.isLanguageSupported(lang);
if (!isLanguageSupported) {
Expand All @@ -56,17 +59,18 @@ public void onError() {
}

@Override
public void onRangeStart(int start, int end, String spokenWord) {
public void onRangeStart(int start, int end) {
JSObject ret = new JSObject();
ret.put("start", start);
ret.put("end", end);
String spokenWord = text.substring(start, end);
ret.put("spokenWord", spokenWord);
notifyListeners("onRangeStart", ret);
}
};

try {
implementation.speak(text, lang, rate, pitch, volume, voice, call.getCallbackId(), resultCallback);
implementation.speak(text, lang, rate, pitch, volume, voice, call.getCallbackId(), resultCallback, queueStrategy);
} catch (Exception ex) {
call.reject(ex.getLocalizedMessage());
}
Expand Down
10 changes: 8 additions & 2 deletions ios/Plugin/TextToSpeech.swift
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import AVFoundation
import Capacitor

enum QUEUE_STRATEGY: Int {
case QUEUE_ADD = 1, QUEUE_FLUSH = 0
}

@objc public class TextToSpeech: NSObject, AVSpeechSynthesizerDelegate {
let synthesizer = AVSpeechSynthesizer()
var calls: [CAPPluginCall] = []
Expand Down Expand Up @@ -29,8 +33,10 @@ import Capacitor
self.resolveCurrentCall()
}

@objc public func speak(_ text: String, _ lang: String, _ rate: Float, _ pitch: Float, _ category: String, _ volume: Float, _ voice: Int, _ call: CAPPluginCall) throws {
self.synthesizer.stopSpeaking(at: .immediate)
@objc public func speak(_ text: String, _ lang: String, _ rate: Float, _ pitch: Float, _ category: String, _ volume: Float, _ voice: Int, _ queueStrategy: Int, _ call: CAPPluginCall) throws {
if(queueStrategy == QUEUE_STRATEGY.QUEUE_FLUSH.rawValue) {
self.synthesizer.stopSpeaking(at: .immediate)
}
self.calls.append(call)

let utterance = AVSpeechUtterance(string: text)
Expand Down
3 changes: 2 additions & 1 deletion ios/Plugin/TextToSpeechPlugin.swift
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ public class TextToSpeechPlugin: CAPPlugin {
let volume = call.getFloat("volume") ?? 1.0
let voice = call.getInt("voice") ?? -1
let category = call.getString("category") ?? "ambient"
let queueStrategy = call.getInt("queueStrategy") ?? 0

let isLanguageSupported = implementation.isLanguageSupported(lang)
guard isLanguageSupported else {
Expand All @@ -28,7 +29,7 @@ public class TextToSpeechPlugin: CAPPlugin {
}

do {
try implementation.speak(text, lang, rate, pitch, category, volume, voice, call)
try implementation.speak(text, lang, rate, pitch, category, volume, voice, queueStrategy, call)
} catch {
call.reject(error.localizedDescription)
}
Expand Down
9 changes: 9 additions & 0 deletions src/definitions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,15 @@ export interface TTSOptions {
* @default "ambient"
*/
category?: string;
/**
* Select the strategy to adopt when several requests to speak overlap.
* Possible values: `0` and `1`.
* Use `0` to stop the current request when a new request is sent.
* Use `1` to buffer the speech request. The request will be executed when all previous requests have been completed.
EnzoMenegaldo marked this conversation as resolved.
Show resolved Hide resolved
*
* @default 0
EnzoMenegaldo marked this conversation as resolved.
Show resolved Hide resolved
*/
queueStrategy?: number;
EnzoMenegaldo marked this conversation as resolved.
Show resolved Hide resolved
}

/**
Expand Down