Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Queue Strategy on Android and iOS #139

Merged
merged 6 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ const speak = async () => {
pitch: 1.0,
volume: 1.0,
category: 'ambient',
queueStrategy: 1
});
};

Expand Down Expand Up @@ -83,6 +84,7 @@ const isLanguageSupported = async (lang: string) => {
* [`openInstall()`](#openinstall)
* [`addListener('onRangeStart', ...)`](#addlisteneronrangestart)
* [Interfaces](#interfaces)
* [Enums](#enums)

</docgen-index>

Expand Down Expand Up @@ -201,6 +203,7 @@ addListener(eventName: 'onRangeStart', listenerFunc: (info: { start: number; end
| **`volume`** | <code>number</code> | The volume that the utterance will be spoken at. | <code>1.0</code> |
| **`voice`** | <code>number</code> | The index of the selected voice that will be used to speak the utterance. Possible voices can be queried using `getSupportedVoices`. | |
| **`category`** | <code>string</code> | Select the iOS Audio session category. Possible values: `ambient` and `playback`. Use `playback` to play audio even when the app is in the background. Only available for iOS. | <code>"ambient"</code> |
| **`queueStrategy`** | <code><a href="#queuestrategy">QueueStrategy</a></code> | Select the strategy to adopt when several requests to speak overlap. | <code>QueueStrategy.Flush</code> | 5.1.0 |


#### SpeechSynthesisVoice
Expand All @@ -222,6 +225,17 @@ The <a href="#speechsynthesisvoice">SpeechSynthesisVoice</a> interface represent
| ------------ | ----------------------------------------- |
| **`remove`** | <code>() =&gt; Promise&lt;void&gt;</code> |


### Enums


#### QueueStrategy

| Members | Value | Description |
| ----------- | -------------- | ---------------------------------------------------------------------------------------------------------------------- |
| **`Flush`** | <code>0</code> | Use `Flush` to stop the current request when a new request is sent. |
| **`Add`** | <code>1</code> | Use `Add` to buffer the speech request. The request will be executed when all previous requests have been completed. |

</docgen-api>

## Changelog
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
public interface SpeakResultCallback {
void onDone();
void onError();
void onRangeStart(int start, int end, String spokenWord);
void onRangeStart(int start, int end);
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Set;

public class TextToSpeech implements android.speech.tts.TextToSpeech.OnInitListener {
Expand All @@ -25,11 +26,44 @@ public class TextToSpeech implements android.speech.tts.TextToSpeech.OnInitListe
private android.speech.tts.TextToSpeech tts = null;
private int initializationStatus;
private JSObject[] supportedVoices = null;
private Map<String, SpeakResultCallback> requests = new HashMap();

TextToSpeech(Context context) {
this.context = context;
try {
tts = new android.speech.tts.TextToSpeech(context, this);
tts.setOnUtteranceProgressListener(
new UtteranceProgressListener() {
@Override
public void onStart(String utteranceId) {}

@Override
public void onDone(String utteranceId) {
SpeakResultCallback callback = requests.get(utteranceId);
if (callback != null) {
callback.onDone();
requests.remove(utteranceId);
}
}

@Override
public void onError(String utteranceId) {
SpeakResultCallback callback = requests.get(utteranceId);
if (callback != null) {
callback.onError();
requests.remove(utteranceId);
}
}

@Override
public void onRangeStart(String utteranceId, int start, int end, int frame) {
SpeakResultCallback callback = requests.get(utteranceId);
if (callback != null) {
callback.onRangeStart(start, end);
}
}
}
);
} catch (Exception ex) {
Log.d(LOG_TAG, ex.getLocalizedMessage());
}
Expand All @@ -50,29 +84,24 @@ public void speak(
String callbackId,
SpeakResultCallback resultCallback
) {
tts.stop();
tts.setOnUtteranceProgressListener(
new UtteranceProgressListener() {
@Override
public void onStart(String utteranceId) {}

@Override
public void onDone(String utteranceId) {
resultCallback.onDone();
}

@Override
public void onError(String utteranceId) {
resultCallback.onError();
}
speak(text, lang, rate, pitch, volume, voice, callbackId, resultCallback, android.speech.tts.TextToSpeech.QUEUE_FLUSH);
}

@Override
public void onRangeStart(String utteranceId, int start, int end, int frame) {
String spokenWord = text.substring(start, end);
resultCallback.onRangeStart(start, end, spokenWord);
}
}
);
public void speak(
String text,
String lang,
float rate,
float pitch,
float volume,
int voice,
String callbackId,
SpeakResultCallback resultCallback,
int queueStrategy
) {
if (queueStrategy != android.speech.tts.TextToSpeech.QUEUE_ADD) {
stop();
}
requests.put(callbackId, resultCallback);

Locale locale = Locale.forLanguageTag(lang);

Expand All @@ -92,8 +121,7 @@ public void onRangeStart(String utteranceId, int start, int end, int frame) {
int resultCode = tts.setVoice(newVoice);
}
}

tts.speak(text, android.speech.tts.TextToSpeech.QUEUE_FLUSH, ttsParams, callbackId);
tts.speak(text, queueStrategy, ttsParams, callbackId);
} else {
HashMap<String, String> ttsParams = new HashMap<>();
ttsParams.put(android.speech.tts.TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, callbackId);
Expand All @@ -102,12 +130,13 @@ public void onRangeStart(String utteranceId, int start, int end, int frame) {
tts.setLanguage(locale);
tts.setSpeechRate(rate);
tts.setPitch(pitch);
tts.speak(text, android.speech.tts.TextToSpeech.QUEUE_FLUSH, ttsParams);
tts.speak(text, queueStrategy, ttsParams);
}
}

public void stop() {
tts.stop();
requests.clear();
}

public JSArray getSupportedLanguages() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.getcapacitor.community.tts;

import android.util.Base64;
import android.util.Log;
import com.getcapacitor.JSArray;
import com.getcapacitor.JSObject;
import com.getcapacitor.Plugin;
Expand Down Expand Up @@ -37,6 +38,7 @@ public void speak(PluginCall call) {
float pitch = call.getFloat("pitch", 1.0f);
float volume = call.getFloat("volume", 1.0f);
int voice = call.getInt("voice", -1);
int queueStrategy = call.getInt("queueStrategy", 0);

boolean isLanguageSupported = implementation.isLanguageSupported(lang);
if (!isLanguageSupported) {
Expand All @@ -56,17 +58,18 @@ public void onError() {
}

@Override
public void onRangeStart(int start, int end, String spokenWord) {
public void onRangeStart(int start, int end) {
JSObject ret = new JSObject();
ret.put("start", start);
ret.put("end", end);
String spokenWord = text.substring(start, end);
ret.put("spokenWord", spokenWord);
notifyListeners("onRangeStart", ret);
}
};

try {
implementation.speak(text, lang, rate, pitch, volume, voice, call.getCallbackId(), resultCallback);
implementation.speak(text, lang, rate, pitch, volume, voice, call.getCallbackId(), resultCallback, queueStrategy);
} catch (Exception ex) {
call.reject(ex.getLocalizedMessage());
}
Expand Down
18 changes: 12 additions & 6 deletions ios/Plugin/TextToSpeech.swift
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import AVFoundation
import Capacitor

enum QUEUE_STRATEGY: Int {
case QUEUE_ADD = 1, QUEUE_FLUSH = 0
}

@objc public class TextToSpeech: NSObject, AVSpeechSynthesizerDelegate {
let synthesizer = AVSpeechSynthesizer()
var calls: [CAPPluginCall] = []
Expand Down Expand Up @@ -29,8 +33,10 @@ import Capacitor
self.resolveCurrentCall()
}

@objc public func speak(_ text: String, _ lang: String, _ rate: Float, _ pitch: Float, _ category: String, _ volume: Float, _ voice: Int, _ call: CAPPluginCall) throws {
self.synthesizer.stopSpeaking(at: .immediate)
@objc public func speak(_ text: String, _ lang: String, _ rate: Float, _ pitch: Float, _ category: String, _ volume: Float, _ voice: Int, _ queueStrategy: Int, _ call: CAPPluginCall) throws {
if queueStrategy == QUEUE_STRATEGY.QUEUE_FLUSH.rawValue {
self.synthesizer.stopSpeaking(at: .immediate)
}
self.calls.append(call)

let utterance = AVSpeechUtterance(string: text)
Expand Down Expand Up @@ -68,10 +74,10 @@ import Capacitor

// Adjust rate for a closer match to other platform.
@objc private func adjustRate(_ rate: Float) -> Float {
let baseRate: Float = AVSpeechUtteranceDefaultSpeechRate
if (rate >= 1.0 ) {
return (0.1 * rate) + (baseRate - 0.1)
}
let baseRate: Float = AVSpeechUtteranceDefaultSpeechRate
if rate >= 1.0 {
return (0.1 * rate) + (baseRate - 0.1)
}
return rate * baseRate
}

Expand Down
3 changes: 2 additions & 1 deletion ios/Plugin/TextToSpeechPlugin.swift
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ public class TextToSpeechPlugin: CAPPlugin {
let volume = call.getFloat("volume") ?? 1.0
let voice = call.getInt("voice") ?? -1
let category = call.getString("category") ?? "ambient"
let queueStrategy = call.getInt("queueStrategy") ?? 0

let isLanguageSupported = implementation.isLanguageSupported(lang)
guard isLanguageSupported else {
Expand All @@ -28,7 +29,7 @@ public class TextToSpeechPlugin: CAPPlugin {
}

do {
try implementation.speak(text, lang, rate, pitch, category, volume, voice, call)
try implementation.speak(text, lang, rate, pitch, category, volume, voice, queueStrategy, call)
} catch {
call.reject(error.localizedDescription)
}
Expand Down
18 changes: 18 additions & 0 deletions src/definitions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,17 @@ export interface TextToSpeechPlugin {
): Promise<PluginListenerHandle>;
}

export enum QueueStrategy {
/**
* Use `Flush` to stop the current request when a new request is sent.
*/
Flush = 0,
/**
* Use `Add` to buffer the speech request. The request will be executed when all previous requests have been completed.
*/
Add = 1,
}

export interface TTSOptions {
/**
* The text that will be synthesised when the utterance is spoken.
Expand Down Expand Up @@ -87,6 +98,13 @@ export interface TTSOptions {
* @default "ambient"
*/
category?: string;
/**
* Select the strategy to adopt when several requests to speak overlap.
*
* @since 5.1.0
* @default QueueStrategy.Flush
*/
queueStrategy?: QueueStrategy;
}

/**
Expand Down