diff --git a/README.md b/README.md index 4dfc68d..0cf2f88 100644 --- a/README.md +++ b/README.md @@ -1 +1 @@ -# speech to input +# speech to element diff --git a/src/consts/readme.ts b/src/consts/readme.ts new file mode 100644 index 0000000..30851a5 --- /dev/null +++ b/src/consts/readme.ts @@ -0,0 +1 @@ +export const README_URL = 'https://github.com/OvidijusParsiunas/speech-to-element'; diff --git a/src/services/azure/azure.ts b/src/services/azure/azure.ts index dc32f50..68ac1e7 100644 --- a/src/services/azure/azure.ts +++ b/src/services/azure/azure.ts @@ -1,54 +1,42 @@ -import {Recognizer, SpeechConfig, SpeechRecognitionEventArgs} from 'microsoft-cognitiveservices-speech-sdk'; -import {OnError, Options, Translations, WebSpeechAPIOptions} from '../../types/options'; +import {Recognizer, SpeechRecognitionEventArgs} from 'microsoft-cognitiveservices-speech-sdk'; +import {AzureOptions, OnError, Options, Translations} from '../../types/options'; import * as sdk from 'microsoft-cognitiveservices-speech-sdk'; +import {AzureSpeechConfig} from './azureSpeechConfig'; +import {AzureTranscript} from './azureTranscript'; import {Speech} from '../../speech'; export class Azure extends Speech { private _service?: sdk.SpeechRecognizer; private _onError?: OnError; - private readonly _translations?: Translations; + private _translations?: Translations; - constructor() { - super(); - } - - start(options?: Options & WebSpeechAPIOptions) { - console.log(sdk); + start(options: Options & AzureOptions) { this.prepareBeforeStart(options); this.instantiateService(options); this._onError = options?.onError; this._service?.startContinuousRecognitionAsync(() => {}, this.error); - // this._translations = options?.translations; + this._translations = options?.translations; } - private instantiateService(options?: Options & WebSpeechAPIOptions) { + private instantiateService(options: Options & AzureOptions) { const audioConfig = sdk.AudioConfig.fromDefaultMicrophoneInput(); - const speechConfig = Azure.getSpeechConfig(sdk.SpeechConfig); + const speechConfig = AzureSpeechConfig.get(sdk.SpeechConfig, options); if (!speechConfig) return; - const reco = new sdk.SpeechRecognizer(speechConfig, audioConfig); - this.setEvents(reco); - this._service = reco; + const recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig); + this.setEvents(recognizer); + this._service = recognizer; // const speechRecognition = window.webkitSpeechRecognition || window.SpeechRecognition; // if (!speechRecognition) { // console.error('Speech Recognition is unsupported'); // } else { // this._service = new speechRecognition(); // this._service.continuous = true; - // this._service.interimResults = options?.displayInterimResults ?? true; // this._service.lang = options?.lang || 'en-US'; // this.setEvents(); // } } - private static getSpeechConfig(sdkConfigType: typeof SpeechConfig) { - // const speechConfig = sdkConfigType.fromAuthorizationToken(authorizationToken, regionOptions.value); - const speechConfig = sdkConfigType.fromSubscription('', 'eastus'); - // speechConfig.outputFormat = sdk.OutputFormat.Detailed; - // speechConfig.speechRecognitionLanguage = languageOptions.value; - return speechConfig; - } - private setEvents(recognizer: sdk.SpeechRecognizer) { recognizer.recognizing = this.onRecognizing.bind(this); recognizer.recognized = this.onRecognized.bind(this); @@ -67,8 +55,11 @@ export class Azure extends Speech { // } } + // prettier-ignore private onRecognizing(_: Recognizer, event: SpeechRecognitionEventArgs) { - this.updateElements(event.result.text, this.finalTranscript); + const {interimTranscript, finalTranscript} = AzureTranscript.extract( + event.result.text, this.finalTranscript, false, this._translations); + this.updateElements(interimTranscript, finalTranscript); } // WORK - huge opportunity to fix this in the repo!!!!! @@ -77,6 +68,7 @@ export class Azure extends Speech { // onRecognizedResult(recognitionEventArgs.result); // } + // prettier-ignore private onRecognized(_: Recognizer, event: SpeechRecognitionEventArgs) { const result = event.result; switch (result.reason) { @@ -84,7 +76,9 @@ export class Azure extends Speech { break; case sdk.ResultReason.RecognizedSpeech: if (result.text) { - this.updateElements('', `${this.finalTranscript + result.text} `); + const {interimTranscript, finalTranscript} = AzureTranscript.extract( + result.text, this.finalTranscript, true, this._translations); + this.updateElements(interimTranscript, finalTranscript); } break; } diff --git a/src/services/azure/azureSpeechConfig.ts b/src/services/azure/azureSpeechConfig.ts new file mode 100644 index 0000000..cb8559f --- /dev/null +++ b/src/services/azure/azureSpeechConfig.ts @@ -0,0 +1,42 @@ +import {SpeechConfig} from 'microsoft-cognitiveservices-speech-sdk'; +import {AzureOptions} from '../../types/options'; +import {README_URL} from '../../consts/readme'; + +export class AzureSpeechConfig { + private static getNewSpeechConfig(sdkSpeechConfig: typeof SpeechConfig, options: AzureOptions) { + if (!options) { + console.error(`Please provide subscription details - more info: ${README_URL}`); + return null; + } + if (options.retrieveToken) { + // here + return null; + } + if (!options.subscriptionKey || !options.token) { + console.error(`Please define a 'subscriptionKey' or 'token' property - more info: ${README_URL}`); + return null; + } + if (!options.region) { + console.error(`Please define a 'region' property - more info: ${README_URL}`); + return null; + } + // WORK - error handling for incorrect key + if (options.subscriptionKey) { + return sdkSpeechConfig.fromSubscription(options.subscriptionKey, options.region); + } + if (options.token) { + return sdkSpeechConfig.fromAuthorizationToken(options.token, options.region); + } + return null; + } + + private static process(sdkSpeechConfig: SpeechConfig, options: AzureOptions) { + if (options.language) sdkSpeechConfig.speechRecognitionLanguage = options.language; + } + + public static get(sdkConfigType: typeof SpeechConfig, options: AzureOptions) { + const speechConfig = AzureSpeechConfig.getNewSpeechConfig(sdkConfigType, options); + if (speechConfig) AzureSpeechConfig.process(speechConfig, options); + return speechConfig; + } +} diff --git a/src/services/azure/azureTranscript.ts b/src/services/azure/azureTranscript.ts new file mode 100644 index 0000000..32ceeb5 --- /dev/null +++ b/src/services/azure/azureTranscript.ts @@ -0,0 +1,12 @@ +import {Translations} from '../../types/options'; +import {Translate} from '../../utils/translate'; + +export class AzureTranscript { + public static extract(text: string, finalTranscript: string, isFinal: boolean, translations?: Translations) { + if (translations) text = Translate.translate(text, translations); + if (isFinal) { + return {interimTranscript: '', finalTranscript: `${finalTranscript + text} `}; + } + return {interimTranscript: text, finalTranscript}; + } +} diff --git a/src/services/webSpeech/webSpeech.ts b/src/services/webSpeech/webSpeech.ts index 9d26182..e9a67d0 100644 --- a/src/services/webSpeech/webSpeech.ts +++ b/src/services/webSpeech/webSpeech.ts @@ -30,7 +30,7 @@ export class WebSpeech extends Speech { this._service = new speechRecognition(); this._service.continuous = true; this._service.interimResults = options?.displayInterimResults ?? true; - this._service.lang = options?.lang || 'en-US'; + this._service.lang = options?.language || 'en-US'; this.setEvents(); } } diff --git a/src/types/options.ts b/src/types/options.ts index c0b4614..4860c39 100644 --- a/src/types/options.ts +++ b/src/types/options.ts @@ -13,8 +13,20 @@ export interface TextColor { final?: string; } +export interface AzureOptions { + retrieveToken?: () => string; + subscriptionKey?: string; + token?: string; + region?: string; + // https://docs.microsoft.com/azure/cognitive-services/speech-service/supported-languages + language?: string; +} + export interface WebSpeechAPIOptions { - lang?: string; + // BCP 47 language tag + // If not specified, this defaults to the HTML lang (https://developer.mozilla.org/en-US/docs/Web/HTML/Element/html#lang) + // attribute value, or the user agent's language setting if that isn't set either. + language?: string; } export interface Options { diff --git a/src/utils/translate.ts b/src/utils/translate.ts index 79b1294..38bd32b 100644 --- a/src/utils/translate.ts +++ b/src/utils/translate.ts @@ -2,12 +2,12 @@ import {Translations} from '../types/options'; export class Translate { public static translate(text: string, tranlsations: Translations) { - const words = text.split(' '); + const words = text.split(/(\W+)/); for (let i = 0; i < words.length; i += 1) { if (tranlsations[words[i]]) { words[i] = tranlsations[words[i]]; } } - return words.join(' '); + return words.join(''); } }