Skip to content

Commit

Permalink
azure refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
OvidijusParsiunas committed Jul 22, 2023
1 parent 3ffb6f7 commit b7149bc
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 31 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
# speech to input
# speech to element
1 change: 1 addition & 0 deletions src/consts/readme.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export const README_URL = 'https://github.com/OvidijusParsiunas/speech-to-element';
46 changes: 20 additions & 26 deletions src/services/azure/azure.ts
Original file line number Diff line number Diff line change
@@ -1,54 +1,42 @@
import {Recognizer, SpeechConfig, SpeechRecognitionEventArgs} from 'microsoft-cognitiveservices-speech-sdk';
import {OnError, Options, Translations, WebSpeechAPIOptions} from '../../types/options';
import {Recognizer, SpeechRecognitionEventArgs} from 'microsoft-cognitiveservices-speech-sdk';
import {AzureOptions, OnError, Options, Translations} from '../../types/options';
import * as sdk from 'microsoft-cognitiveservices-speech-sdk';
import {AzureSpeechConfig} from './azureSpeechConfig';
import {AzureTranscript} from './azureTranscript';
import {Speech} from '../../speech';

export class Azure extends Speech {
private _service?: sdk.SpeechRecognizer;
private _onError?: OnError;
private readonly _translations?: Translations;
private _translations?: Translations;

constructor() {
super();
}

start(options?: Options & WebSpeechAPIOptions) {
console.log(sdk);
start(options: Options & AzureOptions) {
this.prepareBeforeStart(options);
this.instantiateService(options);
this._onError = options?.onError;
this._service?.startContinuousRecognitionAsync(() => {}, this.error);
// this._translations = options?.translations;
this._translations = options?.translations;
}

private instantiateService(options?: Options & WebSpeechAPIOptions) {
private instantiateService(options: Options & AzureOptions) {
const audioConfig = sdk.AudioConfig.fromDefaultMicrophoneInput();
const speechConfig = Azure.getSpeechConfig(sdk.SpeechConfig);
const speechConfig = AzureSpeechConfig.get(sdk.SpeechConfig, options);
if (!speechConfig) return;

const reco = new sdk.SpeechRecognizer(speechConfig, audioConfig);
this.setEvents(reco);
this._service = reco;
const recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
this.setEvents(recognizer);
this._service = recognizer;
// const speechRecognition = window.webkitSpeechRecognition || window.SpeechRecognition;
// if (!speechRecognition) {
// console.error('Speech Recognition is unsupported');
// } else {
// this._service = new speechRecognition();
// this._service.continuous = true;
// this._service.interimResults = options?.displayInterimResults ?? true;
// this._service.lang = options?.lang || 'en-US';
// this.setEvents();
// }
}

private static getSpeechConfig(sdkConfigType: typeof SpeechConfig) {
// const speechConfig = sdkConfigType.fromAuthorizationToken(authorizationToken, regionOptions.value);
const speechConfig = sdkConfigType.fromSubscription('', 'eastus');
// speechConfig.outputFormat = sdk.OutputFormat.Detailed;
// speechConfig.speechRecognitionLanguage = languageOptions.value;
return speechConfig;
}

private setEvents(recognizer: sdk.SpeechRecognizer) {
recognizer.recognizing = this.onRecognizing.bind(this);
recognizer.recognized = this.onRecognized.bind(this);
Expand All @@ -67,8 +55,11 @@ export class Azure extends Speech {
// }
}

// prettier-ignore
private onRecognizing(_: Recognizer, event: SpeechRecognitionEventArgs) {
this.updateElements(event.result.text, this.finalTranscript);
const {interimTranscript, finalTranscript} = AzureTranscript.extract(
event.result.text, this.finalTranscript, false, this._translations);
this.updateElements(interimTranscript, finalTranscript);
}

// WORK - huge opportunity to fix this in the repo!!!!!
Expand All @@ -77,14 +68,17 @@ export class Azure extends Speech {
// onRecognizedResult(recognitionEventArgs.result);
// }

// prettier-ignore
private onRecognized(_: Recognizer, event: SpeechRecognitionEventArgs) {
const result = event.result;
switch (result.reason) {
case sdk.ResultReason.Canceled:
break;
case sdk.ResultReason.RecognizedSpeech:
if (result.text) {
this.updateElements('', `${this.finalTranscript + result.text} `);
const {interimTranscript, finalTranscript} = AzureTranscript.extract(
result.text, this.finalTranscript, true, this._translations);
this.updateElements(interimTranscript, finalTranscript);
}
break;
}
Expand Down
42 changes: 42 additions & 0 deletions src/services/azure/azureSpeechConfig.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import {SpeechConfig} from 'microsoft-cognitiveservices-speech-sdk';
import {AzureOptions} from '../../types/options';
import {README_URL} from '../../consts/readme';

export class AzureSpeechConfig {
private static getNewSpeechConfig(sdkSpeechConfig: typeof SpeechConfig, options: AzureOptions) {
if (!options) {
console.error(`Please provide subscription details - more info: ${README_URL}`);
return null;
}
if (options.retrieveToken) {
// here
return null;
}
if (!options.subscriptionKey || !options.token) {
console.error(`Please define a 'subscriptionKey' or 'token' property - more info: ${README_URL}`);
return null;
}
if (!options.region) {
console.error(`Please define a 'region' property - more info: ${README_URL}`);
return null;
}
// WORK - error handling for incorrect key
if (options.subscriptionKey) {
return sdkSpeechConfig.fromSubscription(options.subscriptionKey, options.region);
}
if (options.token) {
return sdkSpeechConfig.fromAuthorizationToken(options.token, options.region);
}
return null;
}

private static process(sdkSpeechConfig: SpeechConfig, options: AzureOptions) {
if (options.language) sdkSpeechConfig.speechRecognitionLanguage = options.language;
}

public static get(sdkConfigType: typeof SpeechConfig, options: AzureOptions) {
const speechConfig = AzureSpeechConfig.getNewSpeechConfig(sdkConfigType, options);
if (speechConfig) AzureSpeechConfig.process(speechConfig, options);
return speechConfig;
}
}
12 changes: 12 additions & 0 deletions src/services/azure/azureTranscript.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import {Translations} from '../../types/options';
import {Translate} from '../../utils/translate';

export class AzureTranscript {
public static extract(text: string, finalTranscript: string, isFinal: boolean, translations?: Translations) {
if (translations) text = Translate.translate(text, translations);
if (isFinal) {
return {interimTranscript: '', finalTranscript: `${finalTranscript + text} `};
}
return {interimTranscript: text, finalTranscript};
}
}
2 changes: 1 addition & 1 deletion src/services/webSpeech/webSpeech.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ export class WebSpeech extends Speech {
this._service = new speechRecognition();
this._service.continuous = true;
this._service.interimResults = options?.displayInterimResults ?? true;
this._service.lang = options?.lang || 'en-US';
this._service.lang = options?.language || 'en-US';
this.setEvents();
}
}
Expand Down
14 changes: 13 additions & 1 deletion src/types/options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,20 @@ export interface TextColor {
final?: string;
}

export interface AzureOptions {
retrieveToken?: () => string;
subscriptionKey?: string;
token?: string;
region?: string;
// https://docs.microsoft.com/azure/cognitive-services/speech-service/supported-languages
language?: string;
}

export interface WebSpeechAPIOptions {
lang?: string;
// BCP 47 language tag
// If not specified, this defaults to the HTML lang (https://developer.mozilla.org/en-US/docs/Web/HTML/Element/html#lang)
// attribute value, or the user agent's language setting if that isn't set either.
language?: string;
}

export interface Options {
Expand Down
4 changes: 2 additions & 2 deletions src/utils/translate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ import {Translations} from '../types/options';

export class Translate {
public static translate(text: string, tranlsations: Translations) {
const words = text.split(' ');
const words = text.split(/(\W+)/);
for (let i = 0; i < words.length; i += 1) {
if (tranlsations[words[i]]) {
words[i] = tranlsations[words[i]];
}
}
return words.join(' ');
return words.join('');
}
}

0 comments on commit b7149bc

Please sign in to comment.