Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

modified to transcribe text to speech continuously #4

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .env
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
SPEECH_KEY=paste-your-speech-key-here
SPEECH_REGION=paste-your-speech-region-here
SPEECH_KEY=YOUR_SPEECH_KEY_HERE
SPEECH_REGION=YOUR_REGION_HERE
TEXT_KEY=YOUR_TEXT_ANALYSIS_KEY_HERE
TEXT_ENDPOINT=YOUR_CORRESPONDING_ENDPOINT_HERE
1 change: 1 addition & 0 deletions .eslintcache

Large diffs are not rendered by default.

588 changes: 413 additions & 175 deletions package-lock.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"version": "0.1.0",
"private": true,
"dependencies": {
"@azure/ai-text-analytics": "^5.1.0",
"@testing-library/jest-dom": "^5.11.8",
"@testing-library/react": "^11.2.3",
"@testing-library/user-event": "^12.6.0",
Expand Down
42 changes: 42 additions & 0 deletions server/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ const bodyParser = require('body-parser');
const pino = require('express-pino-logger')();

const app = express();
const japp = express();

app.use(bodyParser.urlencoded({ extended: false }));
app.use(pino);

Expand Down Expand Up @@ -32,6 +34,46 @@ app.get('/api/get-speech-token', async (req, res, next) => {
}
});

//custom middlewate for japp-server
const logger2 = (req,res,next) => {
console.log(`${req.protocol}://${req.get('host')}${req.originalUrl}`);
res.setHeader('Access-Control-Allow-Headers', 'X-Requested-With, content-type');
res.setHeader('Access-Control-Allow-Origin', 'http://localhost:3000');
res.setHeader('Access-Control-Allow-Methods', 'POST,PUT,GET');
// ------4GOauth---> const TrCode = `${req.query.code}`;
console.log(`${req.body}`);
console.log('Logger operations done');
next();
};

// Middleware thingies
japp.use(express.urlencoded({ extended: false}));
japp.use(express.json());
japp.use(logger2);

// thing to be updated with keywords object
let thething;

// place where the front is send to the back
japp.post('/processing/poster', async (req, res, next) => {
const whatsend = req.body;
res.json({whatsend});
console.log("this is the backend: " + whatsend);
thething = whatsend;
next();
});

// gateway for processing to call continously
japp.get('/processing/entities', async (req, res, next) => {
res.json(thething);
next();
});


app.listen(3001, () =>
console.log('Express server is running on localhost:3001')
);

japp.listen(3002, () =>
console.log('Express server is running on localhost:3002')
);
239 changes: 195 additions & 44 deletions src/App.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,29 @@ import { Container } from 'reactstrap';
import { getTokenOrRefresh } from './token_util';
import './custom.css'
import { ResultReason } from 'microsoft-cognitiveservices-speech-sdk';
import axios from 'axios';

require('dotenv').config();

//cognitive services
const speechsdk = require('microsoft-cognitiveservices-speech-sdk')

const { TextAnalyticsClient, AzureKeyCredential } = require("@azure/ai-text-analytics");

const key = 'YOUR_TEXT_ANALYSIS_KEY_HERE';
const endpoint = 'YOUR_CORRESPONDING_ENDPOINT_HERE'; //--> example: https://YOURNAMEOFSUB.cognitiveservices.azure.com/
const textAnalyticsClient = new TextAnalyticsClient(endpoint, new AzureKeyCredential(key));

export default class App extends Component {
constructor(props) {
super(props);

this.state = {
displayText: 'INITIALIZED: ready to test speech...'
displayText: null,
entitiescomp: null,
keyphrases: null,
recognizerboy: null,
ctr: 0,
}
}

Expand All @@ -25,61 +38,198 @@ export default class App extends Component {
});
}
}

//get keyphrases
async keyPhraseExtraction(recognizedtext){
let displayText;

async sttFromMic() {
const tokenObj = await getTokenOrRefresh();
const speechConfig = speechsdk.SpeechConfig.fromAuthorizationToken(tokenObj.authToken, tokenObj.region);
speechConfig.speechRecognitionLanguage = 'en-US';
const client = textAnalyticsClient;
const text = recognizedtext;
const keyPhrasesInput = [
text,
];

const keyPhraseResult = await client.extractKeyPhrases(keyPhrasesInput);

const audioConfig = speechsdk.AudioConfig.fromDefaultMicrophoneInput();
const recognizer = new speechsdk.SpeechRecognizer(speechConfig, audioConfig);
keyPhraseResult.forEach(document => {
// console.log(`ID: ${document.id}`);
displayText = `${document.keyPhrases}`;
});

this.setState({
displayText: 'speak into your microphone...'
keyphrases: displayText,
});
console.log("keyphraseresult: " + keyPhraseResult);

}

recognizer.recognizeOnceAsync(result => {
let displayText;
if (result.reason === ResultReason.RecognizedSpeech) {
displayText = `RECOGNIZED: Text=${result.text}`
} else {
displayText = 'ERROR: Speech was cancelled or could not be recognized. Ensure your microphone is working properly.';
async entityRecognition(recognizedtext){
//let displayText;
const client = textAnalyticsClient;
const text = recognizedtext;

const entityInputs = [
text,
// "Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975, to develop and sell BASIC interpreters for the Altair 8800",
// "La sede principal de Microsoft se encuentra en la ciudad de Redmond, a 21 kilómetros de Seattle."
];

const entityResults = await client.recognizeEntities(entityInputs);

entityResults.forEach(document => {
console.log(`Document ID: ${document.id}`);
if (document.entities != null){
document.entities.forEach(entity => {
console.log(`\tName: ${entity.text} \tCategory: ${entity.category} \tSubcategory: ${entity.subCategory ? entity.subCategory : "N/A"}`);
console.log(`\tScore: ${entity.confidenceScore}`);
});
}
});

this.setState({
displayText: displayText
});
this.setState({
entitiescomp: entityResults,
});
console.log("entityresults" + entityResults[0].entities);
}
// entityRecognition(textAnalyticsClient);

//check if state (i.e. first displayText, then keyphrase&entity) updated, send to api
async componentDidUpdate(prevState) {
// Typical usage (don't forget to compare props):
if (this.state.displayText !== prevState.displayText) {
if(this.state.entitiescomp !== null){
if (this.state.entitiescomp !== prevState.entitiescomp && this.state.keyphrases !== prevState.keyphrases){
//copy state & remove client info
console.log(this.state);
const sentence = this.state.displayText;
const keyphrases = this.state.keyphrases;
const entities = this.state.entitiescomp[0].entities;
try{
await axios.post('http://localhost:3002/processing/poster', {sentence,keyphrases,entities}, {
headers: {
'Content-Type': 'application/json'
}
}).then(
console.log('it was updated, so we updating and doing the networkthing')
);
} catch(err) {
if(err.response === 500) {
console.log('it fucked up in the server');
} else {
console.log("error:" + err.response);
}
};
}
}
}
}

async sttFromMic(){
let displayText;
var Ctr = this.state.ctr;
let utterances = [
//place text where you can test a series of utterances without the need to speak every time
]


displayText = utterances[Ctr];
Ctr = Ctr + 1;
if(Ctr == utterances.length){
Ctr = 0;
}

async fileChange(event) {
const audioFile = event.target.files[0];
console.log(audioFile);
const fileInfo = audioFile.name + ` size=${audioFile.size} bytes `;
this.keyPhraseExtraction(displayText);
this.entityRecognition(displayText);

this.setState({
displayText: fileInfo
displayText: displayText,
ctr: Ctr
});
}

//React example single utterance (15s max.)
// async sttFromMic() {
// const tokenObj = await getTokenOrRefresh();
// const speechConfig = speechsdk.SpeechConfig.fromAuthorizationToken(tokenObj.authToken, tokenObj.region);
// speechConfig.speechRecognitionLanguage = 'en-US';

// const audioConfig = speechsdk.AudioConfig.fromDefaultMicrophoneInput();
// const recognizer = new speechsdk.SpeechRecognizer(speechConfig, audioConfig);

// this.setState({
// displayText: 'speak into your microphone...'
// });

// recognizer.recognizeOnceAsync(result => {
// let displayText;


// if (result.reason === ResultReason.RecognizedSpeech) {
// displayText = `${result.text}`;

// } else {
// displayText = 'ERROR: Speech was cancelled or could not be recognized. Ensure your microphone is working properly.';
// }
// // this.keyPhraseExtraction(displayText);
// });
// }

//Awesome Olaf doing continious jwz
async sttFromMicCont(){
const tokenObj = await getTokenOrRefresh();
const speechConfig = speechsdk.SpeechConfig.fromAuthorizationToken(tokenObj.authToken, tokenObj.region);
speechConfig.speechRecognitionLanguage = 'en-US';

const audioConfig = speechsdk.AudioConfig.fromWavFileInput(audioFile);
const audioConfig = speechsdk.AudioConfig.fromDefaultMicrophoneInput();
const recognizer = new speechsdk.SpeechRecognizer(speechConfig, audioConfig);

recognizer.recognizeOnceAsync(result => {
let displayText;
if (result.reason === ResultReason.RecognizedSpeech) {
displayText = `RECOGNIZED: Text=${result.text}`
} else {
displayText = 'ERROR: Speech was cancelled or could not be recognized. Ensure your microphone is working properly.';
this.setState({recognizerboy: recognizer})

let displayText;

recognizer.recognizing = (s, e) => {
//console.log(`RECOGNIZING: Text=${e.result.text}`);
//displayText = `${e.result.text}`;
};


recognizer.recognized = (s, e) => {
if (e.result.reason == speechsdk.ResultReason.RecognizedSpeech) {
console.log(`RECOGNIZED: Text=${e.result.text}`);
displayText = `${e.result.text}`;
this.keyPhraseExtraction(displayText);
this.entityRecognition(displayText);
this.setState({displayText: displayText});
}
else if (e.result.reason == speechsdk.ResultReason.NoMatch) {
console.log("NOMATCH: Speech could not be recognized.");
}
};

recognizer.canceled = (s, e) => {
console.log(`CANCELED: Reason=${e.reason}`);

if (e.reason == speechsdk.CancellationReason.Error) {
console.log(`"CANCELED: ErrorCode=${e.errorCode}`);
console.log(`"CANCELED: ErrorDetails=${e.errorDetails}`);
console.log("CANCELED: Did you update the key and location/region info?");
}

recognizer.stopContinuousRecognitionAsync();
};

recognizer.sessionStopped = (s, e) => {
console.log("\n Session stopped event.");
recognizer.stopContinuousRecognitionAsync();
};

this.setState({
displayText: fileInfo + displayText
});
});
recognizer.startContinuousRecognitionAsync();
}

//button to stop the recogniiton process
async sttFromMicContStop() {
const recognizer = this.state.recognizerboy;
recognizer.stopContinuousRecognitionAsync();
}

render() {
Expand All @@ -90,18 +240,19 @@ export default class App extends Component {
<div className="row main-container">
<div className="col-6">
<i className="fas fa-microphone fa-lg mr-2" onClick={() => this.sttFromMic()}></i>
Convert speech to text from your mic.
Convert speech to text from your mic once.


<div className="mt-2">
<label htmlFor="audio-file"><i className="fas fa-file-audio fa-lg mr-2"></i></label>
<input
type="file"
id="audio-file"
onChange={(e) => this.fileChange(e)}
style={{display: "none"}}
/>
Convert speech to text from an audio file.
<i className="fas fa-microphone fa-lg mr-2" onClick={() => this.sttFromMicCont() } ></i>
Convert speech to text continuously
</div>

<div className="mt-2">
<i className="fas fa-microphone fa-lg mr-2" onClick={() => this.sttFromMicContStop() } ></i>
Stop converting speech to text continuously
</div>

</div>
<div className="col-6 output-display rounded">
<code>{this.state.displayText}</code>
Expand Down
24 changes: 24 additions & 0 deletions src/textclient.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"use strict";
require('dotenv').config();
const { TextAnalyticsClient, AzureKeyCredential } = require("@azure/ai-text-analytics");

const key = 'REDACTED';
const endpoint = 'REDACTED';
const textAnalyticsClient = new TextAnalyticsClient(endpoint, new AzureKeyCredential(key));

async function keyPhraseExtraction(client){

const keyPhrasesInput = [
"My cat might need to see a veterinarian.",
];
const keyPhraseResult = await client.extractKeyPhrases(keyPhrasesInput);

keyPhraseResult.forEach(document => {
console.log(`ID: ${document.id}`);
console.log(` ${document.keyPhrases}`);
});
}

keyPhraseExtraction(textAnalyticsClient);

module.exports = textAnalyticsClient;