Skip to content

Commit

Permalink
feat: Support SPEECH_RECOGNITION_ONLY mode (#242)
Browse files Browse the repository at this point in the history
  • Loading branch information
tshashkova authored Aug 15, 2024
1 parent 2c08d5f commit 4a65bcd
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
### Added

- Add getProto method to Inworld packets
- Support SPEECH_RECOGNITION_ONLY mode

## [1.14.2] - 2024-06-27

Expand Down
39 changes: 39 additions & 0 deletions __tests__/factories/event.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import { v4 } from 'uuid';
import {
InworldConversationEventType,
MicrophoneMode,
UnderstandingMode,
} from '../../src/common/data_structures';
import { protoTimestamp } from '../../src/common/helpers';
import { InworldPacket } from '../../src/entities/packets/inworld_packet.entity';
Expand Down Expand Up @@ -155,6 +156,44 @@ describe('event types', () => {
},
);

test.each([
{
input: UnderstandingMode.FULL,
expected: AudioSessionStartPayload.UnderstandingMode.FULL,
},
{
input: UnderstandingMode.SPEECH_RECOGNITION_ONLY,
expected:
AudioSessionStartPayload.UnderstandingMode.SPEECH_RECOGNITION_ONLY,
},
])(
'should generate audio session start with understandingMode $input',
({ input, expected }) => {
const event = factory.audioSessionStart({
conversationId,
understandingMode: input,
});
const packetId = event.getPacketId();

expect(event.hasControl()).toEqual(true);
expect(event.getControl().getAction()).toEqual(
ControlEvent.Action.AUDIO_SESSION_START,
);
expect(
event.getControl().getAudioSessionStart().getUnderstandingMode(),
).toEqual(expected);
expect(packetId.getPacketId()).toBeDefined();
expect(packetId.getInteractionId()).toBeFalsy();
expect(packetId.getUtteranceId()).toBeFalsy();
expect(packetId.getCorrelationId()).toBeFalsy();
expect(packetId.getConversationId()).toEqual(conversationId);
expect(event.hasRouting()).toEqual(true);
expect(event.getRouting().getTarget()).toBeFalsy();
expect(event.getRouting().getTargetsList()).toEqual([]);
expect(event.hasTimestamp()).toEqual(true);
},
);

test('should generate audio session end', () => {
const event = factory.audioSessionEnd({
conversationId,
Expand Down
34 changes: 33 additions & 1 deletion __tests__/services/inworld_connection.service.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import { v4 } from 'uuid';
import {
ConversationState,
MicrophoneMode,
UnderstandingMode,
} from '../../src/common/data_structures';
import { InworldPacket } from '../../src/entities/packets/inworld_packet.entity';
import { EventFactory } from '../../src/factories/event';
Expand Down Expand Up @@ -453,7 +454,38 @@ describe('send', () => {
mode,
});
expect(packet).toBeInstanceOf(InworldPacket);
expect(packet.isControl()).toEqual(true);
expect(packet?.isControl()).toEqual(true);
});

test('should send audio session start for speach recognition only', async () => {
const understandingMode = UnderstandingMode.SPEECH_RECOGNITION_ONLY;
const sendAudioSessionStart = jest.spyOn(eventFactory, 'audioSessionStart');

const [packet] = await Promise.all([
service.sendAudioSessionStart({ understandingMode }),
new Promise((resolve: any) => {
setTimeout(() => {
connection.onMessage!(incoming);
resolve(true);
}, 0);
}),
]);

expect(open).toHaveBeenCalledTimes(0);
expect(service.isActive()).toEqual(true);
expect(service.getConversations()).toEqual([
{
conversationId,
characters: [characters[0]],
},
]);
expect(sendAudioSessionStart).toHaveBeenCalledTimes(1);
expect(sendAudioSessionStart).toHaveBeenLastCalledWith({
conversationId,
understandingMode,
});
expect(packet).toBeInstanceOf(InworldPacket);
expect(packet?.isControl()).toEqual(true);
});

test('should send audio session end', async () => {
Expand Down
7 changes: 7 additions & 0 deletions src/common/data_structures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@ export enum MicrophoneMode {
EXPECT_AUDIO_END = 'EXPECT_AUDIO_END',
}

export enum UnderstandingMode {
FULL = 'FULL',
SPEECH_RECOGNITION_ONLY = 'SPEECH_RECOGNITION_ONLY',
}

export interface SendPacketParams {
conversationId: string;
}
Expand All @@ -168,10 +173,12 @@ export interface SendTriggerPacketParams extends SendPacketParams {

export interface SendAudioSessionStartPacketParams extends SendPacketParams {
mode?: MicrophoneMode;
understandingMode?: UnderstandingMode;
}

export interface AudioSessionStartPacketParams {
mode?: MicrophoneMode;
understandingMode?: UnderstandingMode;
}

export interface ConversationMapItem<
Expand Down
18 changes: 17 additions & 1 deletion src/factories/event.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import {
SendPacketParams,
SendTriggerPacketParams,
SessionControlProps,
UnderstandingMode,
} from '../common/data_structures';
import { protoTimestamp } from '../common/helpers';
import { Character } from '../entities/character.entity';
Expand Down Expand Up @@ -310,8 +311,23 @@ export class EventFactory {
break;
}

let protoUnderstandingMode;

switch (params.understandingMode) {
case UnderstandingMode.SPEECH_RECOGNITION_ONLY:
protoUnderstandingMode =
AudioSessionStartPayload.UnderstandingMode.SPEECH_RECOGNITION_ONLY;
break;
default:
protoUnderstandingMode =
AudioSessionStartPayload.UnderstandingMode.FULL;
break;
}

event.setAudioSessionStart(
new AudioSessionStartPayload().setMode(protoMode),
new AudioSessionStartPayload()
.setMode(protoMode)
.setUnderstandingMode(protoUnderstandingMode),
);
}

Expand Down
2 changes: 2 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import {
MicrophoneMode,
SessionTokenProps,
TriggerParameter,
UnderstandingMode,
User,
UserProfile,
UserProfileField,
Expand Down Expand Up @@ -125,6 +126,7 @@ export {
TextEvent,
TriggerEvent,
TriggerParameter,
UnderstandingMode,
User,
UserProfile,
UserProfileField,
Expand Down
2 changes: 1 addition & 1 deletion src/services/conversation.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ export class ConversationService<
async sendAudioSessionStart(params?: AudioSessionStartPacketParams) {
return this.ensureConversation(() =>
this.connection.getEventFactory().audioSessionStart({
mode: params?.mode,
...params,
conversationId: this.getConversationId(),
}),
);
Expand Down

0 comments on commit 4a65bcd

Please sign in to comment.