Skip to content

Commit

Permalink
feat: implement core structure (#8)
Browse files Browse the repository at this point in the history
* feat: implement core structure

* chore(bun.lockb): update lockfile

* feat: download and split a video file

* feat: transcribe an interview using whisper

* fix: add loggings

* fix(src/transcribe.ts): skip splitting if unnecessary

* feat: proofread the transcribed text

* style(src/ai.ts): run biome

* feat: upload result files to Google Drive

* feat: reply file urls to command

* fix(src/main.ts): remove activity since the bot never be online

* style: run biome

* fix(src/ai.ts): define the max file size for Whisper API as const

* fix(src/ai.ts): fix error message

* fix(src/commands.ts): fix type assumption

* feat(src/transcribe.ts): remove temp direcotry after completion

* fix(src/transcribe.ts): log file uploads

* fix(src/commands.ts): improve bot response

* style(src/commands.ts): run biome

* style(src/gdrive.ts): use {} instead of undefined for spread

* fix(src/gdrive.ts): upload text instead of stream if the file is converted

* fix(src/transcribe.ts): remove duplicated logs

* fix(src/gdrive.ts): upload files with filename

* fix(src/gdrive.ts): specify mimeType if converting

* fix(src/gdrive.ts): remove extension if converting

* fix(src/commands.ts): inline fields of command response

* fix: use UUID as filename of temp files to avoid a whisper's error

* fix(src/transcribe.ts): use unique-string instead of UUID for filename

* style: run biome

* fix(src/gdrive.ts): fix filename to upload
  • Loading branch information
risu729 authored Jan 16, 2024
1 parent e7ff0f4 commit e702a5c
Show file tree
Hide file tree
Showing 13 changed files with 947 additions and 1 deletion.
5 changes: 4 additions & 1 deletion biome.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
},
"linter": {
"rules": {
"all": true
"all": true,
"nursery": {
"noNodejsModules": "off"
}
}
},
"json": {
Expand Down
Binary file modified bun.lockb
Binary file not shown.
4 changes: 4 additions & 0 deletions cspell.config.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,9 @@ module.exports = {
"knip",
"commitlint",
"automerge",
"openai",
"consola",
"gdrive",
"ffprobe",
],
};
2 changes: 2 additions & 0 deletions knip.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ import type { KnipConfig } from "knip";

const config: KnipConfig = {
ignoreDependencies: [
"bun",
// @commitlint/cli cannot be detected because its binary is named "commitlint"
// ref: https://knip.dev/guides/handling-issues/#example
"@commitlint/cli",
],
ignoreBinaries: ["screen"],
};

// biome-ignore lint/style/noDefaultExport:
Expand Down
15 changes: 15 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
"name": "interview-transcriber",
"private": true,
"scripts": {
"start": "bun src/main.ts",
"start:screen": "screen -DRS transcriber bun start",
"commit": "git-cz",
"check": "npm-run-all check:*",
"check:biome": "biome check --apply-unsafe .",
Expand All @@ -12,6 +14,17 @@
"ignore-sync": "ignore-sync .",
"prepare": "husky install"
},
"dependencies": {
"@google/generative-ai": "0.1.3",
"@googleapis/drive": "8.5.0",
"consola": "3.2.3",
"csv-parse": "5.5.3",
"discord.js": "14.14.1",
"fluent-ffmpeg": "2.1.2",
"mime": "4.0.1",
"openai": "4.24.1",
"unique-string": "3.0.0"
},
"devDependencies": {
"@biomejs/biome": "1.5.2",
"@commitlint/cli": "18.4.4",
Expand All @@ -21,6 +34,8 @@
"@cspell/cspell-types": "8.3.2",
"@tsconfig/bun": "1.0.1",
"@tsconfig/strictest": "2.0.2",
"@types/fluent-ffmpeg": "2.1.24",
"@types/node": "20.10.8",
"bun-types": "1.0.22",
"commitizen": "4.3.0",
"cspell": "8.3.2",
Expand Down
Empty file removed src/.gitkeep
Empty file.
115 changes: 115 additions & 0 deletions src/ai.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import { createReadStream } from "node:fs";
import { GoogleGenerativeAI } from "@google/generative-ai";
import { env } from "bun";
import openAi from "openai";
import { SupportedLanguages } from "./transcribe";

/**
* OpenAI API client.
*/
export const openaiClient = new openAi({
apiKey: env.OPENAI_API_KEY,
});

/**
* Maximum file size for Whisper API.
* @see https://platform.openai.com/docs/api-reference/speech-to-text
*/
export const whisperMaxFileSize = 25 * 1000 * 1000;

/**
* Gemini API client.
*/
export const geminiClient = new GoogleGenerativeAI(env.GEMINI_API_KEY);

/**
* Transcribe an audio file.
* @param audioFilePath Path to the audio file
* @param language Language of the audio file
* @returns Transcribed text segments
*/
export const transcribeAudioFile = async (
audioFilePath: string,
language: SupportedLanguages,
): Promise<string[]> => {
const response = (await openaiClient.audio.transcriptions.create({
file: createReadStream(audioFilePath),
model: "whisper-1",
language,
prompt:
language === "en"
? "Hello. This is an interview, and you transcribe it."
: "こんにちは。これはインタビューの録音で、文字起こしをします。",
// biome-ignore lint/style/useNamingConvention: library's naming convention
response_format: "verbose_json",
})) as openAi.Audio.Transcriptions.Transcription & {
segments: {
text: string;
}[];
}; // cast since the library doesn't support verbose_json

return response.segments.map((segment) => segment.text);
};

/**
* Proofread a transcription.
* @param transcription Transcription to proofread
* @param language Language of the transcription
* @param model AI model to use
* @param prompt System prompt to use
* @returns Proofread transcription
*/
export const proofreadTranscription = async <M extends "gpt-4" | "gemini-pro">(
transcription: string,
language: SupportedLanguages,
model: M,
): Promise<{ model: M; prompt: string; response: string }> => {
const systemPrompt = `You are a web media proofreader.
The text ${model === "gpt-4" ? "entered by the user" : "below"} is a transcription of the interview.
Follow the guide below and improve it.
- Remove redundant or repeating expressions.
- Remove fillers.
- Correct grammar errors.
- Replace unnatural or difficult wordings.
- Shorten sentences.
The output style should be the style of an interview, like \`interviewer: \` or \`interviewee\`.
${
language === "en"
? "The response must not include markdown syntax."
: "The response must be in Japanese without markdown syntax."
}`;

let result = "";
if (model === "gpt-4") {
const response = await openaiClient.chat.completions.create({
messages: [
{
role: "system",
content: systemPrompt,
},
{
role: "user",
content: transcription,
},
],
model,
});
result = response.choices[0]?.message.content ?? "";
} else {
const response = await geminiClient
.getGenerativeModel({
model,
})
.generateContent(`${systemPrompt}\n\n---\n\n${transcription}`);
result = response.response.text();
}
if (!result) {
throw new Error("The response is empty.");
}

return {
model,
prompt: systemPrompt,
response: result,
};
};
Loading

0 comments on commit e702a5c

Please sign in to comment.