-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* feat: implement core structure * chore(bun.lockb): update lockfile * feat: download and split a video file * feat: transcribe an interview using whisper * fix: add loggings * fix(src/transcribe.ts): skip splitting if unnecessary * feat: proofread the transcribed text * style(src/ai.ts): run biome * feat: upload result files to Google Drive * feat: reply file urls to command * fix(src/main.ts): remove activity since the bot never be online * style: run biome * fix(src/ai.ts): define the max file size for Whisper API as const * fix(src/ai.ts): fix error message * fix(src/commands.ts): fix type assumption * feat(src/transcribe.ts): remove temp direcotry after completion * fix(src/transcribe.ts): log file uploads * fix(src/commands.ts): improve bot response * style(src/commands.ts): run biome * style(src/gdrive.ts): use {} instead of undefined for spread * fix(src/gdrive.ts): upload text instead of stream if the file is converted * fix(src/transcribe.ts): remove duplicated logs * fix(src/gdrive.ts): upload files with filename * fix(src/gdrive.ts): specify mimeType if converting * fix(src/gdrive.ts): remove extension if converting * fix(src/commands.ts): inline fields of command response * fix: use UUID as filename of temp files to avoid a whisper's error * fix(src/transcribe.ts): use unique-string instead of UUID for filename * style: run biome * fix(src/gdrive.ts): fix filename to upload
- Loading branch information
Showing
13 changed files
with
947 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,5 +21,9 @@ module.exports = { | |
"knip", | ||
"commitlint", | ||
"automerge", | ||
"openai", | ||
"consola", | ||
"gdrive", | ||
"ffprobe", | ||
], | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
import { createReadStream } from "node:fs"; | ||
import { GoogleGenerativeAI } from "@google/generative-ai"; | ||
import { env } from "bun"; | ||
import openAi from "openai"; | ||
import { SupportedLanguages } from "./transcribe"; | ||
|
||
/** | ||
* OpenAI API client. | ||
*/ | ||
export const openaiClient = new openAi({ | ||
apiKey: env.OPENAI_API_KEY, | ||
}); | ||
|
||
/** | ||
* Maximum file size for Whisper API. | ||
* @see https://platform.openai.com/docs/api-reference/speech-to-text | ||
*/ | ||
export const whisperMaxFileSize = 25 * 1000 * 1000; | ||
|
||
/** | ||
* Gemini API client. | ||
*/ | ||
export const geminiClient = new GoogleGenerativeAI(env.GEMINI_API_KEY); | ||
|
||
/** | ||
* Transcribe an audio file. | ||
* @param audioFilePath Path to the audio file | ||
* @param language Language of the audio file | ||
* @returns Transcribed text segments | ||
*/ | ||
export const transcribeAudioFile = async ( | ||
audioFilePath: string, | ||
language: SupportedLanguages, | ||
): Promise<string[]> => { | ||
const response = (await openaiClient.audio.transcriptions.create({ | ||
file: createReadStream(audioFilePath), | ||
model: "whisper-1", | ||
language, | ||
prompt: | ||
language === "en" | ||
? "Hello. This is an interview, and you transcribe it." | ||
: "こんにちは。これはインタビューの録音で、文字起こしをします。", | ||
// biome-ignore lint/style/useNamingConvention: library's naming convention | ||
response_format: "verbose_json", | ||
})) as openAi.Audio.Transcriptions.Transcription & { | ||
segments: { | ||
text: string; | ||
}[]; | ||
}; // cast since the library doesn't support verbose_json | ||
|
||
return response.segments.map((segment) => segment.text); | ||
}; | ||
|
||
/** | ||
* Proofread a transcription. | ||
* @param transcription Transcription to proofread | ||
* @param language Language of the transcription | ||
* @param model AI model to use | ||
* @param prompt System prompt to use | ||
* @returns Proofread transcription | ||
*/ | ||
export const proofreadTranscription = async <M extends "gpt-4" | "gemini-pro">( | ||
transcription: string, | ||
language: SupportedLanguages, | ||
model: M, | ||
): Promise<{ model: M; prompt: string; response: string }> => { | ||
const systemPrompt = `You are a web media proofreader. | ||
The text ${model === "gpt-4" ? "entered by the user" : "below"} is a transcription of the interview. | ||
Follow the guide below and improve it. | ||
- Remove redundant or repeating expressions. | ||
- Remove fillers. | ||
- Correct grammar errors. | ||
- Replace unnatural or difficult wordings. | ||
- Shorten sentences. | ||
The output style should be the style of an interview, like \`interviewer: \` or \`interviewee\`. | ||
${ | ||
language === "en" | ||
? "The response must not include markdown syntax." | ||
: "The response must be in Japanese without markdown syntax." | ||
}`; | ||
|
||
let result = ""; | ||
if (model === "gpt-4") { | ||
const response = await openaiClient.chat.completions.create({ | ||
messages: [ | ||
{ | ||
role: "system", | ||
content: systemPrompt, | ||
}, | ||
{ | ||
role: "user", | ||
content: transcription, | ||
}, | ||
], | ||
model, | ||
}); | ||
result = response.choices[0]?.message.content ?? ""; | ||
} else { | ||
const response = await geminiClient | ||
.getGenerativeModel({ | ||
model, | ||
}) | ||
.generateContent(`${systemPrompt}\n\n---\n\n${transcription}`); | ||
result = response.response.text(); | ||
} | ||
if (!result) { | ||
throw new Error("The response is empty."); | ||
} | ||
|
||
return { | ||
model, | ||
prompt: systemPrompt, | ||
response: result, | ||
}; | ||
}; |
Oops, something went wrong.