-
Notifications
You must be signed in to change notification settings - Fork 0
/
cli.js
82 lines (66 loc) · 2.17 KB
/
cli.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import convert from './src/index.js'
import fs from 'fs'
// parse if the argument passed is a YouTube link or a downloaded file name
const argument = process.argv[2]
// check if we want to include time in the output
const includeTime = process.argv[3] === '--time'
const isYouTubeLink = (value) => {
// Simplified regex to cover common YouTube link formats
const youtubeRegex = /^(https?:\/\/)?(www\.)?(youtube\.com|youtu\.?be)\/.+$/
return youtubeRegex.test(value)
}
let fileName
let fileContent
if (isYouTubeLink(argument)) {
// Load the file using command line, then use it as the filename
const { execSync } = require('child_process')
// Extract the video ID from the YouTube link
const videoId = argument.match(/(?:v=|\/)([0-9A-Za-z_-]{11}).*/)[1]
// Download the transcript
const command = `yt-dlp --write-auto-subs --skip-download https://www.youtube.com/watch?v=${videoId}`
execSync(command, { stdio: 'ignore' })
// Find the downloaded transcript file
const transcriptFiles = fs
.readdirSync('./')
.filter((file) => file.endsWith('.en.vtt'))
// Check if any transcript files were found
if (transcriptFiles.length === 0) {
console.error('No transcript files found.')
process.exit(1)
}
// Use the first found transcript file
fileName = transcriptFiles[0]
} else {
fileName = argument
}
// check if the file name is provided
if (!fileName) {
console.error(
'Please provide a file name or YouTube link as the first parameter.'
)
process.exit(1)
}
try {
// read the file content
fileContent = fs.readFileSync(fileName, 'utf8')
} catch (err) {
console.error(`Error reading file: ${err.message}`)
process.exit(1)
}
// convert the file to ragtitles
const ragtitles = await convert(
fileContent,
isYouTubeLink(argument) ? argument : ''
)
// print the ragtitles to the console with or without timestamps based on the includeTime flag
if (includeTime) {
console.log(
ragtitles.map((title) => `${title.time} ${title.text}`).join('\n')
)
} else {
console.log(ragtitles.map((title) => title.text).join('\n'))
}
// delete the downloaded transcript file if it was downloaded
if (isYouTubeLink(argument)) {
fs.unlinkSync(fileName)
}