-
Notifications
You must be signed in to change notification settings - Fork 0
/
audio_scraper.js
91 lines (80 loc) · 2.11 KB
/
audio_scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
const SoundCloud = require("soundcloud-scraper");
var parseString = require("xml2js").parseString;
var http = require("https");
const fs = require("fs");
const xmlToJson = (url) => {
return new Promise((resolve, reject) => {
var req = http.get(url, function (res) {
var xml = "";
res.on("data", function (chunk) {
xml += chunk;
});
res.on("error", function (e) {
reject(e);
});
res.on("timeout", function (e) {
reject(e);
});
res.on("end", function () {
parseString(xml, function (err, result) {
resolve(result);
});
});
});
});
};
const pullRecords = async () => {
let audioURLArray = [];
var data = await xmlToJson(
"https://rights.culturalsurvival.org/blueprint-xml"
);
let newData = data.radio_spots.radio_spot;
newData.map((item) => {
audioURLArray.push(item.SoundCloud[0]);
});
audioURLArray.forEach((url, index) => {
audioURLArray[index] = cleanUrl(url);
});
let records = [];
// FIXME: truncating audioURLArray for testing purposes
let testURLs = audioURLArray.slice(2, 6);
console.log(testURLs);
for (const url of testURLs) {
let record = await getAudioInfo(url);
records.push(record);
}
return records;
};
const cleanUrl = (url) => {
let newUrl = url.split("?");
return newUrl[0];
};
const getAudioInfo = async (url) => {
const client = new SoundCloud.Client();
let song = await client.getSongInfo(url);
const stream = await song.downloadProgressive();
const writer = stream.pipe(fs.createWriteStream(`./audio/${song.title}.mp3`));
let filename = song.title;
writer.on("finish", () => {
console.log("Finished writing song!");
});
let record = {
title: song["title"],
description: song["description"],
thumbnail: song["thumbnail"],
url: song["url"],
duration: song["duration"],
genre: song["genre"],
author: song["author"],
publishedAt: song["publishedAt"],
trackURL: song["trackURL"],
mp3filename: filename,
};
return record;
};
module.exports = {
xmlToJson,
pullRecords,
cleanUrl,
getAudioInfo,
};