-
Notifications
You must be signed in to change notification settings - Fork 0
/
server.js
151 lines (135 loc) · 4.25 KB
/
server.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
// 3rd Party dependencies
var express = require('express'); // http://expressjs.com/
var request = require('request'); // https://github.com/request/request
var cheerio = require('cheerio'); // https://github.com/cheeriojs/cheerio
var async = require('async'); // https://github.com/caolan/async
// Core Node dependencies
var fs = require('fs');
var exec = require('child_process').exec;
// Globals
var artist, title, remix, source_url, stream_url;
var downloadDir = './downloads/';
var fakeUserAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36';
// Let's scrape a single track for testing
var url = 'http://hypem.com/track/2f0fh';
var app = express();
// Same cookie from getMainPageData request must be used for getStreamData request or it will be blocked
var req = request.defaults({
jar: true // save cookies to jar
});
// Scrape the page url for hypem tracks
// https://scotch.io/tutorials/scraping-the-web-with-node-js
app.get('/scrape', function (req, res) {
console.log('Starting scrape');
// We need to use control flow for async calls to the hypem server
// https://github.com/caolan/async#seriestasks-callback
// http://www.sebastianseilund.com/nodejs-async-in-practice
async.series([
function(callback) { // First we get the Track details and stream source url
getMainPageData(url, callback);
},
function(callback) { // Now we get the track stream data from the source url
getStreamData(source_url, callback);
},
function(callback) { // Finally download the file using wget for the stream
downloadFileWget (stream_url, callback);
},
], function() { // Final function gets called after all others
res.send('Download Complete: ' + getFileName());
});
});
function getMainPageData(url, callback) {
console.log('Get Main Page Data - url: ' + url);
req.get({
url: url,
headers: {
'User-Agent': fakeUserAgent
}
},
function (error, response, html) {
if (!error) {
var $ = cheerio.load(html);
$('.artist').filter(function () {
var data = $(this);
artist = data.text();
})
$('.base-title').filter(function () {
var data = $(this);
title = data.text();
})
remix = ''; // If there is no remix
$('.remix-link').filter(function () {
var data = $(this);
remix = data.text();
})
$('#displayList-data').filter(function () {
var data = $(this);
var scriptData = JSON.parse(data.text());
var id = scriptData.tracks[0].id;
var key = scriptData.tracks[0].key;
source_url = 'http://hypem.com/serve/source/' + id + '/' + key;
console.log('Source URL: ' + source_url);
})
console.log('Track to process: ' + getFileName());
callback();
} else {
console.log('Failed to request url: ' + url);
}
}
);
}
function getStreamData(source_url, callback) {
console.log('Get Stream Data - url:' + source_url);
req.get({
url: source_url,
headers: {
'User-Agent': fakeUserAgent,
'Content-Type': 'application/json'
}
},
function (error, response, jsonData) {
if (!error) {
if(jsonData.indexOf('Error 403') > -1) {
console.log('We be blocked.');
} else {
var trackStreamData = JSON.parse(jsonData);
stream_url = trackStreamData.url;
console.log('Stream URL: ' + stream_url);
callback();
}
} else {
console.log('Failed to request track json data from source url: ' + url);
}
}
);
}
// Use wget to do the download as it handles streams
// http://www.hacksparrow.com/using-node-js-to-download-files.html
function downloadFileWget (stream_url, callback) {
var fileName = getFileName();
// Compose the wget command
var wget = 'wget --output-document="' + downloadDir + fileName + '.mp3" ' + stream_url;
console.log(wget);
// Execute wget using child_process exec function
var child = exec(
wget,
function (err, stdout, stderr) {
if (err) {
throw err;
} else {
console.log(fileName + ' downloaded to ' + downloadDir);
callback();
}
}
);
}
function getFileName() {
var fileName = artist + ' - ' + title;
if(remix != '') {
fileName = fileName + ' (' + remix + ')';
}
return fileName;
}
app.listen('8081');
console.log('Visit http://localhost:8081/scrape in a browser to start');
exports = module.exports = app;