-
Notifications
You must be signed in to change notification settings - Fork 1
/
retrieve.js
111 lines (105 loc) · 2.48 KB
/
retrieve.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
var querystring = require('querystring'),
http = require('http'),
fs = require('fs'),
cheerio = require('cheerio'),
tidy = require('./htmltidy').tidy;
exports.retrieveData = function(term, classif, destination) {
var data = querystring.stringify({
p_term: term,
p_classif: classif,
p_print_flag: "Y"
});
var options = {
host: 'osoc.berkeley.edu',
port: 80,
path: '/OSOC/osoc',
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'Content-Length': data.length
}
};
var trim = function(s) {
return s.replace(/(\r\n|\n|\r)/gm," ")
.replace(/\s+/g," ")
.replace(/(?:(?:^|\n)\s+|\s+(?:$|\n))/g,'')
.replace(/\s+/g,' ');
}
var req = http.request(options, function(res) {
res.setEncoding('utf8');
var body = "";
res.on('data', function(chunk) {
body += chunk;
});
res.on('end', function() {
tidy(body, function(err, html) {
rows = [];
//fs.writeFile("tidydata.txt", html, function(err) {});
var $ = cheerio.load(html);
var department = "";
var k = -1;
var c = 0;
$("tr").each(function(i, row) {
var cells = $(row).find("td");
if ((cells.length == 1) || (c > 0)) {
if (c == 0) {
c = 4;
} else if (c == 4) {
department = trim($(cells).eq(0).text());
c -= 1;
} else {
c -= 1;
}
} else if (cells.length == 5) {
rows[k].note = trim($(cells).eq(4).text());
} else if (cells.length == 11) {
cells.each(function(j, cell) {
var cell_text = trim($(cell).text());
switch(j) {
case 0:
k += 1;
rows.push({dept: department, note: ""});
break;
case 1:
rows[k].controlno = cell_text;
break;
case 2:
rows[k].courseno = cell_text;
break;
case 3:
rows[k].sectionno = cell_text;
break;
case 4:
rows[k].time = cell_text;
break;
case 5:
rows[k].room = cell_text;
break;
case 6:
rows[k].title = cell_text;
break;
case 7:
rows[k].units = cell_text;
break;
case 8:
rows[k].instructor = cell_text;
break;
case 9:
rows[k].examgroup = cell_text;
break;
case 10:
rows[k].restrictions = cell_text;
break;
default:
break;
}
});
}
});
destination(rows);
});
});
});
req.write(data);
req.end();
};