-
Notifications
You must be signed in to change notification settings - Fork 1
/
compress.js
134 lines (122 loc) · 4.05 KB
/
compress.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env node
const infile = process.argv[2];
const outfile = process.argv[3];
const LZString = require("./lz_string.js");
const fs = require("fs");
const normalize_data = (names_list) => {
let list = [];
for (let { name, count } of names_list) {
list.push([name, count]);
}
return list;
};
// Data is of form { "names": { "neutral": {"name": ..., "count": number}[], "female": [], "male": []}, "totals": {"neutral": number, "female": number, "male": number } }
const calculate_prefix_maps = (data) => {
let normalized_map = (s) =>
prefix_map(normalize_data(data["names"][s] || []));
let normalized_reduced_map = (s) => reduce_prefix_map(normalized_map(s));
//console.log(JSON.stringify(normalized_reduced_map("male"), null, 2));
//
let names = {
neutral: normalized_reduced_map("neutral"),
male: normalized_reduced_map("male"),
female: normalized_reduced_map("female"),
};
// Check calculations
let cloned_names = JSON.parse(JSON.stringify(names));
for (let gender of Object.keys(data.names)) {
count_reduced_prefix_map(cloned_names[gender]);
if (data.totals[gender] !== cloned_names[gender]["#"]) {
console.error(
"Invalid prefix maps, counts are ",
data.totals[gender],
"and",
cloned_names[gender]["#"]
);
}
}
return {
names,
totals: data.totals,
};
};
// list is a list of [name, count] pairs
// Returns a (verbose) prefix map
const prefix_map = (list) => {
let tree = {};
for (let list_index in list) {
let item = list[list_index];
let name = item[0];
let current_tree = tree;
for (character_index in name) {
let character = name[character_index];
if (Object.keys(current_tree).indexOf(character) === -1) {
current_tree[character] = {};
}
current_tree = current_tree[character];
}
current_tree[""] = item[1]; // set the score
}
return tree;
};
const is_reducible = (map, key) => {
let sub_keys = Object.keys(map[key]);
if (sub_keys.length === 1) {
return sub_keys[0];
} else {
return null;
}
};
// map is a prefix map
// reduces the prefix map to be as small as possible by combining subnodes that have only one child
const reduce_prefix_map = (map) => {
let keys_to_reduce = Object.keys(map);
while (keys_to_reduce.length > 0) {
let key = keys_to_reduce.pop();
let subkey = is_reducible(map, key);
if (subkey !== null) {
let current = map[key];
delete map[key];
map[key + subkey] = current[subkey];
keys_to_reduce.push(key + subkey); // Let this one be checked again
} else {
reduce_prefix_map(map[key]);
}
}
return map;
};
/** Find count for a key in a counted reduce_prefix_map
*
*/
function find_count(map) {
if (Object.keys(map).length === 0) {
return map;
} else {
return map["#"];
}
}
/** Change a reduced prefix map so it becomes a counted reduced prefix map
* It contains an extra field "#" in each map that has to total number usages of names in the node (including subnodes)
* The top level contains an "#" field with the total number of occurrences
*/
function count_reduced_prefix_map(map) {
var keys = Object.keys(map);
if (keys.length === 0) {
return;
}
for (var key_index = 0; key_index < keys.length; key_index++) {
var key = keys[key_index];
count_reduced_prefix_map(map[key]);
}
var count = 0;
for (var key_index = 0; key_index < keys.length; key_index++) {
var key = keys[key_index];
count += find_count(map[key]);
}
map["#"] = count;
}
fs.readFile(infile, { encoding: "utf-8" }, (err, data) => {
let comp = JSON.stringify(calculate_prefix_maps(JSON.parse(data)));
comp = LZString.compressToBase64(comp);
fs.writeFile(outfile, comp, { encoding: "utf-8" }, (err) => {});
});