-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathparse_file.php
127 lines (91 loc) · 2.9 KB
/
parse_file.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
<?php
require_once('classes/tree.php');
require_once('classes/log.php');
/**
* Recursively creates tree with children
* @param $tree - current level tree node
*/
function recurse(&$tree) {
global $words;
// for each child
for ($i = 0; $i < count($tree->children); $i++) {
$c = '';
$pos = $tree->children[$i]->from; // position line in input file
$first_word_part = substr($words[$tree->children[$i]->from], 0, $tree->children[$i]->level + 1);
while (isset($words[$pos][$tree->children[$i]->level]) && $first_word_part == substr($words[$pos], 0, $tree->children[$i]->level + 1)) {
// add child
if (isset($words[$pos][$tree->children[$i]->level + 1]) && $c != $words[$pos][$tree->children[$i]->level + 1]) {
$c = $words[$pos][$tree->children[$i]->level + 1];
$tree->children[$i]->addChild($c, $pos);
}
// last symbol in word
if (!isset($words[$pos][$tree->children[$i]->level + 1])) {
$tree->children[$i]->last = true;
}
$pos++;
}
// iterate to each children
recurse($tree->children[$i]);
}
}
/**
* Counts children recursively in tree node
* @param $tree - tree node
*/
function count_children(&$tree) {
$count = count($tree->children);
for ($i = 0; $i < count($tree->children); $i++) {
$count += count_children($tree->children[$i]);
}
return $count;
}
/**
* Saves tree into serialized file
* @param $tree
* @param $fp
*/
function save_tree(&$tree, $fp, $is_last_leaf = 0) {
fwrite($fp, ($tree->letter ? $tree->letter : ' ')); // letter
if ($is_last_leaf) {
fwrite($fp, ($tree->last ? '*' : '/')); // flag of end-of-the-word
} else {
fwrite($fp, ($tree->last ? '+' : '-')); // flag of end-of-the-word
}
fwrite($fp, pack('L', count_children($tree))); // packed 4 bytes count of children
// recursively saves each child
for ($i = 0; $i < count($tree->children); $i++) {
save_tree($tree->children[$i], $fp, ($i == count($tree->children) - 1) ? 1 : 0);
}
}
$time_start = microtime(true);
$filename = 'words.txt';
if (!file_exists($filename)) {
print 'File '. $filename . ' not exists!';
return;
}
// reads all input words separated by new-lines
$words = file($filename, FILE_IGNORE_NEW_LINES + FILE_SKIP_EMPTY_LINES);
$tree = new Tree('!');
$c = '';
// creates first-level tree with first-word-letters children
for ($i=0; $i<count($words); $i++) {
$words[$i] = strtolower(trim($words[$i]));
if ($c != $words[$i][0]) {
$c = $words[$i][0];
$tree->addChild($c, $i);
}
}
// recursively creates children
recurse($tree);
$time_end = microtime(true);
$time = $time_end - $time_start;
print "Create Tree $time seconds\n";
$time_start = microtime(true);
// saves binary serialized tree to file
$fp = fopen('tree.txt', 'w+b');
save_tree($tree, $fp);
fclose($fp);
$time_end = microtime(true);
$time = $time_end - $time_start;
print "Save Tree $time seconds\n";
dp($tree);