-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dawg.py
62 lines (52 loc) · 1.8 KB
/
Dawg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
class Dawg:
"""
Provides the functionality of a prefix retrieval tree (trie) but is more space
optimized.
"""
class Node:
EOW = 'EOW'
"""
A node in a dawg.
"""
def __init__(self, letter):
self.letter = letter
self.children = {}
def putChild(self, child):
self.children[child.letter] = child
def getChild(self, letter):
return self.children[letter]
def hasChild(self, letter):
return letter in self.children
def __init__(self, path):
self.root = Dawg.Node('')
self.eow = Dawg.Node(Dawg.Node.EOW)
with open(path) as f:
lines = f.readlines()
numNodes = int(lines[0])
nodes = [0] * numNodes
edges = []
lines = [l.strip().split('\t') for l in lines[1:]]
for (i, line) in enumerate(lines):
letter = line[0]
if letter == 'root':
nodes[i] = self.root
else:
nodes[i] = Dawg.Node(letter)
for child in line[1:]:
if child == Dawg.Node.EOW:
nodes[i].putChild(self.eow)
else:
edges.append((i, int(child)))
for (source, dest) in edges:
nodes[source].putChild(nodes[dest])
def _enumerate(self, prefix, root, results):
if root.hasChild(Dawg.Node.EOW):
results.append(prefix)
children = root.children.items()
children.sort()
for (letter, child) in children:
self._enumerate(prefix + letter, child, results)
def enumerate(self):
results = []
self._enumerate('', self.root, results)
return results