-
Notifications
You must be signed in to change notification settings - Fork 5
/
generator.py
executable file
·132 lines (97 loc) · 3.72 KB
/
generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import random
import verbose
import deserializer
from sentence import Sentence
class NoWordFound(Exception):
"""No word was found."""
pass
class SentenceGenerator(object):
"""Sentence generator."""
def __init__(self, chains):
"""Constructs a SentenceGenerator.
Args:
chains: Ordered list of n-grams. Unigram should be at index 0,
followed by bigram at index 1, and so on.
"""
self._chains = chains
self.__generator = random.SystemRandom()
def generate_random_word(self, sentence):
"""Generates the next word in a sentence at random.
Args:
sentence: Sentence.
Returns:
Tuple of (random word, probability of it occuring).
"""
return self._generate_word(sentence, self._get_random_word)
def generate_most_likely_word(self, sentence):
"""Generates the next most likely word in a sentence.
Args:
sentence: Sentence.
Returns:
Tuple of (most likely word, probability of it occuring).
"""
return self._generate_word(sentence, self._get_most_likely_word)
def _generate_word(self, sentence, word_generator):
"""Generates the next word in a sentence.
Args:
sentence: Sentence.
word_generator: Function used to generate word.
Returns:
Tuple of (generated word, probability of it occuring).
"""
word_count = len(sentence)
for chain in reversed(self._chains):
present_states_required = chain.order
if word_count >= present_states_required:
present_states = sentence.get_last(present_states_required)
try:
return word_generator(present_states, chain)
except NoWordFound:
continue
# Should not be reached.
raise NoWordFound
def _get_random_word(self, present_states, chain):
"""Gets a random word given the current state and n-gram using the
specified conditional probabilities.
Args:
present_states: List of present states.
chain: Markov chain.
Returns:
Tuple of (random word, probability of it occuring).
"""
if present_states not in chain:
raise NoWordFound("Markov chain has no possible outcome")
# Normalize since the conditional probabilities are not guaranteed to
# sum up to 1.
possible_outcomes = list(chain.yield_future_states(present_states))
normalization_factor = sum(prob for _, prob in possible_outcomes)
rand = self.__generator.uniform(0, normalization_factor)
for outcome, prob in possible_outcomes:
if rand <= prob:
return (outcome, prob / normalization_factor)
rand -= prob
# This should not be possible.
raise RuntimeError("Could not generate any word from the given set...")
def generate(generator):
"""Generates a sentence given a sentence generator.
Args:
generator: Sentence generator.
Returns:
Tuple of (generated sentence, total probability).
"""
sentence = Sentence()
total_prob = 1.0
while not sentence.complete:
word, prob = generator.generate_random_word(sentence)
sentence.add(word)
total_prob *= prob
return (sentence, total_prob)
if __name__ == "__main__":
generator = SentenceGenerator(deserializer.get_all_ngrams())
verbose_output = verbose.is_verbose()
sentence, total_prob = generate(generator)
print(sentence)
if verbose_output:
print("probability:", total_prob)