Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python3 support #17

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions python/cmusphinx/arpalm.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def tokenize():
for x in spam.strip().split():
yield x
def fail(msg):
raise RuntimeError, msg
raise RuntimeError(msg)
t = tokenize()
if t.next() != '{':
fail("Expected {")
Expand Down Expand Up @@ -143,7 +143,7 @@ def normalize(self):
Normalize probabilities.
"""
for c in self.classes:
t = sum(self.classes[c].itervalues())
t = sum(self.classes[c].values())
if t != 0:
for w in self.classes[c]:
self.classes[c][w] /= t
Expand Down Expand Up @@ -228,7 +228,7 @@ def read(self, path):
# Read unigrams and create word id list
spam = fh.readline().rstrip()
if spam != "\\1-grams:":
raise Exception, "1-grams marker not found"
raise Exception("1-grams marker not found")
# ID to word mapping
self.widmap = []
wordid = 0
Expand Down Expand Up @@ -268,7 +268,7 @@ def read(self, path):
ng = tuple(spam[1:])
b = 0.0
else:
raise RuntimeError, "Found %d-gram in %d-gram section" % (len(spam)-1, n)
raise RuntimeError("Found %d-gram in %d-gram section" % (len(spam)-1, n))
# N-Gram info
self.ngrams[n-1][ngramid,:] = p, b
self.ngmap[n-1][ng] = ngramid
Expand Down Expand Up @@ -307,8 +307,7 @@ def save(self, path):
fh.write("ngram %d=%d\n" % (n, self.ng_counts[n]))
for n in range(1, self.n+1):
fh.write("\n\\%d-grams:\n" % n)
ngrams = self.ngmap[n-1].keys()
ngrams.sort()
ngrams = sorted(self.ngmap[n-1].keys())
if '<UNK>' in self.ngmap[n-1]:
ngid = self.ngmap[n-1]['<UNK>']
score, bowt = self.ngrams[n-1][ngid]
Expand Down Expand Up @@ -356,7 +355,7 @@ def mgrams(self, m):
@return: Iterator over N-Grams
@rtype: generator(NGram)
"""
for ng, ngid in self.ngmap[m].iteritems():
for ng, ngid in self.ngmap[m].items():
if isinstance(ng, str):
ng = (ng,)
yield self.NGram(ng, *self.ngrams[m][ngid,:])
Expand Down Expand Up @@ -418,7 +417,7 @@ def prob(self, *syms):
# Use <UNK>
return self.ngrams[0][self.ngmap[0]['<UNK>']][0]
else:
raise IndexError, "Unknown unigram %s" % syms[0]
raise IndexError("Unknown unigram %s" % syms[0])
else:
# Forward N-gram (since syms is reversed)
fsyms = tuple(reversed(syms))
Expand Down Expand Up @@ -478,7 +477,7 @@ def adapt_rescale(self, unigram, vocab=None):
# Rescaled total probabilities
newtprob = numpy.zeros(self.ngrams[n-1].shape[0], 'd')
# For each N-gram, accumulate and rescale
for ng,idx in self.ngmap[n].iteritems():
for ng,idx in self.ngmap[n].items():
h = ng[0:-1]
if n == 1: # Quirk of unigrams
h = h[0]
Expand All @@ -491,7 +490,7 @@ def adapt_rescale(self, unigram, vocab=None):
self.ngrams[n][idx,0] = numpy.log(prob)
# Now renormalize everything
norm = tprob / newtprob
for ng,idx in self.ngmap[n].iteritems():
for ng,idx in self.ngmap[n].items():
h = ng[0:-1]
if n == 1: # Quirk of unigrams
h = h[0]
Expand Down
31 changes: 15 additions & 16 deletions python/cmusphinx/cluster_mixw.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,27 @@
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# This work was supported in part by funding from the Defense Advanced
# Research Projects Agency and the National Science Foundation of the
# This work was supported in part by funding from the Defense Advanced
# Research Projects Agency and the National Science Foundation of the
# United States of America, and the CMU Sphinx Speech Consortium.
#
# THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
# ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
# ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
# NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# ====================================================================
Expand Down Expand Up @@ -93,14 +93,13 @@ def prunetree(tree, nleaves):
for branch in subtree:
subtree2, centroid2 = branch
newleafnodes.append((subtree2, centroid2))
print "Number of leafnodes", len(newleafnodes)
print("Number of leafnodes", len(newleafnodes))
leafnodes = newleafnodes
# Now flatten out the leafnodes to their component distributions
for i, leaf in enumerate(leafnodes):
subtree, centroid = leaf
senones = list(leaves(leaf))
# Sort senones for each leafnode
senones.sort()
senones = sorted(leaves(leaf))
leafnodes[i] = (senones, centroid)
# Sort leafnodes by senone ID
leafnodes.sort(lambda x,y: cmp(x[0][0],y[0][0]))
Expand Down Expand Up @@ -214,7 +213,7 @@ def cluster_merged(mixw, dfunc=multi_js):
q = centroids[best]
# Merge these two
newcentroid = (p + q) * 0.5
print "Merging", i, best, dist[best], len(centroids)
print("Merging", i, best, dist[best], len(centroids))
newtree = ((trees[i], p), (trees[best], q))
centroids[i] = newcentroid
trees[i] = newtree
Expand Down Expand Up @@ -287,12 +286,12 @@ def write_senmgau(outfile, tree, mixw, nclust):
big = clusters[0]
del clusters[0:1]
clusters.extend((big[0][0], big[0][1]))
print "cluster sizes:", [len(leaves(x)) for x in clusters]
print("cluster sizes:", [len(leaves(x)) for x in clusters])
mixwmap = numpy.zeros(len(mixw), 'int32')
for i, c in enumerate(clusters):
for mixwid in leaves(c):
mixwmap[mixwid] = i
print "writing %d senone mappings" % len(mixwmap)
print("writing %d senone mappings" % len(mixwmap))
s3senmgau.open(outfile, "wb").write_mapping(mixwmap)

if __name__ == '__main__':
Expand Down
8 changes: 4 additions & 4 deletions python/cmusphinx/cmllr.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def estimate_cmllr(stats, inmean, invar, mdef):
cofact = np.zeros(ndim+1)
# Beta
B = 0
print 'Get statistics & sum it'
print('Get statistics & sum it')
# CD only : just sum over all CD densities
for j in range(mdef.n_ci_sen, inmean.n_mgau):
# print 'state = %i' % j
Expand Down Expand Up @@ -102,7 +102,7 @@ def estimate_cmllr(stats, inmean, invar, mdef):
cofact = get_cofact(A,i)
# Get alpha
alpha = get_alpha(Ginv,K[i],B,cofact)
print "alpha : %f" % alpha
print("alpha : %f" % alpha)
W = np.zeros(ndim+1)
tvec = alpha * cofact + K[i]
W = np.dot(Ginv,tvec)
Expand All @@ -112,7 +112,7 @@ def estimate_cmllr(stats, inmean, invar, mdef):
A[i,:] = W[0:ndim]
bias[i] = W[ndim]
else:
print 'NOT updating row %i, iter %i,( %f > %f )' % ( i , niter, like_old, like_new )
print('NOT updating row %i, iter %i,( %f > %f )' % ( i , niter, like_old, like_new ))
#to preserve compatibility with write_mllr
Wi = np.c_[bias,A]
Ws.append(Wi)
Expand Down Expand Up @@ -142,7 +142,7 @@ def get_alpha(Ginv,K,B,cofact):
d = b * b - 4 * a * c
if ( d < 0 ) :
#solutions must be real
print 'Warning : determinant < 0'
print('Warning : determinant < 0')
d = 0
d = math.sqrt(d)
alpha1 = (- b + d ) / ( 2 * a )
Expand Down
4 changes: 2 additions & 2 deletions python/cmusphinx/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,12 @@ def __init__(self, corpus, part=1, npart=1):
if npart > 1:
pass
else:
for k, v in corpus.resources.iteritems():
for k, v in corpus.resources.items():
self.iters[k] = iter(v)

def next(self):
utt = {}
for k,v in self.iters.iteritems():
for k,v in self.iters.items():
utt[k] = v.next()
return utt

Expand Down
8 changes: 4 additions & 4 deletions python/cmusphinx/dict_spd.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

if __name__ == "__main__":
if len(sys.argv) < 3:
print>>sys.stderr, "Usage: %s INDICT FALIGNOUT [OUTDICT]" % sys.argv[0]
print("Usage: %s INDICT FALIGNOUT [OUTDICT]" % sys.argv[0], file=sys.stderr)
sys.exit(1)
indict = s3dict.open(sys.argv[1])
counts = defaultdict(int)
Expand All @@ -37,7 +37,7 @@
for w in words:
alts = sum(1 for x in indict.alts(w))
if alts == 1:
print>>outfh, "%s\t\t%s" % (w, " ".join(indict[w]))
print("%s\t\t%s" % (w, " ".join(indict[w])), file=outfh)
else:
bestalt = None
bestcount = 0
Expand All @@ -50,7 +50,7 @@
bestcount = counts[wstr]
bestalt = wstr
if bestalt == None:
print>>outfh, "%s\t\t%s" % (w, " ".join(indict[w]))
print("%s\t\t%s" % (w, " ".join(indict[w])), file=outfh)
else:
print>>outfh, "%s\t\t%s" % (w, " ".join(indict[bestalt]))
print("%s\t\t%s" % (w, " ".join(indict[bestalt])), file=outfh)

6 changes: 3 additions & 3 deletions python/cmusphinx/fstutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def add_ngram_arcs(fst, symtab, lm, n, sidtab):
while tuple(ng.words[spos:]) not in sidtab:
spos += 1
if spos == n:
raise RuntimeError, "Unable to find suffix N-gram for", ng.wids
raise RuntimeError("Unable to find suffix N-gram for").with_traceback(ng.wids)
dest = sidtab[tuple(ng.words[spos:])]
fst.AddArc(src, openfst.StdArc(wsym, wsym, -ng.log_prob, dest))
#print "Adding %d-gram arc %d => %d %s/%.4f" % (n, src, dest, ng.words[n-1], -ng.log_prob)
Expand Down Expand Up @@ -270,7 +270,7 @@ def normalize(self):
Normalize probabilities.
"""
for c in self.classes:
t = sum(self.classes[c].itervalues())
t = sum(self.classes[c].values())
if t != 0:
for w in self.classes[c]:
self.classes[c][w] /= t
Expand Down Expand Up @@ -300,7 +300,7 @@ def build_classfst(probdef, isyms=None):
fst.AddArc(st, label, label, 0, st)
for c in probdef.classes:
clabel = symtab.AddSymbol(c)
for word, prob in probdef.classes[c].iteritems():
for word, prob in probdef.classes[c].items():
wlabel = symtab.AddSymbol(word)
fst.AddArc(st, wlabel, clabel, -math.log(prob), st)
fst.SetOutputSymbols(symtab)
Expand Down
2 changes: 1 addition & 1 deletion python/cmusphinx/htkmfc.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def open(f, mode=None, veclen=13):
elif mode in ('w', 'wb'):
return HTKFeat_write(f, veclen)
else:
raise Exception, "mode must be 'r', 'rb', 'w', or 'wb'"
raise Exception("mode must be 'r', 'rb', 'w', or 'wb'")

class HTKFeat_read(object):
"Read HTK format feature files"
Expand Down
24 changes: 12 additions & 12 deletions python/cmusphinx/lat2dot.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ def lattice_s3(latfile):
items = line.strip().split(" ")
if mode == "node":
if items[1] != "":
print items[0] + " [label = \"" + items[1] + " " + items[2] + " " + items[3] + " " + items[4] + "\"];"
print(items[0] + " [label = \"" + items[1] + " " + items[2] + " " + items[3] + " " + items[4] + "\"];")
else:
print "node " + items[0] + ";"
print("node " + items[0] + ";")
if mode == "edge":
print items[0] + " -> " + items[1] + " [label = \"" + items[2] + "\"];"
print "}"
print(items[0] + " -> " + items[1] + " [label = \"" + items[2] + "\"];")
print("}")

def create_map(items):
dct = {}
Expand All @@ -38,24 +38,24 @@ def lattice_htk_wordnode(latfile):
items = line.strip().split()
if items[0].startswith("I="):
dct = create_map(items)
if dct.has_key("W"):
print dct["J"] + " [label = \"" + dct[W] + "\"];"
if "W" in dct:
print(dct["J"] + " [label = \"" + dct[W] + "\"];")
if items[0].startswith("J="):
dct = create_map(items)
if dct.has_key("W"):
print dct["S"] + " -> " + dct["E"] + " [label = \"" + dct["W"] + "," + dct["a"] + "," + dct["l"] + "\"];"
if "W" in dct:
print(dct["S"] + " -> " + dct["E"] + " [label = \"" + dct["W"] + "," + dct["a"] + "," + dct["l"] + "\"];")
else:
print dct["S"] + " -> " + dct["E"] + " [label = \"" + dct["a"] + "," + dct["l"] + "\"];"
print "}"
print(dct["S"] + " -> " + dct["E"] + " [label = \"" + dct["a"] + "," + dct["l"] + "\"];")
print("}")

if __name__ == '__main__':
latfilename = sys.argv[1]
latfile = open(latfilename, "r")

print """
print("""
digraph lattice {
rankdir=LR;
"""
""")

if latfilename.endswith("slf"):
lattice_htk_wordnode(latfile)
Expand Down
2 changes: 1 addition & 1 deletion python/cmusphinx/lat2fsg.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,5 +259,5 @@ def lat2flat(latfile, fsgfile, lmfst):
for spam in file(ctlfile):
latfile = os.path.join(latdir, spam.strip() + ".slf")
fsgfile = os.path.join(latdir, spam.strip() + opts.outext)
print spam,
print(spam, end=' ')
ofst = lat2fsg_posterior(latfile, fsgfile, opts.prune, errfst)
2 changes: 1 addition & 1 deletion python/cmusphinx/lat_rescore.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ def lat_rescore(latfile, lmfst):
for spam in file(ctlfile):
latfile = os.path.join(latdir, spam.strip() + ".lat.gz")
words, score = lat_rescore(latfile, lm)
print " ".join(words), "(%s %f)" % (spam.strip(), score)
print(" ".join(words), "(%s %f)" % (spam.strip(), score))
2 changes: 1 addition & 1 deletion python/cmusphinx/lat_rescore_fst.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,4 @@ def lat_rescore(dag, lmfst, lw=9.5):
except IOError:
dag = lattice.Dag(htk_file=os.path.join(latdir, spam.strip() + ".slf"))
words, score = lat_rescore(dag, lmfst, opts.lw)
print " ".join(words), "(%s %f)" % (spam.strip(), score)
print(" ".join(words), "(%s %f)" % (spam.strip(), score))
6 changes: 3 additions & 3 deletions python/cmusphinx/lattice.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def dag2htk(self, htkfile, lm=None):
# Ensure some header fields are there
if 'VERSION' not in self.header:
self.header['VERSION'] = '1.0'
for k,v in self.header.iteritems():
for k,v in self.header.items():
# Skip Sphinx stuff
if k[0] == '-':
continue
Expand Down Expand Up @@ -309,7 +309,7 @@ def dag2fst(self, fstfile, symfile=None, altpron=False):
fh.write("%d 0" % idmap[self.end])
fh.close()
if symfile:
for k, v in symmap.iteritems():
for k, v in symmap.items():
sfh.write("%s %d\n" % (k, v))
sfh.close()

Expand Down Expand Up @@ -394,7 +394,7 @@ def dag2sphinx(self, outfile, logbase=1.0003):
fh = open(outfile, "w")
fh.write("# getcwd: %s\n" % self.getcwd)
fh.write("# -logbase %e\n" % logbase)
for arg, val in self.header.iteritems():
for arg, val in self.header.items():
if arg != '-logbase':
fh.write("# %s %s\n" % (arg,val))
fh.write("#\n")
Expand Down
Loading