cmusphinx · ghost · Mar 31, 2019 · Apr 20, 2019 · Apr 20, 2019 · Apr 20, 2019
diff --git a/python/cmusphinx/arpalm.py b/python/cmusphinx/arpalm.py
@@ -52,7 +52,7 @@ def tokenize():
                 for x in spam.strip().split():
                     yield x
         def fail(msg):
-            raise RuntimeError, msg
+            raise RuntimeError(msg)
         t = tokenize()
         if t.next() != '{':
             fail("Expected {")
@@ -143,7 +143,7 @@ def normalize(self):
         Normalize probabilities.
         """
         for c in self.classes:
-            t = sum(self.classes[c].itervalues())
+            t = sum(self.classes[c].values())
             if t != 0:
                 for w in self.classes[c]:
                     self.classes[c][w] /= t
@@ -228,7 +228,7 @@ def read(self, path):
         # Read unigrams and create word id list
         spam = fh.readline().rstrip()
         if spam != "\\1-grams:":
-            raise Exception, "1-grams marker not found"
+            raise Exception("1-grams marker not found")
         # ID to word mapping
         self.widmap = []
         wordid = 0
@@ -268,7 +268,7 @@ def read(self, path):
                     ng = tuple(spam[1:])
                     b = 0.0
                 else:
-                    raise RuntimeError, "Found %d-gram in %d-gram section" % (len(spam)-1, n)
+                    raise RuntimeError("Found %d-gram in %d-gram section" % (len(spam)-1, n))
                 # N-Gram info
                 self.ngrams[n-1][ngramid,:] = p, b
                 self.ngmap[n-1][ng] = ngramid
@@ -307,8 +307,7 @@ def save(self, path):
             fh.write("ngram %d=%d\n" % (n, self.ng_counts[n]))
         for n in range(1, self.n+1):
             fh.write("\n\\%d-grams:\n" % n)
-            ngrams = self.ngmap[n-1].keys()
-            ngrams.sort()
+            ngrams = sorted(self.ngmap[n-1].keys())
             if '<UNK>' in self.ngmap[n-1]:
                 ngid = self.ngmap[n-1]['<UNK>']
                 score, bowt = self.ngrams[n-1][ngid]
@@ -356,7 +355,7 @@ def mgrams(self, m):
         @return: Iterator over N-Grams
         @rtype: generator(NGram)
         """
-        for ng, ngid in self.ngmap[m].iteritems():
+        for ng, ngid in self.ngmap[m].items():
             if isinstance(ng, str):
                 ng = (ng,)
             yield self.NGram(ng, *self.ngrams[m][ngid,:])
@@ -418,7 +417,7 @@ def prob(self, *syms):
                 # Use <UNK>
                 return self.ngrams[0][self.ngmap[0]['<UNK>']][0]
             else:
-                raise IndexError, "Unknown unigram %s" % syms[0]
+                raise IndexError("Unknown unigram %s" % syms[0])
         else:
             # Forward N-gram (since syms is reversed)
             fsyms = tuple(reversed(syms))
@@ -478,7 +477,7 @@ def adapt_rescale(self, unigram, vocab=None):
             # Rescaled total probabilities
             newtprob = numpy.zeros(self.ngrams[n-1].shape[0], 'd')
             # For each N-gram, accumulate and rescale
-            for ng,idx in self.ngmap[n].iteritems():
+            for ng,idx in self.ngmap[n].items():
                 h = ng[0:-1]
                 if n == 1: # Quirk of unigrams
                     h = h[0]
@@ -491,7 +490,7 @@ def adapt_rescale(self, unigram, vocab=None):
                 self.ngrams[n][idx,0] = numpy.log(prob)
             # Now renormalize everything
             norm = tprob / newtprob
-            for ng,idx in self.ngmap[n].iteritems():
+            for ng,idx in self.ngmap[n].items():
                 h = ng[0:-1]
                 if n == 1: # Quirk of unigrams
                     h = h[0]

diff --git a/python/cmusphinx/cluster_mixw.py b/python/cmusphinx/cluster_mixw.py
@@ -9,27 +9,27 @@
 # are met:
 #
 # 1. Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer. 
+#    notice, this list of conditions and the following disclaimer.
 #
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in
 #    the documentation and/or other materials provided with the
 #    distribution.
 #
-# This work was supported in part by funding from the Defense Advanced 
-# Research Projects Agency and the National Science Foundation of the 
+# This work was supported in part by funding from the Defense Advanced
+# Research Projects Agency and the National Science Foundation of the
 # United States of America, and the CMU Sphinx Speech Consortium.
 #
-# THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
-# ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
+# THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
+# ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 # THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
 # NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #
 # ====================================================================
@@ -93,14 +93,13 @@ def prunetree(tree, nleaves):
                 for branch in subtree:
                     subtree2, centroid2 = branch
                     newleafnodes.append((subtree2, centroid2))
-        print "Number of leafnodes", len(newleafnodes)
+        print("Number of leafnodes", len(newleafnodes))
         leafnodes = newleafnodes
     # Now flatten out the leafnodes to their component distributions
     for i, leaf in enumerate(leafnodes):
         subtree, centroid = leaf
-        senones = list(leaves(leaf))
         # Sort senones for each leafnode
-        senones.sort()
+        senones = sorted(leaves(leaf))
         leafnodes[i] = (senones, centroid)
     # Sort leafnodes by senone ID
     leafnodes.sort(lambda x,y: cmp(x[0][0],y[0][0]))
@@ -214,7 +213,7 @@ def cluster_merged(mixw, dfunc=multi_js):
             q = centroids[best]
             # Merge these two
             newcentroid = (p + q) * 0.5
-            print "Merging", i, best, dist[best], len(centroids)
+            print("Merging", i, best, dist[best], len(centroids))
             newtree = ((trees[i], p), (trees[best], q))
             centroids[i] = newcentroid
             trees[i] = newtree
@@ -287,12 +286,12 @@ def write_senmgau(outfile, tree, mixw, nclust):
         big = clusters[0]
         del clusters[0:1]
         clusters.extend((big[0][0], big[0][1]))
-    print "cluster sizes:", [len(leaves(x)) for x in clusters]
+    print("cluster sizes:", [len(leaves(x)) for x in clusters])
     mixwmap = numpy.zeros(len(mixw), 'int32')
     for i, c in enumerate(clusters):
         for mixwid in leaves(c):
             mixwmap[mixwid] = i
-    print "writing %d senone mappings" % len(mixwmap)
+    print("writing %d senone mappings" % len(mixwmap))
     s3senmgau.open(outfile, "wb").write_mapping(mixwmap)
 
 if __name__ == '__main__':

diff --git a/python/cmusphinx/cmllr.py b/python/cmusphinx/cmllr.py
@@ -51,7 +51,7 @@ def estimate_cmllr(stats, inmean, invar, mdef):
     cofact = np.zeros(ndim+1)
     # Beta
     B = 0
-    print 'Get statistics & sum it'
+    print('Get statistics & sum it')
     # CD only : just sum over all CD densities
     for j in range(mdef.n_ci_sen, inmean.n_mgau):
 #        print 'state = %i' % j
@@ -102,7 +102,7 @@ def estimate_cmllr(stats, inmean, invar, mdef):
             cofact = get_cofact(A,i)
             # Get alpha
             alpha  = get_alpha(Ginv,K[i],B,cofact)
-            print "alpha : %f" % alpha
+            print("alpha : %f" % alpha)
             W = np.zeros(ndim+1)
             tvec = alpha * cofact + K[i]
             W = np.dot(Ginv,tvec)
@@ -112,7 +112,7 @@ def estimate_cmllr(stats, inmean, invar, mdef):
                 A[i,:] = W[0:ndim]
                 bias[i] = W[ndim]
             else:
-                print 'NOT updating row %i, iter %i,( %f > %f )' % ( i , niter, like_old, like_new )
+                print('NOT updating row %i, iter %i,( %f > %f )' % ( i , niter, like_old, like_new ))
     #to preserve compatibility with write_mllr
     Wi = np.c_[bias,A]
     Ws.append(Wi)
@@ -142,7 +142,7 @@ def get_alpha(Ginv,K,B,cofact):
     d = b * b - 4 * a * c
     if ( d < 0 ) :
         #solutions must be real
-        print 'Warning : determinant < 0'
+        print('Warning : determinant < 0')
         d = 0
     d = math.sqrt(d)
     alpha1 = (- b + d ) / ( 2 * a )

diff --git a/python/cmusphinx/corpus.py b/python/cmusphinx/corpus.py
@@ -125,12 +125,12 @@ def __init__(self, corpus, part=1, npart=1):
         if npart > 1:
             pass
         else:
-            for k, v in corpus.resources.iteritems():
+            for k, v in corpus.resources.items():
                 self.iters[k] = iter(v)
 
     def next(self):
         utt = {}
-        for k,v in self.iters.iteritems():
+        for k,v in self.iters.items():
             utt[k] = v.next()
         return utt
 

diff --git a/python/cmusphinx/dict_spd.py b/python/cmusphinx/dict_spd.py
@@ -19,7 +19,7 @@
 
 if __name__ == "__main__":
     if len(sys.argv) < 3:
-        print>>sys.stderr, "Usage: %s INDICT FALIGNOUT [OUTDICT]" % sys.argv[0]
+        print("Usage: %s INDICT FALIGNOUT [OUTDICT]" % sys.argv[0], file=sys.stderr)
         sys.exit(1)
     indict = s3dict.open(sys.argv[1])
     counts = defaultdict(int)
@@ -37,7 +37,7 @@
     for w in words:
         alts = sum(1 for x in indict.alts(w))
         if alts == 1:
-            print>>outfh, "%s\t\t%s" % (w, " ".join(indict[w]))
+            print("%s\t\t%s" % (w, " ".join(indict[w])), file=outfh)
         else:
             bestalt = None
             bestcount = 0
@@ -50,7 +50,7 @@
                     bestcount = counts[wstr]
                     bestalt = wstr
             if bestalt == None:
-                print>>outfh, "%s\t\t%s" % (w, " ".join(indict[w]))
+                print("%s\t\t%s" % (w, " ".join(indict[w])), file=outfh)
             else:
-                print>>outfh, "%s\t\t%s" % (w, " ".join(indict[bestalt]))
+                print("%s\t\t%s" % (w, " ".join(indict[bestalt])), file=outfh)
 
diff --git a/python/cmusphinx/fstutils.py b/python/cmusphinx/fstutils.py
@@ -158,7 +158,7 @@ def add_ngram_arcs(fst, symtab, lm, n, sidtab):
         while tuple(ng.words[spos:]) not in sidtab:
             spos += 1
             if spos == n:
-                raise RuntimeError, "Unable to find suffix N-gram for", ng.wids
+                raise RuntimeError("Unable to find suffix N-gram for").with_traceback(ng.wids)
         dest = sidtab[tuple(ng.words[spos:])]
         fst.AddArc(src, openfst.StdArc(wsym, wsym, -ng.log_prob, dest))
         #print "Adding %d-gram arc %d => %d %s/%.4f" % (n, src, dest, ng.words[n-1], -ng.log_prob)
@@ -270,7 +270,7 @@ def normalize(self):
         Normalize probabilities.
         """
         for c in self.classes:
-            t = sum(self.classes[c].itervalues())
+            t = sum(self.classes[c].values())
             if t != 0:
                 for w in self.classes[c]:
                     self.classes[c][w] /= t
@@ -300,7 +300,7 @@ def build_classfst(probdef, isyms=None):
         fst.AddArc(st, label, label, 0, st)
     for c in probdef.classes:
         clabel = symtab.AddSymbol(c)
-        for word, prob in probdef.classes[c].iteritems():
+        for word, prob in probdef.classes[c].items():
             wlabel = symtab.AddSymbol(word)
             fst.AddArc(st, wlabel, clabel, -math.log(prob), st)
     fst.SetOutputSymbols(symtab)

diff --git a/python/cmusphinx/htkmfc.py b/python/cmusphinx/htkmfc.py
@@ -50,7 +50,7 @@ def open(f, mode=None, veclen=13):
     elif mode in ('w', 'wb'):
         return HTKFeat_write(f, veclen)
     else:
-        raise Exception, "mode must be 'r', 'rb', 'w', or 'wb'"
+        raise Exception("mode must be 'r', 'rb', 'w', or 'wb'")
 
 class HTKFeat_read(object):
     "Read HTK format feature files"

diff --git a/python/cmusphinx/lat2dot.py b/python/cmusphinx/lat2dot.py
@@ -19,12 +19,12 @@ def lattice_s3(latfile):
         items = line.strip().split(" ")
 	if mode == "node":
 	    if items[1] != "":
-		print items[0] + " [label = \"" + items[1] + " " + items[2] + " " + items[3] + " " + items[4] + "\"];"
+		print(items[0] + " [label = \"" + items[1] + " " + items[2] + " " + items[3] + " " + items[4] + "\"];")
 	    else:
-		print "node " + items[0] + ";"
+		print("node " + items[0] + ";")
         if mode == "edge":
-    	    print items[0] + " -> " + items[1] + " [label = \"" + items[2] + "\"];"
-    print "}"
+    	    print(items[0] + " -> " + items[1] + " [label = \"" + items[2] + "\"];")
+    print("}")
 
 def create_map(items):
     dct = {}
@@ -38,24 +38,24 @@ def lattice_htk_wordnode(latfile):
         items = line.strip().split()
 	if items[0].startswith("I="):
 	    dct = create_map(items)
-    	    if dct.has_key("W"):
-    		print dct["J"] + " [label = \"" + dct[W] + "\"];"
+    	    if "W" in dct:
+    		print(dct["J"] + " [label = \"" + dct[W] + "\"];")
         if items[0].startswith("J="):
 	    dct = create_map(items)
-    	    if dct.has_key("W"):
-        	print dct["S"] + " -> " + dct["E"] + " [label = \"" + dct["W"] + "," + dct["a"] + "," + dct["l"] + "\"];"
+    	    if "W" in dct:
+        	print(dct["S"] + " -> " + dct["E"] + " [label = \"" + dct["W"] + "," + dct["a"] + "," + dct["l"] + "\"];")
     	    else:
-        	print dct["S"] + " -> " + dct["E"] + " [label = \"" + dct["a"] + "," + dct["l"] + "\"];"
-    print "}"
+        	print(dct["S"] + " -> " + dct["E"] + " [label = \"" + dct["a"] + "," + dct["l"] + "\"];")
+    print("}")
 
 if __name__ == '__main__':
     latfilename = sys.argv[1]
     latfile = open(latfilename, "r")
 
-    print """
+    print("""
     digraph lattice {
 	rankdir=LR;
-    """
+    """)
 
     if latfilename.endswith("slf"):
 	lattice_htk_wordnode(latfile)

diff --git a/python/cmusphinx/lat2fsg.py b/python/cmusphinx/lat2fsg.py
@@ -259,5 +259,5 @@ def lat2flat(latfile, fsgfile, lmfst):
     for spam in file(ctlfile):
         latfile = os.path.join(latdir, spam.strip() + ".slf")
         fsgfile = os.path.join(latdir, spam.strip() + opts.outext)
-        print spam,
+        print(spam, end=' ')
         ofst = lat2fsg_posterior(latfile, fsgfile, opts.prune, errfst)
diff --git a/python/cmusphinx/lat_rescore.py b/python/cmusphinx/lat_rescore.py
@@ -34,4 +34,4 @@ def lat_rescore(latfile, lmfst):
     for spam in file(ctlfile):
         latfile = os.path.join(latdir, spam.strip() + ".lat.gz")
         words, score = lat_rescore(latfile, lm)
-        print " ".join(words), "(%s %f)" % (spam.strip(), score)
+        print(" ".join(words), "(%s %f)" % (spam.strip(), score))
diff --git a/python/cmusphinx/lat_rescore_fst.py b/python/cmusphinx/lat_rescore_fst.py
@@ -77,4 +77,4 @@ def lat_rescore(dag, lmfst, lw=9.5):
             except IOError:
                 dag = lattice.Dag(htk_file=os.path.join(latdir, spam.strip() + ".slf"))
         words, score = lat_rescore(dag, lmfst, opts.lw)
-        print " ".join(words), "(%s %f)" % (spam.strip(), score)
+        print(" ".join(words), "(%s %f)" % (spam.strip(), score))
diff --git a/python/cmusphinx/lattice.py b/python/cmusphinx/lattice.py
@@ -258,7 +258,7 @@ def dag2htk(self, htkfile, lm=None):
         # Ensure some header fields are there
         if 'VERSION' not in self.header:
             self.header['VERSION'] = '1.0'
-        for k,v in self.header.iteritems():
+        for k,v in self.header.items():
             # Skip Sphinx stuff
             if k[0] == '-':
                 continue
@@ -309,7 +309,7 @@ def dag2fst(self, fstfile, symfile=None, altpron=False):
         fh.write("%d 0" % idmap[self.end])
         fh.close()
         if symfile:
-            for k, v in symmap.iteritems():
+            for k, v in symmap.items():
                 sfh.write("%s %d\n" % (k, v))
             sfh.close()
 
@@ -394,7 +394,7 @@ def dag2sphinx(self, outfile, logbase=1.0003):
                 fh = open(outfile, "w")
         fh.write("# getcwd: %s\n" % self.getcwd)
         fh.write("# -logbase %e\n" % logbase)
-        for arg, val in self.header.iteritems():
+        for arg, val in self.header.items():
             if arg != '-logbase':
                 fh.write("# %s %s\n" % (arg,val))
         fh.write("#\n")