#!/usr/bin/python2 # This is -*- Python -*- import sys sys.path.append("../src") sys.path.append("../engine/build/lib.linux-i686-2.2") import gnoetics txt = gnoetics.text_new(sys.argv[1]) tokens = {} for i in xrange(txt.length()): tok = txt.get_token(i) tokens[tok] = tok all = tokens.values() all.sort(lambda x, y:cmp(x.to_string().lower(), y.to_string().lower())) decomp_count = 0 for t in all: decomp = t.get_decomp() if decomp: decomp_str = gnoetics.phoneme_decomp_to_string(decomp) decomp_count += 1 else: decomp_str = "" if not t.pos_mask(): print "%-20s %d %d %s" % (t.to_string(), t.syllables(), t.word_count(), decomp_str) print print "%d of %d tokens have decompositions (%.1f%%)" % \ (decomp_count, len(all), 100*(decomp_count / float(len(all))))