#!/usr/bin/env python2 # This is -*- Python -*- import sys, string, time, random sys.path.append("../src") sys.path.append("../engine/build/lib.linux-i686-2.2") import gnoetics filename = "../texts/great-expectations.gtry" print "Loading", filename txt = gnoetics.text_new(filename) all_dict = {} pred = {} succ = {} N = txt.length() prev = txt.get_token(0) for i in xrange(1, N): curr = txt.get_token(i) all_dict[curr] = curr succ.setdefault(prev, []).append(curr) pred.setdefault(curr, []).append(prev) prev = curr all = all_dict.keys() def intersect(a, b): if not a or not b: return 0 if len(a) > len(b): a, b = b, a for i in xrange(len(a)): if a[i] in b: return 1 return 0 trials = 0 success = 0 total = len(all) * len(all) for i in xrange(len(all)): for j in xrange(len(all)): w1 = all[i] w2 = all[j] trials += 1 if intersect(succ.get(w1, []), pred.get(w2, [])): success += 1 if success % 100 == 0: print "%d/%d, %.1f%% done, %d of %d (%.1f%%)" % (i, j, 100*trials/float(total), success, trials, 100*success/float(trials))