#!/usr/bin/python2 import string, os, os.path, random path = "../dict/tagged-corpora/susanne" files = map(lambda x: "A%02d" % x, range(1, 15)) \ + map(lambda x: "G%02d" % x, range(1, 14)) \ + map(lambda x: "J%02d" % x, range(1, 11)) \ + map(lambda x: "N%02d" % x, range(1, 16)) pos_to_words = {} successors = {} lookback = 3 def scan_file(name): print "Scanning %s..." % name f = file(name) history = [] for line in f.xreadlines(): F = line.split("\t") pos = F[2] word = F[3] if word[0] == "+" and word[1] in string.letters: word = word[1:] pos_to_words.setdefault(pos, []).append(word) if history: key = string.join(history, "//") successors.setdefault(key, []).append(pos) history.append(pos) if len(history) > lookback: history.pop(0) def pick_word(pos): return random.choice(pos_to_words.get(pos, ["*error*"])) def pick_successor(pos): L = successors.get(pos) return L and random.choice(L) for f in files: name = os.path.join(path, f) scan_file(name) history = [] pos = "YB" str = "" while 1: if pos == "YB" or pos == "YF": if str: print str print str = "" history = [] else: word = pick_word(pos) if word[0] == "+": word = word[1:] elif str: str += " " str += word #str += "%s[%s]" % (word, pos) history.append(pos) if len(history) > lookback: history.pop(0) key = string.join(history, "//") pos = pick_successor(key) if not pos: str += " [DeadEnd!]" pos = "YB" history = []