# This is -*- Python -*- # The first haiku # # The defensive heat # participated --- to be # termed from most negros. # import string, random, os, sys import gnoetics from grammar_model import * model = gnoetics.seq_model_new(4) word_dict = {} final_tags = ("YB", "YF", "YQ", "YX") punct_map = { "": "'", "": "-", "": "---", } def scan_file(filename): f = file(filename) sentence = [] for line in f.xreadlines(): F = line.split() tag_str = F[2] word = string.lower(F[3]) for key, punct in punct_map.items(): i = word.find(key) if i >= 0: word = word[:i] + punct + word[i+len(key):] if tag_str != "YB" \ and word.find("ldquo") == -1 \ and word.find("lsquo") == -1 \ and word.find("rdquo") == -1 \ and word.find("rsquo") == -1: tag = gnoetics.pos_from_string(tag_str) tok = gnoetics.token_lookup(word) if word_dict.has_key(tag): c = word_dict[tag] else: c = gnoetics.TokenChooser() word_dict[tag] = c c.add(tok) sentence.append(tag) if tag_str in final_tags: model.add_sentence(sentence) sentence = [] if sentence: model.add_sentence(sentence) path = "../dict/tagged-corpora/susanne" for name in os.listdir(path): if len(name) == 3: sys.stderr.write(name + " ") name = os.path.join(path, name) scan_file(name) sys.stderr.write("\n") terminals = model.terminals() def gen_haiku(): t = None while 1: if t is None: pattern = [5, 7, 5] t = model.get_break_tuple() sentence = [] x = model.step_next(t) if not pattern: if x in terminals: # we ended right on time tok = word_dict[x].choice(syllables_eq=0) if tok: sentence[-1] = tok.to_string() return sentence t = None else: if x != 0: # we ended early n = pattern[0] c = word_dict[x] if c.choice_exists(syllables_lteq=n): tok = c.choice(syllables_lteq=n) sentence.append(tok.to_string()) pattern[0] -= tok.syllables() if pattern[0] == 0: pattern.pop(0) sentence.append("\n") else: t = None if t: t = t[1:] + (x,) while 1: sentence = gen_haiku() print string.join(sentence) print print