import os, sys, string
import gnoetics
import numbers

dir = "../dict/tagged-corpora/susanne"

def to_number(x):
    if x[0] == "+":
        x = x[1:]
    x = string.replace(x, ",", "")
    try:
        return numbers.num_to_string(int(x))
    except:
        return None
    
def is_integer(x):
    try:
        int(x)
        return 1
    except:
        return 0

class SentenceAccumulator:

    def __init__(self):
        self.__sentences = []
        self.__current_sentence = []

    def add(self, pos, word, is_terminal):
        word = word.lower()

        word = string.replace(word, "&", "and")
        
        if word and word != "+":

            num_str = to_number(word)

            if num_str is not None:
                word = num_str
            elif word[0] == "+" \
                   and word[1] in string.ascii_letters \
                   and "'" not in word:
                word = word[1:]
            
            self.__current_sentence.append((pos, word))
            if is_terminal:
                self.__sentences.append(self.__current_sentence)
                self.__current_sentence = []

    def get_sentences(self):
        return self.__sentences


def get_sentences(filename):

    filename = os.path.join(dir, filename)
    
    scan = gnoetics.scanner_susanne_new()

    accum = SentenceAccumulator()

    scan.load_file(filename, accum.add)

    return accum.get_sentences()


def get_all_sentences():
    all_sentences = []
    for f in os.listdir(dir):
        if len(f) == 3:
            sentences = get_sentences(f)
            all_sentences.extend(sentences)
    return all_sentences

