/* This is -*- C -*- */
/* vim: set sw=2: */
/* $Id$ */

/*
 * token.c
 *
 * Copyright (C) 2003 The Free Software Foundation, Inc.
 *
 */

/*
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA.
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "token.h"

#include "dictionary.h"
#include "syllable.h"

static GHashTable *tokens = NULL;

static Token *start_token = NULL;
static Token *stop_token  = NULL;

Token *
token_lookup (const char *word)
{
  Token *token;
  char *clean_word_orig, *clean_word = NULL;
  int i;
  DictionaryWord *dword;

  if (tokens == NULL) {
    tokens = g_hash_table_new (g_str_hash, g_str_equal);
  }

  token = g_hash_table_lookup (tokens, word);
  if (token == NULL) {

    token = g_new0 (Token, 1);

    if (word[0] == '|') {
      token->left_glue = TRUE;
      ++word;
    }

    token->word = g_strdup (word);
    token->decomp = dictionary_get_decomp (word);
    if (token->decomp)
      token->meter = meter_from_phoneme_decomp (token->decomp);

    clean_word_orig = g_strdup (word);
    clean_word = clean_word_orig;
    for (i = strlen (clean_word)-1; i >= 0 && ispunct (clean_word[i]); --i)
      clean_word[i] = '\0';

    while (*clean_word && ispunct (*clean_word))
      ++clean_word;

    if (*clean_word) {
      dword = dictionary_get_word (clean_word);
      if (dword)
	token->pos_mask = dword->pos_mask;

      if (! token->pos_mask) {
	/* Maybe it is a plural. */
	int len = strlen (clean_word);
	if (len > 0 && clean_word[len-1] == 's') {
	  clean_word[len-1] = '\0';

	  dword = dictionary_get_word (clean_word);
	  if (dword && (dword->pos_mask & pos_get_mask (POS_NOUN))) {
	    token->pos_mask |= pos_get_mask (POS_PLURAL);
	  }
	}
      }
    }
    g_free (clean_word_orig);

    token->syllables = -1;
    token->word_count = -1;

    if (! strcmp (token->word, "<start>")) {
      token->is_start = TRUE;
      g_assert (start_token == NULL);
      start_token = token;
      token->decomp = NULL;
      token->meter = NULL;
    }
    if (! strcmp (token->word, "<stop>")) {
      token->is_stop = TRUE;
      g_assert (stop_token == NULL);
      stop_token = token;
      token->decomp = NULL;
      token->meter = NULL;
    }
    
    g_hash_table_insert (tokens, token->word, token);
  }

  return token;
}

int
token_syllables (Token *token)
{
  g_return_val_if_fail (token != NULL, -1);

  if (token->syllables < 0) {
    if (token_is_start (token) || token_is_stop (token))
      token->syllables = 0;
    else if (token->decomp)
      token->syllables = syllable_count_from_decomp (token->decomp);
    else
      token->syllables = syllable_count_approximate (token->word);
  }
  
  return token->syllables;
}

Meter *
token_meter (Token *token)
{
  g_return_val_if_fail (token != NULL, NULL);

  if (token->meter == NULL) {
    int syl = token_syllables (token);
    if (syl > 0) {
      token->meter = g_new (Meter, syl+1);
      memset (token->meter, METER_UNKNOWN, syl);
      token->meter[syl] = '\0';
    }
  }

  return token->meter;
}

int
token_word_count (Token *token)
{
  char *c;

  g_return_val_if_fail (token != NULL, -1);

  if (token->word_count < 0) {

    if (token->word[0] == '<') {
      token->word_count = 0;
    } else {
      token->word_count = 1;
      for (c = token->word; *c; ++c) {
	if (*(c+1) && isspace (*c) && ! isspace (*(c+1)))
	  ++token->word_count;
      }
    }
  }

  return token->word_count;
}

/* ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** */

Token *
token_get_start (void)
{
  if (start_token == NULL)
    token_lookup ("<start>");
  return start_token;
}

Token *
token_get_stop (void)
{
  if (stop_token == NULL)
    token_lookup ("<stop>");
  return stop_token;
}

/* ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** */

/* Python Type Magic */

typedef struct _PyToken PyToken;
struct _PyToken {
  PyObject_HEAD;
  Token *token;
};

static PyObject *
py_token_pos_mask (PyObject *self, PyObject *args)
{
  Token *token = ((PyToken *) self)->token;
  return Py_BuildValue ("i", token->pos_mask);
}

static PyObject *
py_token_syllables (PyObject *self, PyObject *args)
{
  Token *token = ((PyToken *) self)->token;
  return Py_BuildValue ("i", token_syllables (token));
}

static PyObject *
py_token_word_count (PyObject *self, PyObject *args)
{
  Token *token = ((PyToken *) self)->token;
  return Py_BuildValue ("i", token_word_count (token));
}

static PyObject *
py_token_to_string (PyObject *self, PyObject *args)
{
  Token *token = ((PyToken *) self)->token;
  return Py_BuildValue ("s", token->word);
}

static PyObject *
py_token_has_left_glue (PyObject *self, PyObject *args)
{
  Token *token = ((PyToken *) self)->token;
  return Py_BuildValue ("i", token->left_glue);
}

static PyObject *
py_token_is_start (PyObject *self, PyObject *args)
{
  Token *token = ((PyToken *) self)->token;
  return Py_BuildValue ("i", token->is_start);
}

static PyObject *
py_token_is_stop (PyObject *self, PyObject *args)
{
  Token *token = ((PyToken *) self)->token;
  return Py_BuildValue ("i", token->is_stop);
}

static PyObject *
py_token_get_decomp (PyObject *self, PyObject *args)
{
  Token *token = ((PyToken *) self)->token;
  if (token->decomp == NULL) {
    Py_INCREF (Py_None);
    return Py_None;
  }
  return phoneme_decomp_to_py (token->decomp);
}

static PyObject *
py_token_meter (PyObject *self, PyObject *args)
{
  Token *token = ((PyToken *) self)->token;
  return Py_BuildValue ("s", token_meter (token));
}


static PyMethodDef py_token_methods[] = {
  { "pos_mask", py_token_pos_mask, METH_VARARGS,
    "Get the token's part-of-speech mask." },
  { "syllables", py_token_syllables, METH_VARARGS,
    "Return the number of syllables in the tokens." },
  { "word_count", py_token_word_count, METH_VARARGS,
    "Return the number of words in the tokens." },
  { "meter", py_token_meter, METH_VARARGS,
    "Get the meter string of the token." },
  { "to_string", py_token_to_string, METH_VARARGS,
    "Get the string version of a token." },
  { "has_left_glue", py_token_has_left_glue, METH_VARARGS,
    "Well, whatever." },
  { "is_start", py_token_is_start, METH_VARARGS,
    "Is this the start token?" },
  { "is_stop", py_token_is_stop, METH_VARARGS,
    "Is this the stop token?" },
  { "get_decomp", py_token_get_decomp, METH_VARARGS,
    "Get the phoneme decomposition for the token." },
  {NULL, NULL, 0, NULL}
};

static PyObject *
py_token_getattr(PyObject *obj, char *name)
{
    return Py_FindMethod(py_token_methods, obj, name);
}

static void
py_token_dealloc(PyObject *self)
{
    PyObject_Del(self);
}

static int
py_token_cmp (PyObject *a, PyObject *b)
{
  Token *ta = ((PyToken *) a)->token;
  Token *tb = ((PyToken *) b)->token;

  return GPOINTER_TO_INT (ta) - GPOINTER_TO_INT (tb);
}

static long
py_token_hash (PyObject *self)
{
  PyToken *py_token = (PyToken *) self;
  return (long) py_token->token;
}

static PyTypeObject py_token_type_info = {
  PyObject_HEAD_INIT(NULL)
  0,
  "Token",
  sizeof(PyToken),
  0,
  py_token_dealloc, /*tp_dealloc*/
  NULL,             /*tp_print*/
  py_token_getattr, /*tp_getattr*/
  NULL,             /*tp_setattr*/
  py_token_cmp,     /*tp_compare*/
  NULL,             /*tp_repr*/
  NULL,             /*tp_as_number*/
  NULL,             /*tp_as_sequence*/
  NULL,             /*tp_as_mapping*/
  py_token_hash,    /*tp_hash */
};

/* ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** */

PyObject *
token_to_py (Token *token)
{
  PyToken *py_token;
  if (token == NULL) {
    Py_INCREF (Py_None);
    return Py_None;
  }
  py_token = PyObject_New(PyToken, &py_token_type_info);
  py_token->token = token;
  return (PyObject *) py_token;
}

Token *
token_from_py (PyObject *obj)
{
  return ((PyToken *) obj)->token;
}

/* ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** */

PyObject *
py_token_lookup (PyObject *self, PyObject *args)
{
  char *str;
  Token *token;

  if (! PyArg_ParseTuple (args, "s", &str))
    return NULL;

  token = token_lookup (str);
  if (token == NULL) {
    Py_INCREF (Py_None);
    return Py_None;
  } 
    
  return token_to_py (token);
}

PyObject *
py_token_get_start (PyObject *self, PyObject *args)
{
  return token_to_py (token_get_start ());
}

PyObject *
py_token_get_stop (PyObject *self, PyObject *args)
{
  return token_to_py (token_get_stop ());
}



