/* This is -*- C -*- */
/* vim: set sw=2: */
/* $Id$ */

/*
 * scanner_susanne.c
 *
 * Copyright (C) 2003 The Free Software Foundation, Inc.
 *
 * Developed by Jon Trowbridge <trow@gnu.org>
 */

/*
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA.
 */

#ifdef CONFIG_H
#include <config.h>
#endif
#include "scanner_susanne.h"

static const char *replacements[] = {
  "apos",   "'",
  "hyphen", "-",
  "minus",  "-",
  "frac12", "1/2",
  "frac14", "1/4",
  "prime",  "'",
  NULL, NULL
};

static const char *terminal_tags[] = { 
  "YB", "YF", "YQ", "YX", NULL
};

static gboolean
clean_word (char *word)
{
  char *p, *gt = NULL, *lt = NULL;

  for (p = word; *p; ++p) {
    if (*p == '<')
      lt = p;
    else if (*p == '>')
      gt = p;
    if (gt && lt) {
      int n = gt - lt - 1;
      int i;

      for (i = 0; replacements[i] != NULL; i += 2) {
	if (! strncmp (lt+1, replacements[i], n)) {
	  /* overwrite tag */
	  const char *r = replacements[i+1];
	  while (*r) {
	    *lt = *r;
	    ++lt;
	    ++r;
	  }
	  break;
	}
      }

      /* clobber <tag> */
      ++gt;
      while (*gt) {
	*lt = *gt;
	++lt;
	++gt;
      }
      *lt = '\0';

      return TRUE;
    }
  }

  return FALSE;
}
    

static void
dealloc (Scanner *scan)
{
  g_free (scan);
}

static void
scan_line (Scanner *scan, char *line,
	   ScannerAddWordFn add_word_fn, gpointer user_data)
{
  char *p = line;
  int tab_count = 0;
  char *tag_pos = NULL, *word_pos = NULL;

  while (*p) {
    if (tag_pos == NULL && tab_count == 2) {
      tag_pos = p;
    } else if (word_pos == NULL && tab_count == 3) {
      *(p-1) = '\0';
      word_pos = p;
    } else if (tab_count == 4) {
      *(p-1) = '\0';
      while (clean_word (word_pos));
      if (*word_pos) {
	gboolean is_terminal = FALSE;
	int i;
	for (i = 0; terminal_tags[i] != NULL; ++i) {
	  if (! strcmp (tag_pos, terminal_tags[i])) {
	    is_terminal = TRUE;
	    break;
	  }
	}
	add_word_fn (scan, tag_pos, word_pos, is_terminal, user_data);
      }
      return;
    }
    if (*p == '\t')
      ++tab_count;
    ++p;
  }
}

Scanner *
scanner_susanne_new (void)
{
  Scanner *scan = g_new0 (Scanner, 1);

  scan->dealloc   = dealloc;
  scan->scan_line = scan_line;

  return scan;
}

/* ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** */

PyObject *
py_scanner_susanne_new (PyObject *self, PyObject *args)
{
  Scanner *scan = scanner_susanne_new ();
  return scanner_to_py (scan);
}
