/* This file is part of Malaga, a system for Natural Language Analysis.
 * Copyright (C) 1995-1999 Bjoern Beutel
 *
 * Bjoern Beutel
 * Universitaet Erlangen-Nuernberg
 * Abteilung fuer Computerlinguistik
 * Bismarckstrasse 12
 * D-91054 Erlangen
 * e-mail: malaga@linguistik.uni-erlangen.de 
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/* description ==============================================================*/

/* This module administrated the name and atoms for each symbol. */

/* includes =================================================================*/

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "basic.h"
#include "pools.h"
#include "values.h"
#include "files.h"
#include "malaga_files.h"

#undef GLOBAL
#define GLOBAL

#include "symbols.h"

/* variables ================================================================*/

LOCAL struct /* this is the symbol table */
{
  int_t symbols_size; /* of <symbols>, <symbols_name>, <symbols_atoms>. */

  symbol_entry_t *symbols; /* the names and atoms of all symbols */
  symbol_t *symbols_name; /* all symbols sorted by their names */
  symbol_t *symbols_atoms; /* all symbols sorted by their atom lists. */

  int_t values_size;
  symbol_t *values; /* contains lists of atomic symbols */

  int_t strings_size;     
  char *strings; /* contains symbol names */
} symbol_table;

/* functions ================================================================*/

LOCAL string_t local_get_symbol_name (symbol_t symbol)
/* Return the name of <symbol>. */
{
  DB_ASSERT (symbol < symbol_table.symbols_size);
  return symbol_table.strings + symbol_table.symbols[symbol].name;
}

/*---------------------------------------------------------------------------*/

LOCAL value_t local_get_atoms (symbol_t symbol)
/* Return the atom list of <symbol>. */
{
  DB_ASSERT (symbol < symbol_table.symbols_size);
  return symbol_table.values + symbol_table.symbols[symbol].atoms;
}

/*---------------------------------------------------------------------------*/

GLOBAL symbol_t find_symbol (string_t name)
/* Find a symbol <name> in the symbol table and return its code.
 * If there is no symbol <name>, report an error. */
{
  /* We do a binary search on the symbols in <symbols_name>. */
  int_t lower = 0;
  int_t upper = symbol_table.symbols_size - 1;

  while (lower <= upper) 
  {
    int_t middle = (lower + upper) / 2;
    symbol_t symbol = symbol_table.symbols_name[middle];
    int_t result = strcmp_no_case (name, local_get_symbol_name (symbol));
    
    if (result < 0)
      upper = middle - 1;
    else if (result > 0)
      lower = middle + 1;
    else
      return symbol;
  }
  error ("unknown symbol \"%s\"", name);
}

/*---------------------------------------------------------------------------*/

GLOBAL symbol_t find_multi_symbol (value_t atoms)
/* Find a symbol by <name> in the symbol table and return its code.
 * If there is no multi-symbol for <atoms>, report an error. */
{
  /* We do a binary search on the symbols in <symbols_atoms>. */
  int_t lower = 0;
  int_t upper = symbol_table.symbols_size - 1;

  while (lower <= upper) 
  {
    int_t middle = (lower + upper) / 2;
    symbol_t symbol = symbol_table.symbols_atoms[middle];
    int_t result = compare_atom_lists (atoms, local_get_atoms (symbol));
    
    if (result < 0)
      upper = middle - 1;
    else if (result > 0)
      lower = middle + 1;
    else
      return symbol;
  }
  error ("no multi-symbol for this atom list");
}

/*---------------------------------------------------------------------------*/

GLOBAL int_t number_of_symbols (void)
/* Return the number of symbols defined. */
{
  return (symbol_table.symbols_size);
}

/*---------------------------------------------------------------------------*/

LOCAL int compare_symbols_name (const void *symbol1, const void *symbol2)
/* Return -1 if name (<symbol1>) < name (<symbol2>)
 *         0 if name (<symbol1>) == name (<symbol2>)
 *         1 if name (<symbol1>) > name (<symbol2>). */
{
  return strcmp_no_case (local_get_symbol_name (*(symbol_t *) symbol1), 
			 local_get_symbol_name (*(symbol_t *) symbol2));
}

/*---------------------------------------------------------------------------*/

LOCAL int compare_symbols_atoms (const void *symbol1, const void *symbol2)
/* Return -1 if atoms (<symbol1>) < atoms (<symbol2>)
 *         0 if atoms (<symbol1>) == atoms (<symbol2>)
 *         1 if atoms (<symbol1>) > atoms (<symbol2>). */
{
  return compare_atom_lists (local_get_atoms (*(symbol_t *) symbol1), 
			     local_get_atoms (*(symbol_t *) symbol2));
}

/*---------------------------------------------------------------------------*/

GLOBAL void init_symbols (string_t file_name)
/* Initialise this module; Read <symbol_table> from file <file_name>. */
{
  FILE *stream;
  symbol_header_t header;
  int_t i;
  
  stream = open_stream (file_name, "rb");
  read_vector (&header, sizeof (header), 1, stream, file_name);
  check_header (&header.common_header, file_name, 
		SYMBOL_FILE, SYMBOL_CODE_VERSION);
  
  symbol_table.symbols_size = header.symbols_size;
  symbol_table.symbols = read_new_vector (sizeof (symbol_entry_t), 
					  header.symbols_size, 
					  stream, file_name);
  symbol_table.values_size = header.values_size;
  symbol_table.values = read_new_vector (sizeof (cell_t), header.values_size, 
					 stream, file_name);
  symbol_table.strings_size = header.strings_size;
  symbol_table.strings = read_new_vector (sizeof (char), header.strings_size, 
					  stream, file_name);

  close_stream (&stream, file_name);
  
  /* Build a list of all symbols sorted by their names
   * and a list of all symbols sorted by their atom lists. */
  symbol_table.symbols_name = new_vector (sizeof (symbol_t),
					  header.symbols_size);
  symbol_table.symbols_atoms = new_vector (sizeof (symbol_t),
					   header.symbols_size);
  for (i = 0; i < header.symbols_size; i++)
  {
    symbol_table.symbols_name[i] = i;
    symbol_table.symbols_atoms[i] = i;
  }
  qsort (symbol_table.symbols_name, header.symbols_size, 
	 sizeof (symbol_t), compare_symbols_name);
  qsort (symbol_table.symbols_atoms, header.symbols_size, 
	 sizeof (symbol_t), compare_symbols_atoms);
  
  get_symbol_name = local_get_symbol_name;
  get_atoms = local_get_atoms;
}

/*---------------------------------------------------------------------------*/

GLOBAL void terminate_symbols (void)
/* Terminate this module. */
{
  free_mem (&symbol_table.symbols);
  free_mem (&symbol_table.symbols_name);
  free_mem (&symbol_table.symbols_atoms);
  free_mem (&symbol_table.values);
  free_mem (&symbol_table.strings);
}

/* end of file ==============================================================*/
