 /************************************************************************/
 /*                                                                      */
 /*                Centre for Speech Technology Research                 */
 /*                     University of Edinburgh, UK                      */
 /*                       Copyright (c) 1996,1997                        */
 /*                        All Rights Reserved.                          */
 /*                                                                      */
 /*  Permission to use, copy, modify, distribute this software and its   */
 /*  documentation for research, educational and individual use only, is */
 /*  hereby granted without fee, subject to the following conditions:    */
 /*   1. The code must retain the above copyright notice, this list of   */
 /*      conditions and the following disclaimer.                        */
 /*   2. Any modifications must be clearly marked as such.               */
 /*   3. Original authors' names are not deleted.                        */
 /*  This software may not be used for commercial purposes without       */
 /*  specific prior written permission from the authors.                 */
 /*                                                                      */
 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK       */
 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING     */
 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT  */
 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE    */
 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   */
 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  */
 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,         */
 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF      */
 /*  THIS SOFTWARE.                                                      */
 /*                                                                      */
 /************************************************************************/
 /*                                                                      */
 /*                 Author: Richard Caley (rjc@cstr.ed.ac.uk)            */
 /*                   Date: Thu Mar 13 1997                              */
 /* -------------------------------------------------------------------- */
 /*                                                                      */
 /* Databases consisting of many files in the file system, textual       */
 /* indexes and so on.                                                   */
 /*                                                                      */
 /* This is the most basic form of database and is only designed for     */
 /* use in compiling other forms of database and in debugging            */
 /* databases, so it is implemented quite simply. This means it`s        */
 /* slow, but (hopefully) predicatable and understandable.               */
 /*                                                                      */
 /************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "siod.h"
#include "EST_Regex.h"
#include "RawUnitDatabase.h"
#include "EST_Pathname.h"
#include "EST_track_aux.h"

const char RawUnitDatabase::s_magic[] = ";;RAWUNITDB";
const int RawUnitDatabase::s_magic_size = sizeof(RawUnitDatabase::s_magic-1);

const int RawUnitCatalogue::s_line_length = 100;

RawUnitCatalogue::RawUnitCatalogue(void): p_entries(500) 
{
  p_num_entries=0; 
}

RawUnitCatalogue::~RawUnitCatalogue(void)
{
}

RawUnitCatalogue::Itterator::~Itterator(void)
{ }

UnitName RawUnitCatalogue::Itterator::next(void)
{ return (const char *)NULL; }

int RawUnitCatalogue::Itterator::at_end(void) 
{ return 1; }

inline static const char *skip_ws(const char *p)
{
  while (*p != '\0' && isspace(*p))
    p++;
  return p;
}

inline static int min_i(int a, int b)
{
  return a<b?a:b;
}

EST_read_status RawUnitCatalogue::fill_from(FILE *stream)
{
  int startp = ftell(stream);
  char buffer[s_line_length];
  int n=0;

  while (fgets(buffer, s_line_length, stream))
    n++;

  fseek(stream, startp, SEEK_SET);

  p_entries.clear();
  p_num_entries=n;

  while (fgets(buffer, s_line_length, stream))
    {
      Entry *entry = new Entry;
      const char *s, *e;
      int field=1, l;
      
      entry->n_segments=0;
      
      for(s=skip_ws(buffer); *s != '\0' && entry->n_segments <= RUC_MAX_SEGMENTS_PER_UNIT; s=skip_ws(e))
	{
	  e=s;
	  while (*e != '\0' && !isspace(*e))
	    e++;
	  l = e-s;
	  int tl;
	  switch(field)
	    {
	    case 1:
	      tl = min_i(l, RUC_NAME_LENGTH);
	      strncpy(entry->name, s, tl);
	      entry->name[tl] = '\0';
	      break;
	    case 2:
	      tl = min_i(l, RUC_FILENAME_LENGTH);
	      strncpy(entry->filename, s, tl);
	      entry->filename[tl] = '\0';
	      break;
	    default:
	      entry->segments[entry->n_segments] = strtod(s, NULL);
	      entry->n_segments++;
	      break;
	    }
	  field++;
	}
      p_entries.add_item(entry->name, entry);
    }


  return format_ok;
}

EST_write_status RawUnitCatalogue::write_to(FILE *stream)
{
  (void)stream;
  return misc_write_error;
}

UnitCatalogue::Itterator *RawUnitCatalogue::itterator(void)
{
  return NULL;
}

struct RawUnitCatalogue::Entry *RawUnitCatalogue::lookup(EST_String name)
{

  return p_entries.val(name);
}

int RawUnitCatalogue::has_entry(EST_String name)
{
  return lookup(name) != NULL;
}


RawUnitDatabase::RawUnitDatabase(void)
{
  cwarn << "create RawUnitDatabase\n";
  p_data=NULL;
}

RawUnitDatabase::~RawUnitDatabase(void)
{
  printf("destroy raw %s\n", (const char *)name());
  if (catalogue())
    delete (RawUnitCatalogue *)catalogue();
  if (p_data)
    delete[] p_data;
}

bool RawUnitDatabase::recognise(FILE *stream)
{
  int wasat = ftell(stream);
  char buffer[RawUnitDatabase::s_magic_size+1];
  bool recognised=FALSE;

  if (fread(buffer, sizeof(char), s_magic_size, stream) == RawUnitDatabase::s_magic_size)
    if (strncmp(buffer, RawUnitDatabase::s_magic, s_magic_size) == 0)
      recognised=TRUE;
  
  fseek(stream, wasat, SEEK_SET);
  printf("recognise raw = %d\n", recognised);
  return recognised;
}

UnitDatabase *RawUnitDatabase::create(void)
{
  printf("create raw\n");
  return new RawUnitDatabase;
}

EST_read_status RawUnitDatabase::fill_from(FILE *stream)
{
  LISP properties;
  gc_protect(&properties);
  CATCH_ERRORS()
    {
      cerr << "format error in description file\n";
      siod_reset_prompt();
      return wrong_format;
    }
  properties = lreadf(stream);
  END_CATCH_ERRORS();

  this->set_properties(properties);

  gc_unprotect(&properties);

  EST_Pathname root_dir(EST_Pathname(this->filename()).directory());

  if (p_catalogue_filename != "")
    {
      EST_Pathname catalogue_filename = p_catalogue_filename;
      if (catalogue_filename.is_relative())
	catalogue_filename = EST_Pathname::append(root_dir, catalogue_filename);

      FILE *cat_stream=fopen(catalogue_filename, "r");

      if (!cat_stream)
	{
	  cerr << "can't open catalogue '" << catalogue_filename << "'\n";
	  return  misc_read_error;
	}

      RawUnitCatalogue *cat = new RawUnitCatalogue;

      if (cat->fill_from(cat_stream) != format_ok)
	{
	  fclose(cat_stream);
	  delete cat;
	  cerr << "bad format in catalogue file\n";
	  return  misc_read_error;
	}

      fclose(cat_stream);
      set_catalogue((UnitCatalogue *)cat);
    }
  return format_ok;
}

EST_write_status RawUnitDatabase::write_to(FILE *stream)
{
  (void)stream;
  return misc_write_error;
}

EST_StrVector RawUnitDatabase::wave_names(void) const 
{
  return EST_StrVector(0);
}
EST_StrVector RawUnitDatabase::coefficient_names(void) const
{
  return EST_StrVector(0);
}

EST_StrVector RawUnitDatabase::unit_names(void) const
{
  return EST_StrVector(0);
}

struct UnitDatabase::wave_information_s *RawUnitDatabase::wave_information(ContentType content)
{
  int n = (int) content;

  if (is_waveform(content) && p_data[n].contents != ct_none)
    return &(p_data[n].wave_info);

  return NULL;
}
struct UnitDatabase::coefficient_information_s *RawUnitDatabase::coefficient_information(ContentType content)
{
  int n = (int) content;

  if (!is_waveform(content) && p_data[n].contents != ct_none)
    return &(p_data[n].coef_info);

  return NULL;
}

int RawUnitDatabase::process_indexes(ValueType value)
{
  LISP indexes = value_as_list(value);
  

  while(indexes)
    {
      LISP index_prop = CAR(indexes);

      if (!index_prop)
	return 0;

      EST_String type(get_c_string(CAR(index_prop)));

      UnitIndex *index = UnitIndex::create(type);

      if (!index)
	return 0;

      index->associate_database(this);

      if (!index->set_properties(CDR(index_prop)))
	return 0;

      add_index(type, type, index);

      indexes = CDR(indexes);
    }

  return 1;
}

int RawUnitDatabase::process_data_included(ValueType lvalue)
{
  LISP l = value_as_list(lvalue);
  LISP p;
  int n;

  if(!l)
    return 0;

  n = (int)ct_maximum;

  p_data = new struct available_data[n];
  p_num_data = n;

  for(int i=0;i<n;i++)
    p_data[i].contents = UnitDatabase::ct_none;

  for(p=l; p ; p=CDR(p))
    {
      LISP entry = CAR(p);
      n = contents_name_to_number(get_c_string(CAR(entry)));

      p_data[n].contents=(UnitDatabase::ContentType)n;

      // cout<< "include "<<get_c_string(CAR(entry))<<" "<<p_data[n].contents<<"\n";

      entry=CDR(entry);
      for(; entry &&CDR(entry); entry=CDR(CDR(entry)))
	{
	  EST_String prop  = get_c_string(CAR(entry));
	  EST_String value = get_c_string(CAR(CDR(entry)));

	  if (prop == "directory")
	    p_data[n].directory = EST_Pathname::append(EST_Pathname(filename()).directory(), EST_Pathname(value).as_directory());
	  else if (prop == "extension")
	    p_data[n].extension = value;
	  else if (prop == "format")
	    {
	       if (p_data[n].is_waveform())
		 p_data[n].wave_info.format = value; 
	       else
		 p_data[n].coef_info.format = value; 
	    }
	  else if (prop == "sample_rate")
	    if (value.matches(RXint))
	      p_data[n].wave_info.sample_rate = atoi(value);
	    else
	      {
		error_string = "bad sample_rate " + value;
		return 0;
	      }
	  else if (prop == "pitchmarks")
	    {
	       UnitDatabase::ContentType pmcont = contents_name_to_number(value);
	      if (pmcont == ct_none || is_waveform(pmcont))
		{
		  error_string = "bad pitchmarks " + value;
		  return 0;
		}
	      p_data[n].wave_info.pm_content_type=pmcont;
	    }
	  else if (prop == "sample_format")
	    p_data[n].wave_info.sample_format = value; 
	  else if (prop == "coefficient_format")
	    {
#if INCLUDE_CONTOUR_TYPE_MAP
	      EST_ContourType format = EST_ContourTypeMap.token(value);
#else
	      EST_ContourType format = EST_ContourType::make(value);
#endif
	      if (format == EST_ContourType::OTHER)
		{
		  error_string = "unknown coefficient format " + value;
		  return 0;
		}
	      p_data[n].coef_info.coefficient_format = format; 
	    }
	  else if (prop == "time_channel")
	    {
	      p_data[n].coef_info.time_channel = value;
	    }
	  else if (prop == "length_channel")
	    {
	      p_data[n].coef_info.length_channel = value;
	    }
	  else if (prop == "time_scale")
	    {
	      if (value.matches(RXdouble))
		p_data[n].coef_info.time_scale = atof(value);
	      else
		{
		  error_string = "bad time scale " + value;
		  return 0;
		}
	    }
	  else if (prop == "frame_shift")
	      if(value.matches(RXint))
		p_data[n].coef_info.frame_shift = atoi(value);
	      else
		{
		  error_string = "bad frame_shift " + value;
		  return 0;
		}
	  else
	    {
	      error_string = "unknown property " + prop;
	      return 0;
	    }
	}
    }
  return 1;
}

void RawUnitDatabase::property_names(EST_TList<EST_String> &list) const
{
  UnitDatabase::property_names(list);
  list.append("sample_rate");
  list.append("catalogue");
}

ValueType RawUnitDatabase::property(EST_String property) const
{

  if (property == "catalogue")
    return string_as_value(p_catalogue_filename);
  else if (property == "sample_rate")
    return int_as_value(p_sample_rate);
  else
    return UnitDatabase::property(property);
}

int RawUnitDatabase::set_property(EST_String property, ValueType value)
{
  if (property == "catalogue")
    p_catalogue_filename = value_as_c_string(value);
  else if (property == "sample_rate")
    p_sample_rate = value_as_int(value);
  else if (property == "data_included")
    return process_data_included(value);
  else if (property == "indexes")
    return process_indexes(value);
  else
    return this->UnitDatabase::set_property(property, value);
  return 1;
}

void RawUnitDatabase::print_description(FILE *stream)
{
  this->UnitDatabase::print_description(stream);

  fprintf(stream, "Catalogue: %s\n", (const char *)p_catalogue_filename);
  if (p_data)
    {
      fprintf(stream, "Data Included:\n");
      for(int n=0; n<p_num_data; n++)
	{
	  const char *cname = UnitDatabase::ContentTypeMap.name(p_data[n].contents);
	  if (p_data[n].contents)
	    if (p_data[n].is_waveform())
	      {
		const char * pm_name = UnitDatabase::ContentTypeMap.name(p_data[n].wave_info.pm_content_type);
		fprintf(stream, "\t%3d: %s '%sxxx.%s' format=%s rate=%d pitchmarks=%s\n",
			n+1, cname?cname:"unknown",
			(const char *)p_data[n].directory,
			(const char *)p_data[n].extension,
			(const char *)p_data[n].wave_info.format,
			p_data[n].wave_info.sample_rate,
			pm_name?pm_name:"None"
			);
	      }
	    else
	      fprintf(stream, "\t%3d: %s '%sxxx.%s' format=%s type=%s\n",
		      n+1, cname?cname:"unknown",
		      (const char *)p_data[n].directory,
		      (const char *)p_data[n].extension,
		      (const char *)p_data[n].coef_info.format,
#if INCLUDE_CONTOUR_TYPE_MAP
		      (const char *)EST_ContourTypeMap.name(p_data[n].coef_info.coefficient_format)
#else
		      (const char *)(EST_String)p_data[n].coef_info.coefficient_format
#endif
			);
	}
    }
}

static EST_Wave *extract_wave(EST_Wave &w, int sample_rate,
			      int start_sample, int num_samples)
{
  EST_Wave *result = new EST_Wave;

  if (w.num_channels() == 1)
    {
      if (wave_subwave(*result, w, start_sample, num_samples))
	{
	  delete result;
	  return NULL;
	}
    }
  else
    {
      delete result;
      return NULL;
    }

  if (sample_rate != result->sample_rate())
    result->resample(sample_rate);

  return result;
}

EST_Wave *RawUnitDatabase::get_wave(UnitDatabase::ContentType content, 
				    Unit &unit)
{
  EST_Wave w, *result= NULL;

  if (unit.database() != this)
    {
      cwarn << "Attempt to get wave from wrong database\n";
      return NULL;
    }

  RawUnitDatabase::available_data *data_info = p_data + (int)content;

  if(!data_info->contents || !data_info->is_waveform())
    return NULL;

  RawUnitCatalogue *catalogue = (RawUnitCatalogue *)RawUnitDatabase::catalogue();

  if (!catalogue)
    return NULL;

  RawUnitCatalogue::Entry *entry = catalogue->lookup(unit.name());

  if (!entry)
    return NULL;

  EST_Pathname filename(EST_Pathname::construct(data_info->directory, entry->filename, data_info->extension));

  EST_read_status status = w.load_file(filename, 
				       data_info->wave_info.format, 
				       data_info->wave_info.sample_rate,
				       data_info->wave_info.sample_format,
				       EST_NATIVE_BO,
				       1,
				       0,
				       0);

  if (status != read_ok)
    {
      cerr << "cant' access '" << filename << "'\n";
      return NULL;
    }

  EST_Track *pitchmarks=NULL;

  if (data_info->wave_info.pm_content_type != ct_none)
    {
    if (!(pitchmarks = get_coefficients(data_info->wave_info.pm_content_type,
				  EST_ContourType::PITCHMARKS,
				  unit)))
      {
	cwarn << "can't read pitchmarks\n";
	return NULL;
      }
    }
  else if (coefficient_information(ct_pitchmarks))
    {
      if (!(pitchmarks = get_coefficients(ct_pitchmarks,
					EST_ContourType::PITCHMARKS,
					unit)))
	{
	  cwarn << "can't read pitchmarks\n";
	  return NULL;
	}
    }

  int start_sample = (int)(entry->segments[0]/1000.0 * w.sample_rate() + 0.5);
  int end_sample =   (int)(entry->segments[entry->n_segments-1]/1000.0 * w.sample_rate() + 0.5);

  if (pitchmarks)
    {
      // cout << "before " << start_sample << " " << end_sample << "\n";
      align_to_track(*pitchmarks, 
		     start_sample, end_sample, 
		     w.sample_rate());
      // cout << "after " << start_sample << " " << end_sample << "\n";

      int end_index = pitchmarks->index(entry->segments[entry->n_segments-1]/1000.0);
      end_sample += (int)pitchmarks->a(end_index, channel_length);
      // cout << "end index " << end_index << " end " << end_sample << "\n";
    }
	
  int num_samples = end_sample-start_sample;

  // cout << " extract " << start_sample << " " << num_samples << "\n";

  if (status == format_ok)
    result = extract_wave(w, 
			  data_info->wave_info.sample_rate,
			  start_sample,
			  num_samples);

  delete pitchmarks;
  return result;
}

static EST_Track *extract_coefs(EST_Track &t, float start, float end)
{
  EST_Track *result = new EST_Track;

  *result = extract(t, start, end);

  return result;
}

EST_Track *RawUnitDatabase::get_coefficients(UnitDatabase::ContentType content, EST_ContourType format, Unit &unit)
{
  (void)format;
  EST_Track t, *result= NULL;

  if (unit.database() != this)
    {
      cwarn << "Attempt to get coefficients from wrong database\n";
      return NULL;
    }

  RawUnitDatabase::available_data *data_info = p_data + (int)content;

  if(!data_info->contents || data_info->is_waveform())
    return NULL;

  RawUnitCatalogue *catalogue = (RawUnitCatalogue *)RawUnitDatabase::catalogue();

  if (!catalogue)
    return NULL;

  RawUnitCatalogue::Entry *entry = catalogue->lookup(unit.name());

  if (!entry)
    return NULL;

  EST_Pathname filename(EST_Pathname::construct(data_info->directory, entry->filename, data_info->extension));

  EST_read_status status = t.load(filename, 
				       data_info->coef_info.frame_shift
				       );

  if (status == read_ok)
    {
      float time_scale = data_info->coef_info.time_scale;
      if (time_scale == 0.0)
	time_scale=1.0;

      t.create_map();
      
      if (t.has_channel(channel_time))
	t.channel_to_time(channel_time, time_scale);
      else if (t.has_channel(channel_length))
	t.channel_to_time_lengths(channel_length, time_scale);
      else if (data_info->coef_info.time_channel != "" && t.has_channel(data_info->coef_info.time_channel))
	t.channel_to_time(data_info->coef_info.time_channel, time_scale);
      else if (data_info->coef_info.length_channel != "" && t.has_channel(data_info->coef_info.length_channel))
	t.channel_to_time_lengths(data_info->coef_info.length_channel, time_scale);

      result = extract_coefs(t, 
			     entry->segments[0]/1000.0,
			     entry->segments[entry->n_segments-1]/1000.0);
    }
  else
    cerr << "cant' access '" << filename << "'\n";
			
  return result;
}

EST_TVector<float> *RawUnitDatabase::get_segments(Unit &unit)
{

  if (unit.database() != this)
    {
      cwarn << "Attempt to get segments from wrong database\n";
      return NULL;
    }

  RawUnitCatalogue *catalogue = (RawUnitCatalogue *)RawUnitDatabase::catalogue();

  if (!catalogue)
    return NULL;

  RawUnitCatalogue::Entry *entry = catalogue->lookup(unit.name());

  if (!entry)
    return NULL;

  EST_TVector<float> *segments = new EST_TVector<float>(entry->n_segments);

  for(int i=0; i<entry->n_segments; i++)
    (*segments)(i) = entry->segments[i]/1000.0;

  return segments;
}

#if defined(INSTANTIATE_TEMPLATES)
#include "../base_class/EST_THash.cc"
template class EST_TStringHash<RawUnitCatalogue::Entry *>;
template class EST_THash<EST_String,RawUnitCatalogue::Entry *>;
template class EST_Hash_Pair<EST_String,RawUnitCatalogue::Entry *>;
#endif
RawUnitCatalogue::Entry *EST_THash<EST_String, RawUnitCatalogue::Entry *>::Dummy_Value = NULL;

