/*
** Copyright (C) 2003-2006 Teus Benschop.
**  
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**  
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
**  
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
**  
*/


#include "languages.h"
#include <config.h>
#include "utilities.h"
#include "gwrappers.h"
#include <sqlite3.h>
#include "directories.h"
#include "books.h"
#include "bible.h"



/*
Following information was taken from the BibleTime source, file clanguagemgr.cpp.

	Chris explained in an eMail how language codes are build:
	
	Preference order for locale codes are:
	
	ISO 639-1
	ISO 639-2
	Ethnologue (http:www.ethnologue.org/)
	
	We intend to always follow OSIS locale conventions, which state that
	Ethnologue codes will be encoded in the format x-E-??? where ???
	represents the 3-letter Ethnologue code in capital letters (though
	capitalization really doesn't matter here).
	
	Some older modules maintain a former format for Ethnolgoue codes of
	xx-???, including the AleWiesler module.
	
	--Chris
*/


#define LANGUAGE_COUNT 380


char *languages [LANGUAGE_COUNT] =
{ 
  "aa", "Afar",
  "ab", "Abkhazian",
  "ae", "Avestan",
  "af", "Afrikaans",
  "am", "Amharic",
  "ang", "English, Old (ca.450-1100)",
  "ar", "Arabic",
  "as", "Assamese",
  "az", "Azerbaijani",
  "ba", "Bashkir",
  "be", "Belarusian",
  "bg", "Bulgarian",
  "bh", "Bihari",
  "bi", "Bislama",
  "bn", "Bengali",
  "bo", "Tibetan",
  "br", "Breton",
  "bs", "Bosnian",
  "ca", "Catalan",
  "ce", "Chechen",
  "ceb", "Cebuano",
  "ch", "Chamorro",
  "co", "Corsican",
  "cop", "Coptic",
  "cs", "Czech", 
  "cu", "Church Slavic",
  "cv", "Chuvash",
  "cy", "Welsh",
  "da", "Danish",
  "de", "German",
  "dz", "Dzongkha",
  "el", "Greek, Modern (1453-)",
  "en", "English",  "English"
  "en_US", "American English",
  "enm", "English, Middle (1100-1500)",
  "eo", "Esperanto",
  "es", "Spanish",
  "et", "Estonian",
  "eu", "Basque",
  "fa", "Persian",
  "fi", "Finnish",
  "fj", "Fijian",
  "fo", "Faroese",
  "fr", "French",
  "fy", "Frisian",
  "ga", "Irish",
  "gd", "Gaelic (Scots)",
  "gl", "Gallegan",
  "gn", "Guarani",
  "gn", "Gujarati",
  "got", "Gothic",
  "gv", "Manx",
  "grc", "Greek, Ancient (to 1453)",
  "he", "Hebrew",
  "hau", "Hausa",
  "haw", "Hawaiian",
  "hi", "Hindi",
  "ho", "Hiri Motu",
  "hr", "Croatian",
  "hu", "Hungarian",
  "hy", "Armenian",
  "hz", "Herero",
  "ia", "Interlingua",
  "id", "Indonesian",
  "ie", "Interlingue",
  "ik", "Inupiaq",
  "is", "Icelandic",
  "it", "Italian",
  "iu", "Inuktitut",
  "ja", "Japanese",
  "ka", "Georgian",
  "x-E-KAB", "Kabyle",
  "ki", "Kikuyu",
  "kj", "Kuanyama",
  "kk", "Kazakh",
  "kl", "Kalaallisut",
  "km", "Khmer",
  "kn", "Kannada",
  "ko", "Korean",
  "ks", "Kashmiri",
  "ku", "Kurdish",
  "kv", "Komi",
  "kw", "Cornish",
  "ky", "Kirghiz",
  "la", "Latin",
  "lb", "Letzeburgesch",
  "ln", "Lingala",
  "lo", "Lao",
  "lt", "Lithuanian",
  "lv", "Latvian",
  "mg", "Malagasy",
  "mh", "Marshall",
  "mi", "Maori",
  "mk", "Macedonian",
  "ml", "Malayalam",
  "mn", "Mongolian",
  "mo", "Moldavian",
  "mos", "More",
  "mr", "Marathi",
  "ms", "Malay",
  "mt", "Maltese",
  "my", "Burmese",
  "na", "Nauru",
  "nb", "Norwegian Bokmål",
  "nd", "Ndebele, North",
  "nds", "Low German; Low Saxon",
  "ne", "Nepali",
  "ng", "Ndonga",
  "nl", "Dutch",
  "nn", "Norwegian Nynorsk",
  "no", "Norwegian",
  "nr", "Ndebele, South",
  "nv", "Navajo",
  "ny", "Chichewa; Nyanja",
  "oc", "Occitan (post 1500); Provençal",
  "om", "Oromo",
  "or", "Oriya",
  "os", "Ossetian; Ossetic",
  "pa", "Panjabi",
  "pap", "Papiamento",
  "pi", "Pali", 
  "pl", "Polish",
  "ps", "Pushto",
  "pt", "Portuguese",
  "pt_BR", "Brasilian Portuguese",
  "qu", "Quechua",
  "rm", "Raeto-Romance",
  "rn", "Rundi",
  "ro", "Romanian",
  "ru", "Russian",
  "rw", "Kinyarwanda",
  "sa", "Sanskrit",
  "sc", "Sardinian",
  "sco", "Scots",
  "sd", "Sindhi",
  "se", "Northern Sami",
  "sg", "Sango",
  "si", "Sinhalese",
  "sk", "Slovak",
  "sl", "Slovenian",
  "sm", "Samoan",
  "sn", "Shona",
  "so", "Somali",
  "sq", "Albanian",
  "sr", "Serbian",
  "ss", "Swati",
  "st", "Sotho, Southern",
  "su", "Sundanese",
  "sv", "Swedish",
  "sw", "Swahili",
  "syr", "Syriac",
  "ta", "Tamil", 
  "te", "Telugu",
  "tg", "Tajik", 
  "th", "Thai",  
  "tk", "Turkmen",
  "tl", "Tagalog",
  "tn", "Tswana", 
  "tr", "Turkish",
  "ts", "Tsonga", 
  "tt", "Tatar",  
  "tw", "Twi", 
  "ty", "Tahitian",
  "ug", "Uighur", 
  "uk", "Ukrainian",
  "ur", "Urdu", 
  "uz", "Uzbek",
  "vi", "Vietnamese",
  "vo", "Volapük",
  "wo", "Wolof", 
  "xh", "Xhosa", 
  "x-E-BAR", "Bavarian",
  "x-E-DJE", "Zarma",
  "x-E-GSW", "Alemannisch",
  "x-E-HAT", "Haitian Creole French",
  "x-E-ITZ", "Itzá", 
  "x-E-JIV", "Shuar",
  "x-E-KEK", "Kekchí",
  "x-E-KAB", "Kabyle",
  "x-E-LMO", "Lombard",
  "x-E-MKJ", "Macedonian",
  "x-E-PDG", "Tok Pisin", 
  "x-E-PPK", "Uma", 
  "x-E-RMY", "Romani, Vlax",
  "x-E-SAJ", "Sango", 
  "x-E-SRN", "Sranan",
  "yi", "Yiddish", 
  "za", "Zhuang",  
  "zh", "Chinese", 
  "zu", "Zulu"
};


vector<ustring> languages_get_sword ()
{
  vector <ustring> language;
  for (unsigned int i = 0; i < LANGUAGE_COUNT; i++) {
    language.push_back (languages[++i]);
  }
  sort (language.begin(), language.end());
  return language;
}


ustring language_encode_sword (const ustring& language)
{
  ustring code;
  for (unsigned int i = 0; i < LANGUAGE_COUNT; i++) {
    if (language == languages[++i]) {
      code = languages[i - 1];
      break;
    }
  }
  return code; 
}


ustring language_get_filename (ustring language)
{
  replace_text (language, " ", "_");
  return gw_build_filename (directories_get_templates (), "language_" + language.casefold() + ".sql");
}


ustring language_get_name (ustring path)
{
  path = gw_path_get_basename (path);
  ustring name (path);
  name.erase (0, 9);
  name.erase (name.length() - 4, 4);
  replace_text (name, "_", " ");
  size_t pos = 0;
  while (pos != string::npos) {
    name.replace (pos, 1, upperCase (name.substr (pos, 1)));
    pos = name.find (" ", pos);
    if (pos != string::npos) pos++;
  }
  return name;
}


vector<ustring> language_get_raw_templates_available ()
{
  vector <ustring> paths;
  ReadFiles rf (PACKAGE_DATA_DIR, "language_", ".txt");
  for (unsigned int i = 0; i < rf.files.size(); i++) {
    paths.push_back (gw_build_filename (PACKAGE_DATA_DIR, rf.files[i]));
  }
  ReadFiles rf2 (directories_get_templates_user (), "language_", ".txt");
  for (unsigned int i = 0; i < rf2.files.size(); i++) {
    paths.push_back (gw_build_filename (directories_get_templates_user (), rf2.files[i]));
  }
  return paths;
}


void language_import_textfile (const ustring& name, const ustring& textfile)
// Imports a language textfile.
// To support cases like "GEN = Genesis/1 Mose = Gen", there is an 
// alternate table in the database so that "Genesis" and "1 Mose" both are 
// recognized, in addition to "Genesis/1 Mose".
{
  sqlite3 *db;
  int rc;
  char *error = NULL;
  try 
  {
    // Read the text.
    ReadText rt (textfile);
    // Open and create the database.
    unlink (language_get_filename (name).c_str());
    rc = sqlite3_open (language_get_filename (name).c_str (), &db);
    if (rc) throw runtime_error (sqlite3_errmsg(db));
    sqlite3_busy_timeout (db, 1000);
    char * sql;
    sql = g_strdup_printf ("create table books (id integer, book string, bookcf string, abbrev string, abbrevcf string);");
    rc = sqlite3_exec (db, sql, NULL, NULL, &error);
    g_free (sql);
    if (rc) throw runtime_error (sqlite3_errmsg(db));
    sql = g_strdup_printf ("create table altbooks (id integer, bookcf string);");
    rc = sqlite3_exec (db, sql, NULL, NULL, &error);
    g_free (sql);
    if (rc) throw runtime_error (sqlite3_errmsg(db));
    sql = g_strdup_printf ("PRAGMA synchronous=OFF;");
    rc = sqlite3_exec (db, sql, NULL, NULL, &error);
    g_free (sql);
    if (rc) throw runtime_error (sqlite3_errmsg(db));
    // Go through the lines.
    cout << name << " " << rt.lines.size() << " lines" << endl;
    set <unsigned int> ids_done;
    for (unsigned int i = 0; i < rt.lines.size(); i++) {
      try
      {
        // Skip comments.
        if (rt.lines[i].find ("#") != string::npos)
          continue;
        if (rt.lines[i].find ("//") != string::npos)
          continue;
        // Skip headers for just now.
        if (rt.lines[i].find ("[") != string::npos)
          continue;
        // Convert the line.
        ustring line = rt.lines[i];
        Parse parse (line, false, "=");
        if (parse.words.size() != 3) throw runtime_error (line);
        int id = book_find_valid (trim (parse.words[0]));
        if (id == 0) throw runtime_error ("Unknown English book in " + line);
        ustring book = trim (parse.words[1]);
        if (book.empty()) runtime_error ("No book in " + line);
        ustring abbreviation = trim (parse.words[2]);
        if (abbreviation.empty()) runtime_error ("No abbreviation in " + line);
        sql = g_strdup_printf ("insert into books values (%d, '%s', '%s', '%s', '%s');", id, book.c_str(), book.casefold().c_str(), abbreviation.c_str(), abbreviation.casefold().c_str());
        rc = sqlite3_exec (db, sql, NULL, NULL, &error);
        g_free (sql);
        if (rc) throw runtime_error (sqlite3_errmsg(db));
        // Mark this id as being done.
        ids_done.insert (id);
        // See if there are alternate booknames.
        Parse alternates (book, false, "/");
        if (alternates.words.size() > 1) {
          for (unsigned int i2 = 0; i2 < alternates.words.size(); i2++) {
            ustring alternate = trim (alternates.words[i2]);
            sql = g_strdup_printf ("insert into altbooks values (%d, '%s');", id, alternate.casefold().c_str());
            rc = sqlite3_exec (db, sql, NULL, NULL, &error);
            g_free (sql);
            if (rc) throw runtime_error (sqlite3_errmsg(db));            
          }
        }
      }
      catch (exception& ex) {
        ustring msg = "Skipping: ";
        msg.append (ex.what ());
        write (1, msg.c_str(), strlen (msg.c_str()));
        cout << endl;
      }    
        
    }    
    // Any books not yet done, add the english values for them.
    vector <unsigned int> ids = books_type_to_ids (btUnknown);
    for (unsigned int i = 0; i < ids.size(); i++) {
      if (ids_done.find (ids[i]) == ids_done.end()) {
        ustring book = books_id_to_english (ids[i]);
        sql = g_strdup_printf ("insert into books values (%d, '%s', '%s', '%s', '%s');", ids[i], book.c_str(), book.casefold().c_str(), book.c_str(), book.casefold().c_str());
        rc = sqlite3_exec (db, sql, NULL, NULL, &error);
        g_free (sql);
        if (rc) throw runtime_error (sqlite3_errmsg(db));
        cout << "Supplementing with " << book << endl;
      }
    }
  }
  catch (exception& ex) {
    unlink (language_get_filename (name).c_str());
    gw_critical (ex.what ());
  }    
  sqlite3_close (db);
}


vector<ustring> language_get_ones_available ()
{
  vector <ustring> languages;
  ReadFiles rf (directories_get_templates (), "language_", ".sql");
  for (unsigned int i = 0; i < rf.files.size(); i++) {
    languages.push_back (language_get_name (rf.files[i]));
  }
  sort (languages.begin(), languages.end());
  return languages;
}
