#!/usr/bin/env python
#---
# $Id: duali,v 1.17 2003/12/09 13:20:29 elzubeir Exp $
#
# ------------
# Description:
# ------------
#
# The Duali spellchecker
#
# (C) Copyright 2003, Arabeyes, Mohammed Elzubeir
# -----------------
# Revision Details:    (Updated by Revision Control System)
# -----------------
#  $Date: 2003/12/09 13:20:29 $
#  $Author: elzubeir $
#  $Revision: 1.17 $
#  $Source: /home/arabeyes/cvs/projects/duali/pyduali/duali,v $
#
#  This program is written under the BSD License.
#---

import sys, getopt, os, ConfigParser
import pyduali.aradict, pyduali.aralex, pyduali.araspell
#from pyduali.arabic import *

scriptname = os.path.splitext(os.path.basename(sys.argv[0]))[0]
scriptversion = '0.2.0'
conf = '/etc/duali.conf'

def usage():
  "Display usage options"

  print "(C) Copyright 2003, Arabeyes, Mohammed Elzubeir\n"
  print "Usage: %s -c filename [OPTIONS]" % scriptname
  print "\t[-h | --help           ]\toutputs this usage message"
  print "\t[-V | --version        ]\tprogram version"
  print "\t[-c | --check= filename]\tinput file to spellcheck"
  print "\t[-C | --charset        ]\tcharacter encoding (cp1256, utf-8)"
  print "\t[-n | --normalize      ]\tturn normalize mode off (default on)"
  print "\t[-p | --path           ]\tpath to dictionary database"
  print "\t[-v | --verbose        ]\tverbose out (for debugging)"
  print "\r\nThis program is licensed under the BSD License\n"

def grabargs():
  "Grab command-line arguments"

  verbose = 0
  charset = 'utf-8'
  fname = ''

  path, normalize = readconf()
  
  if not sys.argv[1:]:
    usage()
    sys.exit(0)
  try:
    opts, args = getopt.getopt(sys.argv[1:], "hVvnC:p:c:",
                               ["help", "version", "verbose", "normalize",
                                "charset=", "charset=", "path=", "check="],)
  except getopt.GetoptError:
    usage()
    sys.exit(0)
  for o, val in opts:    
    if o in ("-h", "--help"):
      usage()
      sys.exit(0)
    if o in ("-V", "--version"):
      print scriptversion
      sys.exit(0)
    if o in ("-v", "--verbose"):
      verbose = 1
    if o in ("-c", "--check"):
      fname = val
    if o in ("-n", "--normalize"):
      normalize = 0
    if o in ("-C", "--charset"):
      charset = val
    if o in ("-p", "--path"):
      path = val
  return (fname, charset, path, normalize, verbose)


def readconf():
  "Read configuration file"
  config = ConfigParser.ConfigParser()
  try:
    config.readfp(open(conf))
  except IOError:
    print "Warning. Unable to open '%s' configuration file" % conf
    return ('', 1)

  if (not config.has_section('Main')):
    print "Bad config file. Please refer to documentation. Exiting.."
    sys.exit(1)

  for opt in config.options('Main'):
    dict_path = config.get('Main', 'DictPath')
    normalize = config.getboolean('Main', 'Normalize')

  return (dict_path, normalize)


def main():
  "Main function"

  fname, charset, path, normalize, verbose = grabargs()

  if sys.version_info[0] < 2 or sys.version_info[1] < 2:
    print """
%s requires Python 2.2.x at least. Please upgrade your Python version.
""" % scriptname
    sys.exit(0)
  

  if (not fname):
    print "I need a file to check!"
    usage()
    sys.exit(0)
  
  #
  # initialize the aralex, aradict and araspell classes
  #
  mylex = pyduali.aralex.aralex(nostem=1, verbose=verbose)

  if (len(path) is not 0):
    mydict = pyduali.aradict.aradict(data_path=path, verbose=verbose)
  else:
    mydict = pyduali.aradict.aradict(verbose=verbose)

  myspell = pyduali.araspell.araspell(charset=charset, verbose=verbose,
                              mydict=mydict, mylex=mylex)

  # initialize the line counter
  line_no = 1
  
  lines = open(fname, 'r').readlines()
  for line in lines:
    line = mylex.stripextras(line)
    line = mylex.stripPunctuations(line.decode(charset)).encode(charset)
    line = mylex.stripDiacritics(line.decode(charset)).encode(charset)
    words = line.split(' ')
    word_no = 1
    for word in words:
      word = word.strip()
      if (verbose):
        print "Word: [%s]" % word
      if (len(word) is not 0):
        if (normalize):
          word = mylex.normalize(word.decode(charset)).encode(charset)
        if (not myspell.spellcheck(word)):
          print "Line: %d Word %d: Incorrect: [%s]" % (line_no, word_no, word)
        word_no += 1
    line_no += 1
  
  sys.exit(0)

if __name__ == "__main__":
  main()

