#!/usr/bin/env python
#
#---
# $Id: arabic2trans,v 1.1 2003/11/01 07:15:44 elzubeir Exp $
#
# ------------
# Description:
# ------------
# This script will convert an encoded Arabic file (UTF-8 or CP1256) to a
# transliterated file using the Buckwalter transliteration.
# 
#
# (C) Copyright 2003, Arabeyes, Mohammed Elzubeir
# -----------------
# Revision Details:    (Updated by Revision Control System)
# -----------------
#  $Date: 2003/11/01 07:15:44 $
#  $Author: elzubeir $
#  $Revision: 1.1 $
#  $Source: /home/arabeyes/cvs/projects/duali/pyduali/arabic2trans,v $
#
#  This program is written under the BSD License.
#---

import sys, getopt, os, string
from pyduali.trans_table import *

scriptname = os.path.splitext(os.path.basename(sys.argv[0]))[0]
scriptversion = '$Id: arabic2trans,v 1.1 2003/11/01 07:15:44 elzubeir Exp $'


def help():
  "Display help message"
  print """
This utility will convert a UTF-8 or CP1256 encoded file to a Buckwalter
transliteration file. The default encoding is set to the UTF-8. It should be
noted that the Windows CP-1256 encoding is sometimes not sufficient to
convert some of the transliterations. It is therefore advised to only use
UTF-8 encoding.

"""
  usage()
  
def usage():
  "Display usage options"
  print "(C) Copyright 2003, Arabeyes, Mohammed Elzubeir\n"
  print "Usage: %s -f filename [OPTIONS]" % scriptname
  print "\t[-h | --help            ]\toutputs a help message"
  print "\t[-V | --version         ]\tprogram version"
  print "\t[-f | --file= filename  ]\tinput encoded file"
  print "\t[-c | --charset ENCODING]\tencoding (cp1256 or utf-8)"
  print "\r\nThis progroam is licensed under the BSD License.\n"

def grabargs():

  if not sys.argv[1:]:
    usage()
    sys.exit(0)

  charset = 'utf-8'
  
  try:
    opts, args = getopt.getopt(sys.argv[1:], "hVc:f:",
                               ["help", "version", "charset", "file="],)
  except getopt.GetoptError:
    usage()
    sys.exit(0)

  for o, val in opts:
    if o in ("-h", "--help"):
      help()
      sys.exit(0)
    if o in ("-V", "--version"):
      print scriptversion
      sys.exit(0)
    if o in ("-f", "--file"):
      fname = val
    if o in ("-c", "--charset"):
      charset = val
    
  return (fname, charset)

def a2t(s):
  "Arabic character-set conversion to transliterated string"
  mystr = ''
  for mychar in s:
    mystr = "%s%s" % (a2t_table.get(mychar, mychar), mystr)
  return mystr

def reverse_string(s):
  "Reverse a string order"
  s1 = ''
  length = len(s)
  for i in range(length-1,-1,-1):
    s1 = "%s%s" % (s1, s[i])
  return s1

def main():
  fname, charset = grabargs()

  fin = open(fname, 'r').readlines()
  fout = open("%s-trans" % fname, 'w')


  for line in fin:
    line = reverse_string(line.decode(charset))
    fout.write(a2t(line))

  fout.close()
  sys.exit(0)

if __name__ == "__main__":
  main()
