#!/usr/bin/python
#
# Copyright (C) 2007, 2008 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


"""Script to create Ganeti instances in batches.

batcher is a Python script that automates the creation of
Ganeti instances.

"""


from email import MIMEText
import logging
import optparse
import os
import shutil
import smtplib
import sys
import tempfile
import time
import simplejson
import errno
import traceback

from ganeti import utils
from ganeti import errors
from ganeti import constants


_CLUSTER_NAME_FILE = constants.DATA_DIR + '/ssconf_cluster_name'
_CLUSTER_MASTER_FILE = constants.DATA_DIR + '/ssconf_master_node'
_LOCKFILE = constants.LOCK_DIR + '/batcher.lock'


def ParseCommandline():
  """Parse the commandline arguments and return options to the caller.

  Returns:
     options: an object containing values for all of the commandline arguments.

  """
  # Setup the parser object
  usage = 'usage: %prog [options]'
  parser = optparse.OptionParser(usage=usage)

  # Add options to the parser object
  parser.add_option('-i', '--instancesfile', dest='instancesfile',
                    help='define the file holding the instance definitions.')

  parser.add_option('-l', '--logfile', dest='logfile',
                    help='define batcher\'s log file')

  parser.add_option('-d', '--debug', action='store_true', dest='debug',
                    help='run with debugging information')

  parser.add_option('-n', '--notify', dest='notify',
                    help='list of recipients to notify')

  parser.add_option('-s', '--sender', dest='sender',
                    help='From: field for the runtime report')

  parser.add_option('-f', '--force', action='store_true', dest='force',
                    help='really perform the instance creations')

  parser.add_option('--no-wait-sync', action='store_true', dest='nowait',
                    help='Do not wait for the disks to sync (DANGEROUS).')

  parser.add_option('-k', '--keep-files', action='store_true', dest='keepfiles',
                    help='Keep config and error files instead of deleting '
                    'them.')
  parser.add_option('--sleep', dest='sleep', type='int', help='Amount of '
                    'seconds to sleep between instance creations.')
  parser.add_option('--iallocator', dest='iallocator', help='Allocation '
                    'algorithm to use for primary/secondary instances.')

  parser.set_defaults(instancesfile='/var/spool/batcher/instances.json',
                      logfile='/var/log/batcher/batcher.log',
                      debug=False,
                      force=False,
                      nowait=False,
                      keepfiles=False,
                      iallocator='dumb')

  # Retrieve the options passed on the commandline
  options = parser.parse_args()[0]

  # Return the options
  return options


def LockWrapped(meth):
  """Decorator for lock wrapped functions (like main)."""
  def lockwrapper(*args):
    try:
      pidfd = os.open(_LOCKFILE, os.O_CREAT|os.O_EXCL)
      try:
        meth(*args)
      finally:
        os.unlink(_LOCKFILE)
        os.close(pidfd)
    except EnvironmentError, err:
      if err.errno == errno.EEXIST:
        newmsg = ('Batcher lockfile exists. Batcher is either already running'
                  ' or there is a stale lockfile (%s).') % _LOCKFILE
        raise BatcherLockError(newmsg)
      else:
        print '%s, aborting.\n%s' % (err, traceback.format_exc())
        sys.exit(254)
  return lockwrapper


def RemoveFile(file_path):
  """Remove a file.

  Args:
    file_path: A string containing the full path to the file.

  Raises:
    BatcherGenericError: There was an error removing file_path.

  """
  try:
    os.remove(file_path)
  except EnvironmentError, msg:
    raise BatcherGenericError(msg)


def MakeBackup(file_name):
  """Make a copy of a file.

  Make a backup copy of a file to preserve an historical record of
  that file as well as to help with debugging any problems that arise.

  Args:
    file_name: the path to the original file.

  Returns:
    the path to the  newly created backup file.

  Raises:
    BatcherGenericError: file_name did not exist.

  """
  if not os.path.isfile(file_name):
    raise BatcherGenericError('Can\'t make a backup of a non-file \'%s\'' %
                              file_name)

  prefix = '%s.backup-%d.' % (os.path.basename(file_name), int(time.time()))
  dir_name = os.path.dirname(file_name)

  fsrc = open(file_name, 'rb')
  try:
    (fd, backup_name) = tempfile.mkstemp(prefix=prefix, dir=dir_name)
    fdst = os.fdopen(fd, 'wb')
    try:
      shutil.copyfileobj(fsrc, fdst)
    finally:
      fdst.close()
  finally:
    fsrc.close()

  return backup_name


def SleepTime(seconds):
  """Put batcher to sleep for a period of time.

  Args:
    seconds: An integer.

  """
  while seconds > 0:
    sys.stdout.write('.')
    sys.stdout.flush()
    seconds -= 2
    time.sleep(2)
  sys.stdout.write('\n')
  sys.stdout.flush()


class Error(Exception):
  """Abstract base error class."""


class BatcherGenericError(Error):
  """Abstract generic error class."""


class BatcherLockError(Error):
  """Problem creating Batcher's pid file."""


class BatcherNotificationError(Error):
  """Problem sending batcher's runtime report."""


class InstancesFile:
  """Abstraction of the instances definition file."""

  def __init__(self, file_path):
    self.__dict__['data'] = self.ReadInstances(file_path)

  def __getitem__(self, key):
    return self.data[key]

  def ReadInstances(self, file_path):
    """Read instance data from a file."""
    try:
      fd = open(file_path)
    except EnvironmentError, msg:
      raise EnvironmentError(msg)

    try:
      return simplejson.load(fd)
    finally:
      fd.close()

  #TODO(tjb): figure out minimum necessary validation of data


class Cluster:
  """Class to represent data about a cluster."""

  def __init__(self):
    self.__dict__['data'] = {'cluster_name': None,
                             'cluster_master': None,
                             'hostname': None}
    self.PopulateClusterData()

  def __getattr__(self, name):
    """Getter for retrieving class attributes."""
    if name in self.data:
      return self.data[name]
    else:
      raise AttributeError

  def __setattr__(self, name, value):
    """Setter for changing class attributes."""
    self.data[name] = value

  def PopulateClusterData(self):
    """Populate our class attributes."""
    try:
      self.SetClusterName()
      self.SetClusterMaster()
      self.SetHostName()
    except AttributeError, msg:
      raise AttributeError(msg)

  def SetClusterName(self, file_object=None):
    """Get the name of the cluster.

    Args:
      file_object: a file object, defaults to None.

    Raises:
      AttributeError: There was an error trying to get the cluster name.

    """
    try:
      if not file_object:
        self.data['cluster_name'] = open(_CLUSTER_NAME_FILE,
                                         'r').readlines()[0].strip()
      else:
        self.data['cluster_name'] = file_object.readlines()[0].strip()
    except EnvironmentError, msg:
      raise AttributeError(msg)

  def SetClusterMaster(self, file_object=None):
    """Get the cluster's master node.

    Args:
      file_object: a file object, defaults to None.

    Raises:
      AttributeError: There was an error getting the cluster manster name.

    """
    try:
      if not file_object:
        self.data['cluster_master'] = (open(_CLUSTER_MASTER_FILE,
                                            'r').readlines()[0].strip())
      else:
        self.data['cluster_master'] = file_object.readlines()[0].strip()
    except EnvironmentError, msg:
      raise AttributeError(msg)

  def SetHostName(self, file_object=None):
    """Get the fqdn of the current host.

    Args:
      file_object: an opened file object for /etc/hostname

    Raises:
      AttributeError: There was a problem getting the hostname.

    """
    try:
      if not file_object:
        self.data['hostname'] = (open('/etc/hostname',
                                      'r').readlines()[0].strip())
      else:
        self.data['hostname'] = file_object.readlines()[0].strip()
    except EnvironmentError, msg:
      raise AttributeError(msg)


class Instance:
  """Class to represent an instance that needs to be created."""

  NumberOfInstances = 0
  NumberCreated = 0

  def __init__(self, hostname, data):
    self.__dict__['hostname'] = hostname
    self.__dict__['data'] = data
    Instance.NumberOfInstances += 1

  def __del__(self):
    Instance.NumberOfInstances -= 1

  def __getattr__(self, name):
    """Getter for retrieving class attributes."""
    if name in self.data:
      return self.data[name]
    elif name == 'hostname':
      return self.hostname
    else:
      raise AttributeError

  def __setattr__(self, name, value):
    """Setter for changing class attributes."""
    if name == 'hostname':
      self.hostname = value
    else:
      self.data[name] = value

  def InstancesCount(self):
    """Return the number of instances."""
    return Instance.NumberOfInstances

  def InstancesCreated(self):
    """Return the number of instances created."""
    return Instance.NumberCreated

  def IncrementCreated(self):
    """Increase the number of instances created by 1."""
    Instance.NumberCreated += 1

  def InstanceAsDict(self):
    """Return the instance namespace dictionary."""
    return self.__dict__


class InstanceCreator:
  """Class to handle the creation of instances.

  This class is responsible for the following tasks:
  1. Parsing the instance definition file.
  2. Create the correct number of Instance objects per the instance
     definition file.
  3. Create the instances.

  Args:
    options: an OptParse object that holds the commandline options.

  """

  def __init__(self, options, cluster_name):
    self.instances = []
    self.instances_file = options.instancesfile
    self.archived_instances_file = None
    self.logfile = options.logfile
    self.force = options.force
    self.no_wait_for_sync = options.nowait
    self.keepfiles = options.keepfiles
    self.iallocator = options.iallocator
    self.runtime_report = None
    self.cluster_name = cluster_name
    self.start = time.time()
    self.finish = 0

    if options.notify:
      self.notify = options.notify.split(',')

    if options.sender:
      self.sender = options.sender

    if options.sleep:
      self.sleep = options.sleep
    else:
      self.sleep = False

    self.ParseInstanceDefs()

  def ParseInstanceDefs(self):
    """Parse the instance definition file and set up instance objects.

    Raises:
      BatcherGenericError: There was an error reading the instances def file.

    """
    try:
      instances = InstancesFile(self.instances_file)
    except BatcherGenericError, msg:
      raise

    for instance in instances.data:
      self.instances.append(Instance(instance, instances[instance]))

  def CreateInstances(self):
    """Create Ganeti instances.

    This method handles the creation of the instances defined in the
    instances definition file.

    """
    for instance in self.instances:
      try:
        instance.create_command = self.CreateCommand(instance)
      except AttributeError, msg:
        instance.create_command = 'Failed to create instance.'
        instance.created = False
        instance.message = msg
        continue

      if self.force:
        msg = ('Creating %s with the following: %s....' %
               (instance.hostname, instance.create_command))
        sys.stdout.write(msg)
        sys.stdout.flush()
        logging.info(msg)

        # now we actually create the instance
        result = utils.RunCmd(instance.create_command)

        # post-creation checks and cleanup
        if result.failed:
          msg = '%s.' % (result.stderr.strip())
          instance.created = False
          instance.message = msg
        else:
          msg = 'Creation was successful.'
          # update the number of instances created
          instance.IncrementCreated()
          instance.created = True
          instance.message = msg

        # write msg to stdout
        sys.stdout.write(msg + '\n')
        sys.stdout.flush()
        logging.info(msg)

        if (self.sleep and
            instance.InstancesCreated() < instance.InstancesCount()):
          msg = ('Sleeping for %d seconds between instance '
                 'creations') % self.sleep
          logging.info(msg)
          sys.stdout.write(msg)
          sys.stdout.flush()
          SleepTime(self.sleep)

      else:
        # dry run
        instance.created = False
        instance.message = (('Creating %s in dry-run mode. Run batcher'
                             ' with -f to really create this instance.') %
                            instance.hostname)
        logging.info(instance.message)
        continue

    self.finish = time.time()

  def SendRuntimeReport(self):
    """Send the runtime report to recipients.

    Raises:
      BatcherNotificationError: There was a problem sending the report.

    """
    data = {'recipient': self.notify,
            'sender': self.sender,
            'cluster_name': self.cluster_name,
            'message': self.runtime_report.report}

    notify = Notify(data)

    try:
      notify.SendReport()
    except BatcherNotificationError, msg:
      raise BatcherNotifcationError(msg)

  def CreateRuntimeReport(self):
    """Create the runtime report."""
    report_data = {'cluster_name': self.cluster_name,
                   'instances': self.instances,
                   'start': self.start,
                   'finish': self.finish,
                   'instancesfile': self.instances_file,
                   'archived_instancesfile': self.archived_instances_file,
                   'logfile': self.logfile}
    self.runtime_report = RuntimeReport(report_data)

  def CreateCommand(self, instance):
    """Build the command string used to create an instance.

    Args:
      instance: an instance of the Instance class.
    Returns:
      a string containing the appropriate gnt-instance add command.

    """
    if not self.no_wait_for_sync:
      nowait = ''
    else:
      nowait = '--no-wait-for-sync'

    try:
      nodes = '-n %s:%s' % (instance.primary_node, instance.secondary_node)
    except AttributeError:
      nodes = '--iallocator %s' % self.iallocator

    try:
      template = '-t %s' % instance.template
    except AttributeError:
      template = '-t %s' % 'plain'

    try:
      vcpu = '-p %i' % instance.vcpu
    except AttributeError:
      vcpu = ''

    try:
      command = ('gnt-instance add %s %s -o %s -s %s -m %s '
                 '%s %s %s' % (template,
                               nodes,
                               instance.os,
                               instance.disk_size,
                               instance.ram_size,
                               vcpu,
                               nowait,
                               instance.hostname))
    except AttributeError, msg:
      msg = ('It appears either the instance\'s hostname, disk_size, or '
             'ram_size were not specified in %s') % self.instances_file
      raise AttributeError(msg)


    return command

  def DumpInstances(self):
    """Dump the namespace dictionary for each instance object."""
    for i in self.instances:
      print simplejson.dumps(i.InstanceAsDict())


class RuntimeReport:
  """Class to represent the runtime report."""

  def __init__(self, data):
    self.__dict__['data'] = data
    self.BuildReport()

  def __getattr__(self, name):
    if name in self.data:
      return self.data[name]
    elif name == 'report':
      return self.report
    else:
      raise AttributeError

  def __setattr__(self, name, value):
    if name == 'report':
      self.data[name] = value
    else:
      raise AttributeError('Invalid attribute %s' % name)

  def BuildReport(self):
    """Build the runtime report.

    This report is printed to stdout and, if -n was specified with a recipient
    on the commandline, get email to the recipient(s).

    """
    # build list of instances created/not created
    instances_created = []
    instances_not_created = []
    for i in self.instances:
      if i.created:
        instances_created.append('%s (command: %s)' % (i.hostname,
                                                       i.create_command))
      else:
        instances_not_created.append('%s: %s (%s)' % (i.hostname,
                                                      i.message,
                                                      i.create_command))

    #TODO(tjb): this needs cleaned up, i just copied/pasted from original
    start = self.start
    finish = self.finish
    elapsed = finish - start
    now = time.strftime('%Y-%m-%d %H:%M:%S')
    repeating_dashes = '=' * 60
    d = '%Y-%m-%d'
    t = '%H:%M:%S'
    msg = ('%s\n'
           'batcher runtime report\n'
           '%s\n'
           'Cluster: %s\n'
           'Date: %s\n\n'
           '---- Summary ----\n'
           'Time started: %s\n'
           'Time finished: %s\n'
           'Elapsed time (minutes): %d\n'
           'Instances defined in the config file: %i\n'
           'Number of instances created: %i\n'
           'Number of instances not created: %i\n'
           'Original instances.yaml: %s\n'
           'Archived instances.yaml: %s\n'
           'Logfile: %s\n'
           '\n'
           '---- Details ----\n'
           'Instances created:\n'
           '%s\n'
           '\n'
           'Instances not created and why:\n'
           '%s\n'
           '\n') % (repeating_dashes, repeating_dashes,
                    self.cluster_name, now,
                    time.strftime('%s %s' % (d, t), time.localtime(start)),
                    time.strftime('%s %s' % (d, t), time.localtime(finish)),
                    elapsed / 60,
                    self.data['instances'][0].InstancesCount(),
                    self.data['instances'][0].InstancesCreated(),
                    (self.data['instances'][0].InstancesCount() -
                     self.data['instances'][0].InstancesCreated()),
                    self.instancesfile,
                    self.archived_instancesfile,
                    self.logfile,
                    instances_created, instances_not_created)

    self.report = msg


class Notify:
  """Class to handle batcher notifications."""

  def __init__(self, data):
    self.__dict__['data'] = data

  def __getattr__(self, name):
    if name in self.data:
      return self.data[name]
    else:
      raise AttributeError

  def __setattr__(self, name, value):
    if (name == 'recipient' or name == 'sender' or
        name == 'cluster_name' or name == 'messsage'):
      self.data[name] = value
    else:
      raise AttributeError('%s is not a valid attribute' % name)

  def SendReport(self):
    """Connect to the SMTP server and send the message.

    Raises:
      SMTPConnectError: Problem connection to the smtp server.
      SMTPDataError: The SMTP server refused to accept the message data.
      SMTPRecipientRefused: All recipient addresses refused.
      SMTPSenderRefused: Sender address refused.

    """
    msg_object = MIMEText.MIMEText(self.message)

    if msg_object is not '':
      msg_object['Subject'] = ('[batcher] report from %s' %
                               self.data['cluster_name'])
      msg_object['From'] = self.data['sender']
      # convert self.data['recipient'] from a list to a string for the header
      msg_object['To'] = str(self.data['recipient'])[1:-1].replace('\'', '')

      s = smtplib.SMTP()

      try:
        s.connect()
        s.sendmail(self.data['sender'],
                   self.data['recipient'],
                   msg_object.as_string())
      except (smtplib.SMTPConnectError, smtplib.SMTPDataError,
              smtplib.SMTPRecipientsRefused, smtplib.SMTPSenderRefused), msg:
        raise BatcherNotificationError(msg)

      s.close()


@LockWrapped
def main():
  # check if we're running as root.
  if os.getuid():
    sys.stderr.write('This program must be run as root.\n')
    sys.exit(-1)

  cluster = Cluster()
  # check that we're running on the master node
  if cluster.cluster_master != cluster.hostname:
    msg = 'This program must be run from the the master node.\n'
    logging.critical(msg)
    sys.stderr.write(msg)
    sys.exit(-1)

  # get the commandline options
  options = ParseCommandline()

  # set up logger object
  loggerconf = {
      'format': '[%(asctime)s]: %(levelname)-8s: %(message)s',
      'level': logging.INFO,
      'datefmt': '%Y-%m-%d %H:%M:%S',
      'filename': options.logfile,
      'filemode': 'a'}

  # set debug level if user requested it
  # TODO(tjb): add debugging lines in appropriate places
  if options.debug:
    loggerconf['level'] = logging.DEBUG

  logging.basicConfig(**loggerconf)

  # Before we do anything else, make a copy of the config file
  try:
    archived_instances = MakeBackup(options.instancesfile)
  except BatcherGenericError, msg:
    logging.warning(msg)

  # create instances
  try:
    logging.info('Batcher is starting instance creation jobs.')
    creator = InstanceCreator(options, cluster.cluster_name)
    creator.archived_instances_file = archived_instances
    creator.CreateInstances()
  except KeyboardInterrupt:
    sys.stderr.write(('\nAborting. Waiting for batcher to clean up after '
                      'iteself...'))

    sys.stderr.write('complete. Exiting.')
    sys.stderr.flush()
    logging.warning('Aborting due to user interaction (ctrl-C).')
    sys.exit(-1)

  # build the runtime report
  logging.info('Creating runtime report')
  creator.CreateRuntimeReport()

  # send the runtime report
  if options.notify:
    try:
      creator.SendRuntimeReport()
      logging.info('Runtime report sent to %s' % options.notify)
    except BatcherNotificationError, msg:
      sys.stderr.write(msg + '\n')
      logging.warning(msg)

  logging.info('Batcher has finished instance creation jobs.')

  # print the runtime report and then have batcher clean up after itself
  print '\n\n%s' % creator.runtime_report.report

  try:
    if not options.keepfiles:
      RemoveFile(options.instancesfile)
  except GenericBatcherError, msg:
    sys.stderr.write(msg)
    sys.exit(1)


if __name__ == '__main__':
  main()
