#!/usr/bin/env python
# -*- Mode: python -*-
#
# Usage: pslurp [OPTIONS] -h hosts.txt -o outdir remote local
#
# Parallel scp from the set of nodes in hosts.txt.  For each node, we
# essentially do a scp [-r] user@host:remote outdir/<node>/local.  This
# program also uses the -q (quiet) and -C (compression) options.  Note
# that remote must be an absolute path.
#
# Created: 18 November 2003
#
# $Id: pslurp 348 2008-06-05 06:57:26Z bnc $
#
import fcntl, os, popen2, pwd, select, signal, sys, threading, time

basedir, bin = os.path.split(os.path.dirname(os.path.abspath(sys.argv[0])))
sys.path.append("%s" % basedir)

import psshlib
from psshlib.basethread import BaseThread

_DEFAULT_PARALLELISM = 32
_DEFAULT_TIMEOUT     = sys.maxint # "infinity" by default

def print_usage():
    print "Usage: pslurp [OPTIONS] -h hosts.txt -o outdir remote local"
    print
    print "  -r --recursive recusively copy directories (OPTIONAL)"  
    print "  -L --localdir  output directory for remote file copies"
    print "  -h --hosts     hosts file (each line \"host[:port] [login]\")"
    print "  -l --user      username (OPTIONAL)"
    print "  -p --par       max number of parallel threads (OPTIONAL)"
    print "  -o --outdir    output directory for stdout files (OPTIONAL)"
    print "  -e --errdir    output directory for stderr files (OPTIONAL)"
    print "  -t --timeout   timeout in seconds to do scp to a host (OPTIONAL)"
    print "  -O --options   SSH options (OPTIONAL)"    
    print
    print "Example: pslurp -h hosts.txt -L /tmp/outdir -l irb2 \\"
    print "         /home/irb2/foo.txt foo.txt"
    print 

def read_envvars(flags):
    if os.getenv("PSSH_LOCALDIR"):
        flags["localdir"] = os.getenv("PSSH_LOCALDIR")        
    if os.getenv("PSSH_HOSTS"):
        flags["hosts"] = os.getenv("PSSH_HOSTS")
    if os.getenv("PSSH_USER"):
        flags["user"] = os.getenv("PSSH_USER")
    if os.getenv("PSSH_PAR"):
        flags["par"] = int(os.getenv("PSSH_PAR"))
    if os.getenv("PSSH_OUTDIR"):
        flags["outdir"] = os.getenv("PSSH_OUTDIR")
    if os.getenv("PSSH_ERRDIR"):
        flags["errdir"] = os.getenv("PSSH_ERRDIR")                        
    if os.getenv("PSSH_TIMEOUT"):
        flags["timeout"] = int(os.getenv("PSSH_TIMEOUT"))
    if os.getenv("PSSH_OPTIONS"):
        flags["options"] = os.getenv("PSSH_OPTIONS")

def parsecmdline(argv):
    import getopt
    shortopts = "rL:h:l:p:o:e:t:O:"
    longopts = [ "recursive", "localdir", "hosts", "user", "par", "outdir",
                 "errdir", "timeout", "options" ]
    flags = { "recursive" : None, "localdir" : None, "hosts" : None, 
              "user" : None, "par" : _DEFAULT_PARALLELISM, "outdir" : None,
              "errdir" : None, "timeout" : _DEFAULT_TIMEOUT, "options" : None }
    read_envvars(flags)
    if not flags["user"]: flags["user"] = pwd.getpwuid(os.getuid())[0] # Default to current user
    opts, args = getopt.getopt(argv[1:], shortopts, longopts)
    for o, v in opts:
        if o in ("-r", "--recursive"):
            flags["recursive"] = 1
        elif o in ("-L", "--localdir"):
            flags["localdir"] = v
        elif o in ("-h", "--hosts"):
            flags["hosts"] = v
        elif o in ("-l", "--user"):
            flags["user"] = v
        elif o in ("-p", "--par"):
            flags["par"] = int(v)
        elif o in ("-o", "--outdir"):
            flags["outdir"] = v
        elif o in ("-e", "--errdir"):
            flags["errdir"] = v
        elif o in ("-t", "--timeout"):
            flags["timeout"] = int(v)
        elif o in ("-O", "--options"):
            flags["options"] = v
    # Required flags
    if not flags["hosts"]:
        print_usage()
        sys.exit(3)
    return args, flags

def do_pslurp(hosts, ports, users, remote, local, flags):
    import os
    if flags["localdir"] and not os.path.exists(flags["localdir"]):
        os.makedirs(flags["localdir"])
    if flags["outdir"] and not os.path.exists(flags["outdir"]):
        os.makedirs(flags["outdir"])
    if flags["errdir"] and not os.path.exists(flags["errdir"]):
        os.makedirs(flags["errdir"])
    for host in hosts:
        dir = "%s/%s" % (flags["localdir"], host)
        if not os.path.exists(dir):
            os.mkdir(dir)
    sem = threading.Semaphore(flags["par"])
    threads = []
    for i in range(len(hosts)):
        sem.acquire()
        localpath = "%s/%s/%s" % (flags["localdir"], hosts[i], local)
        if flags["options"] and flags["recursive"]:
            cmd = "scp -o \"%s\" -qrC -P %d %s@%s:%s %s" % \
                  (flags["options"], ports[i], users[i], hosts[i],
                   remote, localpath)
        elif flags["options"] and not flags["recursive"]:
            cmd = "scp -o \"%s\" -qC -P %d %s@%s:%s" % \
                  (flags["options"], ports[i], users[i], hosts[i],
                   remote, localpath)
        elif not flags["options"] and flags["recursive"]:
            cmd = "scp -qrC -P %d %s@%s:%s %s" % \
                  (ports[i], users[i], hosts[i], remote, localpath)
        else:
            cmd = "scp -qC -P %d %s@%s:%s %s" % \
                  (ports[i], users[i], hosts[i], remote, localpath)
        t = BaseThread(hosts[i], ports[i], cmd, flags, sem)
        t.start()
        threads.append(t)
    for t in threads:
        t.join()

if __name__ == "__main__":
    import os, pwd, re
    from psshlib import psshutil
    args, flags = parsecmdline(sys.argv)
    if len(args) != 2:
        print_usage()
        sys.exit(3)
    remote = args[0]
    local = args[1]
    if not re.match("^/", remote):
        print "Remote path %s must be an absolute path" % remote
        sys.exit(3)
    hosts, ports, users = psshutil.read_hosts(flags["hosts"])
    psshutil.patch_users(hosts, ports, users, flags["user"])
    signal.signal(signal.SIGCHLD, psshutil.reaper)
    os.setpgid(0, 0)
    do_pslurp(hosts, ports, users, remote, local, flags)
