#!/usr/bin/perl -w

use strict;
use Data::Dumper;
use POSIX ":sys_wait_h";
use Carp;

# Peder swears that it has been in core since 5.004.
use IPC::Open3;

# Standard from 5.8 - But does everyone have it?  Allows for much higher
# resolution wall clock measures of plugin execution.
# use Time::HiRes;

# munin-node-sched
#
# Prototype munin node scheduler as proposed in
# http://munin-monitoring.org/wiki/protocol-spool
#
# Goal:
#  Run munin plugins and collect the results.  Run intervals could be
#  as short as every second.
#
# Design considerations:
# - We cannot expect to be scheduled exactly every second (esp. if the host
#   is loaded).  Therefore use >= and <= and never = for time-stamp
#   comparisons.
# - An upshot of such a high load situation is that some plugins with very
#   short run intervals (say 1 second) may not be run every second.
# - Collect runtime statistics for plugins to be able to override too
#   short intervals with something more realistic -- hopefuly we can
#   communicate this problem to the central node in time and make the
#   graph red as well as pasting in a message on the plugin/graph page.
# - Never execute a plugin for a given host more thay one at a time.
# - The number of plugins*nodes is expected to be low and running only
#   once a second allows some list sorting and searching without getting
#   bogged down.
# - Run a given plugin every time $secssinceepoch mod $interval == 0.
#   This time the process can be stopped and restarted (host reboot) and
#   the plugin execution will just continue as appropriate for the time
#   of restart.  This allows us to not save state which is probably a
#   good thing.  The state would have to include the last time of
#   execution for a plugin. ... state could be reconstructed from the
#   spool files - or simply be stored in a tie or something like that.
#
# Possible metrics that could be replayed along with the spooled output:
# (label it multigraph munin-node and get pretty graphs for free)
#   Maximum slow queue depth
#   Maximum fast queue depth
#   Total number of scheduled executions (lifetime total)
#   Total number of started executions (ditto)
#   Total number of completed executions (ditto)
#   Average plugin execution time
#   Longest plugin execution time (lifetime)
#   Longest plugin execution time last 5 minutes
#
# Plugin magic markers associated with this extention:
# #%# supportedintervals=<value(s)>
#     Value syntax: Single: <n>, range: <n-m>, enumeration: <n,m>
#     Or combinations of the above. Postfixes m(inutes),h(ours)
#     The loadavg plugin might be tagged #%# supportedintervals=1m,5m,15m
#     A mail thruput plugin might be tagged 1m-1h.
#
# #%# defaultinterval=<time-in-seconds>
#     Override in the node configuration.  Any plugin executed more
#     often than every 5 minutes will simply be forked and executed
#     without reservations about resource use ('fast' queue).  Plugins
#     with execution intervals of 5 minutes or more will be executed
#     one at a time ('slow' queue).  If no interval is given 5 minutes
#     is assumed --- to match the current behaviour.
#
# #%# veryslow
#     Use on plugins that us in excess of 10 seconds to execute.  They
#     will be executed in such a way as to not hold up the 'slow'
#     queue.  That is to say they will be executed and then the next
#     'slow' plugin will be executed one second later while the
#     veryslow one still runs.  You can have execution intervals for
#     veryslow jobs shorter than 5 minutes since the 'fast' queue is
#     not exclusive, but this assumes that the thing the plugin
#     measures can take the stress of all the fast plugins executing
#     at the same time and at once.
#
# NOTE: The same plugin for the same node will not be executed more
# than once at a time no matter what queue it is in or what the
# execution interval is.  Never announce a supported interval shorter
# than (or equal to) the typical execution time of the plugin.

# Job-description:
#   pluginname => $
#   hostname   => $
#   lastrun    => $  (seconds since epoch)
#   interval   => $  (seconds)
#   runtimes   => @  (wallclock seconds)
#   avgruntime => $
#   maxruntime => $
#   minruntime => $
#
# If after 5 runs avgruntime is larger than interval the interval will be
# forcibly adjusted.

# Job-record:
#   pluginname => $
#   hostname   => $
#   output     => $
#   starttime  => $  (seconds since epoch)
#

my @alljobs;

@alljobs = (
	 { pluginname => 'exim_mailqueue',
	   hostname   => 'localhost',
	   lastrun    => 0,
	   interval   => 5,
	   runtimes   => [],
	   avgruntime => undef,
	   maxruntime => undef,
	   minruntime => undef },
	 { pluginname => 'exim_mailstats',
	   hostname   => 'localhost',
	   lastrun    => 0,
	   interval   => 30,
	   runtimes   => [],
	   avgruntime => undef,
	   maxruntime => undef,
	   minruntime => undef },
	);

# print Dumper \@alljobs;

my %bypluginhost;

my $i=0;
foreach my $j (@alljobs) {
#print Dumper $j;
    $bypluginhost{($j->{'pluginname'}).'/'.($j->{'hostname'})}=$i;
    $i++;
}

# Hash of the last execution time of each interval value:
my %lastexec;
# E.g.,
#  { 1 => 1148745002,
#    5 => 1148745000,
#   30 => 1148745000,
#   60 => 1148745000,
#  300 => 1148745000,
# 3600 => 1148742000 }

# Hash of the next execution time of each interval value
my %nextexec;

# The time we use for calculations at this time. Only reset after
# waking up for a new turn through the main loop.
my $thistime;
my $lasttime=0;

# pid => plugin/hostname lookup
my %child;

# plugin/hostname lock hash to prevent concurrent executions
my %lock;

# Start of execution by pid
my %start;

# Exit statuses by plugin/hostname
my %exitstatus;

# The output of running plugins
my %outputs;

# The log files of running plugins
my %plugins;

# plugin/hostname is from slowboat or not? Used to determine if the
# another slow job can be executed.  DELETE unused elements!
my %isslow;

# Execution queues
my @fastlane;	# Jobs listed here are just executed at once and damn the
                # concequences.
my @slowboat;   # Jobs listed here are executed one at a time.



sub REAPER {
    # Collect our children in a orderly manner.

    # See perlipc:
    #
    # If a second child dies while in the signal handler caused by the
    # first death, we won$B!G(Bt get another signal. So must loop here
    # else we will leave the unreaped child as a zombie. And the next
    # time two children die we get another zombie. And so on.

    my $childpid; # Pid
    my $childjob; # plugin/hostname
    my $job;      # Job description hash

    while (($childpid = waitpid(-1,WNOHANG)) > 0) {
	warn "<--Reaping child: $childpid\n";
	if (exists($child{$childpid})) {
	    $childjob = $child{$childpid};
	    warn "Child is: $childjob\n";
	    # print Dumper \%lock;
	    $lock{$childjob} -= 1;
	    $exitstatus{$childjob} = $?;
	    warn "Child terminated: $childjob, pid $childpid, $?\n";
	    $job=$alljobs[$bypluginhost{$childjob}];
	    # Collect statistics
	    push(@{$job->{'runtimes'}},(time - $start{$childpid}));
	    if (exists($isslow{$childjob})) {
		delete $isslow{$childjob};
	    }
	} else {
	    # Some other non-interesting child
	}
    }
    $SIG{CHLD} = \&REAPER;
}

$SIG{CHLD} = \&REAPER;

sub initexectimes {
    # Calculate the initial next execution times for each interval.
    my $interval;
    my $j;
    foreach $j (@alljobs) {
	$interval = $j->{'interval'};
	next if defined($nextexec{$interval});
	$nextexec{$interval} = int($thistime/$interval)*$interval;
	# Increment by one interval if nextexec is in the past
	# (it will usually be unless it's $now).
	$nextexec{$interval} += $interval if $thistime>$nextexec{$interval};

	print "Now is $thistime, interval is $interval, ne: ",$nextexec{$interval},"\n";
    }
}

sub execute_plugin ($$) {
    # Just execute them.  Do associated book-keeping and return the pid.

    my ($job,		# Job hash
	$class)=@_;     # job class ('slow', 'fast')

    # Debug support
    if (!defined($job->{'pluginname'})) {
	print STDERR Dumper $job;
	croak("Undefined job");
    }

    my $childpid;
    my $jobname = ($job->{'pluginname'}).'/'.($job->{'hostname'});

    if ($lock{$jobname}) {
	warn "Plugin $jobname already running. Skipping.\n";
	return;
    }

    $childpid=fork;

    if (!defined($childpid)) {
	die "Fork failed: $?";
    }

    if ($childpid) {
	# Parent
	# ... nothing more to do. Just return
	warn "Pid of $jobname is $childpid\n";
	$child{$childpid}=$jobname;
	$start{$childpid}=time;
	$lock{$jobname} = 1;
	$exitstatus{$jobname} = undef;
	if ($class eq 'slow') {
	    $isslow{$jobname}=1;
	}
	return $childpid;
    } else {
	# Child
	warn "-->exec(/etc/munin/plugins/".$job->{'pluginname'}.")\n";
	exec("/etc/munin/plugins/".$job->{'pluginname'});
	# HUH?
	die "Yikes! Returned from execing ".$job->{'pluginname'}.": $?";
    }
}


sub exec_fastlane {
    # Just execute everything at once.
    while (my $j = pop(@fastlane)) {
	execute_plugin($j,'fast');
    }
}


sub exec_slowboat {
    # Execute one at a time
    if (scalar(@slowboat) > 0 and scalar(keys %isslow) == 0) {
	# FIXME: Very pseudo code:
	# if ($plugin_is_veryslow($plugin)) {
	#    execute_plugin(pop(slowboat),'fast');
	# } else {

	execute_plugin(pop(@slowboat),'slow');
	# }
    }
}


sub munin_run ($) {
    # Well, actually, we don't run jobs here, we just queue them.
    my ($job)=@_;
    my $childpid;
    my $jobname = $job->{'pluginname'}.'/'.$job->{'hostname'};

    my $thejob = $alljobs[$bypluginhost{$jobname}];

    # If the job execution interval is less than 5 minutes put it in the
    # fast lane.
    if ($thejob->{'interval'}<300) {
	push(@fastlane,$thejob);
    } else {
	push(@slowboat,$thejob);
    }
}


sub find_interval_plugins ($) {
    # Execute all plugins with the given execution interval

    my($interval)=@_;

    foreach my $j (@alljobs) {
	if ($j->{'interval'} == $interval) {
	    munin_run($j);
	}
    }
}


sub find_due_plugins {
    # Execute plugins that got due since the last time.
    my $i;
    foreach $i (sort {$a <=> $b} keys %nextexec) {
	if ($nextexec{$i} <= time) {
	    warn "Time to execute $i second jobs\n";
	    find_interval_plugins($i);
	    $nextexec{$i} += $i;
	}
    }
}

$thistime=time;

initexectimes;

while (1) {
    if ($thistime != $lasttime) {
	warn "Looking for due plugins\n";
	find_due_plugins;
	exec_fastlane;
    }
    exec_slowboat;

    $lasttime=$thistime;

    sleep 1;
    # Note, this sleep may be interrupted by children exiting. In that
    # case it may be time for another slowboat execution, but not much
    # more.
    $thistime=time;
    warn "Awake! Time now is ".time."\n";
}
