<?php

/* ************************************************************************
 amavis-stats.php - build rrd graphs from amavis-stats collected data.

 Copyright (C) 2003,2004 Mark Lawrence (nomad@null.net)

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License with
 the Debian GNU/Linux distribution in file /usr/share/common-licenses/GPL;
 if not, write to the Free Software Foundation, Inc., 59 Temple Place,
 Suite 330, Boston, MA  02111-1307  USA

 On Debian systems, the complete text of the GNU General Public
 License, version 2, can be found in /usr/share/common-licenses/GPL-2.

************************************************************************ */

/* 
*
* This script depends on rrdtool|php4-rrdtool There are three ways to use it:
*
* 1. Directly from the command line as "php4 amavis-stats.php".
*    This allows you to generate graphs without having to have a full-blown
*    apache or other webserver environment running on your virus/smtp host.
*
*    If you would like regular graphs generated you could put something
*    like this in /etc/cron.d/virus-graphs:
*
*    #
*    # cron job to generate and move graphs every hour
*    #
*    0 * * * * amavis cd /var/tmp && php4 /usr/share/.../amavis-stats.php \
*    && rsync -avz /var/tmp/*.png remotehost:
*
* 2. Copy the script to somewhere in your web root (eg /var/www) and
*    call it directly as a url (eg http://server/amavis-stats.php).
*
* 2. Include it from another php script. In this case no html will be
*    generated directly by amavis-stats.php. However you then have the
*    ability to call the as[V|P]Graph() functions (after a call to
*    asLoadStats()) directly from within your script with your desired
*    parameters. Then build your own html to display the graphs.
*
*    Hopefully the function names are unique enough to not clash
*    with your own namespace.
*
*/

global $asVersion;
$asVersion = "0.1.12";

// The rrdstep value MUST match that which is used by the perl script.
// don't change one without the other.
$rrdstep = 300;

// ////////////////////////////////////////////////////////////////////////
//
// ////////////////////////////////////////////////////////////////////////

/*
* php version of the RRDTool::rrd_graph extention using shell executable.
* This needs to come first in the file otherwise the php pre-compiler
* thinks we don't exist...
*/

$rrd = "shared-library";

if (!function_exists('rrd_graph') && !@dl('rrdtool.so')) {
    $rrd = "command-line";

    function rrd_graph ($img, $opts, $count) {
        global $rrd_error_val;

        unset($output);

        $cmd = "rrdtool graph $img '". implode("' \\\n'",$opts) . "'" .
               " 2>&1";
        asDbg($cmd, 1);
        $out = exec($cmd, $output, $rrd_error_val);

        if ($rrd_error_val) {
            if (!is_numeric($rrd_error_val)) {
                $rrd_error_val = $output[0];
            }
            return false;
        }

        asDbg($output);

        if (preg_match("/(\d+)x(\d+)/", $output[0], $matches)) {
            $retval["xsize"] = $matches[1];
            $retval["ysize"] = $matches[2];
        } else {
            $rrd_error_val = $output[0];
            return false;
        }

        array_shift($output);

        foreach ($output as $index => $value) {
            $retval[calcpr][$index] = $value;
        }
        return $retval;
    }

    function rrd_error() {
        global $rrd_error_val;
        return $rrd_error_val;
    }
}


/*
* Timing function to work out how long things take
*/
function elapsed($start)
{
    $end = microtime();
    list($start2, $start1) = explode(" ", $start);
    list($end2, $end1) = explode(" ", $end);
    $diff1 = $end1 - $start1;
    $diff2 = $end2 - $start2;
    if( $diff2 < 0 ){
        $diff1 -= 1;
        $diff2 += 1.0;
    }
    return $diff2 + $diff1;
}



if (isset($_SERVER["REQUEST_URI"])) {   // web environment 
    $fullpage = true;
    $outdir = "img";

} else { // Stand-alone command line usage
    $cmd = true;
    $outdir = $_SERVER["PWD"];

}

if (isset($GLOBALS["rate"])) {
    $rate = $GLOBALS["rate"];
} else {
    $rate = 60;
}


if (isset($fullpage) || isset($cmd)) {
    asHtmlStart();

    if (asLoadStats()) {
    
        $minsec  = 60;
        $hoursec = 60 * $minsec;
        $daysec  = 24 * $hoursec;
        $weeksec =  7 * $daysec;
    
        $now = time();
    
        print "<h2>Daily Graphs</h2>\n";
        print asPGraph("$outdir/passed-day.png", $now, $daysec, "by day");
        print asVGraph("$outdir/virus-day.png", $now, $daysec, "by day");
    
        print "<h2>Weekly Graphs</h2>\n";
        print asPGraph("$outdir/passed-week.png", $now, 7*$daysec, "by week");
        print asVGraph("$outdir/virus-week.png", $now, 7*$daysec, "by week");
    
        print "<h2>Monthly Graphs</h2>\n";
        print asPGraph("$outdir/passed-month.png", $now, 31*$daysec, "by month");
        print asVGraph("$outdir/virus-month.png", $now, 31*$daysec, "by month");
    
        print "<h2>Yearly Graphs</h2>\n";
        print asPGraph("$outdir/passed-year.png", $now, 365*$daysec, "by year");
        print asVGraph("$outdir/virus-year.png", $now, 365*$daysec, "by year");
    
    } else {
        asMsg("No statistics available.");
    }

    asHtmlEnd();
}

function asDbg($txt = "", $pre = 0) {
    if ($GLOBALS["debug"]) {
        if ($pre) {
            print "<pre>\n";
        }
        print "amavis-stats::debug: $txt\n";
        if (is_array($txt)) {
            print "<pre>\n";
            print_r($txt);
            print "</pre>\n";
        }
        if ($pre) {
            print "</pre>\n";
        }
        print "<br>\n";
    
    }
}

function asMsg($txt = "") {
    print "amavis-stats: $txt<br>\n";
}

function asErr($txt = "") {
    print "amavis-stats::error: $txt<br>\n";
}


function asHtmlStart() {
    print '
<!DOCTYPE HTML PUBLIC \-//W3C//DTD HTML 3.2 Final//EN\>
<html>
<head>
<style>
    body {
        font-size: x-small;
        font-family: sans-serif; 
        margin: 1em;
    }
    img {
        margin-bottom: 1em;
        text-align: center;
        clear: both;
    }
</style>
</head>
<body>

<h1>Amavis Detection Statistics</h1>';


}


function asHtmlEnd() {
    global $asVersion, $rrd;
    print '<br>
<span style=\"font-size:x-small\">[Generated by <a
href="http://rekudos.net/amavis-stats/">amavis-stats</a>
 version ' . $asVersion . ' using ' . $rrd . ' rrdtool [';

    if (!$GLOBALS["debug"]) {
        print '<a href="' . $GLOBALS["REQUEST_URI"] . 
              '?debug=1">debug</a>';
    } else {
        print '<a href="' . 
              str_replace("?debug=1", "", $GLOBALS["REQUEST_URI"]) . 
              '">nodebug</a>';
    }
    print "].\n";

    if (!$GLOBALS["rate"] || ($GLOBALS["rate"] == 60)) {
        print 'Rate per <a href="' . $GLOBALS["REQUEST_URI"] . 
              '?rate=3600">hour</a>';
    } else {
        print 'Rate per <a href="' . 
              str_replace("?rate=3600", "", $GLOBALS["REQUEST_URI"]) . 
              '">minute</a>';
    }


    print ']</span>
</body>
</html>
    ';
}



/*
*
*/
function asLoadStats () {
    global $as_libdir, $virus, $pid, $psid, $iid, $bid,$nid,$nsid, $lastupdate, $maxi;

    $as_libdir    = "/var/lib/amavis-stats";
    $as_statefile = $as_libdir . "/amavis-stats.state";
    $as_namefile  = $as_libdir . "/amavis-stats.names";
    $as_seenfile  = $as_libdir . "/amavis-stats.seen";

    $readfile = @file($as_namefile);
    if (!is_array($readfile)) {
        asErr("Couldn't open id => name mappings file.");
    }
    asDbg("$as_namefile");
    asDbg($readfile);

    $virus = array();
    $pid = -1;
    $iid = -1;
    $bid = -1;
    $sid = -1;

    for ($k = 0; $k <= (count($readfile) - 1); $k++) {
        $fields = preg_split("/\s+/",$readfile[$k], 2, PREG_SPLIT_NO_EMPTY);

        $fields[0] = trim($fields[0]);
        $fields[1] = trim($fields[1]);

        $virus[$fields[0]]["id"] = $fields[0];
        $virus[$fields[0]]["name"] = $fields[1];
        if ($fields[1] == "Passed") {
            $pid = $fields[0];
        }
        elseif ($fields[1] == "Passed(SPAM)") {
            $psid = $fields[0];
        }
        elseif ($fields[1] == "Infected") {
            $iid = $fields[0];
        }
        elseif ($fields[1] == "Banned") {
            $bid = $fields[0];
        }
        elseif ($fields[1] == "Not-Delivered") {
            $nid = $fields[0];
        }
        elseif ($fields[1] == "Not-Delivered(SPAM)") {
            $nsid = $fields[0];
        }
    }

    $readfile = @file($as_seenfile);
    if (!is_array($readfile)) {
        asErr("Couldn't open first/last seen file.");
    }
    asDbg("$as_seenfile");
    asDbg($readfile);

    for ($k = 0; $k <= (count($readfile) - 1); $k++) {
        $fields = preg_split("/\s+/",$readfile[$k], -1, PREG_SPLIT_NO_EMPTY);
        $virus[$fields[0]]["firstseen"] = $fields[1];
        $virus[$fields[0]]["lastseen"] = $fields[2];
    }

    $readfile = @file($as_statefile);
    if (!is_array($readfile)) {
        asErr("Couldn't open state file.");
    }
    asDbg("$as_statefile");
    asDbg($readfile);

    for ($k = 0; $k <= (count($readfile) - 1); $k++) {
        $fields = preg_split("/\s+/",$readfile[$k], -1, PREG_SPLIT_NO_EMPTY);
        if ($fields[0] == "lastupdate:" && is_numeric($fields[1])) {
            $lastupdate = $fields[1]; 
        } elseif ($fields[0] == "LC_TIME:") {
            setlocale(LC_TIME, $fields[1]);
        }
    }

    if (!isset($lastupdate)) {
        asErr("lastupdate not defined.");
        return false;
    }
    elseif ($lastupdate == 0) {
        asErr("last update was at 0 seconds.");
        return false;
    }

    as_col();
    return true;
}



function as_col() {
    global $as_colors;
    $as_colors = array(
    "#00BFFF",  /* DeepSkyBlue */
    "#FFD700",  /* gold */
    "#FA8072",  /* salmon */
    "#006400",  /* DarkGreen */
    "#FF1493",  /* DeepPink */
    "#00CED1",  /* DarkTurquoise */
    "#FF00FF",  /* magenta */
    "#00FF7F",  /* SpringGreen */
    "#FF0000",  /* red */
    "#228B22",  /* ForestGreen */
    "#F0E68C",  /* khaki */
    "#FFFF00",  /* yellow */
    "#0000FF",  /* blue */
    "#CD5C5C",  /* IndianRed */
    "#6A5ACD",  /* SlateBlue */
    "#F4A460",  /* SandyBrown */
    "#FFA500",  /* orange */
    "#FF8C00",  /* DarkOrange */
    "#000080",  /* NavyBlue */
    "#FF69B4",  /* HotPink */
    "#2E8B57",  /* SeaGreen */
    "#A020F0",  /* purple */
    "#FFB6C1",  /* LightPink */
    "#0000CD",  /* MediumBlue */
    "#B22222",  /* firebrick */
    "#7CFC00",  /* LawnGreen */
    "#D02090",  /* VioletRed */
    "#6495ED"  /* CornflowerBlue */
    );
}




/*
*
*/
function addopts(&$opts, $type, $id, $vcount, $virus, $length) {
    global $as_libdir, $as_colors, $rate, $maxi;

    $name = sprintf("%-".$maxi."s", $virus[$id]["name"]);
    $count = $vcount[$id];
    $count = sprintf("%8d", $count);
//    $col  = $as_colors[md5($name) % (count($as_colors) - 1)];
    $col  = substr(md5($name),7,6);
//    print "COL: $col<br>\n";

    $opts[] = "DEF:v$id=$as_libdir/$id.rrd:hits:AVERAGE";
    $opts[] = "CDEF:gv$id=v$id,$rate,*";
//    $opts[] = "CDEF:gv$id=v$id,UN,0,v$id,IF,$rate,*";
    $opts[] = "$type:gv$id#$col:$name $count";

    return $opts;
}


/*
* asVGraph (file,    - name of the png to generate
*           endtime, - end time in seconds (defaults to 'now')
*           length,  - length of time in seconds
*           timetext,- human-readable description of length (eg: 'by day')
*           hostname,- hostname of amavis server (defaults to localhost)
*         )
*
* Build a graph of Virus infected emails.
* Returns either a html-valid <img> tag which can be printed, or the
* boolean "false" if something went wrong.
*/
function asVGraph($img, $end = 0, $length, $timetext = "", $host = "") {
    global $as_libdir, $virus, $pid, $psid, $iid, $bid, $nid, $nsid, $lastupdate, $maxi, $rate, $asVersion, $rrdstep;

    /*
    * Options... when do we start, end, graph title text etc.
    */
    if ($end == 0) {
        $end = time();
    }

    // make the end time an even multiple of the rrdstep
    $end = floor($end/$rrdstep) * $rrdstep;


    $start = $end - $length;

    $startdate = strftime("%c", $start);
    $enddate   = strftime("%c", $end);
    $nowdate   = strftime("%c", time());


    if ($timetext == "") {
        $timetext = "$length seconds";
    }

    if ($host == "") {
        $host = eregi_replace("\n", "", `hostname`);
    }


    /*
    * It is a two-step process to build the final graph. The average over
    * a specific time period seems to be impossible to get without actually
    * building a graph. Ie, rrd fetch will not calculate the values we
    * need - we would have to sum and average manually.
    *
    * However the PRINT function of a graph will return what we want
    * in an array. So first of all build a graph that PRINTs the average
    * of every virus over the selected time period.
    */

    $opts = array();
    $opts[] = "--start=$start";
    $opts[] = "--end=$end";

    foreach ($virus as $id => $rest) {
        $opts[] = "DEF:v$id=$as_libdir/$id.rrd:hits:AVERAGE";
        $opts[] = "CDEF:gv$id=v$id,UN,0,v$id,IF";
        $opts[] = "CDEF:gvt$id=gv$id,$length,*";
        $opts[] = "PRINT:gvt$id:AVERAGE:%.0lf";
    }

    $ret = rrd_graph($img, $opts, count($opts));

    /*
    * debugging - graph definitions
    */
    asDbg($ret);

    $infected = 0;
    if (is_array($ret)) {
        /*
        * All results from PRINT commands are in the array $ret[calcpr][..]
        */
    	$maxi = 0;
        $i = 0;
        foreach ($virus as $id => $rest) {
            /*
            * We don't have enough resolution in the rrds
            * to calculate the correct counts at low averages,
            * so we just don't display them
            */
            if ($ret[calcpr][$i] != 0) {
                $vcount[$id] = $ret[calcpr][$i];
	        $maxi = max($maxi, strlen($virus[$id]["name"]));
            }
            $i++;
            /* asDbg("Id: $id = $vcount[$id]"); */
        }
	    $maxi++;

        /*
        * We usually always have the infected.rrd and passed.rrd 'viruses'.
        * Take them out of the array (saving the totals) because we don't
        * really want to graph them.
        */
        if (count($vcount) >= 1) {
            arsort($vcount);
            if ($vcount[$iid]) {
                $infected = $vcount[$iid];
                unset ($vcount[$iid]);
            }
            if ($vcount[$pid]) {
                unset ($vcount[$pid]);
            }
            if ($vcount[$psid]) {
                unset ($vcount[$psid]);
            }
            if ($vcount[$bid]) {
                unset ($vcount[$bid]);
            }
            if ($vcount[$nid]) {
                unset ($vcount[$nid]);
            }
            if ($vcount[$nsid]) {
                unset ($vcount[$nsid]);
            }

        }
        else {
            asDbg("vcount is an empty array");
            $vcount = array();
        }

    }
    else
    {
        $msg = rrd_error();
        asErr("rrd_graph(): $msg");
        return false;
    }

    if ($rate == 3600) {
        $ratemsg = "hour";
    } else {
        $rate = 60;
        $ratemsg = "min";
    }


    /*
    * Now that we have the counts of each virus over the time period
    * we can build the actual graph
    */
    $opts = array();
    $opts[] = "--start=$start";
    $opts[] = "--end=$end";
    $opts[] = "--title=Virus Detection on $host ($timetext)";
    $opts[] = "--width=520";
    $opts[] = "--vertical-label=viruses/$ratemsg";
//    $opts[] = "COMMENT:$infected viruses detected from $startdate\g"; 
//    $opts[] = "COMMENT: to $enddate";
//    $opts[] = "COMMENT:\\n";
//    $opts[] = "COMMENT:\\n";


    /*
    * The tricky part, building rrd rows but ordering the elements by
    * columns...
    */

    if ($maxi > 20) {
        $width = 2;
    } else {
        $width = 3;
    }
    $total = count($vcount);
    $depth = ceil($total / $width);

    $mod = $total % $width;

    if ($infected > 0) {

        $keyarray = array_keys ($vcount);

        for ($d = 1; $d <= $depth; $d++) {
            for ($col = 1; $col <= $width; $col++) {

                if ($col == 1) {
                    $index = $d;
                }
                elseif ($d != $depth || $mod == 0 || $mod >= $col) {
                    if (($mod == 0) || ($col - $mod) < 2) {
                        $index = ($col - 1) * $depth + $d;
                    } else {
                        $index = $mod * $depth + 
                                 ($col - $mod - 1)*($depth - 1) + $d ;
                    }
                }
                else {
                    continue;
                }

                $id = $keyarray[$index - 1];

                if ($d == 1 && $col == 1) {
                    addopts($opts, "AREA", $id, $vcount, $virus, $length);
                }
                else {
                    addopts($opts, "STACK", $id, $vcount, $virus, $length);
                }
            }
            $opts[] = "COMMENT:\\n"; 
        }
    }

    $opts[] = "COMMENT:\\n"; 
    $opts[] = "COMMENT:amavis-stats v$asVersion ";
    $opts[] = "COMMENT:$enddate \\r";

    asDbg($opts);
    $ret = rrd_graph($img, $opts, count($opts));

    if (!is_array($ret)) {
        $err = rrd_error();
        asErr("rrd_graph(): $err");
        return false;
    }

    return "<img src=\"$img\" alt=\"[image: $timetext]\">\n\n";
}





/*
* asPGraph ( file,    - name of the png to generate
*           endtime, - end time in seconds (defaults to 'now')
*           length,  - length of time in seconds
*           timetext,- human-readable description of length (eg: 'by day')
*           hostname,- hostname of amavis server (defaults to localhost)
*         )
*
* Build a graph of clean or "Passed" emails.
* Returns either a html-valid <img> tag which can be printed, or the
* boolean "false" if something went wrong.
*/
function asPGraph($img, $end = 0, $length, $timetext = "", $host = "") {
    global $as_libdir, $virus, $pid, $psid, $iid, $bid, $nid, $nsid, $lastupdate, $maxi, $rate, $asVersion, $rrdstep;

    /*
    * Options... when do we start, end, graph title text etc.
    */
    if ($end == 0) {
        $end = time();
    }

    // make the end time an even multiple of the rrdstep
    $end = floor($end/$rrdstep) * $rrdstep;
    $start = $end - $length;

    $startdate = strftime("%c", $start);
    $enddate   = strftime("%c", $end);
    $nowdate   = strftime("%c", time());

    if ($timetext == "") {
        $timetext = "$length seconds";
    }

    if ($host == "") {
        $host = eregi_replace("\n", "", `hostname`);
    }

    if ($rate == 3600) {
        $ratemsg = "hour";
    } else {
        $rate = 60;
        $ratemsg = "min";
    }

    $opts = array();
    $opts[] = "--start=$start";
    $opts[] = "--end=$end";
    $opts[] = "--title=Messages Scanned on $host ($timetext)";
    $opts[] = "--width=520";
    $opts[] = "--vertical-label=msgs/$ratemsg";

    $arr = array($pid, $psid, $bid, $iid, $nid, $nsid);
    $arrcol = array("6A5ACD", "fA5ACD", "cc9900", "ffdd00", "9900aa", "ff3a3d");

    $type = "AREA";
    foreach ($arr as $idx => $id) {
        if ($id > 0) {
            $name = sprintf("%-21s", $virus[$id]["name"]);
            $col  = $arrcol[$idx];
//            print "$col<br>\n";
            $opts[] = "DEF:v$id=$as_libdir/$id.rrd:hits:AVERAGE";
            $opts[] = "CDEF:gv$id=v$id,UN,0,v$id,IF";
            $opts[] = "CDEF:gvt$id=gv$id,$length,*";
            $opts[] = "CDEF:gtvt$id=gv$id,$rate,*";
            $opts[] = "$type:gtvt$id#$col:$name ";
            $opts[] = "GPRINT:gvt$id:AVERAGE:%.0lf";
            $opts[] = "COMMENT:\\n"; 
            $type = "STACK";
        }
    }
/*

    if ($iid >= 0) {
        $opts[] = "DEF:v$iid=$as_libdir/$iid.rrd:hits:AVERAGE";
        $opts[] = "CDEF:gv$iid=v$iid,UN,0,v$iid,IF";
        $opts[] = "CDEF:gvt$iid=gv$iid,$length,*";
        $opts[] = "CDEF:gtvt$iid=gv$iid,$rate,*";
        $opts[] = "$type:gtvt$iid#FFD700:Infected\:   ";
        $opts[] = "GPRINT:gvt$iid:AVERAGE:%.0lf";
        $opts[] = "COMMENT:\\n"; 
        $type = "STACK";
    }

    if ($bid >= 0) {
        $opts[] = "DEF:v$bid=$as_libdir/$bid.rrd:hits:AVERAGE";
        $opts[] = "CDEF:gv$bid=v$bid,UN,0,v$bid,IF";
        $opts[] = "CDEF:gvt$bid=gv$bid,$length,*";
        $opts[] = "CDEF:gtvt$bid=gv$bid,$rate,*";
        $opts[] = "$type:gtvt$bid#FFA500:Banned\:     ";
        $opts[] = "GPRINT:gvt$bid:AVERAGE:%.0lf";
        $opts[] = "COMMENT:\\n"; 
        $type = "STACK";
    }

    if ($sid >= 0) {
        $opts[] = "DEF:v$sid=$as_libdir/$sid.rrd:hits:AVERAGE";
        $opts[] = "CDEF:gv$sid=v$sid,UN,0,v$sid,IF";
        $opts[] = "CDEF:gvt$sid=gv$sid,$length,*";
        $opts[] = "CDEF:gtvt$sid=gv$sid,$rate,*";
        $opts[] = "$type:gtvt$sid#FF1493:Spam\:       ";
        $opts[] = "GPRINT:gvt$sid:AVERAGE:%.0lf";
        $opts[] = "COMMENT:\\n"; 
    }
*/
    $opts[] = "COMMENT:amavis-stats v$asVersion ";
    $opts[] = "COMMENT:$enddate \\r";

    /*
    * debugging - graph definitions
    */
    asDbg($opts);

    $start = microtime();
    $ret = rrd_graph($img, $opts, count($opts));
    $t = elapsed($start);
    if (!is_array($ret)) {
        $err = rrd_error();
        asErr("rrd_graph(): $err");
        return false;
    }

    return "<img src=\"$img\" alt=\"[image: $timetext]\">\n\n";
}


?>
