#!/usr/bin/perl -wT

# fexsrv : web server for F*EX service
#
# Author: Ulli Horlacher <framstag@rus.uni-stuttgart.de>
#
# Copyright: GNU General Public License

use Socket;
use IO::Handle;
use Fcntl qw(:flock :seek);

# use BSD::Resource;
# setrlimit(RLIMIT_CPU,999,999) or die "$0: $!\n";

# SSL remote address provided by stunnel
if (@ARGV and $ARGV[0] eq 'stunnel' and $ENV{REMOTE_HOST} =~ /(.+)/) {
  $ssl_ra = $1;
}

# KEEP_ALIVE <== callback from CGI
if ($ENV{KEEP_ALIVE}) { 
  $keep_alive = $ENV{KEEP_ALIVE};
} else {
  %ENV = ();   # clean environment
}

$ENV{HOME} = (getpwuid($<))[7];

# add fex lib - fexsrv MUST be run with full path!
if ($0 =~ m:(/.+)/.+/:) {
  $ENV{FEXHOME} = $FEXHOME = $1;
  $FEXLIB = "$FEXHOME/lib"; 
}

unless ($FEXLIB and -d $FEXLIB) {
  die "$0: found no FEXLIB - fexsrv needs full path\n"
}

# import from fex.pp
our ($hostname,$fakehost,$debug,$timeout,$max_error_handler,$logdir);

# load common code (local config: $FEXHOME/lib/fex.ph)
require "$FEXLIB/fex.pp" or die "$0: cannot load $FEXLIB/fex.pp - $!\n";

our $log = "$logdir/fexsrv.log";

$0 = untaint($0);
  
$ENV{GATEWAY_INTERFACE} = 'CGI/1.1';
$ENV{SERVER_NAME} = $hostname;
$ENV{QUERY_STRING} = '';
$ENV{PATH_INFO} = '';
$ENV{SID} = randstring(8) unless $ENV{SID};

my $features = 'SID,CHECKRECIPIENT';

# ignore bullshit requests
my $ignore = join('|',qw(
  ^GET.*ISC\.SANS\.
));
  
my $nolog = join('|',qw(
  ^Accept
));


# continue session?
if ($keep_alive) {
  if ($ENV{HTTP_HOST} =~ /(.+):(.+)/) {
    $hostname = $1;
    $port = $2;
  } else {
    $hostname = $ENV{HTTP_HOST};
    if ($ENV{PROTO} eq 'https') { $port = 443 }
    else                        { $port = 80 }
  }
  $ra = $ENV{REMOTE_ADDR};
  $connect = $keep_alive;
} 

# new session
else {
    
  # HTTPS connect
  if ($ssl_ra) {
    $ENV{PROTO} = 'https';
    $ENV{REMOTE_ADDR} = $ra = $ssl_ra;
    $rh = gethostbyaddr(inet_aton($ra),AF_INET);
    $port = 443;
    # print {$log} "X-SSL-Remote-Host: $ssl_ra\n";
  } 

  # HTTP connect
  else {
    $ENV{PROTO} = 'http';
    my $sa = getpeername(STDIN) or die "$0: no network stream on STDIN\n";
    ($ENV{REMOTE_PORT},$iaddr) = sockaddr_in($sa);
    $ENV{REMOTE_ADDR} = $ra = inet_ntoa($iaddr);
    $rh = gethostbyaddr($iaddr,AF_INET);
    ($port) = sockaddr_in(getsockname(STDIN));
    $port = 80 unless $port;
  }
  
  $ENV{REMOTE_HOST} = $rh || '';

  if ($fakehost) {
    $ENV{HTTP_HOST} = $fakehost;
  } else {
    if ($port == 80 or $port == 443) { $ENV{HTTP_HOST} = $hostname }
    else                             { $ENV{HTTP_HOST} = "$hostname:$port" }
  }
  
  $connect = sprintf "CONNECT:%s %s %s %s %s:%s",
                     $port,isodate(time),$rh||'-',$ra,$$,$ENV{SID};
}

$| = 1;

$SIG{ALRM} = sub { 
  # printf {$log} "\nTIMEOUT %s %s\n",isodate(time),$connect; 
  debuglog('TIMEOUT',isodate(time));
  exit; 
};
alarm($timeout);

if (open $log,">>$log") {
  flock $log,LOCK_SH|LOCK_NB; # for logwatch
  seek $log,0,SEEK_END;
  autoflush $log 1;
} else {
  die "$0: cannot write to $log - $!\n"
}

# loop label for continuing HTTP requests within this tcp session
REQUEST: 

seek $log,0,SEEK_END;

$ENV{REQUEST_URI} = '';
$http_req = $cgi = '';
$hl = 0;

# get first line: is it a HTTP-request at all?
$REQUEST = $_ = nvt_read();
&schrub;

if ($REQUEST) {
  if ($keep_alive) {
    printf {$log} "\nCONTINUE %s %s\n",isodate(time),$connect;
    debuglog("CONTINUE $connect");
  } else {
    print {$log} "\n$connect\n";
    debuglog($connect);
  }
}

unless (/^(GET|HEAD|POST|OPTIONS).*HTTP\/\d\.\d$/i) {
  print {$log} "$_\n";
  print {$log} "DISCONNECT: no HTTP request\n";
  &$header_hook($_,$ra) if $header_hook;
  exit;
}

while (defined($_ = nvt_read())) {
  $hl += length;
  &schrub;
  $http_req = $_ unless $http_req;
  last if /^\s*$/;
  if (/^(GET|HEAD|POST|OPTIONS)/i) {
    unless ($keep_alive) {
      if (m:^GET /?SID HTTP:i or m:(HTTP/1.(\d+)): and $2) {
        $ENV{KEEP_ALIVE} = $keep_alive = $connect;
      } else {
        $ENV{KEEP_ALIVE} = $keep_alive = '';
      }
    }
  }
  print {$log} "$_\n" unless /$nolog/i;
  s{^(GET|HEAD|POST) http://$hostname(:\d+)?}{$1 };
  &$header_hook($_,$ra) if $header_hook;
  
  # mega stupid "Download Manager" FlashGet
  if (m{^Referer: https?://[^/]+/fop/\w+$}) {
    print {$log} "DISCONNECT: FlashGet\n";
    debuglog("DISCONNECT: FlashGet");
    sleep 30;
    exit;
  }
 
  if (/^Range:/ and $protocol =~ /1\.0/) {
    print {$log} "DISCONNECT: Range + HTTP/1.0\n";
    debuglog("DISCONNECT: Range + HTTP/1.0");
    http_error(416);
    exit;
  }
  
  if (/$ignore/) {
    http_error(404);
    exit;
  }
  
  if (/^OPTIONS FEX HTTP\/[\d\.]+$/i) {
    nvt_print(
	"HTTP/1.1 201 OK",
        "X-Features: $features",
        ''
    );
    goto REQUEST; # uh-uhhhh! ugly! ;-)
  } elsif (m:^GET /?SID HTTP/[\d\.]+$:i) {
    while ($_ = nvt_read()) {
      &schrub;
      # $fix = $_ if /^User-Agent: F\*IX/;
      last if /^\s*$/;
      print {$log} "$_\n";
    }
    nvt_print(
      "HTTP/1.1 201 ".$ENV{SID},
      "X-Features: $features",
      "X-SID: ".$ENV{SID},
      'Content-Length: 0',
      ''
    );
    # http://en.wikipedia.org/wiki/HTTP#Persistent_connections
    goto REQUEST; # uh-uhhhh! ugly! ;-)
  } elsif (/^(GET|HEAD|POST)\s+(.+)\s(HTTP\/[\d\.]+$)/i) {
    $ENV{REQUEST_METHOD} = uc($1);
    $ENV{REQUEST_URI}    = $cgi = $2;
    $ENV{HTTP_VERSION}   = $protocol = $3;
    $ENV{QUERY_STRING}   = $1               if $cgi =~ s/\?(.*)//;
    $ENV{PATH_INFO}      = $1               if $cgi =~ m:/.+?(/.+?)(\?|$):;
    $ENV{KEEP_ALIVE}     = $keep_alive = '' if $protocol =~ /1\.0/;
  } elsif (/^([\w\-_]+):\s*(.+)/) {
    $http_var = $1;
    $http_val = $2;
    $http_val =~ s/\s+$//;
    $http_var =~ s/-/_/g;
    $http_var = uc($http_var);
    $http_var = 'HTTP_'.$http_var if $http_var !~ /^CONTENT_/;
    $http_var = 'X-'.$http_var    if $http_var =~ /^HTTP_(HOST|VERSION)$/;
    $ENV{$http_var} = $http_val;
    if ($http_var eq 'HTTP_CONNECTION' and $http_val =~ /close/i) {
      $ENV{KEEP_ALIVE} = $keep_alive = '' 
    }
  }
}
$ENV{'HTTP_HEADER_LENGTH'} = $hl;

# # HTTP_HOST with IP is illegal (hackers!)
# if ($ENV{HTTP_HOST} =~ /^\d+\.\d+\.\d+\.\d+(:|$)/) {
#   http_error(400);
# }

alarm(0);

if ($debug) {
  debuglog("ENV:\n");
  foreach $var (sort keys %ENV) {
    debuglog(sprintf "  %s = >%s<\n",$var,$ENV{$var});
  }
  debuglog("\n");
}

# die "$0: no CGI request\n" unless $cgi;
exit unless $cgi;
die "$0: $tmpdir - $!\n" unless chdir $tmpdir;

# prepare document file name
if ($ENV{REQUEST_METHOD} =~ /^GET|HEAD$/) {
  $doc = untaint($ENV{REQUEST_URI} || '');
  $doc =~ s/%([\dA-F]{2})/unpack("a",pack("H2",$1))/ge;
  http_error(403) if $doc =~ m:/\.\./:;
  $doc =~ s:^/+::;
  $doc = "$FEXHOME/htdocs/$doc";
}

# CGI or document request?
if ($cgi =~ s:^/+::) {
  $cgi =~ s:/.*::;
  $ENV{SCRIPT_NAME} = $cgi;
  $ENV{SCRIPT_FILENAME} = $cgi = "$FEXHOME/cgi-bin/$cgi";
  $status = '';
  if (-x $cgi and -f $cgi) {
    unlink "$logdir/.error/$ra";
    exec $cgi or $status = $!;
  } else {
    if (-f "$doc/index.html") {
      # force redirect if trailing / is missing
      # this is mandatory for processing further HTTP request!
      if ($doc !~ m{/$}) {
        nvt_print(
          "HTTP/1.1 301 Moved Permanently",
          "Location: $ENV{PROTO}://$ENV{HTTP_HOST}$ENV{REQUEST_URI}/",
          ""
        );
        exit;
      }
      $doc .= '/index.html';
      $doc =~ s:/+:/:g;
    }
    $doc =~ s/#.*//; # ignore HTML anchors (stupid msnbot)

    # special request for F*EX UNIX clients
    if ($ENV{SCRIPT_NAME} eq 'xx.tar' and chdir "$FEXHOME/bin") {
      my $tar = `tar cf - fexget fexsend xx zz`;
      nvt_print(
        'HTTP/1.1 200 OK',
        'Server: fexsrv',
        "Content-Length: ".length($tar),
        "Content-Type: application/octet-stream",
        '',
      );
      print $tar;
      exit;
    }
    
    if (-f $doc) {
      unlink "$logdir/.error/$ra";
      require "$FEXLIB/dop";
      alarm(0);
      dop($doc);
      STDOUT->flush;
      alarm($timeout*3);
      if ($keep_alive and $REQUEST = nvt_read()) {
        goto REQUEST; # uh-uhhhh! ugly! ;-)
      }
      exit;
    } elsif (-e $cgi) { 
      $status = 'not executable';
    } else { 
      $status = 'no such file';
    }
  }

  print {$log} "FAILED to exec $cgi : $status\n";
  close $log;
}
  
# neither document nor CGI ==> error
if (open E,"+>>$logdir/.error/$ra") {
  flock(E,LOCK_EX);
  seek E,0,SEEK_SET;
  $n = 1;
  $n++ while <E>;
  printf E "%s %s\n",isodate(time),$http_req;
  if ($max_error and $n > $max_error) {
    &$max_error_handler($ra);
  }
  close E;
}

http_error(404);
exit;


# read one text line
sub nvt_read {
  my $line = '';
  my $c;
  
  # continue HTTP request, line saved before?
  if ($REQUEST) {
    $line = $REQUEST;
    $REQUEST = '';
  } else {
    # must use sysread to avoid perl line buffering
    while (sysread STDIN,$c,1) {
      $line .= $c;
      last if $c eq "\n";
    }
    debuglog($line) if $line;
  }
  return $line;
}


# mask problematic illegal binary characters in $_
sub schrub {
  s/[\r\n]+$//;
  s/([\x00-\x08\x0E-\x1F\x7F-\x9F])/sprintf "%%%02X",ord($1)/ge;
}


sub http_error {
  my $error = shift;

  if ($error eq 400) {
    nvt_print(
      "HTTP/1.1 400 Bad Request",
      "Connection: close",
      "Content-Type: text/html; charset=iso-8859-1",
      "",
      '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">',
      "<html><head>",
      "<title>400 Bad Request</title>",
      "</head><body>",
      "<h1>Bad Request</h1>",
      "<p>Your request $ENV{PROTO}://$ENV{HTTP_HOST}$ENV{REQUEST_URI} is not acceptable</p>",
      "<hr>",
      "<address>fexsrv at $hostname</address>",
      "</body></html>",
    );
  } elsif ($error eq 403) {
    nvt_print(
      "HTTP/1.1 403 Forbidden",
      "Connection: close",
      "Content-Type: text/html; charset=iso-8859-1",
      "",
      '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">',
      "<html><head>",
      "<title>403 Forbidden</title>",
      "</head><body>",
      "<h1>Forbidden</h1>",
      "<p>You have no permission to request the URI $ENV{REQUEST_URI}</p>",
      "<hr>",
      "<address>fexsrv at $hostname</address>",
      "</body></html>",
    );
  } elsif ($error eq 404) {
    nvt_print(
      "HTTP/1.1 404 Not Found",
      "Connection: close",
      "Content-Type: text/html; charset=iso-8859-1",
      "",
      '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">',
      "<html><head>",
      "<title>404 Not Found</title>",
      "</head><body>",
      "<h1>Not Found</h1>",
      "<p>The requested URL $ENV{REQUEST_URI} was not found on this server.</p>",
    );
  } elsif ($error eq 416) {
    nvt_print(
      "HTTP/1.1 416 Requested Range Not Satisfiable",
      "Connection: close",
      "Content-Type: text/html; charset=iso-8859-1",
      "",
      '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">',
      "<html><head>",
      "<title>416 Requested Range Not Satisfiable</title>",
      "</head><body>",
      "<h1>Requested Range Not Satisfiable</h1>",
      "<p>",
      "Range header is not supported with HTTP/1.0<br>",
      "Your client is not RFC conform, use a compatible web browser!",
    );
  } elsif ($error eq 500) {
    nvt_print(
      "HTTP/1.1 500 Internal Error",
      "Connection: close",
      "Content-Type: text/html; charset=iso-8859-1",
      "",
      '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">',
      "<html><head>",
      "<title>500 Internal Error</title>",
      "</head><body>",
      "<h1>Internal Error</h1>",
      "<p>The requested URL $ENV{REQUEST_URI} produced an internal error:</p>",
      "<p>@_</p>",
    );
  } else {
    nvt_print(
      "HTTP/1.1 555 Unknown Error",
      "Connection: close",
      "Content-Type: text/html; charset=iso-8859-1",
      "",
      '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">',
      "<html><head>",
      "<title>555 Unknown Internal Error</title>",
      "</head><body>",
      "<h1>Unknown Internal Error</h1>",
    );
  }
  nvt_print(
    "<hr>",
    "<address>fexsrv at $hostname</address>",
    "</body></html>",
  );
  exit;
}    
