#!/usr/bin/perl -w

# Copyright (C) 2007-2009 Bart Martens <bartm@knars.be>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
# On Debian systems, the complete text of the GNU General Public
# License version 2 can be found in `/usr/share/common-licenses/GPL-2'.

use strict;
use LWP::Simple;

my $deb_version = "DEB_VERSION";

# read command line options
use Getopt::Long;
my $packre = '';
my $maintre = '';
my $uploadre = '';
my $maintuploadre = '';
my $installed = '';
my $all = '';
my $nocache = '';
my $noextract = '';
my $cacheonly = '';
my $forget = '';
my $input = '';
my $export = '';
my $try = '';
my $debug = '';
my $sid = 'sid';
GetOptions
(
	"packre=s"  => \$packre,
	"maintre=s"  => \$maintre,
	"uploadre=s"  => \$uploadre,
	"maintuploadre=s"  => \$maintuploadre,
	"installed" => \$installed,
	"all" => \$all,
	"nocache"  => \$nocache,
	"noextract"  => \$noextract,
	"cacheonly"  => \$cacheonly,
	"forget" => \$forget,
	"input" => \$input,
	"export" => \$export,
	"try=s"  => \$try,
	"debug" => \$debug,
	"sid=s" => \$sid,
) or die;
$export = 1 if( $try );
$nocache = 1 if( $try );
$nocache = 1 if( $export );

# create user configuration directory
my $usrcfgdir = glob( "~/.watchupstream" );
mkdir $usrcfgdir if( ! -d $usrcfgdir );

# lock the lockfile
use Fcntl qw(:flock);
open LOCK, ">$usrcfgdir/lockfile" or die;
flock LOCK, LOCK_EX or die;

# tie hash for state variables
use Fcntl;
use NDBM_File;
my %statevar;
tie( %statevar, 'NDBM_File', "$usrcfgdir/statevar", O_RDWR|O_CREAT, 0644 ) or die;

# tie hashes for debian repository information
my %version;
my %maintainer;
my %uploaders;
my %difffile;
my %bin2src;
tie( %version, 'NDBM_File', "$usrcfgdir/version", O_RDWR|O_CREAT, 0644 ) or die;
tie( %maintainer, 'NDBM_File', "$usrcfgdir/maintainer", O_RDWR|O_CREAT, 0644 ) or die;
tie( %uploaders, 'NDBM_File', "$usrcfgdir/uploaders", O_RDWR|O_CREAT, 0644 ) or die;
tie( %difffile, 'NDBM_File', "$usrcfgdir/difffile", O_RDWR|O_CREAT, 0644 ) or die;
tie( %bin2src, 'NDBM_File', "$usrcfgdir/bin2src", O_RDWR|O_CREAT, 0644 ) or die;

# subroutine to make time limits a bit less strict
sub more_or_less
{
	my $duration = shift;
	return $duration + int(rand($duration/5)) - $duration/10;
}

# load debian repository information
my $localrepo = '';
#$localrepo = '/home/org/ftp.root/debian'; # uncomment on gluck
#$localrepo = '/org/ftp.root/debian'; # uncomment on merkel
#$localrepo = '/org/ftp.debian.org/ftp/'; # uncomment on ravel
my $oneday = 60 * 60 * 24;
my $oneweek = $oneday * 7;
$statevar{'lastrepoload'} = 0 if( ! defined( $statevar{'lastrepoload'} ) );
my $timelimitfile = "/var/lib/apt/lists";
$timelimitfile = "$localrepo/dists/sid/main/source/Sources.gz" if( $localrepo );
my $timelimit = 0;
(undef, undef, undef, undef, undef, undef, undef, undef, $timelimit )
	= stat( "$timelimitfile" );
if( $statevar{'lastrepoload'} < $timelimit )
{
	if( $localrepo )
	{
		open INPUT, "zcat $localrepo/dists/$sid/*/source/Sources.gz|" or die;
	}
	else
	{
		open INPUT, "cat /var/lib/apt/lists/*_debian_dists_${sid}_*_source_Sources|" or die;
	}

	%version = ();
	%maintainer = ();
	%uploaders = ();
	%difffile = ();
	%bin2src = ();

	my %field;

	while(<INPUT>)
	{
		chomp;

		%field = () if( /^Package:/ );
		$field{$1} = $2 if( /^(\S+): (.*)$/ );
		$field{$2} = $1 if( /^ \S{32} \d+ (\S+\.(dsc|orig\.tar\.gz|diff\.gz))$/ );

		if( /^$/ )
		{
			$version{$field{'Package'}} = $field{'Version'};
			$maintainer{$field{'Package'}} = $field{'Maintainer'};
			$uploaders{$field{'Package'}} = $field{'Uploaders'}
				if( defined $field{'Uploaders'} );
			$difffile{$field{'Package'}} = "$field{'Directory'}/$field{'diff.gz'}"
				if( defined( $field{'diff.gz'} ) );

			my $binary = ", " . $field{'Binary'} . ", ";
			while( $binary =~ s/, (\S+), /, / )
			{
				$bin2src{$1} = $field{'Package'};
			}

			%field = ();
		}
	}

	close INPUT;

	$statevar{'lastrepoload'} = time;
}

# literals
my $seppat = '[\-_]';
my $versionpat     = 'v?[\d\.]+(?:rc|rc\d+|pre-rc\d+|-unix|-source|-Source|-src|\.src|\.orig|[a-z]|b\d+|beta\d+-src|beta\d+)?';
my $cpanversionpat = 'v?[\d\._]+(?:a|b|b\d+|RC\d+)?';
my $fileexts = 'tar\.gz|tgz|tar\.bz2|zip|pm\.gz|jar|shar\.gz|shar\.Z';
my $watchfiletmp = "$usrcfgdir/watchfile.watch";
my $uversionmangle =
	  's/(\d)[\-_]?(rc\d+|pre-rc\d+|pre\d+a?)$/$1~$2/;'
	. 's/[\-\.](source|Source|src|orig|unix)$//;'
	. 's/-(bin|osx)$/~$1/;'
	. 's/^v(\d)/$1/';
my $dversionmangle = 's/(\d)(rc\d+)$/$1~$2/';
my $opts = 'opts="uversionmangle='.$uversionmangle.'"'; # no dversionmangle
#my $opts = 'opts="uversionmangle='.$uversionmangle.',dversionmangle='.$dversionmangle.'"';
my $cpanopts = 'opts="uversionmangle=' . 's/^v(\d)/$1/;s/^\.(\d)/0.$1/' . '"';

# subroutine to create a watch file for a known downloadpage
sub create_downloadpage_watchfile
{
	my $package = shift;
	my $downloadpage = shift;
	my $filename = shift;
	my $separator = shift;

	print STDERR "create_downloadpage_watchfile: "
		."$package $downloadpage $filename $separator\n" if( $debug );

	$filename =~ s/\+/\(?:\\+|%2B\)/g;

	open OUTPUT, ">$watchfiletmp" or die;
	print OUTPUT "version=3\n";
	print OUTPUT "$opts \\\n";
	print OUTPUT "$downloadpage .*$filename$separator($versionpat)\\.(?:$fileexts)\n";
	print OUTPUT "# generated by watchupstream $deb_version\n";
	close OUTPUT;
}

# subroutine to create a watch file for a known downloaddir
sub create_downloaddir_watchfile
{
	my $package = shift;
	my $downloaddir = shift;
	my $filename = shift;

	print STDERR "create_downloaddir_watchfile: $package $downloaddir $filename\n" if( $debug );

	$filename =~ s/\+/\(?:\\+|%2B\)/g;

	open OUTPUT, ">$watchfiletmp" or die;
	print OUTPUT "version=3\n";
	print OUTPUT "$opts \\\n";
	print OUTPUT "$downloaddir/$filename$seppat($versionpat)\\.(?:$fileexts)\n";
	print OUTPUT "# generated by watchupstream $deb_version\n";
	close OUTPUT;
}

# subroutine to create watch files for cpan
sub create_cpan_watchfile_permalink
{
	my $cpandir = shift;
	my $cpanname = $cpandir;

	open OUTPUT, ">$watchfiletmp" or die;
	print OUTPUT "version=3\n";
	print OUTPUT "$cpanopts \\\n";
	print OUTPUT "http://search.cpan.org/dist/$cpandir/ \\\n";
	print OUTPUT ".*/$cpanname-($cpanversionpat)\.(?:$fileexts)\n";
	print OUTPUT "# generated by watchupstream $deb_version\n";
	close OUTPUT;
}

# subroutine to create a watch file for sourcefourge (sf)
sub create_sf_watchfile
{
	my $package = shift;
	my $sfproject = shift;
	my $filename = shift;

	print STDERR "create_sf_watchfile: $package $sfproject $filename\n" if( $debug );

	open OUTPUT, ">$watchfiletmp" or die;
	print OUTPUT "version=3\n";
	print OUTPUT "$opts \\\n";
	print OUTPUT "http://sf.net/$sfproject/$filename$seppat($versionpat)\\.(?:$fileexts)\n";
	print OUTPUT "# generated by watchupstream $deb_version\n";
	close OUTPUT;
}

# tie hashes for the results
my %resulttime;
my %resultlocal;
my %resultnewest;
my %resultuscan;
tie( %resulttime, 'NDBM_File', "$usrcfgdir/resulttime", O_RDWR|O_CREAT, 0644 ) or die;
tie( %resultlocal, 'NDBM_File', "$usrcfgdir/resultlocal", O_RDWR|O_CREAT, 0644 ) or die;
tie( %resultnewest, 'NDBM_File', "$usrcfgdir/resultnewest", O_RDWR|O_CREAT, 0644 ) or die;
tie( %resultuscan, 'NDBM_File', "$usrcfgdir/resultuscan", O_RDWR|O_CREAT, 0644 ) or die;

# subroutine to derive the source package name from the binary-or-source package name
sub derive_source_package_name
{
	my $pkg = shift;
	my $package;

	$package = $bin2src{$pkg} if( defined( $bin2src{$pkg} ) );
	$package = $pkg if( defined( $version{$pkg} ) );

	return $package;
}

# subroutine to show the result
sub show_result
{
	my $package = shift;
	print "$resulttime{$package} $package"
		. " $resultlocal{$package} $resultnewest{$package} $resultuscan{$package}\n";
}

# subroutine to evaluate a watch file
my $resultuscan;
my $resultnewest;
sub evaluate_watchfile
{
	my $package = shift;
	my $version = shift;

	print STDERR "evaluate_watchfile: $package $version\n" if( $debug );

	$resultuscan = "unknown";
	$resultnewest = 0;

	open INPUT, "uscan"
		. " --timeout 60"
		. " --report-status"
		. " --package $package"
		. " --upstream-version $version"
		. " --watchfile $watchfiletmp"
		. " 2>&1 |" or die;

	while(<INPUT>)
	{
		chomp;
		print STDERR "evaluate_watchfile uscan: $_\n" if( $debug );
		$resultuscan = 'ahead' if( /remote site does not even have current version/ );
		$resultuscan = 'OK' if( /Package is up to date/ );
		$resultuscan = 'outdated' if( /Newer version .*available on remote site/ );
		$resultnewest = $1 if( /Newest version on remote site is (\S+), local version is/ );
	}
	close INPUT;
}

# extract the watch file from the diff file
my $difffiletmp = "$usrcfgdir/difffile.diff.gz";
my $filterdifftmp = "$usrcfgdir/filterdiff.diff";
sub extract_watchfile_from_difffile
{
	my $package = shift;

	unlink "$difffiletmp";
	if( $localrepo )
	{
		`cp $localrepo/$difffile{$package} $difffiletmp`;
	}
	else
	{
		`wget -q -O $difffiletmp 'http://ftp.debian.org/debian/$difffile{$package}'`;
	}
	unlink $watchfiletmp;
	`touch $watchfiletmp`;
	if( ! -z $difffiletmp )
	{
		`filterdiff -z -p1 -i 'debian/watch' $difffiletmp > $filterdifftmp`;
		`filterdiff -z -p1 -i 'debian/$package.watch' $difffiletmp > $filterdifftmp`
			if( -z $filterdifftmp );
		`filterdiff -z -p1 -i 'debian/*watch' -x 'debian/*-watch' $difffiletmp > $filterdifftmp`
			if( -z $filterdifftmp );
		`cat $filterdifftmp | patch $watchfiletmp`;
		unlink "$filterdifftmp";
	}
	unlink "$watchfiletmp.orig";
	unlink "$watchfiletmp.rej";
	unlink "$difffiletmp";
}

# subroutine to process one package name
sub process_package
{
	my $package = shift;

	# replace binary package by source package
	$package = derive_source_package_name( $package );
	return if( ! defined( $package ) );

	# derive upstream version
	my $version = $version{$package};
	$version =~ s/-\d+(\.\d+|\+nmu\d+)?$//; # cut off debian revision (also for NMU)
	$version =~ s/^\d+://; # cut off epoch
	$version =~ s/(\.|\+|-|~|)dfsg(\.\d+|\d*)$//;
	$version =~ s/(\d)(\.|)ds\d+$/$1/;
	$version =~ s/(\d)(pre\d+)$/$1~$2/;
	$version =~ s/(\d)-(pre\d+)$/$1~$2/;
	$version =~ s/(\d)-(rc\d+)$/$1~$2/;
	$version =~ s/(\d)(cdbs)$/$1/;
	$version =~ s/(\+deb|\+deb\d+|[\.\+]debian\d*|\.0debian\d+|~debian|\.alan\d+|\.free|\+pristine|-\d+lenny\d+)$//;

	# forget cached result
	if( $forget )
	{
		delete $resulttime{$package};
		delete $resultlocal{$package};
		delete $resultnewest{$package};
		delete $resultuscan{$package};
		unlink "$usrcfgdir/watchfiles/$package.watch";
		return;
	}

	# return cached result
	if( defined( $resulttime{$package} ) and defined( $resultlocal{$package} )
	and $resultlocal{$package} eq $version
	and $resulttime{$package} + more_or_less($oneweek) > time
	and ! $nocache )
	{
		show_result "$package";
		return;
	}
	return if( $cacheonly );

	# initialize result
	$resulttime{$package} = time;
	$resultlocal{$package} = $version;
	$resultnewest{$package} = "0";
	$resultuscan{$package} = "unknown";

	# get the watch file and evaluate it
	unlink $watchfiletmp;
	$resultuscan = "unknown";
	$resultnewest = 0;
	if( $try )
	{
		if( $resultuscan eq "unknown"
		and $try =~ ( '^https?://sourceforge\.net/projects/([^/]+)/files/(?:[^/]+)/(?:[^/]+)/([^/]+?)'
			.$seppat.$versionpat.'\.(?:'.$fileexts.')/download$' ) )
		{
			create_sf_watchfile $package, $1, $2;
			evaluate_watchfile $package, $version;
		}
		
		if( $resultuscan eq "unknown"
		and ( $try =~ ( '^http://(?:sf|sourceforge)\.net/([^/]+)/([^/]+)'
			.$seppat.$versionpat.'\.(?:'.$fileexts.')' )
		or $try =~ ( '^http://qa\.debian\.org/watch/sf\.php/([^/]+)/([^/]+)'
			.$seppat.$versionpat.'\.(?:'.$fileexts.')' )
		or $try =~ ( '^http://qa\.debian\.org/~bartm/watch/sf\.php/([^/]+)/([^/]+)'
			.$seppat.$versionpat.'\.(?:'.$fileexts.')' )
		or $try =~ ( '^http://downloads.sourceforge.net/([^/]+)/([^/]+)'
			.$seppat.$versionpat.'\.(?:'.$fileexts.')' ) ) )
		{
			create_sf_watchfile $package, $1, $2;
			evaluate_watchfile $package, $version;
		}
		if( $resultuscan eq "unknown"
		and ( $try =~ '^http://(?:sf|sourceforge)\.net/([^/]+)/?$'
		or $try =~ '^http://qa\.debian\.org/watch/sf\.php/([^/]+)/?$'
		or $try =~ '^http://qa\.debian\.org/~bartm/watch/sf\.php/([^/]+)/?$'
		or $try =~ '^http://([^\.]+)\.(?:sourceforge|sf)\.net/$'
		or $try =~ '^http://sourceforge.net/projects/([^/]+)/?$' ) )
		{
			create_sf_watchfile $package, $1, $package;
			evaluate_watchfile $package, $version;
		}
		if( $resultuscan eq "unknown"
		and ( $try =~ '^http://(?:sf|sourceforge)\.net/?$'
		or $try =~ '^http://qa\.debian\.org/watch/sf\.php/?$'
		or $try =~ '^http://qa\.debian\.org/~bartm/watch/sf\.php/?$' ) )
		{
			create_sf_watchfile $package, $package, $package;
			evaluate_watchfile $package, $version;
		}
		if( $resultuscan eq "unknown"
		and $try =~ '^http://search.cpan.org/dist/([^/]+)/$' )
		{
			create_cpan_watchfile_permalink $1;
			evaluate_watchfile $package, $version;
		}
		if( $resultuscan eq "unknown"
		and $try =~ ( '^((?:http|ftp)://.*/)([^/]+)'.$seppat.$versionpat.'\.(?:'.$fileexts.')' ) )
		{
			create_downloaddir_watchfile $package, $1, $2;
			evaluate_watchfile $package, $version;
		}
		if( $resultuscan eq "unknown" )
		{
			create_downloadpage_watchfile $package, $try, $package, '-';
			evaluate_watchfile $package, $version;
		}
		if( $resultuscan eq "unknown" )
		{
			create_downloaddir_watchfile $package, $try, $package;
			evaluate_watchfile $package, $version;
		}
	}
	else
	{
		if( $resultuscan eq "unknown"
		and -f "$usrcfgdir/watchfiles/$package.watch" )
		{
			`cp "$usrcfgdir/watchfiles/$package.watch" $watchfiletmp`;
			evaluate_watchfile $package, $version;
		}
		if( $resultuscan eq "unknown"
		and -f "$usrcfgdir/watchfiles/$package.watch" )
		{
			unlink "$usrcfgdir/watchfiles/$package.watch";
		}
		if( $resultuscan eq "unknown"
		and defined( $difffile{$package} )
		and ! $noextract )
		{
			extract_watchfile_from_difffile $package;
			evaluate_watchfile $package, $version;
		}
	}

	# mark native packages
	$resultuscan = 'native'
		if( $resultuscan eq "unknown" and ! defined( $difffile{$package} ) );

	# cache the result
	$resultuscan{$package} = $resultuscan;
	$resultnewest{$package} = $resultnewest;

	# show the result
	show_result $package;

	# save working watch files for later reuse
	if( $export and $resultuscan{$package} =~ /^(OK|ahead|outdated)$/ )
	{
		mkdir "$usrcfgdir/watchfiles" if( ! -d "$usrcfgdir/watchfiles" );
		`cp "$watchfiletmp" "$usrcfgdir/watchfiles/$package.watch"`;
	}
	unlink $watchfiletmp;
}

# subroutine to evaluate a source package with a maintainer regular expression
sub evaluate_package_with_maintre
{
	my $package = shift;
	my $re = shift;

	my $maintainer = ", " . $maintainer{$package} . ", ";

	while( $maintainer =~ s/, ([^<>]*?<[^<>]*?>), /, / )
	{
		my $maint = $1;
		return 1 if( $maint =~ /$re/i );
	}

	#die "$package: $maintainer" if( $maintainer ne ", " );

	return 0;
}

# subroutine to evaluate a source package with a uploaders regular expression
sub evaluate_package_with_uploadre
{
	my $package = shift;
	my $re = shift;

	return 0 if( ! defined $uploaders{$package} );

	my $uploaders = ", " . $uploaders{$package} . ", ";

	$uploaders =~ s/\s/ /g;
	$uploaders =~ s/(debian.org>,)([A-Z])/$1 $2/g;
	$uploaders =~ s/(debian.org>)( [A-Z])/$1,$2/g;

	while( $uploaders =~ s/, ([^<>]*?<[^<>]*?>), /, / )
	{
		my $upload = $1;
		return 1 if( $upload =~ /$re/i );
	}

	#die "$package: $uploaders" if( $uploaders ne ", " );

	return 0;
}

# subroutine to evaluate a binary or source package with regular expressions
sub evaluate_package_with_regexps
{
	my $pkg = shift;
	my $package = derive_source_package_name( $pkg );

	return 0 if( ! defined( $package ) );
	return 0 if( $packre and $pkg !~ /$packre/i and $package !~ /$packre/i );
	return 0 if( $maintuploadre
		and ! evaluate_package_with_maintre( $package, $maintuploadre )
		and ! evaluate_package_with_uploadre( $package, $maintuploadre ) );
	return 0 if( $maintre and ! evaluate_package_with_maintre( $package, $maintre ) );
	return 0 if( $uploadre and ! evaluate_package_with_uploadre( $package, $uploadre ) );

	return 1;
}

# which packages to process
if( $#ARGV >= 0 )
{
	# package names on command line
	my %selected = ();
	my $package;
	foreach my $pkg ( @ARGV )
	{
		$package = derive_source_package_name( $pkg );
		next if( ! defined( $package ) );
		next if( ! evaluate_package_with_regexps( $pkg ) );
		$selected{$package} = 1;
	}
	foreach my $package ( sort keys %selected )
	{
		process_package "$package";
	}
}
elsif( $packre or $maintre or $uploadre or $maintuploadre )
{
	# regular expression(s)
	my %selected = ();
	my $package;
	foreach my $pkg ( ( keys %bin2src, keys %version ) )
	{
		$package = derive_source_package_name( $pkg );
		next if( ! defined( $package ) );
		next if( ! evaluate_package_with_regexps( $pkg ) );
		$selected{$package} = 1;
	}
	foreach my $package ( sort keys %selected )
	{
		process_package "$package";
	}
}
elsif( $installed )
{
	# process all installed packages
	open INPUT, "dpkg-query -W --showformat='\${Package}\n'|" or die;
	my %selected = ();
	my $package;
	while(<INPUT>)
	{
		chomp;
		$package = derive_source_package_name( $_ );
		next if( ! defined( $package ) );
		$selected{$package} = 1;
	}
	close INPUT;
	foreach my $package ( sort keys %selected )
	{
		process_package "$package";
	}
}
elsif( $all )
{
	# process all packages
	foreach my $package ( sort keys %version )
	{
		process_package "$package";
	}
}
else
{
	# process package names given at stdin
	while(<>)
	{
		chomp;
		next if( /^\s*$/ ); # blank lines

		if( $input and /^(\d+)\s(\S+)\s(\S+)\s(\S+)\s(\S+)$/ ) # output used as input
		{
			$resulttime{$2} = $1;
			$resultlocal{$2} = $3;
			$resultnewest{$2} = $4;
			$resultuscan{$2} = $5;
			$version{$2} = $3;
			next;
		}

		s/^\d+\s(\S+)\s\S+\s\S+\s\S+$/$1/; # output used as input
		s/(?:.*\/)?(.*)\.watch$/$1/; # list of watch files used as input
		my $package = $_;
		process_package "$package";
	}
}

# untie hashes
untie %resulttime;
untie %resultlocal;
untie %resultnewest;
untie %resultuscan;
untie %version;
untie %maintainer;
untie %uploaders;
untie %difffile;
untie %bin2src;
untie %statevar;

# unlock the lockfile
flock LOCK, LOCK_UN or die;
close LOCK or die;

