#! /usr/bin/env perl

#
#   Copyright (C) Dr. Heinz-Josef Claes (2005)
#                 hjclaes@web.de
#   
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#   
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#   
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#


# 1.
# Lesen sourceDir, dabei erzeugen: dbm-files:
#                                             inode -> alle Files
#                                             file -> ''
# + parallel einfache Datei mit allen Dateien im targetDir
#   erzeugen + einfache Datei mit allen Directories
# 2.
# Links ermitteln, kopieren, etc. mit direktem Lesen der inodes
# im targetDir
# 3.
# Löschen im targetDir: über Datei mit allen Dateien des targetDir
# mit sourceDir dbm-file "file -> ''"
# 4.
# Löschen überflüssige Directories im targetDir wie gehabt.

my $VERSION = '$Id$ ';
push @VERSION, $VERSION;

use strict;

use DB_File;           # Berkeley DB
use File::Copy;
use POSIX;

sub libPath
{
    my $file = shift;

    my $dir;

    # Falls Datei selbst ein symlink ist, solange folgen, bis aufgelöst
    if (-f $file)
    {
	while (-l $file)
	{
	    my $link = readlink($file);

	    if (substr($link, 0, 1) ne "/")
	    {
		$file =~ s/[^\/]+$/$link/;
	    }
    else
	    {
		$file = $link;
	    }
	}

	($dir, $file) = &splitFileDir($file);
	$file = "/$file";
    }
    else
    {
	print STDERR "<$file> does not exist!\n";
	exit 1;
    }

    $dir .= "/../lib";           # Pfad zu den Bibliotheken
    my $oldDir = `/bin/pwd`;
    chomp $oldDir;
    if (chdir $dir)
    {
	my $absDir = `/bin/pwd`;
	chop $absDir;
	chdir $oldDir;

	return (&splitFileDir("$absDir$file"));
    }
    else
    {
	print STDERR "<$dir> does not exist, exiting\n";
    }
}
sub splitFileDir
{
    my $name = shift;

    return ('.', $name) unless ($name =~/\//);    # nur einfacher Dateiname

    my ($dir, $file) = $name =~ /^(.*)\/(.*)$/s;
    $dir = '/' if ($dir eq '');                   # gilt, falls z.B. /filename
    return ($dir, $file);
}
my ($req, $prog) = &libPath($0);
(@INC) = ($req, @INC);

require 'checkParam.pl';
require 'checkObjPar.pl';
require 'prLog.pl';
require 'fileDir.pl';
require 'humanRead.pl';
require 'forkProc.pl';
require 'version.pl';
require 'storeBackupLib.pl';


my $tmpdir = '/tmp';              # default value
$tmpdir = $ENV{'TMPDIR'} if defined $ENV{'TMPDIR'};


my $Help = <<EOH;
This program synchronises trees to another location.
It is useful to synchronise backups made with storeBackup to
other disks.

usage:
    $prog -s sourceDir -t targetDir [-T tmpdir] [-L lockFile]
		[--followLinks depth]
		[-l logFile
		 [--plusLogStdout] [--withTime yes|no] [-m maxFilelen]
		 [[[-n noOfOldFiles] | [--saveLogs yes|no]]
		 [--compressWith compressprog]]

--sourceDir	-s  source directory (must exist)
--targetDir	-t  target directory (must exist)
--tmpdir	-T  directory for temporary file, default is <$tmpdir>
--lockFile      -L  lock file, if exists, new instances will finish if
		    an old is allready running
--followLinks	    follow symbolic links like directories up to depth
		    default = 0 -> do not follow links
--logFile	-l  log file (default is STDOUT)
--plusLogStdout	    if you specify a log file with --logFile you can
		    additionally print the output to STDOUT with this flag
--withTime	-w  output in logfile with time: 'yes' or 'no'
		    default = 'yes'
--maxFilelen	-m  maximal length of file, default = 1e6
--noOfOldFiles	-n  number of old log files, default = 5
--saveLogs	    save log files with date and time instead of deleting the
		    old (with [-noOldFiles]): 'yes' or 'no', default = 'no'
--compressWith	    compress saved log files (e.g. with 'gzip -9')
		    default is 'bzip2'

Copyright (c) 2005 by Heinz-Josef Claes
Published under the GNU General Public License
EOH
    ;
# '

&printVersions(\@ARGV, '-V');

my $CheckPar =
    CheckParam->new('-allowLists' => 'no',
		    '-list' => [Option->new('-option' => '-s',
					    '-alias' => '--sourceDir',
					    '-param' => 'yes',
					    '-must_be' => 'yes'),
				Option->new('-option' => '-t',
					    '-alias' => '--targetDir',
					    '-param' => 'yes',
					    '-must_be' => 'yes'),
				Option->new('-option' => '-T',
					    '-alias' => '--tmpdir',
					    '-default' => $tmpdir),
				Option->new('-option' => '-L',
					    '-alias' => '--lockFile',
					    '-param' => 'yes'),
				Option->new('-option' => '--followLinks',
					    '-default' => 0),
				Option->new('-option' => '-l',
					    '-alias' => '--logFile',
					    '-default' => ''),
				Option->new('-option' => '--plusLogStdout'),
				Option->new('-option' => '-w',
					    '-alias' => '--withTime',
					    '-default' => 'yes',
                                            '-only_if' => '[-l]',
                                            '-pattern' => '\Ayes\Z|\Ano\Z'),
				Option->new('-option' => '-m',
					    '-alias' => '--maxFilelen',
					    '-default' => 1e6,
					    '-pattern' => '\A[e\d]+\Z',
                                            '-only_if' => '[-l]'),
				Option->new('-option' => '-n',
					    '-alias' => '--noOfOldFiles',
					    '-default' => '5',
					    '-pattern' => '\A\d+\Z',
                                            '-only_if' => '[-l]'),
                                Option->new('-option' => '--saveLogs',
                                            '-default' => 'no',
                                            '-only_if' => '[-l]',
                                            '-pattern' => '\Ayes\Z|\Ano\Z'),
                                Option->new('-option' => '--compressWith',
                                            '-default' => 'bzip2',
                                            '-only_if' => '[-l]')
				]
		    );

$CheckPar->check('-argv' => \@ARGV,
                 '-help' => $Help
                 );

# Auswertung der Parameter
my $sourceDir = $CheckPar->getOptWithPar('-s');
my $targetDir = $CheckPar->getOptWithPar('-t');
$tmpdir = $CheckPar->getOptWithPar('-T');
my $lockFile = $CheckPar->getOptWithPar('-L');
my $followLinks = $CheckPar->getOptWithPar('--followLinks');
$followLinks = 0 unless $followLinks;
my $logFile = $CheckPar->getOptWithPar('-l');
my $plusLogStdout = $CheckPar->getOptWithoutPar('--plusLogStdout');
my $withTime = $CheckPar->getOptWithPar('-w');
my $maxFilelen = $CheckPar->getOptWithPar('-m');
my $noOfOldFiles = $CheckPar->getOptWithPar('-n');
my $saveLogs = $CheckPar->getOptWithPar('--saveLogs');
my $compressWith = $CheckPar->getOptWithPar('--compressWith');

my $prLogKind = ['A:BEGIN',
		 'Z:END',
		 'I:INFO',
		 'W:WARNING',
		 'E:ERROR',
		 'P:PROGRESS',
		 'S:STATISTIC',
		 'D:DEBUG'];
my (@par);
if ($logFile eq '')
{
    push @par, ('-filedescriptor', *STDOUT);
}
else
{
    push @par, ('-file' => $logFile);
}
my $prLog = printLog->new('-kind' => $prLogKind,
			  @par,
			  '-withTime' => $withTime,
			  '-maxFilelen' => $maxFilelen,
			  '-noOfOldFiles' => $noOfOldFiles,
			  '-saveLogs' => $saveLogs,
			  '-compressWith' => $compressWith);

#
# check, if source and target dir exist
#
$prLog->print('-kind' => 'E',
	      '-str' => ["source directory <$sourceDir> does not exist"],
	      '-exit' => 1)
    unless (-d $sourceDir);
$prLog->print('-kind' => 'E',
	      '-str' => ["target directory <$targetDir> does not exist"],
	      '-exit' => 1)
    unless (-d $targetDir);

#
# normalise sourceDir and targetDir
#
$targetDir = &::absolutePath($targetDir);
$sourceDir = &::absolutePath($sourceDir);

#
# check, if target dir is part of source dir (or vice versa)
#

$prLog->print('-kind' => 'E',
	      '-str' => ['<$sourceDir> is part of <$targetDir>, exiting'],
	      '-exit' => 1)
		 if (&::isSubDir($sourceDir, $targetDir));
$prLog->print('-kind' => 'E',
	      '-str' => ['<$targetDir> is part of <$sourceDir>, exiting'],
	      '-exit' => 1)
		 if (&::isSubDir($targetDir, $sourceDir));

#
# check lock file
#
if ($lockFile)
{
    if (-f $lockFile)
    {
	open(FILE, "< $lockFile") or
	    $prLog->print('-kind' => 'E',
			  '-str' => ["cannot read lock file <$lockFile>"],
			  '-exit' => 1);
	my $pid = <FILE>;
	chop $pid;
	close(FILE);
	$prLog->print('-kind' => 'E',
		      '-str' => ["strange format in lock file <$lockFile>, " .
				 "line is <$pid>\n"],
		      '-exit' => 1)
	    unless ($pid =~ /\A\d+\Z/o);
	if (kill(0, $pid) == 1)   # alte Instanz läuft noch
	{
	    $prLog->print('-kind' => 'E',
			  '-str' => ["cannot start, old instance with pid " .
				     "<$pid> is allready running"],
			  '-exit' => 1);
	}
	else
	{
	    $prLog->print('-kind' => 'I',
			  '-str' =>
			  ["removing old lock file of process <$pid>"]);
	}
    }

    $prLog->print('-kind' => 'I',
		  '-str' => ["creating lock file <$lockFile>"]);

    open(FILE, "> $lockFile") or
	$prLog->print('-kind' => 'E',
		      '-str' => ["cannot create lock file <$lockFile>"],
		      '-exit' => 1);
    print FILE "$$\n";
    close(FILE);
}

# Read sourceDir and generate:
# - dbm-file %srcInodeKey: inode -> all files
# - dbm-file %srcFiles: file -> ''
# - dbm-file %srcDirs: dir -> ''

my $rd = recursiveReadDir->new('-dirs' => [$sourceDir],
			       '-followLinks' => $followLinks,
			       '-printDepth' => 'yes',
			       '-prLog' => $prLog);
$prLog->print('-kind' => 'I',
	      '-str' => ["building databases for source directory <$sourceDir>"]);

#my $indexDir = indexDir->new();  # for compressing the dir names
                                 # to numbers

my (%srcInodeKey, %srcDirs, %srcFiles);
my $count = 0;
my $mkdir = 0;
dbmopen(%srcInodeKey, "$tmpdir/sync-srcInode-$$.dbm", 0600);
dbmopen(%srcDirs, "$tmpdir/sync-srcDirs-$$.dbm", 0600);
dbmopen(%srcFiles, "$tmpdir/sync-srcFiles-$$.dbm", 0600);
while (1)
{
    my ($file, $type);
    last unless (($file, $type) = $rd->next());

    my ($inode, $mode, $uid, $gid, $size, $atime, $mtime);
    if ($type eq 'l')        # symbolic link
    {
	($inode, $mode, $uid, $gid, $size, $atime, $mtime) =
	    (lstat($file))[1,2,4,5,7,8,9];
    }
    else
    {
	($inode, $mode, $uid, $gid, $size, $atime, $mtime) =
	    (stat($file))[1,2,4,5,7,8,9];
    }
    $mode &= 07777;
    $file = &::substractPath($file, $sourceDir);  # make a relative path

    if ($type eq 'd')           # generate all directories in target
    {
	unless (-d "$targetDir/$file")
	{
	    mkdir "$targetDir/$file";
	    ++$mkdir;
	}
	chown $uid, $gid, "$targetDir/$file";
	chmod $mode, "$targetDir/$file";
	utime $atime, $mtime, "$targetDir/$file";

	$srcDirs{$file} = '';
	next;
    }

#    my ($fbase, $fname, $index) = $indexDir->newFile($file);

    unless (exists $srcInodeKey{$inode})
    {
	$srcInodeKey{$inode} =
	    &packwithLen('SIIIIIa', $mode, $uid, $gid, $size,
			 $atime, $mtime, $type);
	$count++;
    }

#    $srcInodeKey{$inode} .= &packwithLen('IZ*', $index, $fname);
    $srcInodeKey{$inode} .= &packwithLen('Z*', $file);

    $srcFiles{$file} = '';
}
$prLog->print('-kind' => 'S',
	      '-str' => ["$count different files"]);

# set links and copy files
# now, preparation is done, the main algorithm
$prLog->print('-kind' => 'I', '-str' => ["copying files ..."]);

my ($copyF, $linkF, $delF, $delL, $delD) = (0, 0, 0, 0, 0);
my $step = int($count / 10);
$step = 1 if $step < 1;
$count = 0;
my ($inodeS, $val);
while (($inodeS, $val) = (each %srcInodeKey))
{
    $count++;
    $prLog->print('-kind' => 'S',
		  '-str' => ["synced $count differnt files"])
	unless $count % $step;

    # get information of the first inode group
    my ($modeS, $uidS, $gidS, $sizeS, $atimeS, $mtimeS, $typeS);
    ($val, $modeS, $uidS, $gidS, $sizeS, $atimeS, $mtimeS, $typeS) =
	&unpackwithLen('SIIIIIa', $val);
    my (@filesS);
    while ($val)
    {
	my ($index, $fname);
#	($val, $index, $fname)  = &unpackwithLen('IZ*', $val);
#	my $dir = $indexDir->index2dir($index);
#	push @filesS, "$dir/$fname";
	($val, $fname) = &unpackwithLen('Z*', $val);
	push @filesS, $fname;
    }

    # find the files in the target dir with same size and mtime
    my (@same, @other, $filesS, @deleteTargetFiles);
    my ($inodeT, $modeT, $uidT, $gidT, $mtimeT, $sizeT, $typeT);
    foreach $filesS (@filesS)
    {
#	if (exists $targetFileKey{$filesS})
	if (-e "$targetDir/$filesS")
	{
	    if (-l "$targetDir/$filesS")
	    {
		($inodeT, $modeT, $uidT, $gidT, $mtimeT, $sizeT) =
		    (lstat("$targetDir/$filesS"))[1,2,4,5,9,7];
		$typeT = 'l';
	    }
	    else
	    {
		($inodeT, $modeT, $uidT, $gidT, $mtimeT, $sizeT) =
		    (stat("$targetDir/$filesS"))[1,2,4,5,9,7];
		$modeT &= 07777;
		if  (-f "$targetDir/$filesS")
		{
		    $typeT = 'f';
		}
		elsif (-p "$targetDir/$filesS")
		{
		    $typeT = 'p';
		}
		else
		{
		    $prLog->print('-kind' => 'E',
				  '-str' => ["unsupportet file type for file" .
					     "<$filesS>"]);
		    next;
		}
	    }

#	    ($inodeT, $modeT, $uidT, $gidT, $mtimeT, $sizeT, $typeT) =
#		unpack('IIIIIIa', $targetFileKey{$filesS});
	    if ($sizeS == $sizeT and $mtimeS == $mtimeT and $typeS eq $typeT)
	    {
		push @same, $filesS;
#		delete $targetFileKey{$filesS};
#print "\t$filesS matches $inodeT (target)\n";
	    }
	    else
	    {
		push @other, $filesS;
		push @deleteTargetFiles, $filesS;
#print "\t$filesS does not match (target delete)\n";
	    }
	}
	else
	{
	    push @other, $filesS;
#print "\t$filesS does not match (target not there)\n";
	}
    }

    my $f;             # delete not matching files
    foreach $f (@deleteTargetFiles)
    {
	&delFile($prLog, \$delF, \$delL, "$targetDir/$f");
#	unlink "$targetDir/$f";
#	delete $targetFileKey{$f};
#print "unlink $targetDir/$f\n";
    }
#    $delF += @deleteTargetFiles;

    # first, ignore the matching ones and link the not matching from
    # this inode group the one matching
    my $first;
    if (@same == 0)       # found nothing matching in the target tree
    {
	$first = shift @other;
#print "copy $sourceDir/$first -> $targetDir/$first\n";
	&doCopy("$sourceDir/$first", "$targetDir/$first",
		$typeS, $prLog, $tmpdir);
	$copyF++;
    }
    else
    {
	$first = $same[0];
    }
    # link the non existing files of that inode group
    foreach $f (@other)
    {
#print "link $targetDir/$first $targetDir/$f\n";
	link "$targetDir/$first", "$targetDir/$f";
    }
    $linkF += @other;

    next if ($modeS == $modeT and $uidS == $uidT and $gidS == $gidT);
    # set the permissions
    if ($typeS eq 'l')     # symbolic link
    {
	my $chown = forkProc->new('-exec' => 'chown',
				  '-param' => [$uidS, $gidS,
					       "$targetDir/$first"],
				  '-outRandom' => "$tmpdir/chown-",
				  '-prLog' => $prLog);
	$chown->wait();
    }
    else
    {
	chown $uidS, $gidS, "$targetDir/$first";
	chmod $modeS, "$targetDir/$first";
    }
    utime $atimeS, $mtimeS, "$targetDir/$first";
}

#
# delete unnecessary dirs and files in targetDir
#
$rd = recursiveReadDir->new('-dirs' => [$targetDir],
			    '-ignoreReadError' => 'yes',
			    '-printDepth' => 'yes',
			    '-prLog' => $prLog);
$prLog->print('-kind' => 'I',
	      '-str' => ["deleting files in <$targetDir>"]);
while (1)
{
    my ($file, $type);
    last unless (($file, $type) = $rd->next());

    my ($inode, $mode, $uid, $gid, $size, $atime, $mtime);
    if ($type eq 'l')        # symbolic link
    {
	($inode, $mode, $uid, $gid, $size, $atime, $mtime) =
	    (lstat($file))[1,2,4,5,7,8,9];
    }
    else
    {
	($inode, $mode, $uid, $gid, $size, $atime, $mtime) =
	    (stat($file))[1,2,4,5,7,8,9];
    }
    $mode &= 07777;
    $file = &::substractPath($file, $sourceDir);  # make a relative path

    next unless -e "$targetDir/$file";

    if ($type eq 'd' and not exists $srcDirs{$file})
    {
#print "deleting directory <$targetDir/$file>\n";
	my $rdd = recursiveDelDir->new('-dir' => "$targetDir/$file",
				       '-prLog' => $prLog);
	my ($d, $f, $b, $l) = $rdd->getStatistics();
	$delF += $f;
	$delD += $d;
	$delL += $l;
	next;
    }

    if ($type ne 'd' and not exists $srcFiles{$file})
    {
	&delFile($prLog, \$delF, \$delL, "$targetDir/$file");
    }
}

$prLog->print('-kind' => 'S',
	      '-str' => ["copied files  = $copyF",
			 "linked files  = $linkF",
			 "created dirs  = $mkdir",
			 "deleted files = $delF",
			 "deleted links = $delL",
			 "deleted dirs  = $delD"]);

unlink "$tmpdir/sync-srcInode-$$.dbm",
    "$tmpdir/sync-srcDirs-$$.dbm",
    "$tmpdir/sync-srcFiles-$$.dbm";

exit 0;


######################################################################
sub doCopy
{
    my ($source, $target, $type, $prLog, $tmpdir) = @_;

    if ($type eq 'f')    # plain file
    {
	::copy($source, $target);
    }
    elsif ($type eq 'l') # named symbolic link
    {
	my $l = readlink $source;
	symlink $l, $target;
    }
    elsif ($type eq 'p') # named pipe
    {
	my $mknod = forkProc->new('-exec' => 'mknod',
				  '-param' => [$target, 'p'],
				  '-outRandom' => "$tmpdir/mknod-",
				  '-prLog' => $prLog);
	$mknod->wait();
	my $out = $mknod->getSTDOUT();
	$prLog->print('-kind' => 'E',
		      '-str' =>
		      ["STDOUT of <mknod $target p>:", @$out])
	    if (@$out > 0);
	$out = $mknod->getSTDERR();
	$prLog->print('-kind' => 'E',
		      '-str' =>
		      ["STDERR of <mknod $target p>:", @$out])
	    if (@$out > 0);
    }
    else
    {

    }
}


######################################################################
sub delFile
{
    my $prLog = shift;
    my $delF = shift;
    my $delL = shift;
    my $f = shift;

    my $noLinks = (stat($f))[3];
    unless (unlink $f)
    {
	$prLog->print('-kind' => 'E',
		      '-str' => ["cannot delete <$f>"]);
	next;
    }
    if ($noLinks > 1)   # more than one hard link
    {
	++$$delL;
    }
    else
    {
	++$$delF;
    }
}
