#!/usr/bin/env perl
#
# Traverse the directory tree and extract author information from .m
# files.  Put author information in the file 'AUTHORS'.
# Understands variations of these entries:
#
#   ## Copyright (C)  year(s)  name1   <email>
#   ## Copyright (C)  year(s)  name2   <email>
#   ## Author:  name3  <email>
#   ## Author:  name4  <email>
#
# Albert Danial Dec 15 2001
#               Nov 30 2002 - Attribute to 'Anonymous' any file which has 
#                             licensing terms (ie, granted to public domain)
#                             but no author.  Show code and comment line
#                             counts.
# Dirk Eddelbuettel 07 Jul 2004  also look at .cc files and // comments
#                             
use strict;
use File::Find;

my $location = `pwd`;
my $PATH     = "";
if ($location =~ m{^(.*)/admin$}) {
    chdir "..";
    $PATH = "$1/";
}

my @files = ();       # to contain relative path names of each .m file
find(\&wanted, ".");  # start here & descend recursively; populate @files

my %file_data = ();
classify_files(\@files,       # in
               \%file_data);  # out, a_data{file}{'name'} = [ names ]
                              #                  {'year'} = [ years ]
                              #                  {'mail'} = [ email addrs ]
                              #                  {'cpyr'} = [ 'C' or ' '  ]
                              #                  {'n_code'}    = # lines code
                              #                  {'n_comment'} = # lines of
                              #                                  comments

# traverse file_data and extract
#   - files without copyrights
#   - files without authors
#   - files grouped by author
#   - author to email map
#   - number of lines of code, number of lines of comments
my @unattributed_files  = ();
my @uncopyrighted_files = ();
my %email               = ();   # email{ author name } = email address
my %files               = ();   # files{ author name } = [ list of files ]
foreach my $f (sort @files) {
    # each file can have multiple authors, loop over each author
    if (!defined @{$file_data{$f}{name}}) {
        push @unattributed_files, $f;
        next;
    }
    my $copyrighted = 0;
    for (my $i = 0; $i < scalar @{$file_data{$f}{name}}; $i++) {
        if (defined $file_data{$f}{mail}[$i]) {
            $email{ $file_data{$f}{name}[$i] } = $file_data{$f}{mail}[$i];
        }
        $copyrighted = 1 if $file_data{$f}{cpyr}[$i] eq "C";
        $files{ $file_data{$f}{name}[$i] }{$f} = 1;
    }
    push @uncopyrighted_files, $f unless $copyrighted;
}

printf "%3d uncopyrighted files:%s lines of code/lines of comments\n", 
       scalar @uncopyrighted_files, ' ' x 22;
foreach my $f (sort @uncopyrighted_files) {
    printf "      %-50s %3d/%3d\n", 
           $f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
}

printf "\n%3d unattributed files:%s lines of code/lines of comments\n", 
       scalar @unattributed_files, ' ' x 23;
foreach my $f (sort @unattributed_files) {
    printf "      %-50s %3d/%3d\n",
           $f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
}

my $Auth_file = "AUTHORS";
open(OUT, ">$Auth_file") or die "Cannot write $Auth_file:  $!\n";
printf "%3d authors:\n", scalar keys %files;
foreach my $n (sort by_last_name keys %files) {
    printf     "%-28s", $n;
    printf OUT "%-28s", $n;
    print     $email{$n} if defined $email{$n};
    print OUT $email{$n} if defined $email{$n};
    print     "\n";
    print OUT "\n";
    my $i = 0;
    foreach my $f (sort keys %{$files{$n}}) {
        printf "%3d. %-50s  %3d/%3d\n",
               ++$i, $f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
    }
}
close OUT;
warn "Wrote ${PATH}${Auth_file}\n";

# # # # # # # 

sub by_last_name { # {{{1 for sorting on names
    (my $A = $a) =~ s/.*?(\w+)$/$1/;
    (my $B = $b) =~ s/.*?(\w+)$/$1/;
    return lc($A) cmp lc($B);
} # 1}}}

# # # # # # # 

sub classify_files { # {{{1
    my ($ra_files,  # in, list of files
        $rhha_data, # out,  data{file}{ name | year | cpyr } = [entries]
       ) = @_;
    warn "Found ", scalar @{$ra_files}, " .m files\n";
    foreach my $f (@{$ra_files}) {
        open(IN, $f) or die "Cannot read $f: $!\n";
        my $found_copyright = 0;
        my $found_author    = 0;
        $rhha_data->{$f}{n_code}    = 0;
        $rhha_data->{$f}{n_comment} = 0;
        while (<IN>) {
            # find the copyright line and extract author info from it
            if (/^[#%\/]+/) {           # a comment
                ++$rhha_data->{$f}{n_comment};
            } elsif (!/^\s*$/) {        # not a blank line
                ++$rhha_data->{$f}{n_code};
            }
            s/all\s+rights\s+reserved\.?//i;
            s/\bby\s+//i;

            if (/^[#%\/]+               #  one or more leading comment markers
                  \s*copyright          #  Copyright
                  \s*(\(c\))?           #  (c)    - optional           $1
                  \s*(\d[,\- 0-9]+)     #  Year (or years)             $2
                  \s+(.*?)              #  name                        $3
                  \s*(<.*>)?            #  email  - optional           $4
                  \s*$/ix) {
                $found_copyright = 1;
                $found_author    = 1;
                my $year  = $2;
                my $name  = $3;
                   $name  = "John W. Eaton" if $name eq "jwe";
                   $name  =~ s/\.\s*$//; # strip trailing period
                my $email = "" || $4;
                $name =~ s/^\s+//;  # strip leading  whitespace
                $name =~ s/\s+$//;  # strip trailing whitespace
                push @{$rhha_data->{$f}{name}}, $name;
                push @{$rhha_data->{$f}{year}}, $year;
                push @{$rhha_data->{$f}{mail}}, $email;
                push @{$rhha_data->{$f}{cpyr}}, 'C';
                # don't exit w/last because there could be multiple copyrights
            } elsif (
                /^\s*[#%\/]+              # one or more leading comment markers
                    \s*author\s*:?        #  Copyright
                    \s+(.*?)              #  name                        $1
                    \s*(<.*>)?            #  email  - optional           $2
                    \s*$/ix) {
                my $name  = $1;
                   $name  =~ s/\.\s*$//; # strip trailing period
                   $name  = "John W. Eaton" if $name eq "jwe";
                my $email = "" || $2;
                push @{$rhha_data->{$f}{name}}, $name;
                push @{$rhha_data->{$f}{year}}, "";
                push @{$rhha_data->{$f}{mail}}, $email;
                if ($found_copyright) {
                    push @{$rhha_data->{$f}{cpyr}}, 'C';
                } else {
                    push @{$rhha_data->{$f}{cpyr}}, "";
                }
                $found_author = 1;
            } elsif (
                !$found_author and
                /^\s*[#%\/]+              # one or more leading comment markers
                  .*?                     #  some leading text
                  (granted|placed|given|is)\s+(in|to)
                  (\s+the)?\s+public\s+domain
                /ix) {
                $found_copyright = 1;
            }
        }
        if ($found_copyright and !$found_author) {
            push @{$rhha_data->{$f}{name}}, "Anonymous";
            push @{$rhha_data->{$f}{year}}, "";
            push @{$rhha_data->{$f}{mail}}, "";
            push @{$rhha_data->{$f}{cpyr}}, 'C';
        }
        close IN;
    }
} # 1}}}

# # # # # # # 

sub wanted { # {{{1 populates global array @files
    return unless -f and /\.(m|cc)$/;  # only want .m files (for now)
    push @files, "$File::Find::name";
} # 1}}}
