#!/usr/bin/perl -w
# gtkeyboard-create-cache-file
# Written by David Allen <s2mdalle@titan.vcu.edu>
# http://opop.nols.com/
# 
# This file is released under the terms of the GNU General Public License
# for use with GTKeyboard.  Please see http://www.gnu.org/ or the file
# COPYING that you should have received with GTKeyboard for more information.
#
# Usage: call this with two filenames - the file to convert, and what to
# save it as.
# 
# By default, this script strips punctuation and caches everything as lower
# case.
#
# ******NOTE******
# This script was written to work with Perl 5.005 and similar versions.  I
# have not tested this using perl 5.6, so you're on your own, but it should
# work just fine.  :)
###############################################################################

my($from, $to) = @ARGV;

if(!$from || !$to){
    die "Usage:  $0 file-to-convert file-to-save-as\n";
}

open(FILE, "$from") or die "Couldn't open $from for reading: $!\n";
open(WRITE,">$to") or die "Couldn't write to file $to: $!\n";

my %WORDLIST = ();

while(<FILE>){
    my @arr = split /\s+/, $_;
    
  WORD:
    foreach my $entry (@arr){
        $entry =~ s/[\[\]0123456789\*\@\#\$\%\^\&\(\)-_=\+,\(\)\"\.!\?']//g;
        $entry = lc($entry);
        next WORD if(!$entry);
        $WORDLIST{$entry} = 0 if(!defined($WORDLIST{$entry}));
        $WORDLIST{$entry}++;
    } # End foreach
} # End while

close FILE;

foreach my $key (sort keys %WORDLIST){
    print WRITE "$WORDLIST{$key} $key\n";
}

close WRITE;

print "Finished.\n";
