#!/usr/bin/perl
# Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# version 2 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

# filter_objdump:
#  Perform an objdump, but only output:
#    - Function names
#    - Unique opcodes
#  This removes memory addresses and static offsets.
#  This also removes any NOP codes (they can make small functions seem bigger)

###############################################################
# SAM2bSAM(): Convert a string-based function to binary.
###############################################################
sub SAM2bSAM
{
  my $Function=shift;
  my $bFunction=""; # entire encrypted binary function
  my $bData="";	# binary data
  my $bDataCount=0; # number of items in bData
  my $i;
  my $State="GETNAME";  # FSM: GETNAME -> READDATA

  $Function =~ s/\n\n\n*/\n\n/g; # remove duplicate newlines
  $Function =~ s/^\n*//; # remove initial newlines
  $Function =~ s/\n*$//; # remove final newlines
  if ($Function eq "") { return(""); }

  foreach $i (split(/\n/,$Function))
    {
    if ($State eq "GETNAME")
      {
      $bFunction .= pack("n",0x0101); # tag function name
      $bFunction .= pack("n",length($i)+1);
      $bFunction .= $i;
      $bFunction .= chr(0);
      # pack function names to 2-byte boundary
      if ((length($i)+1) % 2 == 1) { $bFunction .= chr(255); }
      $State="READDATA";
      }
    elsif (($State eq "READDATA") && ($i ne ""))
      {
      my $j;
      my $Sum=0;
      for($j; $j < length($i); $j+=2)
        {
	$Sum += ord(substr($i,$j,1)) * 256;
	if ($j+1 < length($i)) { $Sum += ord(substr($i,$j+1,1)); }
	}
      $bData .= pack("n",$Sum);
      $bDataCount+=2;
      }
    elsif (($State eq "READDATA") && ($i eq ""))
      {
      $bFunction .= pack("n",$bDataCount) . $bData;
      $bData="";
      $bDataCount="";
      $State="GETNAME";
      }
    }
  if ($State eq "READDATA")
      {
      $bFunction .= pack("n",0x0108); # tag function tokens
      $bFunction .= pack("n",$bDataCount) . $bData;
      }
  return($bFunction);
} # SAM2bSAM()

###############################################################
# main()
###############################################################

my $c;	# character read
my $Source;
my @SourceList;	# for complex splitting/joining of $Source
my $s;	# source segment (for looping)
my $Scope;	# counter number of {...}.  "{" = +1, "}" = -1
my $Junk;

# For generating functions
my $Filename;
my $HaveNonPush=0;	# check for a command that is not a push
my $Fcount=0;
my $AsmData="";
my $FunctionName="";

while($ARGV[0] ne "")
{
  # Step 1: Load source and strip out single-line items.
  $Filename = $ARGV[0];
  shift @ARGV;

  # initialize
  $Source="";
  $Scope=0;
  $c="";
  $Fcount=0;

  # Start the data file
  print pack("n",0x0004); # file type
  print pack("n",length("Obj")+1);
  print "Obj" . pack("b",0x00);
  # pack function names to 2-byte boundary
  if ((length("Obj")+1) % 2 == 1) { print chr(255); }

  open(FIN,"objdump -d '$Filename' |");
  while(<FIN>)
    {
    chomp;

#print "0 Line:--$_--\n";
    # reduce line to the assembly command
    s@^  *[^:]*: *@.@;
    s@^........ <@Function @;
    s@>:$@@;
    s@^\.[[:space:]]*([0-9A-Fa-f][0-9A-Fa-f][[:space:]])*[[:space:]]*@. @;
    s@<[^>]*>@@;
    s@0x([0-9a-fA-F]*)@0x@g;
    s@[[:space:]][[:space:]]*@ @g;
    s@[[:space:]]*$@@;

    # filter out all non-assembly commands
    s@.*:.*@NULL@;
    s@^\. nop.*@NULL@;
    s@^\. *$@NULL@;

    # normalize/replace variables
    s/^\. call .*/. call/;
    s/^\. (j[a-z]*) .*/. \1/;
    s/^Function //;

    # check for unnamed functions
    # print "Line: $_  || HaveNonPush=$HaveNonPush\n";
    if (($_ =~ "^\. ") && ($_ !~ m/push/))
	{
	$HaveNonPush=1;
	}

    if (("$_" eq ". push %ebp") && ($HaveNonPush == 1))
	{
	#print "$AsmData\n";
	print SAM2bSAM($AsmData);
	$Fcount++;
	$AsmData = "";
	$AsmData .= "Function_$Fcount\n";
	$HaveNonPush = 0;
	}
    if ($_ ne "NULL") { $AsmData .= "$_\n"; }
    if ($_ eq "")
	{
	#print "$AsmData\n";
	print SAM2bSAM($AsmData);
	$AsmData = "";
	$HaveNonPush = 0;
	}
    $LastLine=$_;
    }
  close(FIN);

  # print "$AsmData\n";
  print SAM2bSAM($AsmData);
  $AsmData="";
  $HaveNonPush=0;

  # next file!
} # while files to process


