#!/bin/bash
# /***********************************************************
# Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# version 2 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
# ***********************************************************/
#
# Script to get latest FreshMeat RDF file and drive updating the DB
#
# Author: mark.donohoe@hp.com
#
# Version: $Id: GetFM 534 2008-04-28 22:16:26Z rrando $

Date=`date +"%Y-%m-%d"`

# Constants
#
URL="http://freshmeat.net/backend/fm-projects.rdf.bz2"
#
# The line below assumes that fossology has been installed.  Edit it
# if you need to.  FMDir is created by the makefile and should contain
# the path to the storage area the process will use (should be at least
# 66GB, better is 100GB).  The path will be in the variable $FMDIR.

. /usr/local/include/FMDir

DIR=$FMDIR
RDIR="${DIR}/Rdfs/"
Wget_Log="${RDIR}wget.log"
INPUTFILES="${DIR}/Input-Files/"
RLOGS="${DIR}/Run-logs/"
GOLDEN="${DIR}/golden.${Date}/Input-files/"
# make the other directories needed.
mkdir -p $RLOGS
mkdir -p $INPUTFILES
if [ ! -d $RDIR ]
then
    mkdir -p $RDIR
fi


Compressed_file="fm-projects.rdf-${Date}.bz2"
Uncompressed_file="fm-projects.rdf-${Date}"

# freshmeat programs usually are in /usr/local/bin....
# Set the path to find them

export PATH=$PATH:/usr/local/bin:/usr/local/lib

########################################################################
# If you need to export a proxy, this is a good place to do it.
# Edit the line below and uncomment it (remove the '#')
########################################################################
# Explicitly export the proxy since crontab does not include
# the shell environment

#export http_proxy="http://some-proxy.com:8080"

# get the daily rdf file
echo "will do:"
echo "wget -o $Wget_Log -O $RDIR$Compressed_file $URL"
echo ""

wget -o $Wget_Log -O ${RDIR}$Compressed_file $URL 


# Set Current-top1k to todays run
top1k="${RDIR}Top1k-${Date}"
echo $top1k > "${RDIR}Current-top1k"
#
# uncompress todays rdf file and call mktop1k to create the top 1000 from this file
#
echo "uncompressing $RDIR$Compressed_file"
echo "bunzip2 -qc $RDIR$Compressed_file > $RDIR$Uncompressed_file"
echo
echo "mktop1k -i $RDIR$Uncompressed_file -o $top1k"
echo
#
# Note -qc is not working for some reason, it's supposed to remove 
# the compressed file after the uncompress.  Is there an error?
# TODO: if no errors, then remove by hand.
#
bunzip2 -qc $RDIR$Compressed_file > $RDIR$Uncompressed_file
brtn=$?
if [[ $brtn -ne 0 ]]
then
  echo "ERROR! could not uncompress $Compressed_file"
  exit 1
fi
mktop1k -i $RDIR$Uncompressed_file -o $top1k
mrtn=$?
if [[ $mrtn -ne 0 ]]
then
    echo "ERROR! mktop1k returned error status, $mrtn"
    exit 1
fi
# Diff the current and previous top 1000 xml files

current=`cat "${RDIR}Current-top1k"`
previous=`cat "${RDIR}Previous-top1k"`
echo  "Doing..."
echo "diffm $current $previous -o $INPUTFILES"
echo
diffm -f "$current" "$previous" -o "$INPUTFILES" > "${RLOGS}log.diffm.${Date}"
drtn=$?
#
# replace previous rdf with current for the next run.
# May want to make a function cause you might want to clean up old rdf's and top1k's.....

current=`cat "${RDIR}Current-top1k"`
echo $current > ${RDIR}Previous-top1k

# if diffm returns 0 there is no projects to update, so clean up and stop
if [[ $drtn -eq 0 ]]
then
    echo "exiting, nothing to do"
    exit 0
else
# we have something to process, get it and upload what we can.
    echo "Doing..."
    echo "get-projects -f ${INPUTFILES}Update.fm.rdf.${Date} > ${RLOGS}log.get-projects.${Date} 2>&1"
    echo
    get-projects -f ${INPUTFILES}Update.fm.rdf.${Date} > ${RLOGS}log.get-projects.${Date} 2>&1
    grtn=$?
    if [[ $grtn -ne 0 ]]
	 then
		echo "ERROR! get-projects returned error status, $grtn"
		exit 1
    fi
# upload the files with cp2foss 
    echo "Doing..."
    echo "cp2foss -f ${INPUTFILES}FM-projects2update.${Date} > ${RLOGS}log.cp2foss.${Date} 2>&1 &"
    echo
    cp2foss -f ${GOLDEN}Freshmeat_to_Upload > ${RLOGS}log.cp2foss.${Date} 2>&1 &
    crtn=$?
    if [[ $crtn -ne 0 ]]
	 then
		echo "ERROR! cp2foss returned error status, $crtn"
		exit 1
    fi
# echo check the following log files .....
fi
exit 0
