/* 
   sitecopy, for managing remote web sites. Stored state handling routines.
   Copyright (C) 1999-2004, Joe Orton <joe@manyfish.co.uk>
                                                                     
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
  
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
  
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

*/

#include "config.h"

#include <sys/stat.h>

#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif

#ifdef HAVE_LIMITS_H
#include <limits.h>
#endif

#include <libxml/parser.h>

#include <glib.h>
#include <glib/gi18n.h>

#include <ctype.h>
#include <errno.h>
#include <stdio.h>

#include "common.h"
#include "sitesi.h"

/* Use a version in the site state file: 
 * Bump the major number if a backwardly-incompatible change is made.
 */
#define SITE_STATE_FILE_VERSION "1.0"

#define EOL "\r\n"

/* Opens the storage file for writing */
FILE *site_open_storage_file(struct site *site) 
{
    if (site->storage_file == NULL) {
	site->storage_file = fopen(site->infofile, "w" FOPEN_BINARY_FLAGS);
    }
    return site->storage_file;
}

int site_close_storage_file(struct site *site)
{
    int ret = fclose(site->storage_file);
    site->storage_file = NULL;
    return ret;
}

/* Return escaped form of 'filename'; any XML-unsafe characters are
 * escaped. */
static char *fn_escape(const char *filename)
{
    const unsigned char *pnt = (const unsigned char *)filename;
    char *ret = g_malloc(strlen(filename) * 3 + 1), *p = ret;

    do {
        if (!(isalnum(*pnt) || *pnt == '/' || *pnt == '.' || *pnt == '-') 
            || *pnt > 0x7f) {
            sprintf(p, "%%%02x", *pnt);
            p += 3;
        } else {
            *p++ = *(char *)pnt;
        }
    } while (*++pnt != '\0');

    *p = '\0';
    
    return ret;
}

/* Return unescaped filename; reverse of fn_escape. */
static char *fn_unescape(const char *filename)
{
    const unsigned char *pnt = (const unsigned char *)filename;
    char *ret = g_malloc(strlen(filename) + 1), *p = ret;

    do {
        if (*pnt == '%') {
            *p = (ASC2HEX(pnt[1]) << 4) & 0xf0;
            *p++ |= (ASC2HEX(pnt[2]) & 0x0f);
            pnt += 2;
        } else {
            *p++ = *pnt;
        }
    } while (*++pnt != '\0');

    *p = '\0';

    return ret;
}

/* Write out the stored state for the site. 
 * Returns 0 on success, non-zero on error. */
int site_write_stored_state(struct site *site) 
{
    struct site_file *current;
    FILE *fp = site_open_storage_file(site);

    if (fp == NULL) {
	return -1;
    }

    fprintf(fp, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" EOL);
    fprintf(fp, "<sitestate version='" SITE_STATE_FILE_VERSION "'>" EOL);
    fprintf(fp, "<options>" EOL);
    fprintf(fp, " <saved-by package='" PACKAGE_NAME "'"
	    " version='" PACKAGE_VERSION "'/>" EOL);
    if (site->state_method == state_checksum) {
	/* For forwards-compatibility */
	fprintf(fp, " <checksum-algorithm><checksum-MD5/></checksum-algorithm>" EOL);
    }
    fprintf(fp, " <state-method><state-%s/></state-method>" EOL,
	     (site->state_method==state_checksum)?"checksum":"timesize");
    if (site->safemode) {
	fprintf(fp, " <safemode/>" EOL);
    }
    fprintf(fp, " <escaped-filenames/>" EOL);
    fprintf(fp, "</options>" EOL);
    fprintf(fp, "<items>" EOL);
    /* Now write out the items */
    for (current = site->files; current!=NULL; current = current->next) {
	char *fname;
	if (!current->stored.exists) continue;
	fprintf(fp, "<item>");
	fprintf(fp, "<type><type-%s/></type>",
		 (current->type==file_file)?"file":(
		     (current->type==file_dir)?"directory":"link"));
        /* escape filenames correctly for XML. */
        fname = fn_escape(current->stored.filename);
	fprintf(fp, "<filename>%s</filename>\n", fname);
        g_free(fname);
        fprintf(fp, "<protection>%03o</protection>", 
                current->stored.mode); /* three-digit octal */
	switch (current->type) {
	case file_link:
	    fprintf(fp, "<linktarget>%s</linktarget>", 
		     current->stored.linktarget);
	    break;
	case file_file:
	    fprintf(fp, "<size>%" NE_FMT_OFF_T "</size>", 
		    current->stored.size);
	    switch (site->state_method) {
	    case state_timesize:
		fprintf(fp, "<modtime>%ld</modtime>", current->stored.time);
		break;
	    case state_checksum:
		break;
	    }
	    fprintf(fp, "<ascii>%s</ascii>",
		     current->stored.ascii?"<true/>":"<false/>");
	    if (current->server.exists) {
		fprintf(fp, "<server-modtime>%ld</server-modtime>", 
			 current->server.time);
	    }
	    break;
	case file_dir:
	    /* nothing to do */
	    break;
	}
	fprintf(fp, "</item>" EOL);
    }
    fprintf(fp, "</items>" EOL);
    fprintf(fp, "</sitestate>" EOL);
    site->stored_state_method = site->state_method;
    return site_close_storage_file(site);
}

#undef EOL

typedef enum {
	SC_STATE_NONE = 0,

	SC_SITE_STATE,
		SC_OPTIONS,
			SC_SAVED_BY,
			SC_STATE_METHOD,
				SC_STATE_TIMESIZE,
			SC_ESCAPED_FILENAMES,
		SC_ITEMS,
			SC_ITEM,
				SC_TYPE,
					SC_TYPE_FILE,
					SC_TYPE_DIRECTORY,
					SC_TYPE_LINK,
						SC_LINKTARGET,
				SC_FILENAME,
				SC_PROTECTION,
				SC_SIZE,
				SC_MODTIME,
				SC_ASCII,
					SC_TRUE,
					SC_FALSE,
				SC_SERVER_MODTIME
} StoreState;

typedef struct SiteStateParseData SiteStateParseData;

static void start_element( SiteStateParseData *data, 
		const xmlChar *fullname, const xmlChar **atts );
static void end_element( SiteStateParseData *data, const xmlChar *name );
static void char_data( SiteStateParseData *data, const xmlChar *ch, int len );
static void parse_error( SiteStateParseData *data, const char *msg, ... );

static xmlSAXHandler sax_handler = {
	NULL,
	NULL,
	NULL,
	NULL,
	NULL,
	NULL,
	NULL,
	NULL,
	NULL,
	NULL,
	NULL,
	NULL,
	NULL,
	NULL,
	(startElementSAXFunc)start_element,
	(endElementSAXFunc)end_element,
	NULL,
	(charactersSAXFunc)char_data,
	NULL,
	NULL,
	NULL,
	NULL,
	(errorSAXFunc)parse_error,
	(fatalErrorSAXFunc)parse_error,
	NULL,
	(cdataBlockSAXFunc)char_data
};


struct SiteStateParseData{
	xmlSAXHandlerPtr ctx;
	StoreState state;
	struct site *site;
	gchar *error;
	/* What we've collected so far */
	enum file_type type;
	struct file_state stored;
	struct file_state server;
	gchar *cdata;
	unsigned int truth:2; /* 0 invalid, 1: true, 2: false */
};

static void start_element( SiteStateParseData *data, 
		const xmlChar *xmlname, const xmlChar **xmlatts )
{
	gboolean error;
	const gchar *name;
	const gchar **atts;
	
	error = FALSE;

	name = (const gchar*)xmlname;
	atts = (const gchar**)xmlatts;
	
	switch( data->state ) {
		case SC_STATE_NONE:
			/* allow SC_SITE_STATE */
			if( ! strcmp( name, "sitestate" ) ) {
				data->state = SC_SITE_STATE;
			} else {
				/* error */
				error = TRUE;
			}
			break;
		case SC_SITE_STATE:
			/* allow SC_OPTIONS, SC_ITEMS */
			if( ! strcmp( name, "options" ) ) {
				data->state = SC_OPTIONS;
			} else if( ! strcmp( name, "items" ) ) {
				data->state = SC_ITEMS;
			} else {
				/* error */
				error = TRUE;
			}
			break;
		case SC_OPTIONS:
			/* allow SC_SAVED_BY, SC_STATE_METHOD,
			 * SC_ESCAPED_FILENAMES */
			if( ! strcmp( name, "saved-by" ) ) {
				data->state = SC_SAVED_BY;
			} else if( ! strcmp( name, "state-method" ) ) {
				data->state = SC_STATE_METHOD;
			} else if( ! strcmp( name, "escaped-filenames" ) ) {
				data->state = SC_ESCAPED_FILENAMES;
			} else {
				/* error */
				error = TRUE;
			}
			break;
		case SC_STATE_METHOD:
			/* allow SC_STATE_TIMESIZE */
			if( ! strcmp( name, "state-timesize" ) ) {
				data->state = SC_STATE_TIMESIZE;
			} else {
				/* error */
				error = TRUE;
			}
			break;
		case SC_ITEMS:
			/* allow SC_ITEM */
			if( ! strcmp( name, "item" ) ) {
				data->state = SC_ITEM;
				g_free( data->cdata );
				data->cdata = NULL;
				memset( &data->stored, 0, 
					sizeof( data->stored ) );
				data->stored.mode = -1;
			} else {
				/* error */
				error = TRUE;
			}
			break;
		case SC_ITEM:
			/* allow SC_TYPE, SC_FILENAME,
			 * SC_PROTECTION, SC_SIZE, SC_MODTIME,
			 * SC_ASCII, SC_SERVER_MODTIME */
			if( ! strcmp( name, "type" ) ) {
				data->state = SC_TYPE;
			} else if( ! strcmp( name, "filename" ) ) {
				data->state = SC_FILENAME;
			} else if( ! strcmp( name, "protection" ) ) {
				data->state = SC_PROTECTION;
			} else if( ! strcmp( name, "size" ) ) {
				data->state = SC_SIZE;
			} else if( ! strcmp( name, "modtime" ) ) {
				data->state = SC_MODTIME;
			} else if( ! strcmp( name, "ascii" ) ) {
				data->state = SC_ASCII;
			} else if( ! strcmp( name, "server-modtime" ) ) {
				data->state = SC_SERVER_MODTIME;
			} else {
				/* error */
				error = TRUE;
			}
			break;
		case SC_TYPE:
			/* allow SC_TYPE_FILE, SC_TYPE_DIRECTORY,
			 * SC_TYPE_LINK */
			if( ! strcmp( name, "type-file" ) ) {
				data->state = SC_TYPE_FILE;
			} else if( ! strcmp( name, "type-directory" ) ){
				data->state = SC_TYPE_DIRECTORY;
			} else if( ! strcmp( name, "type-link" ) ) {
				data->state = SC_TYPE_LINK;
			} else {
				/* error */
				error = TRUE;
			}
			break;
		case SC_TYPE_LINK:
			/* allow SC_LINKTARGET */
			if( ! strcmp( name, "linktarget" ) ) {
				data->state = SC_LINKTARGET;
			} else {
				/* error */
				error = TRUE;
			}
		case SC_ASCII:
			/* allow SC_TRUE, SC_FALSE */
			if( ! strcmp( name, "true" ) ) {
				data->state = SC_TRUE;
			} else if( ! strcmp( name, "false" ) ) {
				data->state = SC_FALSE;
			} else {
				/* error */
				error = TRUE;
			}
			break;
		case SC_SAVED_BY:
		case SC_ESCAPED_FILENAMES:
		case SC_TYPE_FILE:
		case SC_TYPE_DIRECTORY:
		case SC_FILENAME:
		case SC_PROTECTION:
		case SC_SIZE:
		case SC_MODTIME:
		case SC_SERVER_MODTIME:
		default:
			/* error */
			error = TRUE;
			break;
	}

	if( error ) {
		data->ctx->startElement = NULL;
		data->ctx->endElement = NULL;
		data->ctx->characters = NULL;
		data->ctx->error = NULL;
		data->ctx->fatalError = NULL;
		data->ctx->cdataBlock = NULL;
		data->error = _( "Unable to parse site state file" );
	}
}

static void end_element( SiteStateParseData *data, const xmlChar *xmlname )
{
	gboolean error;
	struct site_file *file;
	const gchar *name;

	error = FALSE;

	name = (const gchar*)xmlname;
	
	switch( data->state ) {
		case SC_STATE_NONE:
			/* error */
			error = TRUE;
			break;
		case SC_SITE_STATE:
			data->state = SC_STATE_NONE;
			if( strcmp( name, "sitestate" ) ) {
				/* error */
				error = TRUE;
			}
			break;
		case SC_OPTIONS:
			data->state = SC_SITE_STATE;
			if( strcmp( name, "options" ) ) {
				/* error */
				error = TRUE;
			}
			break;
		case SC_SAVED_BY:
			data->state = SC_OPTIONS;
			if( strcmp( name, "saved-by" ) ) {
				/* error */
				error = TRUE;
			}
			break;
		case SC_STATE_METHOD:
			data->state = SC_OPTIONS;
			if( strcmp( name, "state-method" ) ) {
				/* error */
				error = TRUE;
			}
			break;
		case SC_STATE_TIMESIZE:
			data->state = SC_STATE_METHOD;
			data->site->stored_state_method = state_timesize;
			if( strcmp( name, "state-timesize" ) ) {
				/* error */
				error = TRUE;
			}
			break;
		case SC_ESCAPED_FILENAMES:
			data->state = SC_OPTIONS;
			if( strcmp( name, "escaped-filenames" ) ) {
				/* error */
				error = TRUE;
			}
			break;
		case SC_ITEMS:
			data->state = SC_SITE_STATE;
			if( strcmp( name, "items" ) ) {
				/* error */
				error = TRUE;
			}
			break;
		case SC_ITEM:
			data->state = SC_ITEMS;
			if( strcmp( name, "item" ) ) {
				/* error */
				error = TRUE;
			} else {
				data->stored.exists = TRUE;
				file = file_set_stored( data->type,
						&data->stored,
						data->site );
				if( data->server.exists ) {
					file_state_copy( &file->server,
							&data->server,
							data->site );
				}
			}
			break;
		case SC_TYPE:
			data->state = SC_ITEM;
			if( strcmp( name, "type" ) ) {
				/* error */
				error = TRUE;
			}
			break;
		case SC_TYPE_FILE:
			data->state = SC_TYPE;
			if( strcmp( name, "type-file" ) ) {
				/* error */
				error = TRUE;
			} else {
				data->type = file_file;
			}
			break;
		case SC_TYPE_DIRECTORY:
			data->state = SC_TYPE;
			if( strcmp( name, "type-directory" ) ) {
				/* error */
				error = TRUE;
			} else {
				data->type = file_dir;
			}
			break;
		case SC_TYPE_LINK:
			data->state = SC_TYPE;
			if( strcmp( name, "type-link" ) ) {
				/* error */
				error = TRUE;
			} else {
				data->type = file_link;
			}
			break;
		case SC_LINKTARGET:
			data->state = SC_TYPE_LINK;
			if( strcmp( name, "linktarget" ) ) {
				/* error */
				error = TRUE;
			} else {
				data->stored.linktarget = g_strdup( data->cdata );
			}
			break;
		case SC_FILENAME:
			data->state = SC_ITEM;
			if( strcmp( name, "filename" ) ) {
				/* error */
				error = TRUE;
			} else {
				data->stored.filename = fn_unescape( data->cdata );
			}
			break;
		case SC_PROTECTION:
			data->state = SC_ITEM;
			if( strcmp( name, "protection" ) ) {
				/* error */
				error = TRUE;
			} else if( data->cdata ) {
				data->stored.mode = strtoul( data->cdata,
						NULL, 8 );
			} else {
				/* error */
				error = TRUE;
			}
			break;
		case SC_SIZE:
			data->state = SC_ITEM;
			if( strcmp( name, "size" ) ) {
				/* error */
				error = TRUE;
			} else if( data->cdata ) {
				data->stored.size = strtol( data->cdata,
						NULL, 10 );
			} else {
				/* error */
				error = TRUE;
			}
			break;
		case SC_MODTIME:
			data->state = SC_ITEM;
			if( strcmp( name, "modtime" ) ) {
				/* error */
				error = TRUE;
			} else if( data->cdata ) {
				data->stored.time = strtol( data->cdata,
						NULL, 10 );
				if( data->stored.time == LONG_MIN ||
				    data->stored.time == LONG_MAX ) {
					/* error */
					error = TRUE;
				} 
			} else {
				/* error */
				error = TRUE;
			}
			break;
		case SC_ASCII:
			data->state = SC_ITEM;
			if( strcmp( name, "ascii" ) ) {
				/* error */
				error = TRUE;
			} else if( data->truth ) {
				data->stored.ascii = ( data->truth == 1 );
			} else {
				/* error */
				error = TRUE;
			}
			break;
		case SC_TRUE:
			data->state = SC_ASCII;
			if( strcmp( name, "true" ) ) {
				/* error */
				error = TRUE;
			} else {
				data->truth = 1;
			}
			break;
		case SC_FALSE:
			data->state = SC_ASCII;
			if( strcmp( name, "false" ) ) {
				/* error */
			} else {
				data->truth = 2;
			}
			break;
		case SC_SERVER_MODTIME:
			data->state = SC_ITEM;
			if( strcmp( name, "modtime" ) ) {
				/* error */
				error = TRUE;
			} else if( data->cdata ) {
				data->server.time = strtol( data->cdata,
						NULL, 10 );
				if( data->server.time == LONG_MIN ||
				    data->server.time == LONG_MAX ) {
					/* error */
					error = TRUE;
				}
				data->server.exists = 1;
			} else {
				/* error */
				error = TRUE;
			}
			break;
		default:
			/* error */
			error = TRUE;
			break;
	}

	if( error ) {
		data->ctx->startElement = NULL;
		data->ctx->endElement = NULL;
		data->ctx->characters = NULL;
		data->ctx->error = NULL;
		data->ctx->fatalError = NULL;
		data->ctx->cdataBlock = NULL;
		data->error = _( "Unable to parse site state file" );
	}
}

static void char_data( SiteStateParseData *data, const xmlChar *ch, int len )
{
	gchar *cdata;
	
	cdata = g_strndup( (const gchar*)ch, len );
	
	switch( data->state ) {
		case SC_FILENAME:
		case SC_PROTECTION:
		case SC_SIZE:
		case SC_MODTIME:
		case SC_SERVER_MODTIME:
		case SC_LINKTARGET:
			g_free( data->cdata );
			data->cdata = cdata;
			break;
		case SC_STATE_NONE:
		case SC_SITE_STATE:
		case SC_OPTIONS:
		case SC_SAVED_BY:
		case SC_STATE_METHOD:
		case SC_ESCAPED_FILENAMES:
		case SC_ITEMS:
		case SC_ITEM:
		case SC_TYPE:
		case SC_TYPE_FILE:
		case SC_TYPE_DIRECTORY:
		case SC_ASCII:
		case SC_TRUE:
		case SC_FALSE:
		default:
			g_free( cdata );
			break;
	}
}

static void parse_error( SiteStateParseData *data, const char *msg, ... )
{
	data->ctx->startElement = NULL;
	data->ctx->endElement = NULL;
	data->ctx->characters = NULL;
	data->ctx->error = NULL;
	data->ctx->fatalError = NULL;
	data->ctx->cdataBlock = NULL;
	data->error = _( "Unable to parse site state file" );
}

int site_read_stored_state( struct site *site )
{
	int ret;
	SiteStateParseData user_data;
	struct stat st;
	
	memset( &user_data, 0, sizeof( SiteStateParseData ) );
	user_data.ctx = &sax_handler;
	user_data.state = SC_STATE_NONE;
	/* force state_timesize, we don't support checksum */
	user_data.site = site;
	user_data.site->stored_state_method = state_timesize;
	
	if( ! xmlSAXUserParseFile( &sax_handler, &user_data,
				site->infofile ) ) {
		/* success */
		if( ! user_data.error ) {
			ret = SITE_OK;
		} else {
			ret = SITE_ERRORS;
			site->last_error = g_strdup( user_data.error );
		}
	} else {
		/* error */
		ret = stat( site->infofile, &st );
		if( ( ret == 0 ) || ( errno != ENOENT ) ) {
			ret = SITE_ERRORS;
		} else {
			ret = SITE_FAILED;
		}
	}

	if( user_data.cdata ) {
		g_free( user_data.cdata );
	}

	return ret;
}

