/*  Screem:  screem-dtd.c
 *
 *  DOCTYPE parsing / validity checking functions
 *
 *  Copyright (C) 2003 David A Knight
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <libgnome/gnome-macros.h>

#include <glib.h>

#include <ctype.h>
#include <stdio.h>
#include <string.h>

#include "screem-dtd.h"
#include "screem-dtd-db.h"

#include "support.h"

#include "fileops.h"

static void screem_dtd_class_init( ScreemDTDClass *klass );
static void screem_dtd_instance_init( ScreemDTD *dtd );
static void screem_dtd_finalize( GObject *object );
static void screem_dtd_set_prop( GObject *object, guint prop_id,
                                const GValue *value, GParamSpec *spec );
static void screem_dtd_get_prop( GObject *object, guint prop_id,
                                GValue *value, GParamSpec *spec );

enum {
	PROP_0,
	PROP_PUBLIC,
	PROP_SYSTEM,
	PROP_COMPARE
};

struct ScreemDTDPrivate {
	gchar *public_id;
	gchar *system_id;

	gchar *root;

	GHashTable *entities;
	GHashTable *elements;
	GHashTable *attlists;

	/* for quick access to all elements */
	GSList *elementlist;

	GCompareFunc compare;
	GHashFunc hfunc;
	GEqualFunc efunc;

	gchar *rooturi;
	const gchar *toproot;
};

struct ScreemDTDEntity {
	gchar *name;
	gchar *value;
	gchar *description;

	gboolean general;
};

typedef enum {
	SCREEM_DTD_SEQUENCE,
	SCREEM_DTD_OR,
	SCREEM_DTD_EXCLUDE
} ScreemDTDGroupType;

typedef struct ScreemDTDGroup ScreemDTDGroup;

struct ScreemDTDGroup {
	ScreemDTDGroupType type;
	gint min;
	gint max;

	gchar *name;

	ScreemDTDGroup *siblings;
	ScreemDTDGroup *children;
};

struct ScreemDTDElement {
	gchar *name;
	gchar *description;
	ScreemDTDTagExistance ostate;
	ScreemDTDTagExistance cstate;
	
	GHashTable *allowed;
	GHashTable *attributes;

	GSList *allowedlist;
	GSList *attrlist;

	GSList *optional;
	ScreemDTD *dtd;
};

struct ScreemDTDAllowed {
	gchar *name;
	
	gint min;
	gint max;

	guint ref;
};

typedef struct {
	ScreemDTD *dtd;
	GSList *list;
} ScreemDTDAllowedCopy;

typedef enum {
	SCREEM_DTD_NO_TYPE = 0,
	SCREEM_DTD_IMPLIED,
	SCREEM_DTD_REQUIRED,
	SCREEM_DTD_FIXED
} ScreemDTDAttributeType;

struct ScreemDTDAttribute {
	gchar *name;
	gchar *default_value;
	gchar *description;

	GSList *values;

	gboolean isuri;
	ScreemDTDAttributeType required;

	guint ref;
};

static gint g_strcase_equal (gconstpointer a, gconstpointer b);
static guint g_strcase_hash (gconstpointer v);

static const gchar *screem_dtd_bypass_comment( const gchar *data,
						gchar **txt );
static const gchar *screem_dtd_read_entity( const gchar *data,
						gchar **name );
static const gchar *screem_dtd_parse_element( ScreemDTD *dtd,
					      const gchar *data );
static const gchar *screem_dtd_parse_attlist( ScreemDTD *dtd,
					      const gchar *data );

static const gchar *screem_dtd_name_parse( ScreemDTD *dtd,
					const gchar *pattern, 
					ScreemDTDGroup **group );
static const gchar *screem_dtd_name_parse2( ScreemDTD *dtd,
					const gchar *pattern, 
					ScreemDTDGroup *group );

static ScreemDTDGroup *screem_dtd_group_new_child( ScreemDTDGroup *parent, ScreemDTDGroupType type );
static ScreemDTDGroup *screem_dtd_group_new_sibling( ScreemDTDGroup *group, ScreemDTDGroupType type );

static ScreemDTDGroup *screem_dtd_group_flatten( ScreemDTDGroup *group, ScreemDTDGroup *root );
static void screem_dtd_group_free( ScreemDTDGroup *group, ScreemDTDGroup *parent );
static gboolean screem_dtd_copy_allowed_from_table( gpointer key, 
					gpointer value,
					gpointer user_data  );
static gboolean screem_dtd_copy_allowed_table( gpointer key, 
					gpointer value,
					gpointer user_data  );
static gboolean screem_dtd_free_allowed_table( gpointer key, 
					gpointer value,
					gpointer user_data  );
static gboolean screem_dtd_free_entity_table( gpointer key,
					gpointer value,
					gpointer user_data );
static gboolean screem_dtd_free_element_table( gpointer key,
					gpointer value,
					gpointer user_data );
static gboolean screem_dtd_free_attribute_table( gpointer key,
					gpointer value,
					gpointer user_data );
static void screem_dtd_free_attribute( ScreemDTDAttribute *attr );

static gint screem_dtd_sort_element_list( gconstpointer a,
					gconstpointer b );
static gint screem_dtd_sort_attr_list( gconstpointer a,
					gconstpointer b,
					gpointer data );
static void screem_dtd_build_optional( gpointer data, 
					gpointer user_data );

GNOME_CLASS_BOILERPLATE( ScreemDTD, screem_dtd, GObject, G_TYPE_OBJECT )

static void screem_dtd_class_init( ScreemDTDClass *klass )
{
	GObjectClass *obj_class;
	GParamSpec *pspec;
	
	obj_class = G_OBJECT_CLASS( klass );
	obj_class->finalize = screem_dtd_finalize;
	obj_class->get_property = screem_dtd_get_prop;
	obj_class->set_property = screem_dtd_set_prop;

	pspec = g_param_spec_string( "public", "public id",
				     "The PUBLIC identifier for the dtd",
				     "", 
				     G_PARAM_READWRITE | 
				     G_PARAM_CONSTRUCT_ONLY );
	g_object_class_install_property( obj_class, 
					 PROP_PUBLIC, pspec );
	pspec = g_param_spec_string( "system", "system id",
				     "The SYSTEM identifier for the dtd",
				     "",
				     G_PARAM_READWRITE | 
				     G_PARAM_CONSTRUCT_ONLY );
	g_object_class_install_property( obj_class, 
					 PROP_SYSTEM, pspec );

	pspec = g_param_spec_pointer( "compare", "compare",
			"pointer to the compare function to use on element names",
			G_PARAM_READABLE );
	g_object_class_install_property( obj_class,
			PROP_COMPARE, pspec );
}

static void screem_dtd_instance_init( ScreemDTD *dtd )
{
	dtd->priv = g_new0( ScreemDTDPrivate, 1 );
}

static void screem_dtd_finalize( GObject *object )
{
	ScreemDTD *dtd;
	ScreemDTDPrivate *priv;

	dtd = SCREEM_DTD( object );
	priv = dtd->priv;

	g_free( priv->public_id );
	g_free( priv->system_id );

	/* free entities */
	g_hash_table_foreach( priv->entities,
			(GHFunc)screem_dtd_free_entity_table, NULL );
	g_hash_table_destroy( priv->entities );

	/* free elements */
	g_hash_table_foreach( priv->elements,
			(GHFunc)screem_dtd_free_element_table, NULL );
	g_hash_table_destroy( priv->elements );

	g_slist_free( priv->elementlist );

	/* free attributes */
	g_hash_table_destroy( priv->attlists );

	g_free( priv->root );
	g_free( priv->rooturi );
	
	g_free( priv );	

	GNOME_CALL_PARENT( G_OBJECT_CLASS, finalize, (object) );
}

static void screem_dtd_set_prop( GObject *object, guint prop_id,
                                const GValue *value, GParamSpec *spec )
{
	ScreemDTD *dtd;
	ScreemDTDPrivate *priv;
	const gchar *id;
	gboolean casesens;
	GHashFunc hfunc;
	GEqualFunc efunc;

	dtd = SCREEM_DTD( object );
	priv = dtd->priv;

	switch( prop_id ) {
		case PROP_PUBLIC:
			g_free( priv->public_id );
			id = g_value_get_string( value );
			if( id ) {
				priv->public_id = g_strdup( id );

				casesens = ! ( strstr( id, " HTML " ) ||
					       strstr( id, " html " ) ||
					       strstr( id, " HTML/" ) ||
					       strstr( id, " html/" ) );
				if( ! casesens ) {
					priv->compare = (GCompareFunc)g_strcasecmp;
					hfunc = g_strcase_hash;
					efunc = g_strcase_equal;
				} else {
					priv->compare = (GCompareFunc)strcmp;
					hfunc = g_str_hash;
					efunc = g_str_equal;
				}
			} else {
				priv->public_id = NULL;
				hfunc = g_str_hash;
				efunc = g_str_equal;
				priv->compare = (GCompareFunc)strcmp;
			}
			priv->entities = g_hash_table_new( g_str_hash, 
							  g_str_equal );
			priv->elements = g_hash_table_new( hfunc,
							   efunc );
			priv->attlists = g_hash_table_new( hfunc,
							   efunc );
			priv->hfunc = hfunc;
			priv->efunc = efunc;
			break;
		case PROP_SYSTEM:
			g_free( priv->system_id );
			id = g_value_get_string( value );
			if( id ) {
				priv->system_id = g_strdup( id );

			} else {
				priv->system_id = NULL;
			}
			break;
		default:
			break;	
	}
}

static void screem_dtd_get_prop( GObject *object, guint prop_id,
                                GValue *value, GParamSpec *spec )
{
	ScreemDTD *dtd;
	ScreemDTDPrivate *priv;

	dtd = SCREEM_DTD( object );
	priv = dtd->priv;

	switch( prop_id ) {
		case PROP_PUBLIC:
			g_value_set_string( value, priv->public_id );
			break;
		case PROP_SYSTEM:
			g_value_set_string( value, priv->system_id );
			break;
		case PROP_COMPARE:
			g_value_set_pointer( value, priv->compare );
			break;
		default:
			break;	
	}
}

/* static stuff */


/***/
/* g_strcase_hash() and g_strcase_equal() are taken
 * from camel and are Copyright 1999,2000 Helix Code, Inc. */
/* use these two funcs for case insensitive hash table */

static gint g_strcase_equal (gconstpointer a, gconstpointer b)
{
	return (g_strcasecmp ((gchar *)a, (gchar *)b) == 0);
}

/* modified g_str_hash from glib/gstring.c
   because it would have been too slow to
   us g_strdown() on the string */
/* a char* hash function from ASU */
static guint g_strcase_hash (gconstpointer v)
{
	const char *s = (char*)v;
	const char *p;
	guint h=0, g;
	
	for(p = s; *p != '\0'; p += 1) {
		h = ( h << 4 ) + toupper(*p);
		if ( ( g = h & 0xf0000000 ) ) {
			h = h ^ (g >> 24);
		}
  }

  return h /* % M */;
}

static gboolean screem_dtd_dump_entities( gpointer key,
					  gpointer value,
					  GString *str )
{
	ScreemDTDEntity *entity;

	entity = (ScreemDTDEntity*)value;
	
	g_string_append( str, "<!ENTITY " );
	if( ! entity->general ) {
		g_string_append( str, "% " );
	}
	g_string_append( str, (const gchar*)key );
	g_string_append_printf( str, " \"%s\" ", entity->value );
	if( entity->description ) {
		g_string_append_printf( str, "-- %s -- ", 
				entity->description );
	}
	g_string_append( str, ">\n" );

	return FALSE;
}

static gboolean screem_dtd_dump_allowed( gpointer key,
					 gpointer value,
					 GString *str )
{
	ScreemDTDAllowed *allowed;

	allowed = (ScreemDTDAllowed*)value;
	
	g_string_append( str, allowed->name );
	g_string_append( str, " | " );
	
	return FALSE;
}

static gboolean screem_dtd_dump_attributes( gpointer key,
					 gpointer value,
					 GString *str )
{
	ScreemDTDAttribute *attr;

	attr = (ScreemDTDAttribute*)value;
	
	g_string_append_printf( str, "\n  %s\t", attr->name );
	
	return FALSE;
}



static gboolean screem_dtd_dump_elements( gpointer key,
					  gpointer value,
					  GString *str )
{
	ScreemDTDElement *element;

	element = (ScreemDTDElement*)value;
	
	g_string_append_printf( str, "<!ELEMENT %s ", 
			(const gchar *)key );

	switch( element->ostate ) {
		case SCREEM_DTD_SHOULD:
			g_string_append( str, "O " );
			break;
		case SCREEM_DTD_MUST:
			g_string_append( str, "- " );
			break;
		case SCREEM_DTD_MUST_NOT:
			/* this shouldn't ever occur */
			g_string_append( str, "O " );
			break;
	}
	switch( element->cstate ) {
		case SCREEM_DTD_SHOULD:
			g_string_append( str, "O " );
			break;
		case SCREEM_DTD_MUST:
			g_string_append( str, "- " );
			break;
		case SCREEM_DTD_MUST_NOT:
			/* this shouldn't ever occur */
			g_string_append( str, "O " );
			break;
	}
	
	g_string_append( str, "(" );
	g_hash_table_foreach( element->allowed,
			(GHFunc)screem_dtd_dump_allowed,
			str );
	g_string_append( str, ")" );
	
	if( element->description ) {
		g_string_append_printf( str, "-- %s -- ",
				element->description );
	}
	g_string_append( str, ">\n" );

	if( g_hash_table_size( element->attributes ) != 0 ) {
		g_string_append_printf( str, "<!ATTLIST %s ",
				(const gchar *)key );
		g_hash_table_foreach( element->attributes,
				(GHFunc)screem_dtd_dump_attributes,
				str );
		g_string_append( str, ">\n" );
	}

	return FALSE;
}



static const gchar *screem_dtd_bypass_comment( const gchar *data,
						gchar **txt )
{
	const gchar *ret;
	const gchar *start;
	
	ret = data;

	if( txt ) {
		*txt = NULL;
	}

	if( *data == '-' ) {
		data = g_utf8_next_char( data );
		if( *data != '-' ) {
			g_warning( "Non comment start passed to bypass comment\n" );
		} else {
			data = g_utf8_next_char( data );
		}
	} else {
		g_warning( "Non comment start passed to bypass comment\n" );
	}
	data = g_utf8_skip_space( data );
	start = data;

	while( (ret = g_utf8_strchr( data, strlen( data ), '-' ) ) ) {
		data = g_utf8_next_char( ret );
		if( *data == '-' ) {
			ret = g_utf8_next_char( data );
			break;
		}
	} 
	if( ! ret ) {
		ret = g_utf8_strchr( data, strlen( data ), '\0' );
	}
	
	if( txt ) {
		*txt = g_strndup( start, 
				ret - start - strlen( " --" ) );
	}

	return ret;
}

static const gchar *screem_dtd_read_entity( const gchar *data,
						gchar **name )
{
	gunichar c;
	GString *str;

	data = g_utf8_skip_space( data );

	if( *data != '%' ) {
		g_warning( "read entity called on non entity" );
	} else {
		data = g_utf8_next_char( data );
	}

	str = g_string_new( NULL );
	while( *data != '\0' && *data != '>' && *data != ';' ) {
		c = g_utf8_get_char( data );

		if( g_unichar_isalnum( c ) ||
		     c == '.' || c == '-' || c == '_' ||
		     c == ':' || c == '/' ||
		     g_unichar_type( c )  == G_UNICODE_COMBINING_MARK )  {
			g_string_append_unichar( str, c );
		} else {
			data = g_utf8_prev_char( data );
			break;
		}
		data = g_utf8_next_char( data );
	}

	if( name ) {
		*name = str->str;
		g_string_free( str, FALSE );
	} else {
		g_string_free( str, TRUE );
	}

	return data;
}

static const gchar *screem_dtd_parse_entity( ScreemDTD *dtd,
					     const gchar *data )
{
	ScreemDTDPrivate *priv;
	ScreemDTDEntity *entity;
	gboolean general;
	GString *name;
	GString *value;
	gunichar c;
	gunichar eterm;
	gunichar term;
	gchar *desc;
	GUnicodeType type;

	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), data );
	
	priv = dtd->priv;
	
	/* check this is an entity */
	
	if( strncmp( "ENTITY ", data, strlen( "ENTITY " ) ) ) {
		return data;
	}

	data += strlen( "ENTITY " );
	data = g_utf8_skip_space( data );
	
	general = (*data != '%');
	if( ! general ) {
		data = g_utf8_next_char( data );
		data = g_utf8_skip_space( data );
	}

	/* data should now point at the entity name */
	name = g_string_new( NULL );
	while( *data != '\0' ) {
		c = g_utf8_get_char( data );
		type = g_unichar_type( c );
		if( g_unichar_isalnum( c ) ||
		    c == '.' || c == '-' || c == '_' ||
		    type == G_UNICODE_COMBINING_MARK ) {
			g_string_append_unichar( name, c );
		} else {
			break;
		}
		data = g_utf8_next_char( data );
	}
	data = g_utf8_skip_space( data );

	/* check for a comment in the way */
	if( *data == '-' && data[ 1 ] == '-' ) {
		data = screem_dtd_bypass_comment( data, NULL );
		data = g_utf8_skip_space( data );
	}
	
	/* data should now point at the entity value */
	value = g_string_new( NULL );
	eterm = g_utf8_get_char( data );
	if( eterm != '"' && eterm != '\'' ) {
		eterm = ' ';
	} else {
		data = g_utf8_next_char( data );
	}
	while( *data != '\0' ) {
		c = g_utf8_get_char( data );
		if( c == eterm || ( eterm == ' ' && g_unichar_isspace( c ) ) ) {
			break;
		}
		g_string_append_unichar( value, c );
		data = g_utf8_next_char( data );
	}

	if( eterm == ' ' &&
	    value->str && ! strcmp( "PUBLIC", value->str ) ) {
		/* read public id */
		g_string_append_c( value, ' ' );
		data = g_utf8_skip_space( data );
		term = g_utf8_get_char( data );
		if( term != '"' && term != '\'' ) {
			term = ' ';
		} else {
			g_string_append_unichar( value, term );
			data = g_utf8_next_char( data );
		}
		while( *data != '\0' ) {
			c = g_utf8_get_char( data );
			if( c == term || ( term == ' ' && g_unichar_isspace( c ) ) ) {
				break;
			}
			g_string_append_unichar( value, c );
			data = g_utf8_next_char( data );
		}
		if( *data != '\0' ) {
			g_string_append_unichar( value, term );
			data = g_utf8_next_char( data );
			data = g_utf8_skip_space( data );
		}
		c = g_utf8_get_char( data );
		if( c == '-' && *( data + 1 ) == '-' ) {
			/* local copy of html 4.0 trans, at least
			 * under debian comments out the system
			 * id for the entities, so we should
			 * handle comments here */
			data = screem_dtd_bypass_comment( data, 
							 NULL );	
			c = g_utf8_get_char( data );
		}
		/* prevent attempt to read non existant SYSTEM uri */
		if( c == '>' || c == '\0' ) {
			eterm = '\0';
		}
	}
	if( eterm == ' ' && value->str && 
	    ( ! strncmp( "PUBLIC", value->str, strlen( "PUBLIC" ) ) ||
		! strcmp( "SYSTEM", value->str ) ) ) {
		/* read system id */
		g_string_append_c( value, ' ' );
		data = g_utf8_skip_space( data );
		term = g_utf8_get_char( data );
		if( term != '"' && term != '\'' ) {
			term = ' ';
		} else {
			g_string_append_unichar( value, term );
			data = g_utf8_next_char( data );
		}
		while( *data != '\0' ) {
			c = g_utf8_get_char( data );
			if( c == term || ( term == ' ' && g_unichar_isspace( c ) ) ) {
				break;
			}
			g_string_append_unichar( value, c );
			data = g_utf8_next_char( data );
		}
		g_string_append_unichar( value, term );
	}

	desc = NULL;
	while( *data != '\0' && *data != '>' ) {
		c = g_utf8_get_char( data );
		if( *data == '-' && *( data + 1 ) == '-' ) {
			/* don't bother getting the comment
			   text as we don't use it anywhere */
			data = screem_dtd_bypass_comment( data, 
							 NULL );	
		} else {
			data = g_utf8_next_char( data );
		}
	}

	entity = NULL;
	if( name->str && value->str ) {		
		if( ! g_hash_table_lookup( priv->entities,
					   name->str ) ) {
			entity = g_new0( ScreemDTDEntity, 1 );
			entity->name = name->str;
			entity->value = value->str;
			entity->general = general;

			g_hash_table_insert( priv->entities, entity->name,
					     entity );
			g_string_free( name, FALSE );
			g_string_free( value, FALSE );
		}
	} 
	if( ! entity ) {
		g_string_free( name, TRUE );
		g_string_free( value, TRUE);
		g_free( desc );
	}

	return data;
}

static const gchar *screem_dtd_parse_element( ScreemDTD *dtd,
					      const gchar *data )
{
	ScreemDTDPrivate *priv;
	ScreemDTDGroup *group;
	ScreemDTDGroup *name_group;
	ScreemDTDTagExistance ostate;
	ScreemDTDTagExistance cstate;
	guint offset;
	ScreemDTDGroup *exclude;
	ScreemDTDGroup *include;
	gchar *description;
	GHashFunc hfunc;
	GEqualFunc efunc;
	GHashTable *table;
	ScreemDTDAllowed *allowed;
	ScreemDTDGroup *tmp;
	ScreemDTDElement *element;

	ScreemDTDAllowedCopy copy;
	GSList *allowedlist;
	
	const gchar *ndata;
	const gchar *backtrack;
	gchar *name;
	gchar *val;
	
	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), data );
	
	copy.dtd = dtd;
	copy.list = NULL;
	
	priv = dtd->priv;
	
	/* check this is an element */
	
	if( strncmp( "ELEMENT ", data, strlen( "ELEMENT " ) ) ) {
		return data;
	}

	data += strlen( "ELEMENT " );
	data = g_utf8_skip_space( data );
	
	/* data should now be at the start of the element name */
	group = NULL;
	data = screem_dtd_name_parse( dtd, data, &group );

	if( ! group ) {
		g_warning( "Invalid Element declaration\n" );
		return data;
	}

	/* flatten out the tree of names so it is simple
	   to use */
	name_group = screem_dtd_group_flatten( group, NULL );
	screem_dtd_group_free( group, NULL );

	data = g_utf8_skip_space( data );

	/* check for open/close options,
	 *
	 * some DTDs, TEI 2 for example use an entity for this,
	 * grrrrrr, how annoying.
	 *
	 */
	offset = 0;
	ndata = backtrack = val = NULL;
	if( *data == '%' ) {
		backtrack = data;
		ndata = screem_dtd_read_entity( data, &name );
		ndata = g_utf8_next_char( ndata );
		val = screem_dtd_lookup_entity( dtd, name, FALSE );
		g_free( name );
		if( val ) {
			data = val;
		}
	}
	
	if( ! strncmp( "O O", data, strlen( "O O" ) ) ) {
		/* open / close optional */
		ostate = cstate = SCREEM_DTD_SHOULD;
		offset += strlen( "O O" );
	} else if( ! strncmp( "- O EMPTY", data, strlen( "- O EMPTY" ) ) ) {
		ostate = SCREEM_DTD_MUST;
		cstate = SCREEM_DTD_MUST_NOT;
		offset += strlen( "- O EMPTY" );
	} else if( ! strncmp( "- O", data, strlen( "- O" ) ) ) {
		/* optional close */
		ostate = SCREEM_DTD_MUST;
		cstate = SCREEM_DTD_SHOULD;
		offset += strlen( "- O" );
	} else if( ! strncmp( "- -", data, strlen( "- -" ) ) ) {
		/* must close */
		ostate = cstate = SCREEM_DTD_MUST;
		offset += strlen( "- -" );
	} else if( ! strncmp( "EMPTY", data, strlen( "EMPTY" ) ) ) {
		ostate = SCREEM_DTD_MUST;
		cstate = SCREEM_DTD_MUST_NOT;
	} else {
		/* default to must close */
		ostate = cstate = SCREEM_DTD_MUST;

		if( ndata ) {
			/* entity didn't expand to open/close
			 * options, backtrack */
			data = backtrack;	
			ndata = NULL;
		}
	}
	if( ndata ) {
		data = ndata;
		g_free( val );
	} else {
		data += offset;
	}

	data = g_utf8_skip_space( data );

	/* data should now be at the start of the allowed children */
	data = screem_dtd_name_parse( dtd, data, &group );

	/* group will contain allowed elements, it possibly doesn't
	   contain all exclusions / inclusions however */

	data = g_utf8_skip_space( data );
	exclude = include = NULL;
	if( *data != '>' && ( *data =='-' && data[ 1 ] != '-' ) ) {
		/* exclusion */
		data = screem_dtd_name_parse( dtd, data, &exclude );
	}
	data = g_utf8_skip_space( data );
	if( *data != '>' && *data =='+' ) {
		/* inclusion */
		data = screem_dtd_name_parse( dtd, data, &include );
	}

	data = g_utf8_skip_space( data );

	/* check for a description of the element as a comment */
	description = NULL;
	if( *data == '-' && data[ 1 ] == '-' ) {
		data = screem_dtd_bypass_comment( data, &description );	
	}

	/* process group, exclude, and include */
	hfunc = priv->hfunc;
	efunc = priv->efunc;

	table = g_hash_table_new( hfunc, efunc );
	tmp = NULL;
	if( group ) {
		tmp = screem_dtd_group_flatten( group, NULL );
		screem_dtd_group_free( group, NULL );
	}
	for( group = tmp; tmp; tmp = tmp->siblings ) {
		if( tmp->name ) {
			allowed = g_hash_table_lookup( table, 
							tmp->name );
			if( tmp->type == SCREEM_DTD_EXCLUDE && 
				allowed ) {
				g_hash_table_remove( table, tmp->name );
				g_free( allowed->name );
				g_free( allowed );
			} else if( ! allowed ) {
				allowed = g_new( ScreemDTDAllowed, 1 );
				allowed->name = g_strdup( tmp->name );
				allowed->min = tmp->min;
				allowed->max = tmp->max;
				allowed->ref = 1;

				g_hash_table_insert( table, 
						allowed->name, 
						allowed );
			}
		}
	}
	if( group ) {
		screem_dtd_group_free( group, NULL );
	}
	group = NULL;
	if( exclude ) {
		group = screem_dtd_group_flatten( exclude, NULL );
	}
	for( tmp = group; tmp; tmp = tmp->siblings ) {
		if( tmp->name && tmp->type == SCREEM_DTD_EXCLUDE ) {
			allowed = g_hash_table_lookup( table, 
							tmp->name );
			if( allowed ) {
				g_hash_table_remove( table, tmp->name );
				g_free( allowed->name );
				g_free( allowed );
			}
		}
	}
	if( group ) {
		screem_dtd_group_free( group, NULL );
		screem_dtd_group_free( exclude, NULL );
		group = NULL;
	}
	if( include ) {
		group = screem_dtd_group_flatten( include, NULL );
	}	
	for( tmp = group; tmp; tmp = tmp->siblings ) {
		if( tmp->name && 
		    tmp->type != SCREEM_DTD_EXCLUDE &&
		    ! g_hash_table_lookup( table, tmp->name ) ) {
			allowed = g_new( ScreemDTDAllowed, 1 );
			allowed->name = g_strdup( tmp->name );
			allowed->min = tmp->min;
			allowed->max = tmp->max;
			allowed->ref = 1;
			g_hash_table_insert( table, 
					allowed->name, 
					allowed );
		}
	}
	if( group ) {
		screem_dtd_group_free( group, NULL );
		screem_dtd_group_free( include, NULL );
	}
	/* create elements */
	g_hash_table_foreach( table,
			(GHFunc)screem_dtd_copy_allowed_from_table,
			&copy );
	allowedlist = copy.list;
	
	for( tmp = name_group; tmp; tmp = tmp->siblings ) {
		if( tmp->name ) {
			element = g_new0( ScreemDTDElement, 1 );
			element->dtd = dtd;
			element->name = g_strdup( tmp->name );
			element->ostate = ostate;
			element->cstate = cstate;
			if( description ) {
				element->description = g_strdup( description );
			}
			/* avoid copying if we are the last one */
			if( ! tmp->siblings ) {
				element->allowed = table;
				element->allowedlist = allowedlist;
				allowedlist = NULL;
				table = NULL;
			} else {
				element->allowed = g_hash_table_new( hfunc,
							efunc );
				g_hash_table_foreach( table,
					(GHFunc)screem_dtd_copy_allowed_table,
					element );

				element->allowedlist = g_slist_copy( allowedlist );
			}
			element->attributes = g_hash_table_new( hfunc,
							efunc ); 
			g_hash_table_insert( priv->elements, 
					element->name, element );
			priv->elementlist = g_slist_insert_sorted( priv->elementlist, element, screem_dtd_sort_element_list );
		}
	}
	
	g_free( description );

	if( table ) {
		g_hash_table_foreach( table,
				(GHFunc)screem_dtd_free_allowed_table, NULL );
		g_hash_table_destroy( table );
		g_slist_free( allowedlist );
	}

	screem_dtd_group_free( name_group, NULL );

	return data;
}

static const gchar *screem_dtd_parse_attlist_attrs( ScreemDTD *dtd,
					const gchar *data, 
					GSList **attrs )
{
	ScreemDTDAttribute *attr;
	gunichar c;
	gchar *ename;
	gchar *val;
	GSList *alist;
	GString *str;
	GUnicodeType type;
	ScreemDTDGroup *group;
	ScreemDTDGroup *flat;
	gboolean fixed;
	gchar *tmp;
	
	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), data );
	g_return_val_if_fail( data != NULL, NULL );
	g_return_val_if_fail( attrs != NULL, data );

	alist = *attrs;

	/* get current attribute or create new one */
	if( alist ) {
		attr = alist->data;
	} else {
		attr = g_new0( ScreemDTDAttribute, 1 );
		attr->ref = 1;
		alist = g_slist_prepend( NULL, attr );
		*attrs = alist;
	}

	str = g_string_new( NULL );
	fixed = FALSE;
	data = g_utf8_skip_space( data );
	while( *data != '\0' && *data != '>' ) {
		c = g_utf8_get_char( data );
		type = g_unichar_type( c );
		
		if( c == '\0' || c == '>' ) {
			break;
		} else if( c == '-' && data[ 1 ] == '-' ) {
			data = screem_dtd_bypass_comment( data,
						&tmp );
			if( ! attr->description ) {
				attr->description = tmp;
			} else {
				g_free( tmp );
			}
		} else if( ! attr->name && c == '%' ) {
			data = screem_dtd_read_entity( data, &ename );

			/* lookup entity name */
			val = screem_dtd_lookup_entity( dtd, ename, 
							FALSE );
			if( val && *val != '\0' ) {
				if( str->len > 0 ) {
					/* we already have part of
					 * a name */
					tmp = val;
					val = g_strconcat( str->str,
							tmp, NULL );
					g_free( tmp );
					g_string_assign( str, "" );
				}
					
				/* parse val */
				screem_dtd_parse_attlist_attrs( dtd,
								val,
								attrs );
				alist = *attrs;
				if( alist ) {
					attr = alist->data;
				}
				/* it may have a name in attr now,
				 * however it may just be a prefix
				 * to the fullname, so we will
				 * treat it as such */
				if( attr && attr->name ) {
					g_string_assign( str,
							attr->name );
					g_free( attr->name );
					attr->name = NULL;
				}
			}
			g_free( val );
			g_free( ename );
		} else if( ! attr->name ) {
			/* we are parsing the attribute name */
			if( g_unichar_isalnum( c ) ||
			   c == '.' || c == '-' || c == '_' ||
			   c == ':' ||
			   type == G_UNICODE_COMBINING_MARK ){
				g_string_append_unichar( str, c );
			} else if( str->len > 0 ) {
				/* name parse done */
				attr->name = g_strdup( str->str );
				g_string_assign( str, "" );

				data = g_utf8_skip_space( data );
				data = g_utf8_prev_char( data );
			}
		} else if( ! attr->values ) {
			/* parsing values, check for uri type, then
			   do a normal name read */
			data = g_utf8_skip_space( data );	
			attr->isuri = ( ! strncmp( "%URI;", data, strlen( "%URI;" ) ) ||
				  ! strncmp( "%URL;", data, strlen( "%URL;" ) ) ||
				  ! strncmp( "%URL ", data, strlen( "%URL " ) ) ||
				  ! strncmp( "%URI.datatype;", data, 
					     strlen( "%URI.datatype;" ) ) );

			/* some doctypes may not declare a URI type,
			 * look at the attribute name and guess
			 * based on that */
			if( ! attr->isuri ) {
				attr->isuri = ( ! strcmp( "url",
							attr->name ) );
			}
			
			data = screem_dtd_name_parse( dtd, data, 
							&group );
			flat = NULL;
			if( group ) {
				flat = screem_dtd_group_flatten( group,
								NULL );
				screem_dtd_group_free( group, NULL );
				group = NULL;
			}
			for( group = flat; group; group = group->siblings ) {
				if( group->name ) {
					attr->values = g_slist_prepend( attr->values, g_strdup( group->name ) );
				}
			}
			if( flat ) {
				screem_dtd_group_free( flat, NULL );
				group = NULL;
			} else {
				/* failed to get a single value,
				   default to CDATA so we have something */
				   attr->values = g_slist_prepend( NULL, g_strdup( "CDATA" ) );
			}
			data = g_utf8_skip_space( data );
			data = g_utf8_prev_char( data );
		} else if( c == '#' ) {
			/* #IMPLIED or #REQUIRED, or #FIXED */
			if( ! strncmp( "#REQUIRED", data,
					strlen( "#REQUIRED" ) ) ) {
				attr->required = SCREEM_DTD_REQUIRED;
			} else if( ! strncmp( "#IMPLIED", data,
					strlen( "#IMPLIED" ) ) ) {
				attr->required = SCREEM_DTD_IMPLIED;
			} else if( ! strncmp( "#FIXED", data, 
					strlen( "#FIXED" ) ) ) {
				attr->required = SCREEM_DTD_FIXED;
			}
			data = g_utf8_next_char( data );
			c = g_utf8_get_char( data );
			while( g_unichar_isalpha( c ) ) {
				data = g_utf8_next_char( data );
				c = g_utf8_get_char( data );
			}
			if( attr->required == SCREEM_DTD_FIXED ) {
				data = g_utf8_skip_space( data );
				/* parse default value */
				data = screem_dtd_name_parse( dtd, data,
							&group );
				if( group && group->name ) {
					attr->default_value = g_strdup( group->name );
				}
				if( group ) {
					screem_dtd_group_free( group, 
							NULL );
					group = NULL;
				}
				/* FIXME: HACK */
				if( *data == '\'' || *data == '"' ) {
					data = g_utf8_next_char( data );
				}
			}
			data = g_utf8_skip_space( data );
			data = g_utf8_prev_char( data );
		} else if( attr->required == SCREEM_DTD_NO_TYPE &&
			   ! attr->default_value ) {
			data = g_utf8_skip_space( data );
			/* parse default value */
			data = screem_dtd_name_parse( dtd, data,
						&group );
			if( group && group->name ) {
				attr->default_value = g_strdup( group->name );
			}
			if( group ) {
				screem_dtd_group_free( group, NULL );
				group = NULL;
			}
		} else if( ! g_unichar_isspace( c ) ) {
			/* we must be starting a new attribute,
			   create it, and  backup data  */
			   attr = g_new0( ScreemDTDAttribute, 1 );
			   attr->ref = 1;
			   alist = g_slist_prepend( alist, attr );
			   *attrs = alist;
			   data = g_utf8_prev_char( data );
		}
		if( *data != '\0' && *data != '>' ) {
			data = g_utf8_next_char( data );
		}
	}

	if( ! attr->name ) {
		/* have a string, so use that as the attr->name */
		if( str->len ) {
			attr->name = g_strdup( str->str );
		} else {
			if( alist ) {
				*attrs = alist->next;
				alist->next = NULL;
				g_slist_free( alist );
			}
			g_free( attr );
		}
	}
	g_string_free( str, TRUE );

	return data;
}

static const gchar *screem_dtd_parse_attlist( ScreemDTD *dtd,
					      const gchar *data )
{
	ScreemDTDPrivate *priv;
	ScreemDTDGroup *group;
	ScreemDTDGroup *name_group;
	ScreemDTDGroup *tmp;
	ScreemDTDElement *element;
	ScreemDTDAttribute *attribute;
	GSList *list;
	GSList *tlist;
	GSList *copy;
	
	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), data );
	
	priv = dtd->priv;
	
	/* check this is an attlist */
	
	if( strncmp( "ATTLIST ", data, strlen( "ATTLIST " ) ) ) {
		return data;
	}

	data += strlen( "ATTLIST " );
	data = g_utf8_skip_space( data );
	
	/* data should now be at the start of the element name */
	group = NULL;
	data = screem_dtd_name_parse( dtd, data, &group );

	/* flatten out the tree of names so it is simple
	   to use */
	name_group = NULL;
	if( group ) {
		name_group = screem_dtd_group_flatten( group, NULL );
		screem_dtd_group_free( group, NULL );
	}

	data = g_utf8_skip_space( data );
	list = NULL;
	data = screem_dtd_parse_attlist_attrs( dtd, data, &list );
	if( list ) {
		list = g_slist_sort_with_data( list, 
				screem_dtd_sort_attr_list, dtd );
	
		/* copy attributes to all elements */ 
		for( tmp = name_group; tmp; tmp = tmp->siblings ) {
			if( tmp->name ) {
				element = g_hash_table_lookup( priv->elements,
								tmp->name );
				/* a crash was occuring / valgrind
				 * was giving errors when we
				 * were using
				 * copy = g_slist_copy( list );
				 * theoretically that should be no
				 * different to what is here now,
				 * prepending each item, and reversing
				 * should be identical, however the
				 * crash goes, and valgrind stops
				 * warning like this */
				if( element ) {
					copy = NULL;
					for( tlist = list; tlist; 
						tlist = tlist->next ) {
						attribute = (ScreemDTDAttribute*)tlist->data;
						if( ! g_hash_table_lookup( element->attributes, attribute->name ) ) {
							g_hash_table_insert( element->attributes, attribute->name, attribute );
							attribute->ref ++;
							copy = g_slist_prepend( copy, attribute );
						}
					}
					/* XMLResume has multiple ATTLISTs for
					 * elements, we need to allow that */
					copy = g_slist_reverse( copy );
					element->attrlist = g_slist_concat( element->attrlist, copy );
				}
			}
		}
		g_slist_foreach( list, 
				(GFunc)screem_dtd_free_attribute, NULL );
		g_slist_free( list );
	}
	if( name_group ) {
		screem_dtd_group_free( name_group, NULL );
	}

	return data;
}

static const gchar *screem_dtd_name_parse( ScreemDTD *dtd,
					const gchar *pattern, 
					ScreemDTDGroup **group )
{
	const gchar *pass1;
	GString *string;
	gunichar c;
	gint depth;
	gboolean quote;
	gboolean passed;
	gchar *name;
	gchar *val;
	ScreemDTDGroup *new_group;
	gunichar qchar;

	*group = NULL;

	string = g_string_new( NULL );
	
	qchar = '\0';
	/* first pass, build up complete string */
	for( passed = FALSE, quote = FALSE, depth = 0, pass1 = pattern;
		*pass1 != '\0' && ! passed;
		pass1 = g_utf8_next_char( pass1 ) ) {
		
		c = g_utf8_get_char( pass1 );
		
		switch( c ) {
			case '>':
				passed = TRUE;
				break;
			case '%':
				pass1 = screem_dtd_read_entity( pass1,
								&name );

				/* lookup entity name */
				val = screem_dtd_lookup_entity( dtd, 
								name, 
								TRUE );
				if( val ) {
					g_string_append( string, val );
					g_free( val );
				}
				g_free( name );
				break;
			case '(':
				depth ++;
				g_string_append_unichar( string, c );
				break;
			case ')':
				depth --;
				g_string_append_unichar( string, c );
				break;
			case '"':
			case '\'':
				if( qchar == '\0' ||
				    qchar == c ) {
					quote = !quote;
					qchar = c;
					if( ! quote ) {
						qchar = '\0';
					}
				}
				g_string_append_unichar( string,
							c );
				break;
			default:
				if( g_unichar_isspace( c ) &&
				    depth == 0 && ! quote ) {
					/* catch end of pattern */
					passed = TRUE;
				} else {
					/* NOTE: no validity checking done here
					   we only want to build up the complete
					   string */
					g_string_append_unichar( string, c );	
				}
				break;
		}
	}
	if( string->len > 0 ) {
		/* second pass, parse string */

		new_group = g_new0( ScreemDTDGroup, 1 );
		new_group->type = SCREEM_DTD_OR;
		
		screem_dtd_name_parse2( dtd, string->str, new_group );
		*group = new_group;
	}
	g_string_free( string, TRUE );
	
	pattern = g_utf8_prev_char( pass1 );

	return pattern;
}

static const gchar *screem_dtd_name_parse2( ScreemDTD *dtd,
					const gchar *pattern, 
					ScreemDTDGroup *group )
{
	const gchar *pass2;
	GString *string;
	gunichar c;
	gunichar p;
	ScreemDTDGroup *sub;
	
	string = g_string_new( NULL );

	c = '\0';
	for( pass2 = pattern; *pass2 != '\0';
		pass2 = g_utf8_next_char( pass2 ) ) {

		p = c;
		c = g_utf8_get_char( pass2 );

		if( c == '(' || c == ')' || c == '|' ||
		    c == ',' || c == '&' ) {
			/* separator, we have a new name
			   if string->len > 0 */
			if( string->len > 0 ) {
				group->name = g_strdup( string->str );	
			}
			g_string_assign( string, "" );
		}

		if( c == '*' ) {
			group->min = 0;
			group->max = -1;
		} else if( c == '+' ) {
			if( string->str || group->name ) {
				group->min = 1;
				group->max = -1;
			} else {
				/* stupid sgml + thing */
			}
		} else if( c == '?' ) {
			group->min = 0;
			group->max = 1;
		} else if( c == ',' ) {
			/* add sibling SCREEM_DTD_SEQUENCE group */
			group = screem_dtd_group_new_sibling( group, SCREEM_DTD_SEQUENCE );
		} else if( c == '|' || c == '&' ) {
			group = screem_dtd_group_new_sibling( group, SCREEM_DTD_OR );
		} else if( c == '(' ) {
			/* begin new block */

			/* if the current group has a name, then
			   we haven't got a correct separator,
			   we will create a new group to deal with
			   this */
			sub = screem_dtd_group_new_child( group, SCREEM_DTD_OR );
			if( p == '-' ) {
				sub->type = SCREEM_DTD_EXCLUDE;
			}
			
			pass2 = g_utf8_next_char( pass2 );
			if( *pass2 != '\0' ) {
				pass2 = screem_dtd_name_parse2( dtd, 
							pass2, sub );
			}
			/* backup if needed */
			if( *pass2 == '\0' ) {
				pass2 = g_utf8_prev_char( pass2 );
			}
		} else if( c == ')' ) {
			break;
		} else if( string->len == 0 && 
			   ( g_unichar_isalnum( c ) || c == ':' ) ) {
			/* first char of name */
			g_string_append_unichar( string, c );

			if( p == '-' ) {
				group->type = SCREEM_DTD_EXCLUDE;
			}
		} else if( string->len > 0 &&
			   ( g_unichar_isalnum( c ) ||
			     c == '.' || c == '-' || c == '_' ||
			     c == ':' || c == '/' ||
			     g_unichar_type( c )  == G_UNICODE_COMBINING_MARK ) ) {
			   /* valid name char */
			g_string_append_unichar( string, c );
		} else if( c != ',' && c != '|' ) {
			/* name termination caused by non valid
			   name char */
			if( string->len > 0 ) {
				group->name = g_strdup( string->str );	
				group = screem_dtd_group_new_sibling( group, SCREEM_DTD_OR );
			}
			g_string_assign( string, "" );
		}
	}
	if( string->len > 0 ) {
		group->name = g_strdup( string->str );	
	}
	g_string_free( string, TRUE );

	return pass2;
}

static ScreemDTDGroup *screem_dtd_group_new_child( ScreemDTDGroup *parent, ScreemDTDGroupType type )
{
	ScreemDTDGroup *children;
	ScreemDTDGroup *ngroup;

	children = parent->children;
	if( ! children ) {
		ngroup = g_new0( ScreemDTDGroup, 1 );
		ngroup->type = type;
		ngroup->siblings = parent;

		parent->children = ngroup;
	} else {
		/* must past last sibling to screem_dtd_group_new_sibling */
		while( children->siblings != parent ) {
			children = children->siblings;
		}
		ngroup = screem_dtd_group_new_sibling( children, type );
	}	

	return ngroup;
}

static ScreemDTDGroup *screem_dtd_group_new_sibling( ScreemDTDGroup *group, ScreemDTDGroupType type )
{
	ScreemDTDGroup *ngroup;
	
	ngroup = g_new0( ScreemDTDGroup, 1 );
	ngroup->type = type;
	ngroup->siblings = group->siblings;
	
	group->siblings = ngroup;

	return ngroup;
}

static ScreemDTDGroup *screem_dtd_group_flatten( ScreemDTDGroup *group, ScreemDTDGroup *root )
{
	ScreemDTDGroup *ngroup;
	ScreemDTDGroup *siblings;
	
	ngroup = g_new0( ScreemDTDGroup, 1 );
	ngroup->type = group->type;
	if( group->name ) {
		ngroup->name = g_strdup( group->name );
	}
	ngroup->min = group->min;
	ngroup->max = group->max;

	if( group->children ) {
		ngroup->siblings = screem_dtd_group_flatten( group->children, 
						group );
	}

	siblings = ngroup->siblings;
	if( siblings ) {
		while( siblings->siblings != group &&
			siblings->siblings != root &&
			siblings->siblings ) {
			siblings = siblings->siblings;
		}
	} else {
		siblings = ngroup;
	}
	
	if( group->siblings != root ) {
		siblings->siblings = screem_dtd_group_flatten( group->siblings,
						    root );
	}

	return ngroup;
}

static void screem_dtd_group_free( ScreemDTDGroup *group, ScreemDTDGroup *parent )
{
	g_return_if_fail( group != NULL );

	g_free( group->name );
	if( group->children ) {
		screem_dtd_group_free( group->children, group );
	}
	if( group->siblings && group->siblings != parent ) {
		screem_dtd_group_free( group->siblings, parent );
	}
	g_free( group );
}

static gboolean screem_dtd_copy_allowed_from_table( gpointer key, 
					gpointer value,
					gpointer user_data  )
{
	ScreemDTDAllowedCopy *copy;
	ScreemDTDAllowed *allowed;

	copy = (ScreemDTDAllowedCopy*)user_data;
	allowed = (ScreemDTDAllowed*)value;
	copy->list = g_slist_insert_sorted( copy->list, 
			allowed->name, copy->dtd->priv->compare );

	return FALSE;
}

static gboolean screem_dtd_copy_allowed_table( gpointer key, 
					gpointer value,
					gpointer user_data  )
{
	GHashTable *table;
	ScreemDTDElement *element;
	ScreemDTDAllowed *allowed;
	
	element = (ScreemDTDElement*)user_data;
	table = element->allowed;

	allowed = (ScreemDTDAllowed*)value;
	allowed->ref ++;

	g_hash_table_insert( table, allowed->name, allowed );

	return FALSE;
}

static gboolean screem_dtd_free_allowed_table( gpointer key, 
					gpointer value,
					gpointer user_data )
{
	ScreemDTDAllowed *allowed;
	
	allowed = (ScreemDTDAllowed*)value;
	allowed->ref --;
	if( allowed->ref < 1 ) {
		g_free( allowed->name );
		g_free( allowed );
	}

	return FALSE;
}

static gboolean screem_dtd_free_entity_table( gpointer key,
					gpointer value,
					gpointer user_data )
{
	ScreemDTDEntity *entity;

	entity = (ScreemDTDEntity*)value;
	g_free( entity->name );
	g_free( entity->value );
	g_free( entity->description );
	g_free( entity );

	return FALSE;
}

static gboolean screem_dtd_free_element_table( gpointer key,
					gpointer value,
					gpointer user_data )
{
	ScreemDTDElement *element;

	element = (ScreemDTDElement*)value;
	g_free( element->name );
	g_free( element->description );

	if( element->allowedlist ) {
		g_slist_free( element->allowedlist );
	}
	if( element->optional ) {
		g_slist_free( element->optional );
	}
	g_hash_table_foreach( element->allowed,
			(GHFunc)screem_dtd_free_allowed_table, NULL );
	g_hash_table_destroy( element->allowed );

	if( element->attrlist ) {
		g_slist_free( element->attrlist );
	}
	g_hash_table_foreach( element->attributes,
			(GHFunc)screem_dtd_free_attribute_table, 
			NULL );
	g_hash_table_destroy( element->attributes );

	g_free( element );

	return FALSE;
}

static gboolean screem_dtd_free_attribute_table( gpointer key,
						gpointer value,
						gpointer user_data )
{
	ScreemDTDAttribute *attribute;

	attribute = (ScreemDTDAttribute*)value;
	screem_dtd_free_attribute( attribute );

	return FALSE;
}

static void screem_dtd_free_attribute( ScreemDTDAttribute *attr )
{
	if( -- attr->ref < 1 ) {
		g_free( attr->name );
		g_free( attr->default_value );
		g_free( attr->description );
		if( attr->values ) {
			g_slist_foreach( attr->values, (GFunc)g_free,
					NULL );
			g_slist_free( attr->values );
		}
		g_free( attr );
	}
}

static gint screem_dtd_sort_element_list( gconstpointer a,
					gconstpointer b )
{
	ScreemDTD *dtd;
	const ScreemDTDElement *aelem;
	const ScreemDTDElement *belem;

	aelem = (const ScreemDTDElement*)a;
	belem = (const ScreemDTDElement*)b;

	dtd = SCREEM_DTD( aelem->dtd );

	return dtd->priv->compare( aelem->name, belem->name );
}


static gint screem_dtd_sort_attr_list( gconstpointer a,
					gconstpointer b,
					gpointer data )
{
	ScreemDTD *dtd;
	const ScreemDTDAttribute *aattr;
	const ScreemDTDAttribute *battr;
	gboolean ret;

	aattr = (const ScreemDTDAttribute*)a;
	battr = (const ScreemDTDAttribute*)b;

	dtd = SCREEM_DTD( data );

	if( aattr->name && ! battr->name ) {
		ret = 1;
	} else if( battr->name && ! aattr->name ) {
		ret = -1;
	} else {
		ret = dtd->priv->compare( aattr->name, battr->name );
	}

	return ret;
}

static void screem_dtd_build_optional( gpointer data, 
					gpointer user_data )
{
	ScreemDTDElement *element;
	const GSList *allowed;
	GSList *opt;
	const ScreemDTDElement *child;
	
	element = (ScreemDTDElement *)data;
	
	allowed = screem_dtd_element_get_allowed( element );
	for( opt = NULL; allowed; allowed = allowed->next ) {
		child = screem_dtd_valid_element( element->dtd,
				allowed->data );
		if( child && child->ostate == SCREEM_DTD_SHOULD ) {
			opt = g_slist_prepend( opt, (gpointer)child );	
		}
	}
	element->optional = opt;
}

static void screem_dtd_find_root( ScreemDTD *dtd )
{
	ScreemDTDPrivate *priv;
	const GSList *tmp;
	const GSList *tmp2;
	gboolean isroot;
	const gchar *name;
	const ScreemDTDElement *element;
	
	g_return_if_fail( SCREEM_IS_DTD( dtd ) );
	
	priv = dtd->priv;

	for( tmp = priv->elementlist; tmp; tmp = tmp->next ) {
		isroot = TRUE;
		name = screem_dtd_element_get_name( tmp->data );
		for( tmp2 = priv->elementlist; tmp2; tmp2 = tmp2->next ) {
			element = (ScreemDTDElement*)tmp2->data;
			if( g_hash_table_lookup( element->allowed,
						name ) ) {
				isroot = FALSE;
				break;
			}
		}
		if( isroot ) {
			/* name is the calculated root element */
			g_free( priv->root );
			priv->root = g_strdup( name );
			break;
		}
	}
}

/* public stuff */

ScreemDTD *screem_dtd_new( const gchar *publicid, 
			   const gchar *systemid )
{
	ScreemDTD *dtd;
	
	dtd = SCREEM_DTD( g_object_new( SCREEM_TYPE_DTD, 
					"public", publicid,
					"system", systemid,
					NULL ) );

	return dtd;
}

const gchar *screem_dtd_get_public_id( const ScreemDTD *dtd )
{
	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), NULL );
	
	return dtd->priv->public_id;
}

const gchar *screem_dtd_get_system_id( const ScreemDTD *dtd )
{
	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), NULL );
	
	return dtd->priv->system_id;
}

const gchar *screem_dtd_get_root_name( const ScreemDTD *dtd )
{
	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), NULL );

	return dtd->priv->root;
}

void screem_dtd_parse( ScreemDTD *dtd, ScreemDTDParse *parse,
			const gchar *data )
{
	ScreemDTDPrivate *priv;
	ScreemDTDDB *db;
	gchar *name;
	gchar *val;
	gchar *publicid;
	gchar *systemid;
	gchar *data2;
	gboolean feature;
	const gchar *tmp;

	gchar *rooturi;
	
	g_return_if_fail( SCREEM_IS_DTD( dtd ) );
	g_return_if_fail( parse != NULL );
	g_return_if_fail( data != NULL );

	priv = dtd->priv;
	db = SCREEM_DTD_DB( parse->userdata );

	if( ! priv->rooturi ) {
		rooturi = (gchar*)screem_dtd_get_system_id( dtd );
		if( rooturi ) {
			priv->rooturi = g_path_get_dirname( rooturi );
			priv->toproot = priv->rooturi;
		}
	}
	
	while( *data != '\0' ) {

		while( *data != '\0' && *data != '<' && *data != '%' ) {
			data = g_utf8_next_char( data );
		}

		if( *data == '%' ) {
			data = screem_dtd_read_entity( data, &name );

			/* lookup entity name */
			val = screem_dtd_lookup_entity( dtd, name, 
							TRUE );
			publicid = systemid = NULL;

			screem_dtd_db_parse_doctype( db, val,
						&publicid, &systemid,
						NULL );
			
			if( parse->resolve_entity &&
			    ( publicid || systemid ) ) {
				/* set rooturi */
				tmp = priv->rooturi;
			
				rooturi = relative_to_full( systemid, 
						tmp );
				priv->rooturi = g_path_get_dirname( rooturi );
				data2 = parse->resolve_entity( db,
							publicid,
							rooturi );
				/* failed to resolve entity,
				 * try with different root */
				if( ! data2 ) {
					g_free( rooturi );

					rooturi = relative_to_full( systemid, 
						priv->toproot );
					data2 = parse->resolve_entity( db,
							publicid,
							rooturi );
				}
				g_free( publicid );
				g_free( systemid );
				g_free( rooturi );
				if( data2 ) {
					screem_dtd_parse( dtd, parse,
							  data2 );
					g_free( data2 );

				}
				/* restore rooturi */
				g_free( priv->rooturi );
				priv->rooturi = (gchar*)tmp;
			}

			g_free( name );
			g_free( val );
		} else if( *data == '<' && data[ 1 ] == '!' ) {
			data = g_utf8_next_char( data );
			data = g_utf8_next_char( data );

			if( ! strncmp( "--", data, strlen( "--" ) ) ) {
				/* comment */
				data = screem_dtd_bypass_comment( data,
								 NULL );
			} else if( ! strncmp( "ELEMENT ", data,
						strlen( "ELEMENT " ) ) ) {
				/* element declaration */
				data = screem_dtd_parse_element( dtd, data );
			} else if( ! strncmp( "ENTITY ", data,
						strlen( "ENTITY " ) ) ) {
				/* entity declaration */
				data = screem_dtd_parse_entity( dtd, data );
			} else if( ! strncmp( "ATTLIST ", data,
						strlen( "ATTLIST " ) ) ) {
				/* attlist declaration */
				data = screem_dtd_parse_attlist( dtd, data );
			} else if( *data == '[' ) {
				/* feature switch */
				data = g_utf8_next_char( data );	

				data = screem_dtd_read_entity( data, 
								&name );
				feature = FALSE;
				if( name ) {
					/* lookup entity name */
					val = screem_dtd_lookup_entity( dtd, name, TRUE );
					feature = ( val && ! strcmp( "INCLUDE", val ) );
					g_free( val );
					g_free( name );
				}
				if( ! feature ) {
					/* feature is off, skip to ]]> */
					tmp = strstr( data, "]]>" );
					if( tmp ) {
						data = tmp + strlen( "]]>" );
					}
				}
			}
		} else if( *data != '\0' && *data != '>' ) {
			data = g_utf8_next_char( data );
		}
	}

	/* this will occur at the top level screem_dtd_parse() for
	 * this dtd */
	if( priv->rooturi == priv->toproot ) {
		g_slist_foreach( priv->elementlist,
				screem_dtd_build_optional, dtd );
		screem_dtd_find_root( dtd );
		g_free( priv->rooturi );
		priv->rooturi = NULL;
	}
}

gchar *screem_dtd_dump( const ScreemDTD *dtd )
{
	ScreemDTDPrivate *priv;
	GString *str;
	gchar *ret;
	
	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), NULL );

	priv = dtd->priv;

	str = g_string_new( NULL );

	/* output entities */
	g_hash_table_foreach( priv->entities,
			(GHFunc)screem_dtd_dump_entities,
			str );

	/* output elements */
	g_hash_table_foreach( priv->elements,
			(GHFunc)screem_dtd_dump_elements,
			str );

	ret = str->str;
	g_string_free( str, FALSE );

	return ret;
}

gchar *screem_dtd_lookup_entity( const ScreemDTD *dtd, 
				const gchar *name,
				gboolean expand )
{
	ScreemDTDEntity *entity;
	GString *exp;
	gunichar c;
	const gchar *val;
	gchar *ret;
	gchar *tmp;
	
	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), NULL );
	g_return_val_if_fail( name != NULL, NULL );
	
	entity = g_hash_table_lookup( dtd->priv->entities, name );

	if( ! entity ) {
		g_warning( "Unknown entity: %s\n", name );
		return NULL;
	}

	val = entity->value;
	
	if( ! expand ) {
		ret = g_strdup( val );
	} else {
		exp = g_string_new( NULL );
		while( *val != '\0' ) {
			c = g_utf8_get_char( val );
			if( c != '%' || entity->general ) {
				exp = g_string_append_unichar( exp, c );
			} else { 
				tmp = NULL;
				val = screem_dtd_read_entity( val, &tmp );		
				if( tmp ) {
					ret = screem_dtd_lookup_entity( dtd, tmp, TRUE );
					g_string_append( exp, ret );
					g_free( ret );
					g_free( tmp );
				}
			}
			if( val != '\0' ) {
				val = g_utf8_next_char( val );
			}
		}
		ret = exp->str;
		g_string_free( exp, FALSE );
	}

	return ret;
}

const ScreemDTDElement *screem_dtd_valid_element( const ScreemDTD *dtd,
					    const gchar *element )
{
	ScreemDTDElement *ret;

	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), NULL );
	g_return_val_if_fail( element != NULL, NULL );

	ret = g_hash_table_lookup( dtd->priv->elements, element );
	
	return ret;
}

gboolean screem_dtd_valid_child( const ScreemDTD *dtd,
				 const gchar *element,
				 const gchar *child )
{
	const ScreemDTDElement *elem;
	
	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), FALSE );

	if( ! element || ! child ) {
		return FALSE;
	}
	
	elem = screem_dtd_valid_element( dtd, element );
	if( elem ) {
		if( ! screem_dtd_valid_child_element( dtd, elem, 
						child ) ) {
			elem = NULL;
		}
	}

	return ( elem != NULL );
}
gboolean screem_dtd_valid_child_element( const ScreemDTD *dtd,
					const ScreemDTDElement *parent,
					const gchar *child )
{
	gboolean ret;
	const GSList *tmp;
	const ScreemDTDElement *optelem;
	
	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), FALSE );

	if( ! parent || ! child ) {
		return FALSE;
	}

	ret = ( g_hash_table_lookup( parent->allowed, child ) != NULL );

	if( ! ret ) {
		tmp = parent->optional;
		while( tmp && ! ret ) {
			optelem = tmp->data;
			ret = screem_dtd_valid_child_element( dtd, 
					optelem, child );
			tmp = tmp->next;
		}
	}

	return ret;
}

ScreemDTDTagExistance 
screem_dtd_element_close_state( const ScreemDTD *dtd,
				const gchar *element )
{
	const ScreemDTDElement *elem;
	ScreemDTDTagExistance ret;
	
	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), SCREEM_DTD_MUST );
	g_return_val_if_fail( element != NULL, SCREEM_DTD_MUST );

	ret = SCREEM_DTD_MUST;
	elem = screem_dtd_valid_element( dtd, element );
	if( elem ) {
		ret = elem->cstate;
	}

	return ret;
}

ScreemDTDTagExistance 
screem_dtd_element_close_element_state( const ScreemDTD *dtd,
				const ScreemDTDElement *element )
{
	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), SCREEM_DTD_MUST );

	if( ! element ) {
		return SCREEM_DTD_MUST;
	}

	return element->cstate;
}

gboolean screem_dtd_is_root_element( const ScreemDTD *dtd,
				     const gchar *element )
{
	ScreemDTDPrivate *priv;
	const gchar *root;
	
	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), FALSE );
	g_return_val_if_fail( element != NULL, FALSE );

	priv = dtd->priv;
	root = priv->root;

	return ( root && ! priv->compare( element, root ) );
}

const ScreemDTDAttribute *screem_dtd_valid_attr( const ScreemDTD *dtd,
						 const gchar *element,
						 const gchar *attr )
{
	const ScreemDTDElement *elem;
	ScreemDTDAttribute *attribute;
	
	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), NULL );
	g_return_val_if_fail( element != NULL, NULL );
	g_return_val_if_fail( attr != NULL, NULL );
	
	attribute = NULL;
	elem = screem_dtd_valid_element( dtd, element );
	if( elem ) {
		attribute = g_hash_table_lookup( elem->attributes,
						 attr );
	}

	return attribute;
}
const ScreemDTDAttribute *screem_dtd_valid_element_attr( const ScreemDTD *dtd,
					const ScreemDTDElement *element,
					const gchar *attr )
{
	ScreemDTDAttribute *attribute;
	
	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), NULL );
	
	if( ! element || ! attr ) {
		return NULL;
	}
	
	attribute = g_hash_table_lookup( element->attributes, attr );

	return attribute;
}


gboolean screem_dtd_attr_is_uri( const ScreemDTD *dtd,
				 const gchar *element,
				 const gchar *attr )
{
	const ScreemDTDAttribute *attribute;
	gboolean ret;	
	
	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), FALSE );
	g_return_val_if_fail( element != NULL, FALSE );
	g_return_val_if_fail( attr != NULL, FALSE );

	ret = FALSE;
	attribute = screem_dtd_valid_attr( dtd, element, attr );
	if( attribute ) {
		ret = screem_dtd_attribute_is_uri( attribute );
	}

	return ret;
}


const GSList *screem_dtd_get_elements( const ScreemDTD *dtd )
{
	g_return_val_if_fail( SCREEM_IS_DTD( dtd ), NULL );

	return dtd->priv->elementlist;
}

const GSList *screem_dtd_element_get_attrs( const ScreemDTDElement *element )
{
	g_return_val_if_fail( element != NULL, NULL );

	return element->attrlist;
}

const GSList *screem_dtd_element_get_allowed( const ScreemDTDElement *element )
{
	g_return_val_if_fail( element != NULL, NULL );

	return element->allowedlist;
}

const gchar *screem_dtd_element_get_name( const ScreemDTDElement *element )
{
	g_return_val_if_fail( element != NULL, NULL );

	return element->name;
}

const gchar *screem_dtd_element_get_description( const ScreemDTDElement *element )
{
	g_return_val_if_fail( element != NULL, NULL );

	return element->description;
}

const gchar *screem_dtd_attribute_get_name( const ScreemDTDAttribute *attr )
{
	g_return_val_if_fail( attr != NULL, NULL );

	return attr->name;
}

const gchar *screem_dtd_attribute_get_description( const ScreemDTDAttribute *attr )
{
	g_return_val_if_fail( attr != NULL, NULL );
	
	return attr->description;
}

const gboolean screem_dtd_attribute_get_required( const ScreemDTDAttribute *attr )
{
	g_return_val_if_fail( attr != NULL, FALSE );

	return attr->required == SCREEM_DTD_REQUIRED;
}

const gboolean screem_dtd_attribute_is_uri( const ScreemDTDAttribute *attr )
{
	g_return_val_if_fail( attr != NULL, FALSE );

	return attr->isuri;
}

const GSList *screem_dtd_attribute_get_values( const ScreemDTDAttribute *attr )
{
	g_return_val_if_fail( attr != NULL, NULL );

	return attr->values;
}

const gchar *screem_dtd_attribute_get_default( const ScreemDTDAttribute *attr )
{
	g_return_val_if_fail( attr != NULL, NULL );

	return attr->default_value;
}

