/*
    BFilter - a web proxy which removes banner ads
    Copyright (C) 2002-2006  Joseph Artsimovich <joseph_a@mail.ru>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include <iostream>
#include <fstream>
#include <sstream>
#include "lexgen.h"
#include "NfaChar.h"
#include "NfaCharClass.h"
#include "NfaNegatedCharClass.h"
#include "NfaConcatenation.h"
#include "NfaClosure.h"
#include "NfaPositiveClosure.h"
#include "NfaOption.h"
#include "NfaString.h"
#include "NfaInsensitiveString.h"
#include "NfaUnion.h"
#include "NfaEpsilon.h"

#include "HtmlDetectorDefinitions.h"

int main(int argc, char** argv)
{
	if (argc < 8) {
		std::cerr << "Usage: makelexer <OutputClass> <out.h> <out.cpp> "
			"<Definitions> <definitions.h> <Subclass> <subclass.h>" << std::endl;
		return 1;
	}
	const char* out_class = argv[1];
	const char* out_header = argv[2];
	const char* out_impl = argv[3];
	const char* def_class = argv[4];
	const char* def_header = argv[5];
	const char* subclass = argv[6];
	const char* subclass_header = argv[7];
	
	std::ofstream header_strm(out_header);
	if (header_strm.fail()) {
		std::cerr << "Failed opening " << out_header << " for writing" << std::endl;
	}
	
	std::ofstream impl_strm(out_impl);
	if (impl_strm.fail()) {
		std::cerr << "Failed opening " << out_impl << " for writing" << std::endl;
	}
	
	LexGen lexgen(out_class, subclass);
	
	NfaEpsilon epsilon;
	NfaNegatedCharClass any;
	
	std::string space_chars(" \t\r\n");
	NfaCharClass space(space_chars);
	
	static char const utf8_bom_str[] = { 0xef, 0xbb, 0xbf };
	NfaString utf8_bom(std::string(utf8_bom_str, sizeof(utf8_bom_str)));
	
	NfaUnion boms(utf8_bom);
	// add others if necessary
	
	typedef HtmlDetectorDefinitions Defs;
	
	lexgen.addRule(Defs::BOM, NfaOption(boms),
		"BEGIN(WHITESPACE); MORE();"
	);
	lexgen.addRule(Defs::WHITESPACE, NfaClosure(space),
		"BEGIN(AFTER_WHITESPACE); MORE();"
	);
	lexgen.addRule(Defs::AFTER_WHITESPACE, NfaChar('<'),
		"obj->onHtmlDetected(); BEGIN(DONE); MORE(); SUSPEND();"
	);
	lexgen.addRule(Defs::AFTER_WHITESPACE, any,
		"obj->onHtmlNotDetected(); BEGIN(DONE); MORE(); SUSPEND();"
	);
	lexgen.addRule(Defs::AFTER_WHITESPACE, epsilon,
		"obj->onEmptyDoc(); BEGIN(DONE); MORE(); SUSPEND();"
	);
	lexgen.addRule(Defs::DONE, epsilon,
		"MORE(); SUSPEND();"
	);
	
	lexgen.writeLexer(header_strm, impl_strm, def_class, def_header, subclass_header);
	return 0;
}
