/*
 * Fast index for tag data
 *
 * Copyright (C) 2006  Enrico Zini <enrico@debian.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <tagcoll/coll/intdiskindex.h>
#include <wibble/operators.h>
#include <list>
#include <vector>

using namespace std;
using namespace wibble::operators;

namespace tagcoll {
namespace coll {

//#define TRACE_ISI

#ifdef TRACE_IS
#include <iostream>
#endif

class IntSetIterator
{
	const int* vals;
	size_t size;

public:
	// End iterator
	IntSetIterator() : vals(0), size(0) {}

	IntSetIterator(const int* vals, size_t size)
		: vals(vals), size(size) { if (size == 0) this->vals = 0; }

	IntSetIterator& operator++()
	{
		if (size > 0)
		{
			++vals;
			if (--size == 0)
				vals = 0;
		}
		return *this;
	}
	int operator*() { return *vals; }

	bool operator!=(const IntSetIterator& mi) { return vals != mi.vals; }
};

class IntSeqIterator
{
	int val;

public:
	IntSeqIterator(int val)
		: val(val) {}

	IntSeqIterator& operator++()
	{
		++val;
		return *this;
	}
	int operator*() { return val; }

	bool operator!=(const IntSeqIterator& mi) { return val != mi.val; }
};

class NonemptyIntSeqIterator
{
	const diskindex::Int& idx;
	unsigned int val;

public:
	NonemptyIntSeqIterator(const diskindex::Int& idx, int val)
		: idx(idx), val(val) {}

	NonemptyIntSeqIterator& operator++()
	{
		for (++val; idx.size(val) == 0 && val < idx.size(); ++val)
			;
		return *this;
	}
	int operator*() { return val; }

	bool operator!=(const NonemptyIntSeqIterator& mi) { return val != mi.val; }
};


class IntSets : public std::list< std::pair<size_t, const int*> >
{
#ifdef TRACE_IS
	void print(const std::string& title)
	{
		cerr << " * " << title << ":" << endl;
		size_t idx = 0;
		for (const_iterator i = begin(); i != end(); i++, idx++)
		{
			cerr << idx << ":";
			for (size_t j = 0; j < i->first; j++)
			{
				cerr << " " << i->second[j];
				if (j > 5)
				{
					cerr << "...";
					break;
				}
			}
			cerr << endl;
		}
	}
#else
	inline void print(const std::string&) {}
#endif

public:
	// Remove the first item from the beginning of the list idx.  If the
	// list becomes empty, remove the list itself by copying the last
	// element on top of it.
	// Returns i if it advanced normally, or i + 1 if the list got empty
	// and has been deleted.
	iterator advance(iterator i)
	{
		if (i->first == 1)
		{
			iterator next = i; ++next;
			erase(i);
			return next;
		} else {
			--(i->first);
			++(i->second);
			return i;
		}
	}

	// Make sure that the first item is the greatest one
	void ensureFirstIsGreatest()
	{
		print("pre-ensureFirstIsGreatest");
		while (size() > 1)
		{
			iterator i = begin(); ++i;
			if (*begin()->second >= *i->second)
				break;
			else
			{
				if (begin()->first == 1)
					erase(begin());
				else {
					--(begin()->first);
					++(begin()->second);
				}
			}
		}
	}

	// Advance i until its first item it the same as the item pointed by
	// begin().  If it's impossible, return false, if it succeeds, returns
	// true.
	bool ensureNextIsSame(iterator start)
	{
		print("pre-ensureNextIsSame(begin)");
		// Advance the next until it's the same
		iterator i = start; ++i;
		while (i != end() && *start->second > *i->second)
			i = advance(i);

		// If nothing is left, we're ok
		if (i == end())
			return true;

		// If next is the same, keep going
		if (*start->second == *i->second)
			return ensureNextIsSame(i);
		
		// Else, we need to go back and advance the start
		return false;
	}

	// Ensure that all the first elements are the same
	void flatten()
	{
		while (size() > 1)
		{
			ensureFirstIsGreatest();
			if (ensureNextIsSame(begin()))
				break;
			else
				advance(begin());
		}
	}

	std::set<int> intersect()
	{
		print("begin");
		/*
		 * * If the first element of every list is the same, add it to res
		 *   and move on
		 * * Else, advance the minor ones until they all get the same
		 */
		std::set<int> res;
		while (size() > 1)
		{
			print("pre-flatten");
			flatten();
			print("post-flatten");
			if (size() > 1)
			{
				// Store the common item
				res |= *(begin()->second);
				//cerr << "Selected: " << *(begin()->second) << endl;

				// Advance all lists
				iterator i = begin();
				while (i != end())
				{
					iterator next = i; ++next;
					advance(i);
					i = next;
				}
			}
			print("post-selection");
		}
		return res;
	}

	int extractmin()
	{
		// Find the minimum item
		int min = *(begin()->second);
		for (const_iterator i = begin(); i != end(); i++)
			if (*(i->second) < min)
				min = *(i->second);

		// Advance all the lists which have the minimum item as first item
		iterator i = begin();
		while (i != end())
		{
			iterator next = i; ++next;
			if (*(i->second) == min)
				advance(i);
			i = next;
		}

		return min;
	}

	class MergeIterator
	{
		IntSets& is;
		int val;

	public:
		// End iterator
		MergeIterator(IntSets& is) : is(is), val(-1) {}

		MergeIterator(IntSets& is, int val) : is(is), val(val) {}

		MergeIterator& operator++()
		{
			if (is.empty())
				val = -1;
			else
				val = is.extractmin();
			return *this;
		}
		int operator*() { return val; }

		bool operator!=(const MergeIterator& mi) { return val != -1 || mi.val != -1; }
	};
	
	MergeIterator mergeBegin()
	{
		return MergeIterator(*this, extractmin());
	}

	MergeIterator mergeEnd()
	{
		return MergeIterator(*this);
	}

	std::set<int> merge()
	{
		/*
		vector<T> sortedmerge;
		while (!empty())
			sortedmerge.push_back(conv(extractmin()));
		return std::set<T>(sortedmerge.begin(), sortedmerge.end());
		*/
		return std::set<int>(mergeBegin(), mergeEnd());
	}
};

std::set<int> IntDiskIndex::getItemsHavingTag(const int& tag) const
{
	return std::set<int>(IntSetIterator(tagidx.data(tag), tagidx.size(tag)), IntSetIterator());
}

std::set<int> IntDiskIndex::getItemsHavingTags(const std::set<int>& tags) const
{
	if (tags.empty())
		return std::set<int>();
	if (tags.size() == 1)
		return getItemsHavingTag(*tags.begin());

	// Create a vector with the item lists
	IntSets items;
	for (std::set<int>::const_iterator i = tags.begin(); i != tags.end(); i++)
		items.push_back(make_pair(tagidx.size(*i), tagidx.data(*i)));
	return items.intersect();
}

std::set<int> IntDiskIndex::getTagsOfItem(const int& item) const
{
	return std::set<int>(IntSetIterator(pkgidx.data(item), pkgidx.size(item)), IntSetIterator());
}

std::set<int> IntDiskIndex::getTagsOfItems(const std::set<int>& items) const
{
	if (items.empty())
		return std::set<int>();

	// Create a vector with the item lists
	IntSets tags;
	for (std::set<int>::const_iterator i = items.begin(); i != items.end(); i++)
		tags.push_back(make_pair(pkgidx.size(*i), pkgidx.data(*i)));
	return tags.merge();
}

std::set<int> IntDiskIndex::getTaggedItems() const
{
	return std::set<int>(
			NonemptyIntSeqIterator(pkgidx, 0),
			NonemptyIntSeqIterator(pkgidx, pkgidx.size()));
}

std::set<int> IntDiskIndex::getAllTags() const
{
	return std::set<int>(IntSeqIterator(0), IntSeqIterator(tagidx.size()));
}

std::vector<int> IntDiskIndex::getAllTagsAsVector() const
{
	std::vector<int> res;
	res.reserve(tagidx.size());
	for (size_t i = 0; i < tagidx.size(); ++i)
		res[i] = i;
	return res;
}

std::set<int> IntDiskIndex::getCompanionTags(const std::set<int>& tags) const
{
	// This is basically a reimplementation of:
	// return getTagsOfItems(getItemsHavingTags(tags)) - tags;
	// without the conversion to and from ITEM and TAG

	if (tags.empty())
		return std::set<int>();

	std::set<int> items;

	if (tags.size() == 1)
	{
		int itag = *tags.begin();
		for (unsigned int i = 0; i < tagidx.size(itag); i++)
			items |= tagidx.data(itag)[i];
	} else {
		// Create a vector with the item lists
		IntSets inters;
		for (std::set<int>::const_iterator i = tags.begin(); i != tags.end(); i++)
			inters.push_back(make_pair(tagidx.size(*i), tagidx.data(*i)));
		items = inters.intersect();
	}

	if (items.empty())
		return std::set<int>();

	// Create a vector with the item lists
	IntSets merge;
	for (std::set<int>::const_iterator i = items.begin(); i != items.end(); i++)
		merge.push_back(make_pair(pkgidx.size(*i), pkgidx.data(*i)));
	return merge.merge();
}

#if 0
void IntDiskIndex::output(Consumer<ITEM, TAG>& consumer) const
{
	for (unsigned int i = 0; i < pkgidx.size(); i++)
		if (pkgidx.size(i) > 0)
			consumer.consume(toitem(i), std::set<TAG>(IntSetIterator<TAG>(pkgidx.data(i), pkgidx.size(i), *m_totag), IntSetIterator<TAG>(*m_totag)));
}
#endif

}
}

// vim:set ts=4 sw=4:
