/*************************************************************************
 *
 *  OpenOffice.org - a multi-platform office productivity suite
 *
 *  $RCSfile: XmlIndex.java,v $
 *
 *  $Revision: 1.2 $
 *
 *  last change: $Author: rt $ $Date: 2005/09/09 16:52:21 $
 *
 *  The Contents of this file are made available subject to
 *  the terms of GNU Lesser General Public License Version 2.1.
 *
 *
 *    GNU Lesser General Public License Version 2.1
 *    =============================================
 *    Copyright 2005 by Sun Microsystems, Inc.
 *    901 San Antonio Road, Palo Alto, CA 94303, USA
 *
 *    This library is free software; you can redistribute it and/or
 *    modify it under the terms of the GNU Lesser General Public
 *    License version 2.1, as published by the Free Software Foundation.
 *
 *    This library is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *    Lesser General Public License for more details.
 *
 *    You should have received a copy of the GNU Lesser General Public
 *    License along with this library; if not, write to the Free Software
 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *    MA  02111-1307  USA
 *
 ************************************************************************/
package com.sun.xmlsearch.xml;

import java.io.*;
import java.util.Hashtable;
import com.sun.xmlsearch.db.*;
import com.sun.xmlsearch.util.*;

import com.sun.xmlsearch.xml.qe.QueryHit; // !!! before hitToString moves
import com.sun.xmlsearch.xml.qe.QueryHitData;

public final class XmlIndex extends Index {
    private VectorBtreeParameters _edgesParams;
    private FullVectorBtree       _edges;
    private byte[]                 _edge = CompactEdgeFormat.makeVector();
    private IntegerArray          _contextsOffsets = new IntegerArray();
    private ContextTables         _contextTables;
    protected RandomAccessFile    _contextsFile = null;
    private byte[]                 _contextsData;
    private String[] _linkNames;
  
    public XmlIndex(String indexName, boolean update) {
	super(indexName, update);
    }

    public boolean init() throws Exception {
	if (super.init()) {
	    _edgesParams = new VectorBtreeParameters(_schema, "EDGE", 9);
	    if (_edgesParams.readState() == false)
		_edgesParams.setBlockSize(1024);
	    _edges = new FullVectorBtree(_edgesParams, _update);
	    if (_contextsOffsets.cardinality() > 0) {
		_contextsData = readByteArray("CONTEXTS");
		_linkNames = (String[])readObject("LINKNAMES");
		_contextTables =
		    new ContextTables(_contextsOffsets.toIntArray(),
				      _contextsData, _linkNames);
	    }
	    return true;
	}
	else
	    return false;
    }
  
    public String[] getLinkNames() {
	return _linkNames;
    }
  
    public void pruneIndex(Hashtable toRemove, Hashtable toRefresh)
	throws Exception {
	int initSize = _documents.cardinality();
	IntegerArray newDocuments = new IntegerArray(initSize);
	IntegerArray newTitles = new IntegerArray(initSize);
	IntegerArray newMicroIndexOffsets = new IntegerArray(initSize);
	IntegerArray newContextsOffsets = new IntegerArray(initSize);
	getContextsFile();
	_contextsOffsets.add((int)_contextsFile.length());
	_contextsFile.setLength(0);
	_contextsFile.seek(0);
	getPositionsFile();
	_microIndexOffsets.add((int)_positionsFile.length());
	_positionsFile.setLength(0);
	_positionsFile.seek(0);
	int positionsLength = 0;
	int contextsLength = 0;
	for (int i = 0; i < initSize; i++) {
	    String documentName = fetch(_documents.at(i));
	    if (toRemove.get(documentName) == null
		&& toRefresh.get(documentName) == null) {
		System.out.println("keeping " + documentName);
		newMicroIndexOffsets.add(positionsLength);
		positionsLength += writeOutMicroIndex(i);
		newContextsOffsets.add(contextsLength);
		contextsLength += writeOutContextInfo(i);
		newDocuments.add(_documents.at(i));
		newTitles.add(_titles.at(i));
	    }
	    else {
		System.out.println("removing " + documentName);
		if (toRemove.get(documentName) != null)
		    // !!! ??? ID not reused though, nor string really deleted from Dict
		    removeSymbol(documentName);
	    }
	}
	_documents = newDocuments;
	_microIndexOffsets = newMicroIndexOffsets;
	_contextsOffsets = newContextsOffsets;
	_titles = newTitles;
    }
  
    private int writeOutMicroIndex(int n) throws IOException {
	int begin = _microIndexOffsets.at(n);
	int length = _microIndexOffsets.at(n + 1) - begin;
	_positionsFile.write(_positions, begin, length);
	return length;
    }

    private int writeOutContextInfo(int n) throws IOException {
	int begin = _contextsOffsets.at(n);
	int length = _contextsOffsets.at(n + 1) - begin;
	_contextsFile.write(_contextsData, begin, length);
	return length;
    }

    public RandomAccessFile getContextsFile() throws IOException {
	if (_contextsFile == null)
	    _contextsFile = getRAF("CONTEXTS", _update);
	return _contextsFile;
    }
  
    protected void readOffsetsTables(InputStream in) throws Exception {
	super.readOffsetsTables(in);
	// decompress contexts' offsets
	int k4 = in.read();
	_contextsOffsets = new IntegerArray(_documents.cardinality() + 1);
	StreamDecompressor co = new StreamDecompressor(in);
	//    _contextsOffsets.add(0);	// first, trivial offset
	co.ascDecode(k4, _contextsOffsets);
    }

    public void close() throws Exception {
	if (_contextsFile != null) {
	    _contextsFile.close();
	    _contextsFile = null;
	}
	_edges.close();
	if (_update)
	    _edgesParams.updateSchema();
	super.close();
    }

    public byte[] getPositions(int docNo) throws Exception {
	_contextTables.setMicroindex(docNo);
	return super.getPositions(docNo);
    }

    public void resetContextSearch() {
	_contextTables.resetContextSearch();
    }

    public ContextTables getContextInfo() {
	return _contextTables;
    }

    public void compress(int docID, int titleID,
			 ConceptLocation[] locations, int count,
			 ConceptLocation[] extents, int extCount,
			 int k, Compressor contextTables)
	throws IOException {
	super.compress(docID, titleID, locations, count, extents, extCount);
	RandomAccessFile contexts = getContextsFile();
	long currentEnd = contexts.length();
	contexts.seek(currentEnd);
	contexts.writeByte(k);
	contextTables.write(contexts);
	_contextsOffsets.add((int)currentEnd);
    }

    protected void writeOutOffsets() throws IOException {
	super.writeOutOffsets();
	if (_contextsOffsets.cardinality() > 0) {
	    RandomAccessFile out = getOffsetsFile();
	    Compressor offsets2 = new Compressor();
	    int k = offsets2.compressAscending(_contextsOffsets);
	    out.write(k);
	    offsets2.write(out);
	}
    }
    
    public synchronized String hitToString(QueryHit hit) throws Exception {
	StringBuffer result = new StringBuffer();
	result.append(hit.getPenalty());
	result.append(" {");
	int[] matches = hit.getMatches();
	String[] terms = new String[matches.length / 2];
	for (int i = 0; i < terms.length; i++) {
	    if (matches[2*i] > 0) {
		terms[i] = fetch(matches[2*i]);
		result.append(terms[i]);
	    }
	    else
		result.append("--");
	    if (i < matches.length - 1)
		result.append(", ");
	}
	result.append("}\n");
    
	result.append(documentName(hit.getDocument()));
	result.append(' ');
	result.append(hit.getDocument());
	result.append(' ');
	result.append(hit.getBegin());
	result.append(' ');
	result.append(hit.getEnd());
	result.append('\n');
	// context info
	// !!! efficiency? synchronization?
    
	_contextTables.setMicroindex(hit.getDocument());
	_contextTables.resetContextSearch();
	_contextTables.hitLocation(terms, hit.getMatches(), result);
    
	result.append('\n');
	return result.toString();
    }

    public synchronized QueryHitData hitToData(final QueryHit hit)
	throws Exception {
	final int[] matches = hit.getMatches();
	// translate matching term IDs to their strings
	final String[] terms = new String[matches.length >>> 1];
	for (int i = 0; i < terms.length; i++)
	    if (matches[i << 1] > 0)
		terms[i] = fetch(matches[i << 1]);
	// build the resulting data packet
	final int document = hit.getDocument();
	QueryHitData result = new QueryHitData(hit.getPenalty(),
					       documentName(document),
					       terms);
	// fill out the remaining data: xPath locations of terms
	_contextTables.setMicroindex(document);
	_contextTables.resetContextSearch();
	_contextTables.hitLocation(terms, matches, result);
	return result;
    }
  
    /** add to result the XML representation of the argument hit */
    public synchronized void hitToXml(QueryHit hit, StringBuffer result)
	throws Exception {
	result.append("<QueryHit penalty=\"");
	result.append(hit.getPenalty());
	result.append("\" doc=\"");
	result.append(documentName(hit.getDocument()));
	result.append("\">");
    
	int[] matches = hit.getMatches();
	String[] terms = new String[matches.length / 2];
	for (int i = 0; i < terms.length; i++)
	    if (matches[2*i] > 0)
		terms[i] = fetch(matches[2*i]);
      
	_contextTables.setMicroindex(hit.getDocument());
	_contextTables.resetContextSearch();
	_contextTables.hitLocation(terms, hit.getMatches(), result);
    
	result.append("</QueryHit>");
    }
  
    public void saveLinkNames(Object[] linkNames) throws IOException {
	outputObject("LINKNAMES", linkNames);
    }
}
