
/*
 * LIB/HISTORY.C
 *
 * (c)Copyright 1997, Matthew Dillon, All Rights Reserved.  Refer to
 *    the COPYRIGHT file in the base directory of this distribution 
 *    for specific rights granted.
 *
 * Generally speaking, the history mechanism can be pictured from the
 * point of view of a reader or from the point of view of a writer.  From
 * the point of view of a reader, A base table lookup begins a hash chain
 * of history records that runs backwards through the history file.  More
 * recent entries are encountered first, resulting in a certain degree of
 * locality of reference based on age.
 *
 * A writer must scan the chain to ensure that the message-id does not
 * already exist.  The new history record is appended to the history file
 * and inserted at the base of the hash chain.  The file append requires
 * an exclusive lock to ensure atomic operation (O_APPEND is often not atomic
 * on heavily loaded systems, the exclusive lock is required).
 *
 * In a heavily loaded system, the exclusive lock and append may become a
 * bottleneck.
 *
 * WARNING!  offsets stored in history records / hash table index are signed
 * 32 bits but cast to unsigned in any lseek() operations.  The history file
 * is thus currently limited to 4GB even with 64 bit capable filesystems.
 * 2GB on linux, 4GB under FreeBSD (which is 64 bit capable).
 */

#include "defs.h"

Prototype void HistoryOpen(const char *fileName, int fastMode);
Prototype int  HistoryClose(void);
Prototype int HistoryLookup(const char *msgid, History *h);
Prototype int HistoryLookupByHash(hash_t hv, History *h);
Prototype int32 HistoryPosLookupByHash(hash_t hv, History *h);
Prototype int HistoryAdd(const char *msgid, History *h);

Prototype int NewHSize;

#define HBLKINCR	16
#define HBLKSIZE	256

HistHead	HHead;
uint32 		*HAry;
int		HFd = -1;
int		LoggedDHistCorrupt;
int		HFlags;
int		HSize;
int		HMask;
off_t		HScanOff;
int		NewHSize = 0;
int		HBlkIncr = HBLKINCR;
int		HBlkGood;

void
HistoryOpen(const char *fileName, int hflags)
{
    int fd;
    struct stat st;
    int openflags;

    HFlags = hflags;

    if (NewHSize == 0)
	NewHSize = DOpts.HashSize;	/* which may also be 0 */

    /*
     * open the history file
     */

    if (HFlags & HGF_READONLY)
	openflags = O_RDONLY;
    else
	openflags = O_RDWR|O_CREAT;

    if (fileName)
	fd = open(fileName, openflags, 0644);
    else
	fd = open(PatDbExpand(DHistoryPat), openflags, 0644);

    if (fd < 0) {
	syslog(LOG_ERR, "open %s failed (%s)", PatDbExpand(DHistoryPat),
							strerror(errno));
	fprintf(stderr, "open %s failed (%s)", PatDbExpand(DHistoryPat),
							strerror(errno));
	exit(1);
    }

    if (fstat(fd, &st) < 0) {
	syslog(LOG_ERR, "fstat %s failed", PatDbExpand(DHistoryPat));
	exit(1);
    }

    /*
     * initial history file creation, if necessary
     */

    bzero(&HHead, sizeof(HHead));

    if (st.st_size == 0 || 
	read(fd, &HHead, sizeof(HHead)) != sizeof(HHead) ||
	HHead.hmagic != HMAGIC
    ) {
	/*
	 * lock after finding the history file to be invalid and recheck.
	 */

	hflock(fd, 0, XLOCK_EX);

	fstat(fd, &st);
	lseek(fd, 0L, 0);

	if (st.st_size == 0 || 
	    read(fd, &HHead, sizeof(HHead)) != sizeof(HHead) ||
	    HHead.hmagic != HMAGIC
	) {
	    int n;
	    int b;
	    char *z = calloc(4096, 1);

	    /*
	     * check for old version of history file
	     */

	    if (st.st_size) {
		syslog(LOG_ERR, "Incompatible history file version or corrupted history file\n");
		exit(1);
	    }

	    if (HFlags & HGF_READONLY) {
		syslog(LOG_ERR, "Unable to create %s - read-only mode\n",
						PatDbExpand(DHistoryPat));
		fprintf(stderr, "Unable to create %s - read-only mode\n",
						PatDbExpand(DHistoryPat));
		exit(1);
	    }

	    /*
	     * create new history file
	     */

	    syslog(LOG_INFO, "Creating history file");

	    lseek(fd, 0L, 0);
	    ftruncate(fd, 0);
	    bzero(&HHead, sizeof(HHead));

	    HHead.hashSize = NewHSize;
	    HHead.version  = HVERSION;
	    HHead.henSize  = sizeof(History);
	    HHead.headSize = sizeof(HHead);

	    write(fd, &HHead, sizeof(HHead));

	    /*
	     * write out the hash table
	     */

	    n = 0;
	    b = HHead.hashSize * sizeof(int32);

	    while (n < b) {
		int r = (b - n > 4096) ? 4096 : b - n;

		write(fd, z, r);
		n += r;
	    }
	    fsync(fd);

	    /*
	     * rewrite header with magic number
	     */

	    lseek(fd, 0L, 0);
	    HHead.hmagic = HMAGIC;
	    write(fd, &HHead, sizeof(HHead));

	    free(z);
	    syslog(LOG_INFO, "History file creation complete");
	}

	hflock(fd, 0, XLOCK_UN);
    }

    if (HHead.version < HVERSION) {
	syslog(LOG_ERR, "DHistory version %d, expected %d, use the biweekly adm script to regenerate the history file",
	    HHead.version,
	    HVERSION
	);
	fprintf(stderr, "DHistory version %d, expected %d, use the biweekly adm script to regenerate the history file\n",
	    HHead.version,
	    HVERSION
	);
	exit(1);
    }
    if (HHead.version > HVERSION) {
	syslog(LOG_ERR, "DHistory version %d, expected %d, YOU ARE RUNNING AN OLD DIABLO ON A NEW HISTORY FILE!",
	    HHead.version,
	    HVERSION
	);
	exit(1);
    }

    /*
     * Map history file
     */

    HSize = HHead.hashSize;
    HMask = HSize - 1;

    /*
     * In FAST mode we leave the history file locked in order to
     * cache the hash table array at the beginning, which in turn
     * allows us to essentially append new entries to the end of
     * the file without having to seek back and forth updating
     * the hash table.
     *
     * When we aren't in FAST mode, we memory-map the hash table
     * portion of the history file.
     */

    if (HFlags & HGF_FAST)
	hflock(fd, 0, XLOCK_EX);

    if (HFlags & HGF_FAST) {
	HAry = calloc(HSize, sizeof(int32));
	if (HAry == NULL) {
	    perror("calloc");
	    exit(1);
	}
	lseek(fd, HHead.headSize, 0);
	if (read(fd, HAry, HSize * sizeof(int32)) != HSize * sizeof(int32)) {
	    perror("read");
	    exit(1);
	}
    } else {
	HAry = xmap(NULL, HSize * sizeof(int32), PROT_READ, MAP_SHARED, fd, HHead.headSize);
	if (HFlags & HGF_MLOCK)
	    mlock(HAry, HSize * sizeof(int32));
    }

    if (HAry == NULL || HAry == (uint32 *)-1) {
	if (fd >= 0)
	    close(fd);
	syslog(LOG_CRIT, "dhistory mmap error: %s", strerror(errno));
	exit(1);
    }
    HFd = fd;
    HScanOff = HHead.headSize + HHead.hashSize * sizeof(int32);
}

/*
 * On close, we have to commit the hash table if we were in
 * FAST mode, otherwise we need only unmap the file before
 * closing it.
 */

int
HistoryClose(void)
{
    int r = RCOK;

    if (HFd >= 0 && !(HFlags & HGF_READONLY)) {
	if (HFlags & HGF_FAST) {
	    lseek(HFd, HHead.headSize, 0);
	    if (write(HFd, HAry, HSize * sizeof(int32)) != HSize * sizeof(int32))
		r = RCTRYAGAIN;
	    else
		free(HAry);
	} else {
	    if (HAry && HAry != (uint32 *)-1) {
		if (HFlags & HGF_MLOCK)
		    munlock(HAry, HSize * sizeof(int32));
		xunmap((void *)HAry, HSize * sizeof(int32));
	    }
	}
    }
    if (r == RCOK) {
	if (HFd >= 0)
	    close(HFd);
	HFd = -1;
	HAry = NULL;
    }
    return(r);
}

void
printHistory(History *h)
{
    printf(" [hv=%08x.%08x gm=%d ex=%d off=%d len=%d F=%s]\n",
		h->hv.h1,
		h->hv.h2,
		(int)h->gmt,
		(int)h->exp,
		(int)h->boffset,
		(int)h->bsize,
		((h->exp & EXPF_HEADONLY) ? "H" : "")
    );
}

int
HistoryLookup(const char *msgid, History *nh)
{
    hash_t hv = hhash(msgid);
    int32 poff = HHead.headSize + ((hv.h1 ^ hv.h2) & HMask) * sizeof(int32);
    int32 off = HAry[(hv.h1 ^ hv.h2) & HMask];
    History h = { 0 };
    static int HLAlt;
    int r = -1;
    int counter = 0;
    int statfailed = 0;

    while (off) {
	lseek(HFd, (off_t)(uint32)off, 0);
	if (read(HFd, &h, sizeof(h)) != sizeof(h)) {
	    if ((LoggedDHistCorrupt & 1) == 0 || DebugOpt) {
		LoggedDHistCorrupt |= 1;
		syslog(LOG_ERR, "dhistory file corrupted on lookup @ %d->%d chain %d", (int)poff, (int)off, (int)((hv.h1 ^ hv.h2) & HMask));
		sleep(1);
	    }
	    break;
	}
	if (h.hv.h1 == hv.h1 && h.hv.h2 == hv.h2)
	    break;
	poff = off;
	off = h.next;
	if (counter++ > 5000) {
	    syslog(LOG_ERR, "dhistory file chain loop @ %d->%d chain %d (%s)",
			(int)poff, (int)off, (int)((hv.h1 ^ hv.h2) & HMask), msgid);
	    off = 0;
	}
    }
    if (off != 0)
	r = 0;

    /*
     * On failure, try alternate hash method (for lookup only)
     */

    if (r < 0 && DOpts.CompatHashMethod >= 0 && HLAlt == 0 && !statfailed) {
	int save = DOpts.HashMethod;

	DOpts.HashMethod = DOpts.CompatHashMethod;
	HLAlt = 1;
	r = HistoryLookup(msgid, nh);
	DOpts.HashMethod = save;
	HLAlt = 0;
    }

    if (r == 0 && nh != NULL)
	memcpy(nh, &h, sizeof(h));

    return(r);
}

int
HistoryLookupByHash(hash_t hv, History *h)
{
    int32 poff = HHead.headSize + ((hv.h1 ^ hv.h2) & HMask) * sizeof(int32);
    int32 off = HAry[(hv.h1 ^ hv.h2) & HMask];
    int counter = 0;

    while (off) {
	lseek(HFd, (off_t)(uint32)off, 0);
	if (read(HFd, h, sizeof(*h)) != sizeof(*h)) {
	    if ((LoggedDHistCorrupt & 1) == 0 || DebugOpt) {
		LoggedDHistCorrupt |= 1;
		syslog(LOG_ERR, "dhistory file corrupted on lookup");
		sleep(1);
	    }
	    break;
	}
	if (h->hv.h1 == hv.h1 && h->hv.h2 == hv.h2)
	    break;
	poff = off;
	off = h->next;
	if (counter++ > 5000) {
	    syslog(LOG_ERR, "dhistory file chain loop @ %d->%d chain %d",
			(int)poff, (int)off, (int)((hv.h1 ^ hv.h2) & HMask));
	    off = 0;
	}
    }
    if (off != 0) {
	return(0);
    }
    return(-1);
}

int32
HistoryPosLookupByHash(hash_t hv, History *h)
{
    int32 poff = HHead.headSize + ((hv.h1 ^ hv.h2) & HMask) * sizeof(int32);
    int32 off = HAry[(hv.h1 ^ hv.h2) & HMask];
    int counter = 0;

    while (off) {
	lseek(HFd, (off_t)(uint32)off, 0);
	if (read(HFd, h, sizeof(*h)) != sizeof(*h)) {
	    if ((LoggedDHistCorrupt & 1) == 0 || DebugOpt) {
		LoggedDHistCorrupt |= 1;
		syslog(LOG_ERR, "dhistory file corrupted on lookup");
		sleep(1);
	    }
	    break;
	}
	if (h->hv.h1 == hv.h1 && h->hv.h2 == hv.h2)
	    break;
	poff = off;
	off = h->next;
	if (counter++ > 5000) {
	    syslog(LOG_ERR, "dhistory file chain loop @ %d->%d chain %d",
			(int)poff, (int)off, (int)((hv.h1 ^ hv.h2) & HMask));
	    off = 0;
	}
    }
    if (off != 0) {
	return(off);
    }
    return(-1);
}

int
HistoryAddOld(const char *msgid, History *h)
{
    int32 hi = (h->hv.h1 ^ h->hv.h2) & HMask;
    int32 poff = HHead.headSize + hi * sizeof(int32);
    int32 boff = poff;
    int r = RCOK;

    /*
     * This should not happen, but if it does, log it and pretend we did it
     */
    if (h->gmt == 0) {
	syslog(LOG_ERR, "Not adding history entry with gmt=0");
	return(RCOK);
    }

    if (HFlags & HGF_READONLY)
	return(RCOK);

    /*
     * record lock, search for message id
     *
     */

    if ((HFlags & HGF_FAST) == 0)	/* lock hash chain */
	hflock(HFd, boff, XLOCK_EX);

    /*
     * make sure message is not already in hash table
     */

    {
	int32 off;

	off = HAry[hi];

	if ((HFlags & HGF_NOSEARCH) == 0) {
	    int counter = 0;

	    while (off) {
		History ht;

		lseek(HFd, (off_t)(uint32)off, 0);
		if (read(HFd, &ht, sizeof(ht)) != sizeof(ht)) {
		    if ((LoggedDHistCorrupt & 2) == 0 || DebugOpt) {
			LoggedDHistCorrupt |= 2;
			syslog(LOG_ERR, "dhistory file corrupted @ %d", off);
			sleep(1);
		    }
		    break;
		}
		if (ht.hv.h1 == h->hv.h1 && ht.hv.h2 == h->hv.h2) {
		    r = RCALREADY;
		    break;
		}
		poff = off;
		off = ht.next;
		if (counter++ > 5000) {
		    syslog(LOG_ERR, "dhistory file chain loop @ %d->%d chain %d (%s)",
				(int)poff, (int)off,
				(int)((ht.hv.h1 ^ ht.hv.h2) & HMask), msgid);
		    off = 0;
		}
	    }
	}
    }

    /*
     * If we are ok, reserve space in the history file for the record
     * and insert the record.  This may loop if intermediate scans determine
     * that we must append a new block to the file.
     */

    while (r == RCOK) {
	off_t offBeg;
	off_t offEnd;
	History hary[HBLKSIZE];

	/*
	 * append/scan lock
	 */

	if ((HFlags & HGF_FAST) == 0)	/* append/scan lock */
	    hflock(HFd, 4, XLOCK_EX);

	/*
	 * Figure out scan start & end point.  We only look at the last
	 * HBLKSIZE*2 records worth of entries in the worst case.
	 */

	offEnd = lseek(HFd, 0L, 2);
	{
	    off_t n;

	    n = offEnd - HHead.headSize - HHead.hashSize * sizeof(int32);
	    n = n % HHead.henSize;
	    offEnd -= n;
	}

	offBeg = HScanOff;

	if (offBeg > offEnd)
	    offBeg = offEnd;

	if (offBeg < offEnd - HBLKSIZE * sizeof(History)) {
	    off_t n;

	    offBeg = offEnd - HBLKSIZE * sizeof(History);
	    n = offBeg - HHead.headSize - HHead.hashSize * sizeof(int32);
	    n = n % HHead.henSize;
	    offBeg -= n;
	}

	/*
	 * Look for a blank record (gmt value is 0)
	 */

	lseek(HFd, offBeg, 0);

	while (offBeg < offEnd) {
	    int q = (offEnd - offBeg) / sizeof(History);
	    int i;
	    int n;

	    if (q > arysize(hary))
		q = arysize(hary);
	    if (q > HBlkIncr)
		q = HBlkIncr;

	    n = read(HFd, hary, q * sizeof(History));

	    if (n != q * sizeof(History)) {
		r = RCTRYAGAIN;
		break;
	    }

	    for (i = 0; r == RCOK && i < q; ++i) {
		if (hary[i].gmt == 0 && 
		    ((HFlags & HGF_FAST) ||
		    hflock(HFd, offBeg, XLOCK_NB|XLOCK_EX) >= 0)
		) {
		    break;
		}
		offBeg += sizeof(History);
	    }

	    /*
	     * Found, break, or not found, continue.  Adjust HBlkIncr
	     * appropriately.
	     */
	    if (i != q) {
		if (++HBlkGood == 4) {
		    HBlkGood = 0;
		    if ((HBlkIncr -= HBLKINCR) < HBLKINCR)
			HBlkIncr = HBLKINCR;
		}
		break;
	    }
	    if (offBeg != offEnd) {
		HBlkGood = 0;
		if ((HBlkIncr += HBLKINCR) > HBLKSIZE) {
		    HBlkIncr = HBLKSIZE;
		}
	    }
	}

	HScanOff = offBeg;

	/*
	 * break if something went wrong or if we found the record.  With
	 * the proper record explicitly locked, we can release the append
	 * lock.
	 */

	if (r != RCOK || offBeg != offEnd) {
	    if ((HFlags & HGF_FAST) == 0)	/* append/scan unlock */
		hflock(HFd, 4, XLOCK_UN);
	    break;
	}

	/*
	 * Otherwise append a new block and retry.  XXX put on filesystem
	 * block boundry.
	 */

	lseek(HFd, 0L, 2);
	bzero(hary, sizeof(hary));
	write(HFd, hary, sizeof(hary));
	if ((HFlags & HGF_FAST) == 0)
	    hflock(HFd, 4, XLOCK_UN);
    } /* WHILE */

    /*
     * If we did find a blank record, HScanOff is set to it.  The record
     * is locked at this point.
     *
     * We insert the record into the chain at the beginning of the
     * chain, even though we are appending the record.  This is for
     * time-locality of reference and also reduces 'append-to-history'
     * file overhead by localizing disk writes a bit better.
     *
     * XXX next/linkages are 32 bits.  At the moment we can go to 4G even
     * though we are using a signed 32 bit int because we cast seeks to
     * unsigned 32 bits.
     */

    if (r == RCOK) {
	int n;

	h->next = HAry[hi];

	lseek(HFd, HScanOff, 0);

	n = write(HFd, h, sizeof(History));

	if ((HFlags & HGF_FAST) == 0)
	    hflock(HFd, HScanOff, XLOCK_UN);

	if (n == sizeof(History)) {
	    if ((HFlags & HGF_FAST) == 0) {
		int32 off = (int32)HScanOff;	/* for write XXX */

		lseek(HFd, HHead.headSize + hi * sizeof(int32), 0);
		if (write(HFd, &off, sizeof(int32)) != sizeof(int32)) {
		    r = RCTRYAGAIN;
		}
	    } else {
		HAry[hi] = (int)HScanOff;
	    }
	} else {
	    r = RCTRYAGAIN;
	}
    }

    if ((HFlags & HGF_FAST) == 0)	/* unlock hash chain */
	hflock(HFd, boff, XLOCK_UN);

    return(r);
}

/*
 * This is a new and simplifed HistoryAdd that locks an append lock
 * (pos 4) and just appends the single history record and unlocks.
 * It then updates the hash chain (already locked) by adding the
 * new history entry at the beginning as usual.
 */
int
HistoryAdd(const char *msgid, History *h)
{
    int32 hi = (h->hv.h1 ^ h->hv.h2) & HMask;
    int32 poff = HHead.headSize + hi * sizeof(int32);
    int32 boff = poff;
    int r = RCOK;

    /*
     * This should not happen, but if it does, log it and pretend we did it
     */
    if (h->gmt == 0) {
	syslog(LOG_ERR, "Not adding history entry with gmt=0");
	return(RCOK);
    }
    /*
     * record lock, search for message id
     *
     */

    if ((HFlags & HGF_FAST) == 0)	/* lock hash chain */
	hflock(HFd, boff, XLOCK_EX);

    /*
     * make sure message is not already in hash table
     */

    {
	int32 off;

	off = HAry[hi];

	if ((HFlags & HGF_NOSEARCH) == 0) {
	    int counter = 0;

	    while (off) {
		History ht;

		lseek(HFd, (off_t)(uint32)off, 0);
		if (read(HFd, &ht, sizeof(ht)) != sizeof(ht)) {
		    if ((LoggedDHistCorrupt & 2) == 0 || DebugOpt) {
			LoggedDHistCorrupt |= 2;
			syslog(LOG_ERR, "dhistory file corrupted @ %d", off);
			sleep(1);
		    }
		    break;
		}
		if (ht.hv.h1 == h->hv.h1 && ht.hv.h2 == h->hv.h2) {
		    r = RCALREADY;
		    break;
		}
		poff = off;
		off = ht.next;
		if (counter++ > 5000) {
		    syslog(LOG_ERR, "dhistory file chain loop @ %d->%d chain %d (%s)",
				(int)poff, (int)off,
				(int)((ht.hv.h1 ^ ht.hv.h2) & HMask),
				msgid ? msgid : "no message-id");
		    off = 0;
		}
	    }
	}
    }

    while (r == RCOK) {
	off_t writePos;
	int n = 0;

	h->next = HAry[hi];

	if ((HFlags & HGF_FAST) == 0)	/* append/scan lock */
	    hflock(HFd, 4, XLOCK_EX);

	if ((writePos = lseek(HFd, 0L, 2)) == -1) {
	    r = RCTRYAGAIN;
	    break;
	}

	HScanOff = writePos;

	n = write(HFd, h, sizeof(History));

	if (n != sizeof(History)) {
	    lseek(HFd, writePos, 0);
	    ftruncate(HFd, writePos);
	}

	if ((HFlags & HGF_FAST) == 0)	/* append/scan lock */
	    hflock(HFd, 4, XLOCK_UN);

	if (n == sizeof(History)) {
	    if ((HFlags & HGF_FAST) == 0) {
		int32 off = (int32)HScanOff;	/* for write XXX */

		lseek(HFd, HHead.headSize + hi * sizeof(int32), 0);
		if (write(HFd, &off, sizeof(int32)) != sizeof(int32)) {
		    r = RCTRYAGAIN;
		}
	    } else {
		HAry[hi] = (int)HScanOff;
	    }
	} else {
	    r = RCTRYAGAIN;
	}
	break;
    }

    if ((HFlags & HGF_FAST) == 0)	/* unlock hash chain */
	hflock(HFd, boff, XLOCK_UN);

    return(r);
}

