/*
 * multilingual support for nvi
 * Copyright(c) 1996, 1997 by Jun-ichiro Itoh.  All rights reserved.
 * Author contact: <itojun@mt.cs.keio.ac.jp>
 * $Id: multibyte.c,v 1.1.2.4 1999/08/10 23:21:48 itojun Exp $
 *
 * Freely redistributable, reusable, unless otherwise noted in accompanying
 * document. (for example, redistribution is prohibited during alpha-test
 * period)
 * Absolutely no warranty.
 *
 * The code is based on:
 *	jelvis japanization patch by Jun-ichiro Itoh
 *	nvi 1.03 japanization patch by Yoshitaka Tokugawa <toku@dit.co.jp>
 */
/*
 * Derived code:
 *
 * The code for handling Big5 as iso-2022-* has been derived from
 * Mule(multilingual Emacs) by handa@etl.go.jp.
 * The code to determine KSC5601 character class was implemented by
 * kjlee@sgi.co.jp.
 * Canna support code was originally implemented by Nobuyuki Koganemaru
 * <kogane@kces.koganemaru.co.jp> for jelvis.
 */

#include "config.h"

#ifdef MULTIBYTE

#include <sys/types.h>
#include <sys/queue.h>

#include <bitstring.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "../common/common.h"
#include "multibyte.h"

/*
 * Static functions.
 */

static ENCODING const *multi_findencoding __P((const char *));
static int rawgetline __P((SCR *, recno_t, CHAR_T **, size_t *));
static int dbgetline __P((SCR *, recno_t, CHAR_T **, size_t *));
static int rawsetline __P((SCR *, recno_t, CHAR_T *, size_t));
static int dbsetline __P((SCR *, recno_t, CHAR_T *, size_t));
static int rawdelline __P((SCR *, recno_t));
static int dbdelline __P((SCR *, recno_t));

/*
 * Charset/encoding table.  Will be accessed by multibyte routines.
 */
CHARSET const charsettab[] = {
/*
 * Raw (US-ASCII) charsets
 * This meant to be ISO646 IRV 1991 for left half (GL), and binary chars
 * for right half (GR).
 */
/* CS_RAW0 */
	{ "raw bits",		2, 0,	NULL, CS96, 0, 0, },
/* CS_RAW1 */
	{ "raw bits",		2, 1,	NULL, CS96, 0, 0, },
/* CS_RAW2 */
	{ "raw bits",		2, 2,	NULL, CS96, 0, 0, },
/* CS_RAW3 */
	{ "raw bits",		2, 3,	NULL, CS96, 0, 0, },
/* CS_RAW4 */
	{ "raw bits",		2, 4,	NULL, CS96, 0, 0, },
/* CS_RAW5 */
	{ "raw bits",		2, 5,	NULL, CS96, 0, 0, },
/* CS_RAW6 */
	{ "raw bits",		2, 6,	NULL, CS96, 0, 0, },
/* CS_RAW7 */
	{ "raw bits",		2, 7,	NULL, CS96, 0, 0, },
/*
 * 94x94 charsets
 */
/* CS_JISX0208_1978 */
	{ "JIS x0208.1978",	3, 2,	jis0208_chclass, CS9494, '@', '\0', 1, 0, },
/* CS_GB2312 */
	{ "GB2312-80",		3, 2,	NULL, CS9494, 'A', '\0', 1, 0, },
/* CS_JISX0208_1983 */
	{ "JIS x0208.1983",	3, 2,	jis0208_chclass, CS9494, 'B', '\0', 1, 0, },
/* CS_JISX0208_1990 */
	{ "JIS x0208.1990",	3, 2,	jis0208_chclass, CS9494, 'B', '@', 1, 0, },
/* CS_KSC5601 */
	{ "KSC5601 1987",	3, 2,	ksc5601_chclass, CS9494, 'C', '\0', 0, 0, },
/* CS_JISX0212_1990 */
	{ "JIS x0212.1990",	3, 2,	NULL, CS9494, 'D', '\0', 0, 0, },
/* CS_ISO_IR165	*/
	{ "ISO IR-165",		3, 2,	NULL, CS9494, 'E', '\0', 0, 0, },
/* CS_CNS11643_1 */
	{ "CNS11643-1992-1",	3, 2,	NULL, CS9494, 'G', '\0', 0, 0, },
/* CS_CNS11643_2 */
	{ "CNS11643-1992-2",	3, 2,	NULL, CS9494, 'H', '\0', 0, 0, },
/* CS_CNS11643_3 */
	{ "CNS11643-1992-3",	3, 2,	NULL, CS9494, 'I', '\0', 0, 0, },
/* CS_CNS11643_4 */
	{ "CNS11643-1992-4",	3, 2,	NULL, CS9494, 'J', '\0', 0, 0, },
/* CS_CNS11643_5 */
	{ "CNS11643-1992-5",	3, 2,	NULL, CS9494, 'K', '\0', 0, 0, },
/* CS_CNS11643_6 */
	{ "CNS11643-1992-6",	3, 2,	NULL, CS9494, 'L', '\0', 0, 0, },
/* CS_CNS11643_7 */
	{ "CNS11643-1992-7",	3, 2,	NULL, CS9494, 'M', '\0', 0, 0, },
/* CS_JISX0213_1 */
	{ "JIS x0213-1",	3, 2,	jis0208_chclass, CS9494, 'O', '\0', 0, 0, },
/* CS_JISX0213_2 */
	{ "JIS x0213-2",	3, 2,	NULL, CS9494, 'P', '\0', 0, 0, },
/* CS_BIG5_1 */
	{ "Big5-1",		3, 2,	NULL, CS9494, '0', '\0', 0, 0, },
/* CS_BIG5_2 */
	{ "Big5-2",		3, 2,	NULL, CS9494, '1', '\0', 0, 0, },
/*
 * 94 charsets
 * Note that ASCII (\033(B) has been hardcoded as CS_NONE.
 */
/* CS_IRV */
	{ "IRV 1983",		2, 1,	NULL, CS94, '@', '\0', 0, 1, },
/* CS_UK */
	{ "UK",			2, 1,	NULL, CS94, 'A', '\0', 0, 1, },
/* CS_SWEDISH */
	{ "Swedish",		2, 1,	NULL, CS94, 'C', '\0', 0, 1, },
/* CS_NORWEGIAN	 */
	{ "Norwegian",		2, 1,	NULL, CS94, 'E', '\0', 0, 1, },
/* CS_JISX0201_RIGHT */
	{ "JIS x0201 right",	2, 1,	NULL, CS94, 'I', '\0', 0, 0, },
/* CS_JISX0201_LEFT */
	{ "JIS x0201 left",	2, 1,	NULL, CS94, 'J', '\0', 0, 1, },
/* CS_GERMAN */
	{ "German",		2, 1,	NULL, CS94, 'K', '\0', 0, 1, },
/* CS_FRENCH */
	{ "French",		2, 1,	NULL, CS94, 'R', '\0', 0, 1, },
/* CS_ITALIAN */
	{ "Italian",		2, 1,	NULL, CS94, 'Y', '\0', 0, 1, },
/* CS_SPANISH */
	{ "Spanish",		2, 1,	NULL, CS94, 'Z', '\0', 0, 1, },
/* CS_PRIVATE94_0 */
	{ "Private 94-0",	2, 1,	NULL, CS94, '0', '\0', 0, 1, },
/* CS_PRIVATE94_1 */
	{ "Private 94-1",	2, 1,	NULL, CS94, '1', '\0', 0, 1, },
/*
 * 96 charsets
 */
/* CS_LATIN1 */
	{ "Latin1",		2, 1,	NULL, CS96, 'A', '\0', 0, 1, },
/* CS_LATIN2 */
	{ "Latin2",		2, 1,	NULL, CS96, 'B', '\0', 0, 1, },
/* CS_LATIN3 */
	{ "Latin3",		2, 1,	NULL, CS96, 'C', '\0', 0, 1, },
/* CS_LATIN4 */
	{ "Latin4",		2, 1,	NULL, CS96, 'D', '\0', 0, 1, },
/* CS_GREEK */
	{ "Greek",		2, 1,	NULL, CS96, 'F', '\0', 0, 1, },
/* CS_ARABIC */
	{ "Arabic",		2, 1,	NULL, CS96, 'G', '\0', 0, 1, },
/* CS_HEBREW */
	{ "Hebrew",		2, 1,	NULL, CS96, 'H', '\0', 0, 1, },
/* CS_CYRILLIC */
	{ "Cyrillic",		2, 1,	NULL, CS96, 'L', '\0', 0, 1, },
/* CS_LATIN5R */
	{ "Latin5",		2, 1,	NULL, CS96, 'M', '\0', 0, 1, },
/* CS_THAI */
	{ "Thai",		2, 1,	NULL, CS96, 'T', '\0', 0, 1, },
/* CS_LATIN6 */
	{ "Latin6",		2, 1,	NULL, CS96, 'V', '\0', 0, 1, },
/* CS_LATIN6ADD */
	{ "Latin6 add",		2, 1,	NULL, CS96, 'X', '\0', 0, 1, },
/* CS_PRIVATE96_0 */
	{ "Private 96-0",	2, 1,	NULL, CS96, '0', '\0', 0, 1, },
/* CS_PRIVATE96_1 */
	{ "Private 96-1",	2, 1,	NULL, CS96, '1', '\0', 0, 1, },
/* termination */
	{ NULL, },
};

	/* options: maxcharset/maxside/shifttype/flags */
ENCODING const encodingtab[] = {
	/* XXX none has to be the first item. */
	{ "none",
		none_to_int, int_to_none, none_keyinput, NULL },

	/* iso-2022 encodings */
	{ "euc-jp-1978",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, eucjp_display,
		CS_NONE, CS_JISX0208_1978, CS_JISX0201_RIGHT, CS_JISX0212_1990,
		4, 2, S_EUC, euc_recommendation,
		E_SOLRESET, },
	{ "euc-jp",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, eucjp_display,
		CS_NONE, CS_JISX0208_1983, CS_JISX0201_RIGHT, CS_JISX0212_1990,
		4, 2, S_EUC, euc_recommendation,
		E_SOLRESET, },
	{ "euc-jp-1983",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, eucjp_display,
		CS_NONE, CS_JISX0208_1983, CS_JISX0201_RIGHT, CS_JISX0212_1990,
		4, 2, S_EUC, euc_recommendation,
		E_SOLRESET, },
	{ "euc-jp-1990",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, eucjp_display,
		CS_NONE, CS_JISX0208_1990, CS_JISX0201_RIGHT, CS_JISX0212_1990,
		4, 2, S_EUC, euc_recommendation,
		E_SOLRESET, },
	{ "euc-jp-0213",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, eucjp_display,
		CS_NONE, CS_JISX0213_1, CS_JISX0201_RIGHT, CS_JISX0213_2,
		4, 2, S_EUC, euc_recommendation,
		E_SOLRESET, },
	{ "euc-cn",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, euc_display,
		CS_NONE, CS_GB2312, CS_INVALID, CS_INVALID,
		4, 2, S_EUC, euc_recommendation,
		E_SOLRESET, },
	{ "euc-kr",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, euc_display,
		CS_NONE, CS_KSC5601, CS_INVALID, CS_INVALID,
		4, 2, S_EUC, euc_recommendation,
		E_SOLRESET, },
	{ "iso-8859-1",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_LATIN1, CS_INVALID, CS_INVALID,
		2, 2, S_NONE, NULL,
		E_SOLRESET|E_EOLRESET, },
	{ "iso-8859-2",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_LATIN2, CS_INVALID, CS_INVALID,
		2, 2, S_NONE, NULL,
		E_SOLRESET|E_EOLRESET, },
	{ "iso-8859-3",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_LATIN3, CS_INVALID, CS_INVALID,
		2, 2, S_NONE, NULL,
		E_SOLRESET|E_EOLRESET, },
	{ "iso-8859-4",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_LATIN4, CS_INVALID, CS_INVALID,
		2, 2, S_NONE, NULL,
		E_SOLRESET|E_EOLRESET, },
	{ "iso-8859-7",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_GREEK, CS_INVALID, CS_INVALID,
		2, 2, S_NONE, NULL,
		E_SOLRESET|E_EOLRESET, },
	{ "iso-8859-8",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_HEBREW, CS_INVALID, CS_INVALID,
		2, 2, S_NONE, NULL,
		E_SOLRESET|E_EOLRESET, },
	{ "iso-8859-9",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_LATIN5R, CS_INVALID, CS_INVALID,
		2, 2, S_NONE, NULL,
		E_SOLRESET|E_EOLRESET, },
	{ "latin1",	/* same as iso-8859-1 */
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_LATIN1, CS_INVALID, CS_INVALID,
		2, 2, S_NONE, NULL,
		E_SOLRESET|E_EOLRESET, },
	{ "latin2",	/* same as iso-8859-2 */
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_LATIN2, CS_INVALID, CS_INVALID,
		2, 2, S_NONE, NULL,
		E_SOLRESET|E_EOLRESET, },
	{ "iso-2022-7-1",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_INVALID, CS_INVALID, CS_INVALID,
		1, 1, S_NONE, NULL,
		E_SOLRESET|E_EOLRESET, },
	{ "iso-2022-7-2",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_NONE, CS_INVALID, CS_INVALID,
		2, 1, S_SEVEN, NULL,
		E_SOLRESET|E_EOLRESET, },
	{ "iso-2022-8-2",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_NONE, CS_INVALID, CS_INVALID,
		2, 2, S_EIGHT, NULL,
		E_SOLRESET|E_EOLRESET, },
	{ "iso-2022-jp",	/* same as iso-2022-jp-2 */
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_INVALID, CS_INVALID, CS_INVALID,
		3, 1, S_SS2, iso2022jp_recommendation,
		E_SOLRESET|E_EOLRESET, },
	{ "iso-2022-jp-2",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_INVALID, CS_INVALID, CS_INVALID,
		3, 1, S_SS2, iso2022jp_recommendation,
		E_SOLRESET|E_EOLRESET, },
	{ "iso-2022-jp-3",
		iso2022jp3_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_INVALID, CS_INVALID, CS_INVALID,
		3, 1, S_SS2, iso2022jp_recommendation,
		E_SOLRESET|E_EOLRESET, },
	{ "iso-2022-kr",
		iso2022kr_to_int, int_to_iso2022kr, iso2022_keyinput, NULL,
		CS_NONE, CS_INVALID, CS_INVALID, CS_INVALID,
		2, 1, S_SI|S_SO, iso2022kr_recommendation,
		0, },
	{ "iso-2022-cn",	/* same as iso-2022-cn-ext */
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_INVALID, CS_INVALID, CS_INVALID,
		4, 1, S_SI|S_SO|S_SS2|S_SS3, iso2022cn_recommendation,
		E_SOLRESET|E_EOLRESET, },
	{ "iso-2022-cn-ext",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_INVALID, CS_INVALID, CS_INVALID,
		4, 1, S_SI|S_SO|S_SS2|S_SS3, iso2022cn_recommendation,
		E_SOLRESET|E_EOLRESET, },
	{ "ctext",
		iso2022_to_int, int_to_iso2022, iso2022_keyinput, NULL,
		CS_NONE, CS_LATIN1, CS_INVALID, CS_INVALID,
		2, 2, S_NONE, ctext_recommendation,
		E_SOLRESET|E_EOLRESET|E_NOOLDSTYLE, },

	/* non iso-2022 encodings */
	{ "sjis",
		sjis_to_int, int_to_sjis, sjis_keyinput,  sjis_display},
	{ "hz",
		hz_to_int, int_to_hz, NULL, NULL },
	{ "big5",
		big5_to_int, int_to_big5, big5_keyinput, big5_display},
	{ "euc-tw",
		euctw_to_int, int_to_euctw, euctw_keyinput, euc_display },
	{ NULL },
};

/*------------------------------------------------------------*/

/*
 * Support function to scan forward/backward multibyte chars.
 */

/*
 * checks if here lies multibyte char.
 * if it is non-multibyte char, returns 1 for safety.
 * so that you can do
 *	p += multi_byte(*p);
 * instead of
 *	p++;
 * in non-multibyte implementation.
 *
 * KEY_BLEN(sp, ch) uses this function.  You should use KEY_BLEN(sp, ch)
 * for daily use.
 *
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: int multi_byte_c __P((SCR *, ARG_CHAR_T));
 * PUBLIC: #endif
 */
int
multi_byte_c(sp, arg)
	SCR *sp;
	ARG_CHAR_T arg;
{
	CHAR_T ch;

	ch = arg & 0xff;
	if (ischarset(ch))
		return charset(ch).blen;
	else
		return 1;
}

/*
 * checks if here lies multibyte char.
 * if it is non-multibyte char, returns 1 for safety.
 * so that you can do
 *	p += multi_byte(p);
 * instead of
 *	p++;
 * in non-multibyte implementation.
 *
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: int multi_byte __P((SCR *, const CHAR_T *));
 * PUBLIC: #endif
 */
int
multi_byte(sp, p)
	SCR *sp;
	const CHAR_T *p;
{
	if (ischarset(p[0]))
		return charset(p[0]).blen;
	else
		return 1;
}

/*
 * Check if we're stepping on the non-justified byte. (inbetween
 * multibyte character code)
 *
 * Since we know the maximum byte-length of multibyte char
 * beforehand, the loops in multi_fix() are relatively short.
 * (less than five)
 * Worst case would be that we're on the non-multibyte line.
 * In that case, we have to scan backward, and then scan forward.
 *
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: int multi_fix __P((SCR *, const CHAR_T *, const CHAR_T *));
 * PUBLIC: #endif
 */
int
multi_fix(sp, p0, p)
	SCR *sp;
	const CHAR_T *p0;
	const CHAR_T *p;
{
	int i;
	int count;

	/*
	 * Easy cases.  If we're on the first charcter of the line,
	 * we're done.  If we're stepping on the first byte (that designates
	 * character set) of the multibyte char, we're done too.
	 */
	if (! (p0 < p))
		return 0;
	if (ischarset(p[0]))
		return 0;
	
	/*
	 * Scan backward for the first byte of multibyte char.
	 */
	i = p - p0 - 1;
	count = 1;
	while (0 < i && count <= MAX_MULTI_BLEN) {
		if (ischarset(p0[i]))
			break;
		count++;
		i--;
	}

	/*
	 * If we didn't find any multibyte char, that means we're not
	 * stepping on multibyte char.
	 */
	if (!ischarset(p0[i]))
		return 0;

	/*
	 * Hard case.  We found multibyte char, and we don't know whether
	 * our current position (p) is on multibyte char or not.
	 * We scan forward to find the multibyte (or singlebyte)
	 * character we're stepping on.
	 */
	while (0 < count) {
		if (ischarset(p0[i])) {
			if (count < charset(p0[i]).blen)
				break;
			count -= charset(p0[i]).blen;
			i += charset(p0[i]).blen;
		} else {
			count--;
			i++;
		}
	}

	return count;
}

/*
 * Returns number of bytes to be subtracted to go 1 char backward.
 * p -= multi_back(sp, p0, p);
 *
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: int multi_back __P((SCR *, const CHAR_T *, const CHAR_T *));
 * PUBLIC: #endif
 */
int
multi_back(sp, p0, p)
	SCR *sp;
	const CHAR_T *p0;
	const CHAR_T *p;
{
	int mf;

	if (p0 < p) {
		if (mf = multi_fix(sp, p0, p))
			return mf;
		else if (mf = multi_fix(sp, p0, p - 1))
			return mf + 1;
		else
			return 1;
	} else
		return 1;
}

/*
 * Returns number of columns to be occupied by the current character.
 * You can use KEY_LEN(sp, ch) for daily use.
 *
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: int multi_len_c __P((SCR *, ARG_CHAR_T));
 * PUBLIC: #endif
 */
int
multi_len_c(sp, arg)
	SCR *sp;
	ARG_CHAR_T arg;
{
	CHAR_T ch;

	ch = arg & 0xff;
	if (ischarset(ch))
		return charset(ch).len;
	else {
		/* for safety */
		return KEY_LEN(sp, ch);
	}
}

/*
 * Returns number of columns to be occupied by the current character.
 *
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: int multi_len __P((SCR *, const CHAR_T *));
 * PUBLIC: #endif
 */
int
multi_len(sp, p)
	SCR *sp;
	const CHAR_T *p;
{
	if (ischarset(p[0]))
		return charset(p[0]).len;
	else {
		/* for safety */
		return KEY_LEN(sp, p[0]);
	}
}

/*
 * Returns actual text to be displayed.
 * Relevant to KEY_NAME(sp, ch) for singlebyte chars.
 * Text will not be preserved if you call multi_name() again.
 *
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: CHAR_T *multi_name __P((SCR *, const CHAR_T *));
 * PUBLIC: #endif
 */
CHAR_T *
multi_name(sp, p)
	SCR *sp;
	const CHAR_T *p;
{
	static CHAR_T name[30];
	char *ename;
	ENCODING const *e;

	/* Easy ones first. */
	if (!(p[0] & 0x80))
		return KEY_NAME(sp, p[0]);
	if (CS_RAW(p[0]))
		return v_key_name(sp, p[1] | 0x80);

	/* Plugin functions for supporting each encoding */
	ename = O_STR(sp, O_DISPLAYENCODING);
	e = multi_findencoding(ename);
	if (!e)
		e = &encodingtab[0];
	if (e->display && e->display(sp, e, name, p))
		return name;
	
	/* if would like to skip undisplayable ones... */
	if (O_ISSET(sp, O_SKIPDISPLAY)) {
		size_t i;

		for (i = 0; i < KEY_LEN(sp, p[0]); i++)
			name[i] = '?';
		name[i] = '\0';
		return name;
	}
	
	/* try iso-2022-* G0 designation */
	if (ischarset(p[0])) {
		CHARSET const *cs;
		size_t i, j;

		cs = &charset(p[0]);
		i = 0;

		if (cs->version) {
			name[i++] = '\033';
			name[i++] = '&';
			name[i++] = cs->version;
		}

		name[i++] = '\033';
		switch (cs->type) {
		case CS94:
			name[i++] = '(';
			break;
		case CS96:
			name[i++] = ',';
			break;
		case CS9494:
		case CS9696:
			name[i++] = '$';
			if (F_ISSET(e, E_NOOLDSTYLE) || !cs->oldstyle)
				name[i++] = '(';
			break;
		}
		name[i++] = cs->enter;

		for (j = 1; j < charset(p[0]).blen; j++)
			name[i++] = p[j];
		name[i] = '\0';
		return name;
	}

	/* for safety */
	return KEY_NAME(sp, p[0]);
}

/*
 * Returns the character in internal encoding.  For use in ex_print() and
 * other rountines.
 *
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: CHAR_T *multi_rawname __P((SCR *, const CHAR_T *));
 * PUBLIC: #endif
 */
CHAR_T *
multi_rawname(sp, p)
	SCR *sp;
	const CHAR_T *p;
{
	static CHAR_T name[MAX_MULTI_BLEN + 1];
	size_t blen;
	size_t i;

	if (!ischarset(p[0])) {
		name[0] = p[0];
		name[1] = '\0';
		return name;
	}
	
	blen = charset(p[0]).blen;
	for (i = 0; i < blen; i++)
		name[i] = p[i];
	name[i] = '\0';
	return name;
}

/*------------------------------------------------------------*/

/*
 * Returns 1 if it is ASCII (CS_NONE) or binary (CS_RAW) chars.
 *
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: int multi_isnone __P((SCR *, ARG_CHAR_T));
 * PUBLIC: #endif
 */
int
multi_isnone(sp, ch)
	SCR *sp;
	ARG_CHAR_T ch;
{
	ch &= 0xff;
	if (ch & 0x80) {
		if (CS_RAW(ch))
			return 1;
		else
			return 0;
	} else
		return 1;
}

/*
 * Returns 1 if it is western charset. (word divided by space)
 *
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: int multi_iswestern __P((SCR *, ARG_CHAR_T));
 * PUBLIC: #endif
 */
int
multi_iswestern(sp, ch)
	SCR *sp;
	ARG_CHAR_T ch;
{
	ch &= 0xff;
	if (ch & 0x80)
		return charset(ch).western;
	else
		return 1;
}

/*
 * Wordbound recogntion. (for "W" command)
 *
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: int multi_Wordbound __P((u_int, u_int, int));
 * PUBLIC: #endif
 */
int
multi_Wordbound(c1, c2, forward)
	u_int c1;
	u_int c2;
	int forward;
{
	/*
	 * if it is just beginning, we don't bother.
	 */
	if (c1 == 0)
		return 0;

	/*
	 * if they are in different character set, we've hit word boundary.
	 */
	if ((c1 & 0xff00) != (c2 & 0xff00))
		return 1;

	/*
	 * if we are going forward and next char is stronger, we've hit
	 * word boundary.
	 * if we are going backward, and lefthandside char is weaker,
	 * we've hit word boundary.
	 */
	if (forward) {
		if ((c1 & 0xff) < (c2 & 0xff))
			return 1;
	} else {
		if ((c1 & 0xff) > (c2 & 0xff))
			return 1;
	}
	
	return 0;
}

/*
 * Wordbound recogntion. (for "w" command)
 *
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: int multi_wordbound __P((u_int, u_int, int));
 * PUBLIC: #endif
 */
int
multi_wordbound(c1, c2, forward)
	u_int c1;
	u_int c2;
	int forward;
{
	/*
	 * if it is just beginning, we don't bother.
	 */
	if (c1 == 0)
		return 0;

	/*
	 * if they are in different character set, we've hit word boundary.
	 */
	if ((c1 & 0xff00) != (c2 & 0xff00))
		return 1;

	/*
	 * if they are in the same character set, and they are in different
	 * character class, we've hit word boundary.
	 */
	if ((c1 & 0xff) != (c2 & 0xff))
		return 1;

	return 0;
}

/*------------------------------------------------------------*/

/*
 * encoding autodetection routines.
 *
 * XXX
 * logic is not very robust against line with binary garbages.
 * how should we treat escape sequences that are not iso-2022-* designation?
 * (such as ansi cursor movement)
 */

static int multi_predict_iso2022 __P((SCR *, CHAR_T *, size_t));
static int multi_predict_euc __P((SCR *, CHAR_T *, size_t));
static int multi_predict_euctw __P((SCR *, CHAR_T *, size_t));
static int multi_predict_sjis __P((SCR *, CHAR_T *, size_t));
static int multi_predict_big5 __P((SCR *, CHAR_T *, size_t));
static int multi_predict_hz __P((SCR *, CHAR_T *, size_t));

static int
multi_predict_iso2022(sp, p, len)
	SCR *sp;
	CHAR_T *p;
	size_t len;
{
	size_t i;
	int ret;
	int cstype[4];
	int gl, gr;
	int gl1, gr1;
	int tmp, mask;

	ret = 0;
	cstype[0] = 94;	/*ASCII*/
	cstype[1] = cstype[2] = cstype[3] = -1;
	gl = 0;
	gr = 1;
	gl1 = gr1 = -1;

	i = 0;
	while (i < len) {
		/*
		 * If we have some designation, it must be iso-2022-*.
		 * it means that the string is iso-2022-*.
		 */
		if (i + 2 < len && p[i] == '\033'
		 && p[i + 1] == '$' && strchr("@AB", p[i + 2])) {
			cstype[0] = 9494;
			ret = 1;
			i += 3;
			continue;
		}
		if (i + 2 < len && p[i] == '\033'
		 && strchr("()*+,-./", p[i + 1]) && isecma(p[i + 2])) {
			if (p[i + 1] < ',')
				cstype[p[i + 1] - '('] = 94;
			else
				cstype[p[i + 1] - ','] = 96;
			ret = 1;
			i += 3;
			continue;
		}
		if (i + 3 < len && p[i] == '\033' && p[i + 1] == '$'
		 && strchr("()*+,-./", p[i + 2]) && isecma(p[i + 3])) {
			if (p[i + 2] < ',')
				cstype[p[i + 2] - '('] = 9494;
			else
				cstype[p[i + 2] - ','] = 9696;
			ret = 1;
			i += 4;
			continue;
		}

		/*
		 * locking shift.  there must be designation first.
		 * we don't take it as an evidence for iso-2022-*.
		 */
		if (p[i] == SI) {
			if (cstype[0] < 0)
				return -1;

			i++;
			continue;
		}
		if (p[i] == SO) {
			if (cstype[1] < 0)
				return -1;

			i++;
			continue;
		}
		if (i + 1 < len && p[i] == '\033' && strchr("no", p[i + 1])) {
			gl = p[i + 1] - 'n' + 2;
			if (cstype[gl] < 0)
				return -1;

			i += 2;
			continue;
		}
		if (i + 1 < len && p[i] == '\033' && strchr("~}|", p[i + 1])) {
			gr = 3 - (p[i + 1] - '|');
			if (cstype[gr] < 0)
				return -1;

			i += 2;
			continue;
		}

		/*
		 * single shift. there must be designation first.
		 * we don't take it as an evidence for iso-2022-*.
		 */
		if (i + 1 < len && p[i] == '\033' && strchr("NO", p[i + 1])) {
			gl1 = p[i + 1] - 'N' + 2;
			if (cstype[gl1] < 0)
				return -1;

			i += 2;
			continue;
		}
		if (p[i] == SS2 || p[i] == SS3) {
			gl1 = gr1 = p[i] - SS2 + 2;
			if (cstype[gl1] < 0)
				return -1;

			i++;
			continue;
		}

		/*
		 * check for standard chars.
		 * if there are properly designated standard chars, it must be
		 * ok to take it as iso-2022-* string.
		 *
		 * XXX
		 * the logic simply skips improperly designated standard chars.
		 * how should we treat it?
		 */
		if (p[i] & 0x80) {
			tmp = (gr1 < 0) ? gr : gr1;
			mask = 0x80;
		} else {
			tmp = (gl1 < 0) ? gl : gl1;
			mask = 0x00;
		}

		switch (cstype[tmp]) {
		case -1:
			return -1;
		case 94:
			if (0x21 + mask <= p[i] && p[i] <= 0xfe + mask) {
				ret = 1;
				i++;
			} else
				i++;
			break;
		case 96:
			if (0x20 + mask <= p[i] && p[i] <= 0xff + mask) {
				ret = 1;
				i++;
			} else
				i++;
			break;
		case 9494:
			if (i + 1 < len
			 && 0x21 + mask <= p[i+0] && p[i+0] <= 0xfe + mask
			 && 0x21 + mask <= p[i+1] && p[i+1] <= 0xfe + mask) {
				ret = 1;
				i += 2;
			} else
				i++;
			break;
		case 9696:
			if (i + 1 < len
			 && 0x20 + mask <= p[i+0] && p[i+0] <= 0xff + mask
			 && 0x20 + mask <= p[i+1] && p[i+1] <= 0xff + mask) {
				ret = 1;
				i += 2;
			} else
				i++;
			break;
		}

		gl1 = gr1 = -1;
	}

	return ret;
}

static int
multi_predict_euc(sp, p, len)
	SCR *sp;
	CHAR_T *p;
	size_t len;
{
	size_t i;
	int ret;

	ret = 0;
	i = 0;
	while (i < len) {
		/*
		 * If we have some designation other than SS2/SS3,
		 * it will never be euc string.
		 */
		if (i + 2 < len && p[i] == '\033'
		 && p[i + 1] == '$' && strchr("@AB", p[i + 2]))
			return -1;
		if (i + 2 < len && p[i] == '\033'
		 && strchr("()*+,-./", p[i + 1]) && isecma(p[i + 2]))
			return -1;
		if (i + 3 < len && p[i] == '\033' && p[i + 1] == '$'
		 && strchr("()*+,-./", p[i + 2]) && isecma(p[i + 3]))
			return -1;
		if (i + 1 < len && p[i] == '\033'
		 && strchr("NOno~}|", p[i + 1]))
			return -1;
		if (p[i] == SO || p[i] == SI)
			return -1;

		/*
		 * if we see euc bytes, this line can be euc string.
		 * if we see firstbyte of euc char, and secondbyte does not
		 * match, it is not an euc string.
		 */
		if (iseuckanji(p[i])) {
			if (i + 1 < len && iseuckanji(p[i + 1])) {
				ret = 1;
				i += 2;
				continue;
			}

			return -1;
		}

		/*
		 * if we see SS2, and the next byte is valid, it is euc string.
		 */
		if (p[i] == SS2) {
			if (i + 1 < len && iseuckanji(p[i + 1])) {
				ret = 1;
				i += 2;
				continue;
			}

			return -1;
		}

		/*
		 * if we see SS3, and the next bytes are valid,
		 * it is euc string.
		 */
		if (p[i] == SS3) {
			if (i + 2 < len
			 && iseuckanji(p[i + 1]) && iseuckanji(p[i + 2])) {
				ret = 1;
				i += 3;
				continue;
			}

			return -1;
		}

		i++;
	}
	return ret;
}

static int
multi_predict_euctw(sp, p, len)
	SCR *sp;
	CHAR_T *p;
	size_t len;
{
	size_t i;
	int ret;

	ret = 0;
	i = 0;
	while (i < len) {
		/*
		 * If we have some designation other than SS2,
		 * it will never be euc-tw string.
		 */
		if (i + 2 < len && p[i] == '\033'
		 && p[i + 1] == '$' && strchr("@AB", p[i + 2]))
			return -1;
		if (i + 2 < len && p[i] == '\033'
		 && strchr("()*+,-./", p[i + 1]) && isecma(p[i + 2]))
			return -1;
		if (i + 3 < len && p[i] == '\033' && p[i + 1] == '$'
		 && strchr("()*+,-./", p[i + 2]) && isecma(p[i + 3]))
			return -1;
		if (i + 1 < len && p[i] == '\033'
		 && strchr("NOno~}|", p[i + 1]))
			return -1;
		if (p[i] == SO || p[i] == SI || p[i] == SS3)
			return -1;

		/*
		 * if we see euc bytes, this line can be euc string.
		 * if we see firstbyte of euc-tw char, and secondbyte does not
		 * match, it is not an euc-tw string.
		 */
		if (iseuckanji(p[i])) {
			if (i + 1 < len && iseuckanji(p[i + 1])) {
				ret = 1;
				i += 2;
				continue;
			}

			return -1;
		}

		/*
		 * if we see SS2, and the next three bytes are valid,
		 * it is euc-tw string.
		 */
		if (p[i] == SS2) {
			if (i + 3 < len && iseuckanji(p[i + 1])
			 && iseuckanji(p[i + 2]) && iseuckanji(p[i + 3])) {
				ret = 1;
				i += 4;
				continue;
			}

			return -1;
		}

		i++;
	}
	return ret;
}

static int
multi_predict_sjis(sp, p, len)
	SCR *sp;
	CHAR_T *p;
	size_t len;
{
	size_t i;
	int ret;

	ret = 0;
	i = 0;
	while (i < len) {
		/*
		 * If we have some designation, it will never be
		 * sjis string.
		 */
		if (i + 2 < len && p[i] == '\033'
		 && p[i + 1] == '$' && strchr("@AB", p[i + 2]))
			return -1;
		if (i + 2 < len && p[i] == '\033'
		 && strchr("()*+,-./", p[i + 1]) && isecma(p[i + 2]))
			return -1;
		if (i + 3 < len && p[i] == '\033' && p[i + 1] == '$'
		 && strchr("()*+,-./", p[i + 2]) && isecma(p[i + 3]))
			return -1;
		if (i + 1 < len && p[i] == '\033' && strchr("NOno~}|", p[i + 1]))
			return -1;
		if (p[i] == SO || p[i] == SI)
			return -1;
		/*
		 * don't check SS2 and SS3, they are part of issjissjis1().
		 * (thanks serow-san)
		 */

		/*
		 * if we see sjis bytes, this line can be sjis string.
		 * if we see firstbyte of sjis char, and secondbyte does not
		 * match, it is not an sjis string.
		 */
		if (issjiskanji1(p[i])) {
			if (i + 1 < len && issjiskanji2(p[i + 1])) {
				ret = 1;
				i += 2;
				continue;
			}

			return -1;
		}

		i++;
	}
	return ret;
}

static int
multi_predict_big5(sp, p, len)
	SCR *sp;
	CHAR_T *p;
	size_t len;
{
	size_t i;
	int ret;

	ret = 0;
	i = 0;
	while (i < len) {
		/*
		 * If we have some designation, it will never be
		 * big5 string.
		 */
		if (i + 2 < len && p[i] == '\033'
		 && p[i + 1] == '$' && strchr("@AB", p[i + 2]))
			return -1;
		if (i + 2 < len && p[i] == '\033'
		 && strchr("()*+,-./", p[i + 1]) && isecma(p[i + 2]))
			return -1;
		if (i + 3 < len && p[i] == '\033' && p[i + 1] == '$'
		 && strchr("()*+,-./", p[i + 2]) && isecma(p[i + 3]))
			return -1;
		if (i + 1 < len && p[i] == '\033'
		 && strchr("NOno~}|", p[i + 1]))
			return -1;
		if (p[i] == SO || p[i] == SI)
			return -1;
		if (p[i] == SS2 || p[i] == SS3)
			return -1;

		/*
		 * if we see big5 bytes, this line can be big5 string.
		 * if we see firstbyte of big5 char, and secondbyte does not
		 * match, it is not an big5 string.
		 */
		if (isbig5mb1(p[i])) {
			if (i + 1 < len && isbig5mb2(p[i + 1])) {
				ret = 1;
				i += 2;
				continue;
			}

			return -1;
		}

		i++;
	}
	return ret;
}

static int
multi_predict_hz(sp, p, len)
	SCR *sp;
	CHAR_T *p;
	size_t len;
{
	size_t i;
	int ret;

	ret = 0;
	i = 0;
	while (i < len) {
		/*
		 * hz is 7bit code.
		 */
		if (p[i] & 0x80)
			return -1;

		/*
		 * If we have some designation, it will never be
		 * hz string.
		 */
		if (i + 2 < len && p[i] == '\033'
		 && p[i + 1] == '$' && strchr("@AB", p[i + 2]))
			return -1;
		if (i + 2 < len && p[i] == '\033'
		 && strchr("()*+,-./", p[i + 1]) && isecma(p[i + 2]))
			return -1;
		if (i + 3 < len && p[i] == '\033' && p[i + 1] == '$'
		 && strchr("()*+,-./", p[i + 2]) && isecma(p[i + 3]))
			return -1;
		if (i + 1 < len && p[i] == '\033' && strchr("NOno~}|", p[i + 1]))
			return -1;
		if (p[i] == SO || p[i] == SI)
			return -1;
		if (p[i] == SS2 || p[i] == SS3)
			return -1;

		/*
		 * if we see /~[{}~]/ or /~$/, this line can be hz string.
		 * if there's ~ in other cases, it will not be hz string.
		 */
		if (p[i] == '~') {
			if (i == len - 1) {
				ret = 1;
				i++;
				continue;
			}
			if (i + 1 < len && strchr("{}~", p[i + 1])) {
				ret = 1;
				i += 2;
				continue;
			}

			return -1;
		}

		i++;
	}
	return ret;
}

/*
 * encoding detection table.
 * number of elements in func[] and name[] must match.
 */
struct _detecttab {
	char *key;
	int (*func[10])();
	char *name[10];
} detecttab[] = {
	{ "jp",
		{ multi_predict_iso2022,
		  multi_predict_euc,
		  multi_predict_sjis,
		  NULL, },
		{ "iso-2022-jp",
		  "euc-jp",
		  "sjis",
		  NULL, }, },
	{ "cn",
		{ multi_predict_iso2022,
		  multi_predict_euc,
		  multi_predict_big5,
		  multi_predict_hz,
		  NULL, },
		{ "iso-2022-cn",
		  "euc-cn",
		  "big5",
		  "hz",
		  NULL, }, },
	{ "tw",
		{ multi_predict_iso2022,
		  multi_predict_euctw,
		  multi_predict_big5,
		  multi_predict_hz,
		  NULL, },
		{ "iso-2022-cn",
		  "euc-tw",
		  "big5",
		  "hz",
		  NULL, }, },
	{ "kr",
		{ multi_predict_iso2022,
		  multi_predict_euc,
		  NULL, },
		{ "iso-2022-kr",
		  "euc-kr",
		  NULL, }, },
	{ NULL,
		{ NULL, },
		{ NULL, }, },
};

/*
 * Encoding type prediction by scanning backing database.
 *
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: char *multi_predict_region __P((SCR *, MARK *, MARK *, char *, char *, int));
 * PUBLIC: #endif
 */
char *
multi_predict_region(sp, mstart, mend, ename, autodetect, mode)
	SCR *sp;
	MARK *mstart;
	MARK *mend;
	char *ename;
	char *autodetect;
	int mode;
{
	/*
	 * configuration paramter.
	 * If it is undefined, try to search all the way
	 */
/* #define PREDICT_MAXLINES	100 */

	recno_t start;
	recno_t end;
	recno_t l;
	CHAR_T *p;
	size_t i;
	size_t len;

	struct _detecttab *dt;

	int maybe;
	int try;
	int maxfunc;

	int (*reader)();

	reader = (mode ? dbgetline : rawgetline);

	start = mstart->lno;
	end = mend->lno;
	if (start == 0 || end < start)
		return ename;
#ifdef PREDICT_MAXLINES
	if (start + PREDICT_MAXLINES < end)
		end = start + PREDICT_MAXLINES;
#endif

	/* find out detecttab */
	for (dt = detecttab; dt->key; dt++) {
		if (strncmp(autodetect, dt->key, strlen(dt->key)) == 0)
			break;
	}
	if (!dt->key)
		return ename;

	maybe = 0;
	for (i = 0; dt->func[i]; i++)
		maybe |= (1 << i);
	maxfunc = i;

	for (l = start; l <= end; l++) {
		if ((*reader)(sp, l, &p, &len))
			break;
		if (len == 0)
			continue;

		try = maybe;
		for (i = 0; i < maxfunc; i++) {
			if (!(try & (1 << i)))
				continue;
			switch ((*dt->func[i])(sp, p, len)) {
			case -1:
				/* remove this code. */
				maybe &= ~(1 << i);
				break;
			case 0:
				/* leave it as is. */
				break;
			case 1:
				/* add this code. */
				maybe |= (1 << i);	/*XXX no effect */
				break;
			}
		}

		/* no codedetection succeed. */
		if (maybe == 0)
			return ename;

		for (i = 0; i < maxfunc; i++) {
			if (maybe == (1 << i))
				return dt->name[i];
		}
	}

	return ename;

#undef PREDICT_MAXLINES
}
#if 0
{
	/*
	 * configuration paramter.
	 * If it is undefined, try to search all the way
	 */
/* #define PREDICT_MAXLINES	100 */

	recno_t start;
	recno_t end;
	recno_t l;

	CHAR_T *p;
	size_t i;
	size_t len;
#define AD_ISO2022		0x01
#define AD_EUC			0x02
#define AD_SJIS			0x04
#define AD_BIG5			0x08
#define AD_HZ			0x10
#define AD_JP			0x07
#define AD_CN			0x1b
#define AD_KR			0x03
#define AD_UNKNOWN		0x1f
	int state;
	int try;
	int again;
	int (*reader)();
	int siso, ss2ss3, jp, jpnew, kr, cn, hz;
	int iso2022skip, hzskip;
	
	reader = (mode ? dbgetline : rawgetline);

	start = mstart->lno;
	end = mend->lno;
	if (start == 0 || end < start)
		return ename;
#ifdef PREDICT_MAXLINES
	if (start + PREDICT_MAXLINES < end)
		end = start + PREDICT_MAXLINES;
#endif
	if (strncmp(autodetect, "jp", 2) == 0)
		state = AD_JP;
	else if (strncmp(autodetect, "cn", 2) == 0)
		state = AD_CN;
	else if (strncmp(autodetect, "kr", 2) == 0)
		state = AD_KR;
	else
		return ename;

	siso = ss2ss3 = jp = jpnew = kr = cn = hz = 0;
	iso2022skip = hzskip = 0;
	for (l = start; l < end; l++) {
		if ((*reader)(sp, l, &p, &len))
			break;
		if (len == 0)
			continue;
		
		i = 0;
		while (state && i < len) {
			if (state & AD_ISO2022) {
				if (!iso2022skip && p[i] == '\033') {
					if (i + 2 < len && p[i + 1] == '$'
					 && strchr("AB@", p[i + 2])) {
						state = AD_ISO2022;
						iso2022skip = 2;

						if (strchr("B@", p[i + 2]))
							jp++;
						else
							cn++;
					} else if (i + 2 < len
					    && strchr("()*+,-./", p[i + 1])
					    && isecma(p[i + 2])) {
						state = AD_ISO2022;
						iso2022skip = 2;
					} else if (i + 3 < len
					    && p[i + 1] == '$'
					    && strchr("()*+,-./", p[i + 2])
					    && isecma(p[i + 3])) {
						state = AD_ISO2022;
						iso2022skip = 3;

						if (strchr("AEGHIJKLM", p[i + 3]))
							cn++;
						else if (strchr("@BD",
								p[i + 3])) {
							jpnew++;
							jp++;
						}
						else if (strchr("C", p[i + 3]))
							kr++;
					} else if (i + 1 < len
					    && strchr("NO", p[i + 1])) {
						state = AD_ISO2022;
						iso2022skip = 1;

						ss2ss3++;
					}
				} else if (!iso2022skip
				    && strchr("\016\017", p[i])) {
					state = AD_ISO2022;

					siso++;
				} else if (iso2022skip)
					iso2022skip--;
			}

			if (state & AD_HZ) {
				if (!hzskip && p[i] == '~') {
					if (i + 1 < len
					 && strchr("{}~", p[i + 1])) {
						hz++;
						hzskip = 1;
					} else if (i + 1 == len)
						hz++;
				} else if (hzskip)
					hzskip--;
			}

			if (state & (AD_EUC | AD_SJIS | AD_BIG5)) {
				try = 0;

				if (i + 1 < len
				 && iseuckanji(p[i]) && iseuckanji(p[i + 1])) {
					try |= AD_EUC;
				} else if (i + 1 < len && p[i] == 0x8e
				    && iseuckanji(p[i + 1])) {
					try |= AD_EUC;
				} else if (i + 2 < len && p[i] == 0x8f
				    && iseuckanji(p[i + 1])
				    && iseuckanji(p[i + 2])) {
					try |= AD_EUC;
				}

				if (i + 1 < len
				 && issjiskanji1(p[i]) && issjiskanji2(p[i + 1])) {
					try |= AD_SJIS;
				}
				
				if (i + 1 < len
				 && isbig5mb1(p[i]) && isbig5mb2(p[i + 1])) {
					try |= AD_BIG5;
				}

				switch (try & state) {
				case AD_EUC:
					state = AD_EUC;
					goto breakbreak;
				case AD_SJIS:
					state = AD_SJIS;
					goto breakbreak;
				case AD_BIG5:
					state = AD_BIG5;
					goto breakbreak;
				case 0:
					if (try)
						goto breakbreak;
					else
						break;
				default:
					state &= try;
					i++;
					break;
				}
			}

			i++;
		}
	}
breakbreak:;
	
	switch (state) {
	case AD_ISO2022:
		if (jpnew)
			return "ctext";
		else if (!jp && kr && !cn && siso && !ss2ss3)
			return "iso-2022-kr";
		else if (!siso && !ss2ss3)
		else
			return "iso-2022-7-1";
	case AD_SJIS:
		return "sjis";
	case AD_BIG5:
		return "big5";
	case AD_EUC:
		if (strcmp(autodetect, "jp") == 0)
			return "euc-jp";
		else if (strcmp(autodetect, "cn") == 0)
			return "euc-cn";
		else if (strcmp(autodetect, "kr") == 0)
			return "euc-kr";
		/* fall through */
	default:
		if ((state & AD_HZ) && hz)
			return "hz";
		else
			return ename;
	}
#undef AD_ISO2022
#undef AD_EUC
#undef AD_SJIS
#undef AD_BIG5
#undef AD_UNKNOWN

#undef PREDICT_MAXLINES
}
#endif

/*------------------------------------------------------------*/

/*
 * Perform code conversion from external to internal, over TEXT structure.
 *
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: int multi_ext2int_text __P((SCR *, TEXTH *, int));
 * PUBLIC: #endif
 */
int
multi_ext2int_text(sp, tiqh, opt)
	SCR *sp;
	TEXTH *tiqh;
	int opt;
{
	char *ename;
	ENCODING const *e;
	TEXT *tp;
	CHAR_T *np;
	size_t nlen;
	size_t ilen;
	int flags;
	ISO2022STATE tstate;
	ISO2022STATE *pstate;

	/*
	 * Even if there's no encoding specified, we have to convert
	 * chars that are 0x80 <= ch <= 0xff.
	 */
	ename = O_STR(sp, opt);
	if (!ename || !ename[0])
		ename = "none";
	e = multi_findencoding(ename);
	if (!e)
		e = &encodingtab[0];
	if (!e->toint)
		abort();	/*XXX*/

	pstate = multi_cnvstart(sp, ename);
	GET_SPACE_RET(sp, np, nlen, 256);
	for (tp = tiqh->cqh_first; tp != (TEXT *)tiqh; tp = tp->q.cqe_next) {
		tstate = *pstate;
		(*e->toint)(sp, e, NULL, &ilen, tp->lb, tp->len, &flags, &tstate);
		if (!LF_ISSET(MB_MULTIBYTE))
			continue;
		if (nlen < ilen)
			ADD_SPACE_RET(sp, np, nlen, ilen);
		if (tp->lb_len < ilen)
			BINC_RET(sp, tp->lb, tp->lb_len, ilen);
		(*e->toint)(sp, e, np, &ilen, tp->lb, tp->len, NULL, pstate);
		memcpy(tp->lb, np, ilen);
		tp->len = ilen;
	}
	multi_cnvfinish(sp, pstate);
	FREE_SPACE(sp, np, nlen);
	return 0;
}

/*------------------------------------------------------------*/

/*
 * Perform code conversion from external to internal, over some range of
 * database.  The variable "mode" controls how to access the database.
 */

static int
rawgetline(sp, l, pp, plen)
	SCR *sp;
	recno_t l;
	CHAR_T **pp;
	size_t *plen;
{
	EXF *ep;
	DBT key, data;

	if (!sp || !(ep = sp->ep))
		abort();

	key.data = &l;
	key.size = sizeof(l);
	switch (ep->db->get(ep->db, &key, &data, 0)) {
	case -1:
		/* something wrong happened. */
		return 1;
	case 1:
		/* this line seems to be empty. */
		if (pp)
			*pp = NULL;
		if (plen)
			*plen = 0;
		return 0;
	}

	if (pp)
		*pp = data.data;
	if (plen)
		*plen = data.size;
	return 0;
}

static int
dbgetline(sp, l, pp, plen)
	SCR *sp;
	recno_t l;
	CHAR_T **pp;
	size_t *plen;
{
	return db_get(sp, l, DBG_FATAL, pp, plen);
}

static int
rawsetline(sp, l, p, len)
	SCR *sp;
	recno_t l;
	CHAR_T *p;
	size_t len;
{
	EXF *ep;
	DBT key, data;

	if (!sp || !(ep = sp->ep))
		abort();

	/* XXX don't log the update */

	key.data = &l;
	key.size = sizeof(l);
	data.data = p;
	data.size = len;
	SIGBLOCK;
	if (ep->db->put(ep->db, &key, &data, 0) == -1)
		abort();	/*XXX*/
	SIGUNBLOCK;
	
	/* invalidate the line cache */
	ep->c_lno = OOBLNO;

	return 0;
}

static int
dbsetline(sp, l, p, len)
	SCR *sp;
	recno_t l;
	CHAR_T *p;
	size_t len;
{
	return db_set(sp, l, p, len);
}

static int
rawdelline(sp, l)
	SCR *sp;
	recno_t l;
{
	EXF *ep;
	DBT key, data;

	if (!sp || !(ep = sp->ep))
		abort();

	if (mark_insdel(sp, LINE_DELETE, l))
		return 1;
	if (ex_g_insdel(sp, LINE_DELETE, l))
		return 1;

	/* XXX don't log the update */

	key.data = &l;
	key.size = sizeof(l);
	SIGBLOCK;
	if (ep->db->del(ep->db, &key, 0) == -1)
		abort();	/*XXX*/
	SIGUNBLOCK;
	
	/* invalidate the line cache, update the line# cache. */
	ep->c_lno = OOBLNO;
	if (ep->c_nlines != OOBLNO)
		ep->c_nlines--;

	/* updates of modify flag supressed */

	return 0;
}

static int
dbdelline(sp, l)
	SCR *sp;
	recno_t l;
{
	return db_delete(sp, l);
}

/*
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: int multi_ext2int_region __P((SCR *, MARK *, MARK *, int, char *, int, int));
 * PUBLIC: #endif
 */
int
multi_ext2int_region(sp, mstart, mend, opt, autodetect, mode, update)
	SCR *sp;
	MARK *mstart;
	MARK *mend;
	int opt;
	char *autodetect;
	int mode;
	int update;
{
	ENCODING const *e;
	recno_t start;
	recno_t end;
	recno_t l;

	char *ename;

	CHAR_T *op;
	size_t olen;
	CHAR_T *np;
	size_t nlen;
	size_t ilen;
	int flags;
	ISO2022STATE *pstate;
	ISO2022STATE tstate;

	/* join */
	recno_t joinline;
	size_t joincnt;

	int (*reader)();
	int (*writer)();
	int (*deleter)();
	
	reader = (mode ? dbgetline : rawgetline);
	writer = (mode ? dbsetline : rawsetline);
	deleter = (mode ? dbdelline : rawdelline);

	/*
	 * Even if there's no encoding specified, we have to convert
	 * chars that are 0x80 <= ch <= 0xff.
	 */
	ename = O_STR(sp, opt);
	if (!ename || !ename[0])
		ename = "none";

	/*
	 * Try autodetect the encoding.
	 */
	if (autodetect) {
		ename = multi_predict_region(sp, mstart, mend, ename,
				autodetect, mode);
	}
	e = multi_findencoding(ename);
	if (!e)
		e = &encodingtab[0];
	if (!e->toint)
		abort();	/*XXX*/

	start = mstart->lno;
	end = mend->lno;
	if (start == 0 || end < start)
		return 1;	/*parameter problem*/
	GET_SPACE_RET(sp, np, nlen, 256);
	joinline = 0;
	joincnt = 0;
	pstate = multi_cnvstart(sp, ename);
	for (l = start; l <= end; l++) {
		if ((*reader)(sp, l, &op, &olen))
			abort();
		if (olen == 0)
			continue;

		tstate = *pstate;
		(*e->toint)(sp, e, NULL, &ilen, op, olen, &flags, &tstate);
		if (!LF_ISSET(MB_MULTIBYTE|MB_JOIN) && !joinline)
			continue;
		if (nlen < ilen)
			ADD_SPACE_RET(sp, np, nlen, ilen);
		(*e->toint)(sp, e, np, &ilen, op, olen, &flags, pstate);
		if (!joinline) {
			if (flags & MB_JOIN)
				joinline = l;
			if ((*writer)(sp, l, np, ilen))
				abort();
			continue;
		}

		/* join the line */
		if ((*reader)(sp, joinline, &op, &olen))
			abort();
		if (nlen < olen + ilen)
			ADD_SPACE_RET(sp, np, nlen, olen + ilen);
		memcpy(np + olen, np, ilen);
		memcpy(np, op, olen);
		if ((*writer)(sp, joinline, np, olen + ilen))
			abort();
		if (flags & MB_JOIN)
			;		/* continue to join */
		else
			joinline = 0;	/* join finished */

		if ((*deleter)(sp, l))
			abort();
		l--;
		end--;
		joincnt++;
	}
	multi_cnvfinish(sp, pstate);
	FREE_SPACE(sp, np, nlen);

	if (joincnt)
		mend->lno -= joincnt;
	
	/*
	 * update by autodetected encoding.
	 */
	if (update && autodetect && strlen(autodetect)
	 && autodetect[strlen(autodetect) - 1] == '+') {
		ARGS *argv[2], a, b;
		char buf[1024];

		a.bp = buf;
		b.bp = NULL;
		a.len = b.len = 0;
		argv[0] = &a;
		argv[1] = &b;

		snprintf(buf, sizeof(buf), "%s=%s", optlist[opt].name, ename);
		a.len = strlen(buf);
		opts_set(sp, argv, NULL);
	}
	return 0;
}

/*------------------------------------------------------------*/

/*
 * Conversion state management.
 */

/*
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: void *multi_cnvstart __P((SCR *, char *));
 * PUBLIC: #endif
 */
void *
multi_cnvstart(sp, encoding)
	SCR *sp;
	char *encoding;
{
	ISO2022STATE *p;
	ENCODING const *e;

	e = multi_findencoding(encoding);
	if (!e)
		e = &encodingtab[0];
	p = (ISO2022STATE *)malloc(sizeof(ISO2022STATE));
	multi_initstate(sp, e, p);
	return (void *)p;
}

/*
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: void multi_cnvfinish __P((SCR *, void *));
 * PUBLIC: #endif
 */
void
multi_cnvfinish(sp, extra)
	SCR *sp;
	void *extra;
{
	free(extra);
}

/*
 * State initialization.  Only accessible to multi_*.c so we don't include
 * public header description.
 */
void
multi_initstate(sp, e, state)
	SCR *sp;
	ENCODING const *e;
	ISO2022STATE *state;
{
	state->charsets[0] = (CHARSET *) NULL;
	state->charsets[1] = (CHARSET *) NULL;
	state->charsets[2] = (CHARSET *) NULL;
	state->charsets[3] = (CHARSET *) NULL;
	state->gl = 0;
	state->gr = 1;
	if (e) {
		if (ischarset(e->initg0))
			state->charsets[0] = &charset(e->initg0);
		if (ischarset(e->initg1))
			state->charsets[1] = &charset(e->initg1);
		if (ischarset(e->initg2))
			state->charsets[2] = &charset(e->initg2);
		if (ischarset(e->initg3))
			state->charsets[3] = &charset(e->initg3);
	}
}

/*------------------------------------------------------------*/

/*
 * Character set conversion, from internal to external.
 * Will be treated in per-line manner.
 */

/*
 * Obtain the size of the memory region for holding result of multi_int2ext().
 *
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: int multi_int2extlen __P((SCR *, CHAR_T *, size_t, int *, void *));
 * PUBLIC: #endif
 */
int
multi_int2extlen(sp, p0, len, pflags, extra)
	SCR *sp;
	CHAR_T *p0;
	size_t len;
	int *pflags;
	void *extra;
{
	size_t i;
	size_t ilen;
	ENCODING const *e;
	ISO2022STATE tstate;
	ISO2022STATE *state;
	char *ename;

	state = extra;
	if (pflags)
		FL_INIT(*pflags, 0);

	/* If we don't know which encoding to use, do nothing. */
	if (!sp)
		return len;

	ename = O_STR(sp, O_FILEENCODING);
	e = multi_findencoding(ename);

	if (!e)
		return len;
	if (!e->fromint)
		return len;

	/* use copy of state since i don't wanna bother it */
	if (!state)
		multi_initstate(sp, e, &tstate);
	else
		tstate = *state;

	(*e->fromint)(sp, e, NULL, &ilen, p0, len, pflags, &tstate);

	return ilen;
}

/*
 * Converts specified region into external expression.
 *
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: int multi_int2ext __P((SCR *, CHAR_T *, size_t, size_t, void *));
 * PUBLIC: #endif
 */
int
multi_int2ext(sp, p0, len, ilen, extra)
	SCR *sp;
	CHAR_T *p0;
	size_t len;
	size_t ilen;
	void *extra;
{
	size_t i;
	CHAR_T *t0;
	size_t tlen;
	CHAR_T *p;
	CHAR_T *q;
	ENCODING const *e;
	size_t newlen;
	ISO2022STATE *state;
	char *ename;

	state = extra;

	/* If we don't know which encoding to use, do nothing. */
	if (!sp)
		return 0;

	ename = O_STR(sp, O_FILEENCODING);
	e = multi_findencoding(ename);

	if (!e)
		return 0;
	if (!e->fromint)
		return 0;

	GET_SPACE_RET(sp, t0, tlen, ilen);
	(*e->fromint)(sp, e, t0, &newlen, p0, len, NULL, state);
	if (ilen != newlen)
		abort();
	memcpy(p0, t0, newlen);
	FREE_SPACE(sp, t0, tlen);

	return 0;
}

/*------------------------------------------------------------*/

/*
 * keyboard input entrypoint.
 */

/*
 * dirty cheat to pass SCR* to encoding conversion code inside db routines.
 */
static int keyinput_stateinit = 1;

/*
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: void multi_keyinput_stateinit __P((SCR *));
 * PUBLIC: #endif
 */
void
multi_keyinput_resetstate(sp)
	SCR *sp;
{
	keyinput_stateinit = 1;
}

/*
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: void multi_keyinput __P((SCR *, CHAR_T *, size_t *, CHAR_T *, size_t *, int *));
 * PUBLIC: #endif
 */
void
multi_keyinput(sp, kbuf, pkbuflen, intbuf, pintbuflen, pstate)
	SCR *sp;
	CHAR_T *kbuf;
	size_t *pkbuflen;
	CHAR_T *intbuf;
	size_t *pintbuflen;
	int *pstate;
{
	size_t i;
	size_t ilen;
	ENCODING const *e;
	size_t consumed;

	/* If we don't know which encoding to use, do nothing. */
	if (!sp) {
fail:		memcpy(intbuf, kbuf, *pkbuflen);
		*pintbuflen = *pkbuflen;
		*pstate = 0;
		return;
	}
	e = multi_findencoding(O_STR(sp, O_INPUTENCODING));

	if (!e)
		goto fail;
	if (!e->keyinput)
		goto fail;

	if (keyinput_stateinit) {
		*pstate = 0;
		keyinput_stateinit = 0;
	}

#ifdef CANNA
	if (O_ISSET(sp, O_CANNA)
	 && kbuf[0] == O_STR(sp, O_CANNAKEY)[0]) {
		if (canna_enabled(sp) && !canna_fenceactive(sp)) {
			canna_force_off(sp);

			/* consume control-O. */
			memmove(kbuf, kbuf + 1, *pkbuflen - 1);
			*pkbuflen = *pkbuflen - 1;
		} else if (!canna_enabled(sp)) {
			canna_force_on(sp);

			/* consume control-O. */
			memmove(kbuf, kbuf + 1, *pkbuflen - 1);
			*pkbuflen = *pkbuflen - 1;
		}
	}
#endif

#ifndef CANNA
	(*e->keyinput)(sp, e, kbuf, *pkbuflen, intbuf, pintbuflen,
		pstate, &consumed);
#else
	if (canna_enabled(sp)) {
		canna_keyinput(sp, e, kbuf, *pkbuflen, intbuf, pintbuflen,
			pstate, &consumed);
	} else {
		(*e->keyinput)(sp, e, kbuf, *pkbuflen, intbuf, pintbuflen,
			pstate, &consumed);
	}
#endif /*CANNA*/
	if (consumed == 0)
		; /*nothing*/
	else if (consumed < *pkbuflen) {
		memmove(kbuf, kbuf + consumed, *pkbuflen - consumed);
		*pkbuflen = *pkbuflen - consumed;
	} else
		*pkbuflen = 0;
}

/*------------------------------------------------------------*/

/*
 * encoding type table manipulation.
 */

/*
 * Returns 1 if p holds the name of valid encoding type.
 *
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: int multi_isencoding __P((const char *, int));
 * PUBLIC: #endif
 */
int
multi_isencoding(p, opt)
	const char *p;
	int opt;
{
	size_t i;

	for (i = 0; encodingtab[i].name; i++) {
		if (strcmp(p, encodingtab[i].name) == 0) {
			switch (opt) {
			case O_FILEENCODING:
				if (encodingtab[i].toint
				 && encodingtab[i].fromint)
					return 1;
				break;
			case O_DISPLAYENCODING:
				return 1;
			case O_INPUTENCODING:
				if (encodingtab[i].keyinput)
					return 1;
				break;
			}
			return 0;
		}
	}
	return 0;
}

/*
 * Returns pointer to the encoding table entry (ENCODING *)
 * for the encoding type name held by p.
 */
static ENCODING const *
multi_findencoding(p)
	const char *p;
{
	size_t i;
	size_t n;
	char *pp;

	n = (pp = strchr(p, ';')) ? pp - p : 0;
	for (i = 0; encodingtab[i].name; i++) {
		if (strcmp(p, encodingtab[i].name) == 0)
			return &encodingtab[i];
		if (n && strncmp(p, encodingtab[i].name, n) == 0
		 && strlen(encodingtab[i].name) == n)
			return &encodingtab[i];
	}
	return NULL;
}

/*------------------------------------------------------------*/

/*
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: void v_fep_on __P((SCR *));
 * PUBLIC: #endif
 */
void
v_fep_on(sp)
	SCR *sp;
{
#ifdef CANNA
	if (O_ISSET(sp, O_CANNA) && O_ISSET(sp, O_CANNACTRL))
		canna_on(sp);
#endif /*CANNA*/
}

/*
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: void v_fep_off __P((SCR *));
 * PUBLIC: #endif
 */
void
v_fep_off(sp)
	SCR *sp;
{
#ifdef CANNA
	if (O_ISSET(sp, O_CANNA) && O_ISSET(sp, O_CANNACTRL))
		canna_off(sp);
#endif /*CANNA*/
}

/*
 * PUBLIC: #ifdef MULTIBYTE
 * PUBLIC: void v_fep_force_off __P((SCR *));
 * PUBLIC: #endif
 */
void
v_fep_force_off(sp)
	SCR *sp;
{
#ifdef CANNA
	if (O_ISSET(sp, O_CANNA) && O_ISSET(sp, O_CANNACTRL))
		canna_force_off(sp);
#endif /*CANNA*/
}

#endif /*MULTIBYTE*/

