/*
 * p64-addblk.cc --
 *
 *      FIXME: This file needs a description here.
 *
 * Copyright (c) 1996-2002 The Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * A. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * B. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * C. Neither the names of the copyright holders nor the names of its
 *    contributors may be used to endorse or promote products derived from this
 *    software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Splice a decoded block directly into the construction buffer.
 */
inline void P64Decoder::splice(int dc, short* blk, u_char* out, u_int stride)
{
	/* FIXME can we get overflow from idct/iquant? */

	for (int k = 8; --k >= 0; ) {

		u_int v;
		u_int* o = (u_int*)out;
#if BYTE_ORDER == LITTLE_ENDIAN
		v = blk[0] + dc & 0xff;
		v |= (blk[1] + dc & 0xff) << 8;
		v |= (blk[2] + dc & 0xff) << 16;
		v |= blk[3] + dc << 24;
		o[0] = v;

		v = blk[4] + dc & 0xff;
		v |= (blk[5] + dc & 0xff) << 8;
		v |= (blk[6] + dc & 0xff) << 16;
		v |= blk[7] + dc << 24;
		o[1] = v;
#else
		v = blk[0] + dc << 24;
		v |= (blk[1] + dc & 0xff) << 16;
		v |= (blk[2] + dc & 0xff) << 8;
		v |= blk[3] + dc & 0xff;
		o[0] = v;

		v = blk[4] + dc << 24;
		v |= (blk[5] + dc & 0xff) << 16;
		v |= (blk[6] + dc & 0xff) << 8;
		v |= blk[7] + dc & 0xff;
		o[1] = v;
#endif
		blk += 8;
		out += stride;
	}
}

/*
 * Mix in a motion-compensated, unfiltered block.  Note that
 * the input block may be misaligned so we cannot try fancy,
 * word-at-a-time accesses without being careful.  The output
 * block is, of course, aligned.
 */
void P64Decoder::addblk(short* blk, u_char* in,
			u_char* out, u_int stride)
{
#ifdef notyet
	if (((u_long)in & 3) == 0) {
		/*
		 * Input buffer aligned.
		 * Take advantage of this.
		 */
		addblka(dc, blk, in, out, stride);
		return;
	}
#endif
	for (int i = 8; --i >= 0;) {
		register int t;

		u_int v;
		u_int* o = (u_int*)out;
#if BYTE_ORDER == LITTLE_ENDIAN
		v = UCLIMIT(*blk++ + in[0]) & 0xff;
		v |= (UCLIMIT(*blk++ + in[1]) & 0xff) << 8;
		v |= (UCLIMIT(*blk++ + in[2]) & 0xff) << 16;
		v |= UCLIMIT(*blk++ + in[3]) << 24;
		o[0] = v;

		v = UCLIMIT(*blk++ + in[4]) & 0xff;
		v |= (UCLIMIT(*blk++ + in[5]) & 0xff) << 8;
		v |= (UCLIMIT(*blk++ + in[6]) & 0xff) << 16;
		v |= UCLIMIT(*blk++ + in[7]) << 24;
		o[1] = v;
#else
		v = UCLIMIT(*blk++ + in[0]) << 24;
		v |= (UCLIMIT(*blk++ + in[1]) & 0xff) << 16;
		v |= (UCLIMIT(*blk++ + in[2]) & 0xff) << 8;
		v |= UCLIMIT(*blk++ + in[3]) & 0xff;
		o[0] = v;

		v = UCLIMIT(*blk++ + in[4]) << 24;
		v |= (UCLIMIT(*blk++ + in[5]) & 0xff) << 16;
		v |= (UCLIMIT(*blk++ + in[6]) & 0xff) << 8;
		v |= UCLIMIT(*blk++ + in[7]) & 0xff;
		o[1] = v;
#endif
		in += stride;
		out += stride;
	}
}

/*
 * Same as addblk, but input buffer is aligned.
 */
void P64Decoder::addblka(int dc, short* blk, u_char* in,
			 u_char* out, u_int stride)
{
	for (int k = 8; --k >= 0;) {
		register int t;

		u_int v;
		u_int* o = (u_int*)out;
		u_int w = *(u_int*)in;
#if BYTE_ORDER == LITTLE_ENDIAN
		v = UCLIMIT(*blk++ + dc + (w & 0xff)) & 0xff;
		v |= (UCLIMIT(*blk++ + dc + (w >> 8 & 0xff)) & 0xff) << 8;
		v |= (UCLIMIT(*blk++ + dc + (w >> 16 & 0xff)) & 0xff) << 16;
		v |= UCLIMIT(*blk++ + dc + (w >> 24 & 0xff)) << 24;
		o[0] = v;

		w = *(u_int*)(in + 4);
		v = UCLIMIT(*blk++ + dc + (w & 0xff)) & 0xff;
		v |= (UCLIMIT(*blk++ + dc + (w >> 8 & 0xff)) & 0xff) << 8;
		v |= (UCLIMIT(*blk++ + dc + (w >> 16 & 0xff)) & 0xff) << 16;
		v |= UCLIMIT(*blk++ + dc + (w >> 24 & 0xff)) << 24;
		o[1] = v;
#else
		v = UCLIMIT(*blk++ + dc + (w >> 24 & 0xff)) << 24;
		v |= (UCLIMIT(*blk++ + dc + (w >> 16 & 0xff)) & 0xff) << 16;
		v |= (UCLIMIT(*blk++ + dc + (w >> 8 & 0xff)) & 0xff) << 8;
		v |= UCLIMIT(*blk++ + dc + (w & 0xff)) & 0xff;
		o[0] = v;

		w = *(u_int*)(in + 4);
		v = UCLIMIT(*blk++ + dc + (w >> 24 & 0xff)) << 24;
		v |= (UCLIMIT(*blk++ + dc + (w >> 16 & 0xff)) & 0xff) << 16;
		v |= (UCLIMIT(*blk++ + dc + (w >> 8 & 0xff)) & 0xff) << 8;
		v |= UCLIMIT(*blk++ + dc + (w & 0xff)) & 0xff;
		o[1] = v;
#endif
		in += stride;
		out += stride;
	}
}

