/* $Id: mach64tex.c,v 1.31 2000/03/29 00:53:44 gareth Exp $ */

/*
 * GLX Hardware Device Driver for ATI Rage Pro
 * Copyright (C) 1999 John Carmack, Gareth Hughes
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
 * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 *    John Carmack <johnc@idsoftware.com>
 *    Gareth Hughes <gareth@precisioninsight.com>
 */

#include <stdlib.h>
#include "mach64glx.h"
#include "glx_symbols.h"

/*

NOTES:

rage pro has a single byte greyscale texture format

*/


/*
 * mach64DestroyTexObj
 * Free all memory associated with a texture and NULL any pointers
 * to it.
 */
static void mach64DestroyTexObj( mach64TextureObjectPtr t )
{
	mach64TextureObjectPtr	p, prev;
	int			i;
	int			wait;

	hwMsg( 10, "mach64DestroyTexObj( %p )\n", t->tObj );

 	if ( !t ) {
  		return;
  	}

 	if ( t->magic != MACH64_TEXTURE_OBJECT_MAGIC ) {
 		hwError( "mach64DestroyTexObj: t->magic != MACH64_TEXTURE_OBJECT_MAGIC\n" );
		return;
	}

	/* free the texture memory */
	if ( mach64glx.dmaDriver >= 3 ) {
		/* flush the command stream to finish rendering */
		mach64DmaFlush();
		/* ensure all rendering is complete */
		wait = mach64WaitForDmaCompletion();
		if ( wait ) {
			hwMsg( 10, "mach64DestroyTexObj: waited %d usec for rendering\n", wait );
		}

		hwFreeAGPMem( &t->agpBlocks[0] );
		hwFreeAGPMem( &t->agpBlocks[1] );
	} else {
		mmFreeMem( t->memBlock );
	}

 	/* free mesa's link */
	t->tObj->DriverData = NULL;

	/* see if it was the driver's current object */
	for ( i = 0 ; i < 2 ; i++ ) {
		if ( mach64glx.currentTexture[i] == t ) {
			hwMsg( 10, "mach64DestroyTexObj: destroyed current %d\n", i );
			mach64glx.currentTexture[i] = NULL;
		}
	}

	/* remove from the driver texobj list */
	p = mach64glx.textureList;
	prev = NULL;
	while ( p ) {
		if ( p == t ) {
			if ( prev ) {
				prev->next = t->next;
			} else {
				mach64glx.textureList = t->next;
			}
    			break;
    		}
		prev = p;
		p = p->next;
	}

	/* clear magic to catch any bad future references */
	t->magic = 0;

	/* free the structure */
	free( t );

	/* dump the heap contents if loglevel is high enough */
	if ( hwGetLogLevel() >= 25 ) {
		if ( mach64glx.dmaDriver >= 3 ) {
			hwDumpAGPMemInfo();
		} else {
			mmDumpMemInfo( cardHeap );
		}
	}
}


/*
 * mach64DestroyOldestTexObj
 * Throw out a texture to try to make room for a new texture
 */
static int mach64DestroyOldestTexObj( void )
{
	mach64TextureObjectPtr	t, oldest;
	hwUI32			old;

	hwMsg(10,"  Swapping out texture.\n");
	mach64glx.c_textureSwaps++;

 	/* find the best texture to toss */
	old = 0x7fffffff;
	oldest = NULL;
	for ( t = mach64glx.textureList; t ; t = t->next ) {
		/* never swap out textures used by other multitexture units */
		if ( t == mach64glx.currentTexture[0] ||
		     t == mach64glx.currentTexture[1] ) {
			continue;
		}
		if ( t->age < old ) {
			old = t->age;
			oldest = t;
		}
	}

	/* if the oldest texture was in use on the previous frame, then
	   we are in a texture thrashing state.  Note that we can't just
	   test for "in THIS frame", because textures from the same working
	   set may be used in different order, and it could register as not
	   thrashing.  The solution is to pick the MOST recently used texture
	   that isn't currently needed for multitexture.  This will allow the
	   other textures to stay resident for the next frame, rather than
	   swapping everything out in order. */

	if ( old >= mach64glx.swapBuffersCount - 1 ) {
		/* newly created texture objects are always added to the
		   front of the list, so just find the first one that isn't
		   used for multitexture */
	        hwMsg( 10, "mach64DestroyOldestTexObj: thrashing\n" );
		for ( t = mach64glx.textureList ; t ; t = t->next ) {
			/* never swap out textures used by other multitexture units */
			if ( t == mach64glx.currentTexture[0] ||
			     t == mach64glx.currentTexture[1] ) {
				continue;
			}
			break;
		}
		oldest = t;

	} else {
		hwMsg( 10, "mach64DestroyOldestTexObj\n" );
	}

	if ( !oldest ) {
		/* This shouldn't happen unless the 2D resolution is high enough that
		   a single texture can't be allocated in the remaining memory */
		hwError("  No Texture to swap out -> Out of Memory!\n");
		if ( mach64glx.dmaDriver >= 3 ) {
			hwDumpAGPMemInfo();
		} else {
			mmDumpMemInfo( cardHeap );
		}
		return -1;
	}

	/* just destroy the texture, because it can always
	   be recreated directly from the mesa texture */
	mach64DestroyTexObj( oldest );

	return 0;
}


/*
 * when a window is resized, we need to dump all the textures to
 * make sure as much memory as possible is available for the new
 * window buffer.
 */
void mach64FlushAllTextures( void ) {
	while ( mach64glx.textureList ) {
		mach64DestroyTexObj( mach64glx.textureList );
	}
}


/*=============================================*/


/*
 * mach64ConvertLocalTexture
 * Converts a block of mesa format texture to the apropriate hardware format.
 * Does a host data blit, only for local card memory textures.
 */
static void mach64ConvertLocalTexture( int texelBytes, struct gl_texture_image *image,
				       int x, int y, int width, int height, int pitch )
{
	int		i, j;
	hwUI8		*src;
	int		dwords;
	DMALOCALS;

	/* account for the actual space needed by the hostdata stream */
	/* this must be the same as in the check for recursive subdivision */
	dwords =  16 * ( ( width * height * texelBytes + 59 ) / 60 );

	/* this should never overflow, because then the blit would get */
	/* the context register restore data instead of texels... */
	DMAGETPTR( dwords );

	switch ( texelBytes ) {
	case 1:
		switch ( image->Format ) {
		case GL_COLOR_INDEX:
		case GL_INTENSITY:
		case GL_LUMINANCE:
		case GL_ALPHA:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x );
				for ( j = width >> 2 ; j ; j-- ) {
					int	pix;

					pix = src[0] | ( src[1] << 8 ) | ( src[2] << 16 ) | ( src[3] << 24 );
					DMAOUTHOSTDATA( pix );
					src += 4;
				}
			}
			break;
		default:
			goto format_error;
		}
		break;
	case 2:
		switch ( image->Format ) {
		case GL_RGB:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x ) * 3;
				for ( j = width >> 1 ; j ; j-- ) {
					int	pix;

					pix = MACH64PACKCOLOR565( src[0], src[1], src[2] ) |
					    ( MACH64PACKCOLOR565( src[3], src[4], src[5] ) << 16 );
					DMAOUTHOSTDATA( pix );
					src += 6;
				}
			}
			break;
		case GL_RGBA:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x ) * 4;
				for ( j = width >> 1 ; j ; j-- ) {
					int	pix;

					pix = MACH64PACKCOLOR4444( src[0], src[1], src[2], src[3] ) |
					    ( MACH64PACKCOLOR4444( src[4], src[5], src[6], src[7] ) << 16 );
					DMAOUTHOSTDATA( pix );
					src += 8;
				}
			}
			break;
		case GL_LUMINANCE:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x );
				for ( j = width >> 1 ; j ; j-- ) {
					int	pix;
					/* FIXME: should probably use 555 texture to get true grey */
					pix = MACH64PACKCOLOR565( src[0], src[0], src[0] ) |
					    ( MACH64PACKCOLOR565( src[1], src[1], src[1] ) << 16 );
					DMAOUTHOSTDATA( pix );
					src += 2;
				}
			}
			break;
		case GL_INTENSITY:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x );
				for ( j = width >> 1 ; j ; j-- ) {
					int	pix;

					pix = MACH64PACKCOLOR4444( src[0], src[0], src[0], src[0] ) |
					    ( MACH64PACKCOLOR4444( src[1], src[1], src[1], src[1] ) << 16 );
					DMAOUTHOSTDATA( pix );
					src += 2;
				}
			}
			break;
		case GL_ALPHA:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x );
				for ( j = width >> 1 ; j ; j-- ) {
					int	pix;

					pix = MACH64PACKCOLOR4444( 255, 255, 255, src[0] ) |
					    ( MACH64PACKCOLOR4444( 255, 255, 255, src[1] ) << 16 );
					DMAOUTHOSTDATA( pix );
					src += 2;
				}
			}
			break;
		case GL_LUMINANCE_ALPHA:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x ) * 2;
				for ( j = width >> 1 ; j ; j-- ) {
					int	pix;

					pix = MACH64PACKCOLOR4444( src[0], src[0], src[0], src[1] ) |
					    ( MACH64PACKCOLOR4444( src[2], src[2], src[2], src[3] ) << 16 );
					DMAOUTHOSTDATA( pix );
					src += 4;
				}
			}
			break;
		default:
			goto format_error;
		}
		break;
	case 4:
		switch ( image->Format ) {
		case GL_RGB:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x ) * 3;
				for ( j = width ; j ; j-- ) {
					int	pix;

					pix = ( 255 << 24 ) | MACH64PACKCOLOR888( src[0], src[1], src[2] );
					DMAOUTHOSTDATA( pix );
					src += 3;
				}
			}
			break;
		case GL_RGBA:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x ) * 4;
				for ( j = width ; j ; j-- ) {
					int	pix;

					pix = MACH64PACKCOLOR8888( src[0], src[1], src[2], src[3] );
					DMAOUTHOSTDATA( pix );
					src += 4;
				}
			}
			break;
		case GL_LUMINANCE:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x );
				for ( j = width ; j ; j-- ) {
					int	pix;

					pix = ( 255 << 24 ) | MACH64PACKCOLOR888( src[0], src[0], src[0] );
					DMAOUTHOSTDATA( pix );
					src += 1;
				}
			}
		case GL_INTENSITY:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x );
				for ( j = width ; j ; j-- ) {
					int	pix;

					pix = MACH64PACKCOLOR8888( src[0], src[0], src[0], src[0] );
					DMAOUTHOSTDATA( pix );
					src += 1;
				}
			}
			break;
		case GL_ALPHA:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x );
				for ( j = width ; j ; j-- ) {
					int	pix;

					pix = MACH64PACKCOLOR8888( 255, 255, 255, src[0] );
					DMAOUTHOSTDATA( pix );
					src += 1;
				}
			}
			break;
		case GL_LUMINANCE_ALPHA:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x ) * 2;
				for ( j = width ; j ; j-- ) {
					int	pix;

					pix = MACH64PACKCOLOR8888( src[0], src[0], src[0], src[1] );
					DMAOUTHOSTDATA( pix );
					src += 2;
				}
			}
		default:
			goto format_error;
		}
		break;
	default:
		goto format_error;
	}

	DMAHOSTDATAEND();
	DMAADVANCE();

	return;

 format_error:
	hwError( "Unsupported texelBytes %i, image->Format %i\n", texelBytes, image->Format );
}


/*
 * mach64ConvertAGPTexture
 * Converts a block of mesa format texture to the apropriate hardware format.
 * Transfers data directly to AGP memory, as the host data interface cannot
 * be used.
 */
static void mach64ConvertAGPTexture( hwUI32 *destPtr, int texelBytes,
				     struct gl_texture_image *image,
				     int x, int y, int width, int height, int pitch )
{
	int		i, j;
	hwUI8		*src;

	switch ( texelBytes ) {
	case 1:
		switch ( image->Format ) {
		case GL_COLOR_INDEX:
		case GL_INTENSITY:
		case GL_LUMINANCE:
		case GL_ALPHA:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x );
				for ( j = width >> 2 ; j ; j-- ) {

					*destPtr++ = src[0] | ( src[1] << 8 ) | ( src[2] << 16 ) | ( src[3] << 24 );
					src += 4;
				}
			}
			break;
		default:
			goto format_error;
		}
		break;
	case 2:
		switch ( image->Format ) {
		case GL_RGB:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x ) * 3;
				for ( j = width >> 1 ; j ; j-- ) {

					*destPtr++ = MACH64PACKCOLOR565( src[0], src[1], src[2] ) |
						   ( MACH64PACKCOLOR565( src[3], src[4], src[5] ) << 16 );
					src += 6;
				}
			}
			break;
		case GL_RGBA:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x ) * 4;
				for ( j = width >> 1 ; j ; j-- ) {

					*destPtr++ = MACH64PACKCOLOR4444( src[0], src[1], src[2], src[3] ) |
						   ( MACH64PACKCOLOR4444( src[4], src[5], src[6], src[7] ) << 16 );
					src += 8;
				}
			}
			break;
		case GL_LUMINANCE:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x );
				for ( j = width >> 1 ; j ; j-- ) {

					/* FIXME: should probably use 555 texture to get true grey */
					*destPtr++ = MACH64PACKCOLOR565( src[0], src[0], src[0] ) |
						   ( MACH64PACKCOLOR565( src[1], src[1], src[1] ) << 16 );
					src += 2;
				}
			}
			break;
		case GL_INTENSITY:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x );
				for ( j = width >> 1 ; j ; j-- ) {

					*destPtr++ = MACH64PACKCOLOR4444( src[0], src[0], src[0], src[0] ) |
						   ( MACH64PACKCOLOR4444( src[1], src[1], src[1], src[1] ) << 16 );
					src += 2;
				}
			}
			break;
		case GL_ALPHA:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x );
				for ( j = width >> 1 ; j ; j-- ) {

					*destPtr++ = MACH64PACKCOLOR4444( 255, 255, 255, src[0] ) |
						   ( MACH64PACKCOLOR4444( 255, 255, 255, src[1] ) << 16 );
					src += 2;
				}
			}
			break;
		case GL_LUMINANCE_ALPHA:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x ) * 2;
				for ( j = width >> 1 ; j ; j-- ) {

					*destPtr++ = MACH64PACKCOLOR4444( src[0], src[0], src[0], src[1] ) |
						   ( MACH64PACKCOLOR4444( src[2], src[2], src[2], src[3] ) << 16 );
					src += 4;
				}
			}
			break;
		default:
			goto format_error;
		}
		break;
	case 4:
		switch ( image->Format ) {
		case GL_RGB:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x ) * 3;
				for ( j = width ; j ; j-- ) {

					*destPtr++ = ( 255 << 24 ) | MACH64PACKCOLOR888( src[0], src[1], src[2] );
					src += 3;
				}
			}
			break;
		case GL_RGBA:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x ) * 4;
				for ( j = width ; j ; j-- ) {

					*destPtr++ = MACH64PACKCOLOR8888( src[0], src[1], src[2], src[3] );
					src += 4;
				}
			}
			break;
		case GL_LUMINANCE:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x );
				for ( j = width ; j ; j-- ) {

					*destPtr++ = ( 255 << 24 ) | MACH64PACKCOLOR888( src[0], src[0], src[0] );
					src += 1;
				}
			}
		case GL_INTENSITY:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x );
				for ( j = width ; j ; j-- ) {

					*destPtr++ = MACH64PACKCOLOR8888( src[0], src[0], src[0], src[0] );
					src += 1;
				}
			}
			break;
		case GL_ALPHA:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x );
				for ( j = width ; j ; j-- ) {

					*destPtr++ = MACH64PACKCOLOR8888( 255, 255, 255, src[0] );
					src += 1;
				}
			}
			break;
		case GL_LUMINANCE_ALPHA:
			for ( i = 0 ; i < height ; i++ ) {
				src = (hwUI8 *)image->Data + ( ( y + i ) * pitch + x ) * 2;
				for ( j = width ; j ; j-- ) {

					*destPtr++ = MACH64PACKCOLOR8888( src[0], src[0], src[0], src[1] );
					src += 2;
				}
			}
		default:
			goto format_error;
		}
		break;
	default:
		goto format_error;
	}

	return;

 format_error:
	hwError( "Unsupported texelBytes %i, image->Format %i\n", texelBytes, image->Format );
}


/*=============================================*/


/*
 * mach64UploadLocalSubImage
 *
 * Perform an host blit based update of a resident buffer.  This is used for
 * both initial loading of the entire image, and texSubImage updates.
 *
 * This is complicated by a few factors:
 *
 * mach64ConvertTexture() must upload at least 32 bits for each row, making
 * problems for 16 bit 1 texel wide and 8 bit 3 texel or less wide images.
 *
 * The host blit hardware has a minimum pitch of 64 texels, causing problems
 * for all images less than that wide.
 *
 * Very large images may not be able to be be converted into a single dma
 * buffer, forcing a subdivision into multiple parts.
 */
static void mach64UploadLocalSubImage( mach64TextureObjectPtr t, int level,
				       int x, int y, int width, int height )
{
	int		x2;
	int		dwords;
	struct gl_texture_image *image;
	int		texelBytes, texelsPerDword;
	int		pitch, ofs;
	int		blitX, blitY, blitWidth, blitHeight;
	int		imageWidth, imageHeight;
	DMALOCALS;

	/* the ragePro only handles a single level properly */
	if ( level != 0 ) {
		hwMsg( 1, "mach64UploadLocalSubImage: bad level: %i\n", level );
 	 	return;
	}

	image = t->tObj->Image[0];
	if ( !image ) {
		hwError( "mach64UploadLocalSubImage: NULL image\n" );
		return;
	}

	/* hasAlpha is needed for programming SCALE_3D_CNTL */
	if ( ( image->Format == GL_RGBA ) || ( image->Format == GL_ALPHA ) || ( image->Format == GL_LUMINANCE_ALPHA ) ) {
		t->hasAlpha = 1;
	} else {
		t->hasAlpha = 0;
	}

	/* textureFormat is used for programming DP_PIX_WIDTH */
	/* FIXME: 32 bit and 8 bit intensity */
	if ( t->hasAlpha ) {
		t->textureFormat = 15;	/* 16 bpp ARGB 4444 */
	} else {
		t->textureFormat = 4;	/* 16 bpp RGB 565 */
	}

	texelBytes = t->texelBytes;
	texelsPerDword = 2;

	imageWidth = image->Width;
	imageHeight = image->Height;

	/* the texel upload routines have a minimum width, so
	 * force the size if needed
	 */
	if ( imageWidth < texelsPerDword ) {
		int		factor;

		factor = texelsPerDword / imageWidth;
		imageWidth = texelsPerDword;
		imageHeight /= factor;
		if ( imageHeight == 0 ) {
			/* in this case, the texel converter will actually
			 * walk a texel or two off the end of the image, but
			 * normal malloc alignment should prevent it from
			 * ever causing a fault.
			 */
			imageHeight = 1;
		}
	}

	/* we can't upload to a pitch less than 64 texels
	 * so we will need to linearly upload all modified rows
	 * this makes the x/y/width/height different for the
	 * blitter and the texture walker
	 */
	if ( imageWidth < 64 )
	{
		int		factor;
		int		y2;
		int		start, end;

		start = ( y * imageWidth ) & ~63;
		end = ( y + height ) * imageWidth;

	 	if ( end - start < 64 ) {
			/* handle the case where the total number of texels
			 * uploaded is < 64
		 	 */

			blitX = 0;
			blitY = start / 64;
			blitWidth = end - start;
			blitHeight = 1;

			x = 0;
			y = start / imageWidth;
			width = imageWidth;
			height = ( end / imageWidth ) - y;
		} else {
			/* upload some number of full 64 texel blit rows */
			factor = 64 / imageWidth;

			y2 = y + height - 1;
			y /= factor;
			y2 /= factor;

			blitX = 0;
			blitY = y;
			blitWidth = 64;
			blitHeight = y2 - y + 1;

			x = 0;
			y = y * factor;
			width = imageWidth;
			height = blitHeight * factor;
		}

		/* fixed pitch of 64 */
		pitch = 8;
	}
	else
	{
		/* pad the size out to dwords.  The image is a pointer to the entire image,
		   so we can safely reference outside the x,y,width,height bounds if we need to */
		x2 = x + width;
		x2 = ( x2 + ( texelsPerDword - 1 ) ) & ~(texelsPerDword-1);

		x = ( x + ( texelsPerDword - 1 ) ) & ~(texelsPerDword-1);
		width = x2 - x;

		/* the texture walker and the blitter look identical */
		blitX = x;
		blitY = y;
		blitWidth = width;
		blitHeight = height;
		pitch = imageWidth >> 3;
	}

	/* we may not be able to upload the entire texture in one batch due
	   to register limits or dma buffer limits.  Recursively split it up. */
	while ( 1 ) {
		/* account for the actual space needed by the hostdata stream */
		/* this must be the same as in the DMAGETPTR when uploading */
		dwords =  16 * ( ( width * height * texelBytes + 59 ) / 60 );

		if ( dwords + 32 <= mach64glx.dma_buffer->overflowBufferDwords ) {
			break;
		}
		hwMsg( 10, "mach64UploadLocalSubImage: recursively subdividing\n" );

		mach64UploadLocalSubImage( t, level, x, y, width, height >> 1 );

		/* update actual and blit dimensions */
		y += ( height >> 1 );
		height -= ( height >> 1 );

		blitY += ( blitHeight >> 1 );
		blitHeight -= ( blitHeight >> 1 );
	}
	ofs = t->memBlock->ofs;

	/* bump the performance counter */
	mach64glx.c_textureDwords += ( dwords << 2 );

	/* make sure we overflow here instead of in convertTexture */
	DMAGETPTR( 32 + dwords );

	hwMsg( 10, "mach64UploadLocalSubImage: %i,%i of %i,%i at %i,%i\n",
		   width, height, image->Width, image->Height, x, y );
	hwMsg( 10, "                blit size: %i,%i at %i,%i\n",
		   blitWidth, blitHeight, blitX, blitY );

	/* blit via the host data registers */
	/* make sure EVERYTHING that is needed is set, because this can be called */
	/* as the very first thing in a buffer, before any state updates are set */

	DMAOUTREG( MACH64_Z_CNTL, 0 );
	DMAOUTREG( MACH64_SCALE_3D_CNTL, 0 );

	DMAOUTREG( MACH64_SC_LEFT_RIGHT, 0 | ( 8191 << 16 ) );	/* no scissor */
	DMAOUTREG( MACH64_SC_TOP_BOTTOM, 0 | ( 16383 << 16 )  );

	DMAOUTREG( MACH64_CLR_CMP_CNTL, 0 );			/* disable */
	DMAOUTREG( MACH64_GUI_TRAJ_CNTL, DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM );

	DMAOUTREG( MACH64_DP_PIX_WIDTH,
		   ( t->textureFormat << 0 )			/* dst pix width */
		   | ( t->textureFormat << 4 )			/* composite pix width */
		   | ( t->textureFormat << 8 )			/* src pix width */
		   | ( t->textureFormat << 16 )			/* host data pix width */
		   | ( t->textureFormat << 28 )			/* scaler/3D pix width */
		   );

	DMAOUTREG( MACH64_DP_WRITE_MASK, 0xffffffff );		/* enable all planes */
	DMAOUTREG( MACH64_DP_MIX, BKGD_MIX_D | FRGD_MIX_S );
	DMAOUTREG( MACH64_DP_SRC, BKGD_SRC_BKGD_CLR | FRGD_SRC_HOST | MONO_SRC_ONE );

	DMAOUTREG( MACH64_DST_OFF_PITCH, (pitch << 22) | (ofs>>3) );
	DMAOUTREG( MACH64_DST_X_Y, (blitY << 16) | blitX );
	DMAOUTREG( MACH64_DST_WIDTH_HEIGHT, (blitHeight << 16) | blitWidth );

	DMAADVANCE();

	/* blit via the host data registers the properly converted texels from the mesa buffer */
	mach64ConvertLocalTexture( texelBytes, image, x, y, width, height, imageWidth );

	/* the normal state registers must be reprogrammed before drawing */
}


/*
 * mach64UploadAGPSubImage
 *
 * Perform an agp memory blit update of a resident buffer.  This is used for
 * both initial loading of the entire image, and texSubImage updates.
 *
 * This is much simpler than the host blit based update, but we have to
 * upload the entire image due to the double texture buffers.  Otherwise,
 * any updates to the other buffer could be lost when we upload this round
 * of updates.
 *
 * FIXME: Save updated region, so we can get away with only updating the
 * union of the previous and current regions?
 */
static void mach64UploadAGPSubImage( mach64TextureObjectPtr t, int level,
				     int x, int y, int width, int height )
{
	int		dwords;
	struct gl_texture_image *image;
	int		texelBytes, texelsPerDword;
	hwUI32		*dest;
	int		wait;

	/* the ragePro only handles a single level properly */
	if ( level != 0 ) {
		hwMsg( 1, "mach64UploadAGPSubImage: bad level: %i\n", level );
 	 	return;
	}

	image = t->tObj->Image[0];
	if ( !image ) {
		hwError( "mach64UploadAGPSubImage: NULL image\n" );
		return;
	}

	/* mark the old buffer as dirty for this frame */
	t->agpBlocks[ t->activeBlock ].age = mach64glx.swapBuffersCount;

	/* flush dma command stream to kick off some rendering with the old buffer */
	mach64DmaFlush();

	/* swap to using the other buffer */
	t->activeBlock ^= 1;
	t->memBlock = t->agpBlocks[ t->activeBlock ].memBlock;

	/* if the other buffer has been used this frame, we must wait */
	if ( t->agpBlocks[ t->activeBlock ].age == mach64glx.swapBuffersCount ) {
		wait = mach64WaitForDmaCompletion();
		if ( wait ) {
			hwMsg( 10, "mach64UploadAGPSubImage: waited %d usec for dirty buffer\n", wait );
		}
	}

	/* hasAlpha is needed for programming SCALE_3D_CNTL */
	if ( ( image->Format == GL_RGBA ) || ( image->Format == GL_ALPHA ) || ( image->Format == GL_LUMINANCE_ALPHA ) ) {
		t->hasAlpha = 1;
	} else {
		t->hasAlpha = 0;
	}

	/* textureFormat is used for programming DP_PIX_WIDTH */
	/* FIXME: 32 bit and 8 bit intensity */
	if ( t->hasAlpha ) {
		t->textureFormat = 15;	// 4444
	} else {
		t->textureFormat = 4;	// 565
	}

	texelBytes = t->texelBytes;
	texelsPerDword = 2;

	dwords =  width * height * texelBytes;

	/* bump the performance counter */
	mach64glx.c_textureDwords += ( dwords << 2 );

	/* find the proper destination offset */
	dest = (hwUI32 *)(t->agpBlocks[ t->activeBlock ].buffer);

	hwMsg( 10, "mach64UploadAGPSubImage: %i,%i of %i,%i at %i,%i\n",
	       image->Width, image->Height, image->Width, image->Height, 0, 0 );

	/* write directly to agp memory the properly converted texels from the mesa buffer */
	mach64ConvertAGPTexture( dest, texelBytes, image, 0, 0, image->Width, image->Height, image->Width );

	/* the normal state registers must be reprogrammed before drawing */
}


/*=============================================*/


static int Log2( unsigned a )
{
	unsigned	i;

	for ( i = 0 ; i < 32 ; i++ ) {
		if ( ( 1<<i ) >= a ) {
			return i;
		}
	}
	return 31;
}

/*
 * mach64CreateLocalTexObj
 * Allocate space for and load the mesa images into the texture memory block.
 * This will happen before drawing with a new texture, or drawing with a
 * texture after it was swapped out or teximaged again.
 */
static void mach64CreateLocalTexObj( mach64ContextPtr ctx, struct gl_texture_object *tObj )
{
	mach64TextureObjectPtr	t;
	int			ofs, size;
	PMemBlock		mem;
	struct gl_texture_image *image;

	hwMsg( 10,"mach64CreateLocalTexObj( %p )\n", tObj );

	image = tObj->Image[0];
	if ( !image ) {
		return;
	}

	t = malloc( sizeof( *t ) );
	if ( !t ) {
		FatalError( "mach64CreateLocalTexObj: Failed to malloc textureObject\n" );
	}
	memset( t, 0, sizeof( *t ) );

	/* we only need one mip level, because the RagePro has busted mipmap hardware */

	/* texture format options */
	t->texelBytes = 2;

	size = image->Width * image->Height * t->texelBytes;
	size = ( size + 31 ) & ~31;	/* 32 byte aligned */
	ofs = size;

	t->widthLog2 = Log2( image->Width );
	t->heightLog2 = Log2( image->Height );
	t->maxLog2 = ( t->widthLog2 > t->heightLog2 ) ? t->widthLog2 : t->heightLog2;

	t->totalSize = ofs;

	/* allocate a buffer for all levels, swapping out stuff if needed */
	/* 64 byte alignment */
	while ( ( mem = mmAllocMem( cardHeap, ofs, 6, 0 ) ) == 0 ) {
		if ( mach64DestroyOldestTexObj() ) {
			/* can't hold this texture at all */
			hwMsg( 10, "mach64CreateTexObj: Couldn't allocate buffer\n" );
			free( t );
			return;
		}
	}

	/* dump the heap contents if loglevel is high enough */
	if ( hwGetLogLevel() >= 15 ) {
		mmDumpMemInfo( cardHeap );
	}

	/* fill in our texture object */
	t->magic = MACH64_TEXTURE_OBJECT_MAGIC;
	t->tObj = tObj;
	t->ctx = ctx;
	t->next = mach64glx.textureList;
	mach64glx.textureList = t;

	t->memBlock = mem;

	/* base image */
	image = tObj->Image[0];

  	tObj->DriverData = t;

	/* load the texels */
	mach64UploadLocalSubImage( t, 0, 0, 0, image->Width, image->Height );
}

/*
 * mach64CreateAGPTexObj
 * Allocate space for and load the mesa images into the texture memory block.
 * This will happen before drawing with a new texture, or drawing with a
 * texture after it was swapped out or teximaged again.
 */
static void mach64CreateAGPTexObj( mach64ContextPtr ctx, struct gl_texture_object *tObj )
{
	mach64TextureObjectPtr	t;
	int			ofs, size;
	int			ret;
	struct gl_texture_image *image;

	hwMsg( 10,"mach64CreateAGPTexObj( %p )\n", tObj );

	image = tObj->Image[0];
	if ( !image ) {
		return;
	}

	t = malloc( sizeof( *t ) );
	if ( !t ) {
		FatalError( "mach64CreateTexObj: Failed to malloc textureObject\n" );
	}
	memset( t, 0, sizeof( *t ) );

	/* we only need one mip level, because the RagePro has busted mipmap hardware */

	/* texture format options */
	t->texelBytes = 2;

	size = image->Width * image->Height * t->texelBytes;
	size = ( size + 31 ) & ~31;	/* 32 byte aligned */
	ofs = size;

	t->widthLog2 = Log2( image->Width );
	t->heightLog2 = Log2( image->Height );
	t->maxLog2 = ( t->widthLog2 > t->heightLog2 ) ? t->widthLog2 : t->heightLog2;

	t->totalSize = ofs;

	/* allocate a buffer for all levels, swapping out stuff if needed */
	while ( ( ret = hwAllocAGPMem( &(t->agpBlocks[0]), ofs ) ) == 0 ) {
		if ( mach64DestroyOldestTexObj() ) {
			/* can't hold this texture at all */
			hwMsg( 10, "mach64CreateTexObj: Couldn't allocate buffer\n" );
			free( t );
			return;
		}
	}
	if ( ret < 0 ) {
		FatalError( "mach64CreateAGPTexObj: AGP allocation failed\n" );
	}

	while ( ( ret = hwAllocAGPMem( &(t->agpBlocks[1]), ofs ) ) == 0 ) {
		if ( mach64DestroyOldestTexObj() ) {
			/* can't hold this texture at all */
			hwMsg( 10, "mach64CreateTexObj: Couldn't allocate buffer\n" );
			free( t );
			return;
		}
	}
	if ( ret < 0 ) {
		FatalError( "mach64CreateAGPTexObj: AGP allocation failed\n" );
	}

	/* dump the heap contents if loglevel is high enough */
	if ( hwGetLogLevel() >= 15 ) {
		hwDumpAGPMemInfo();
	}

	/* fill in our texture object */
	t->magic = MACH64_TEXTURE_OBJECT_MAGIC;
	t->tObj = tObj;
	t->ctx = ctx;
	t->next = mach64glx.textureList;
	mach64glx.textureList = t;

	/* init agp texture double buffer */
	t->activeBlock = 0;
	t->memBlock = t->agpBlocks[ t->activeBlock ].memBlock;

	/* base image */
	image = tObj->Image[0];

  	tObj->DriverData = t;

	/* load the texels */
	mach64UploadAGPSubImage( t, 0, 0, 0, image->Width, image->Height );
}

/*
 * Local and AGP textures now have two completely seperate code paths.  There
 * is still quite a bit of similarity in the code paths, but enough is
 * different to warrant a split here instead of further down.
 */
void mach64CreateTexObj( mach64ContextPtr ctx, struct gl_texture_object *tObj )
{
	if ( mach64glx.dmaDriver >= 3 ) {
		mach64CreateAGPTexObj( ctx, tObj );
	} else {
		mach64CreateLocalTexObj( ctx, tObj );
	}
}


/*
============================================================================

Driver functions called directly from mesa

============================================================================
*/


/*
 * mach64TexImage
 */
void mach64TexImage( GLcontext *ctx, GLenum target,
		     struct gl_texture_object *tObj, GLint level,
		     GLint internalFormat,
		     const struct gl_texture_image *image )
{
	mach64TextureObject_t *t;

	hwMsg( 10,"mach64TexImage( %p, level %i )\n", tObj, level );

  	/* only level 0 will matter on rage pro */
  	if ( level != 0 ) {
  		return;
  	}

  	/* free the driver texture if it exists */
	t = (mach64TextureObjectPtr) tObj->DriverData;
	if ( t ) {
 	 	mach64DestroyTexObj( t );
	}

	/* create it */
	mach64CreateTexObj( mach64Ctx, tObj );

	/* make the texture current */
	mach64glx.currentTexture[ctx->Texture.CurrentUnit] = t;
}

/*
 * mach64TexSubImage
 */
void mach64TexSubImage( GLcontext *ctx, GLenum target,
			struct gl_texture_object *tObj, GLint level,
			GLint xoffset, GLint yoffset,
			GLsizei width, GLsizei height,
			GLint internalFormat,
			const struct gl_texture_image *image )
{
	mach64TextureObject_t *t;

	hwMsg( 10, "mach64TexSubImage( %p ) size: %d,%d of %d,%d; level %d\n",
		   tObj, width, height, image->Width,image->Height, level );

  	/* only level 0 will matter on rage pro */
  	if ( level != 0 ) {
  		return;
  	}

	/* immediately upload it if it is resident */
	t = (mach64TextureObject_t *) tObj->DriverData;
	if ( t ) {
		/* local and agp textures require different paths */
		if ( mach64glx.dmaDriver >= 3 ) {
			mach64UploadAGPSubImage( t, 0, xoffset, yoffset, width, height );
		} else {
			mach64UploadLocalSubImage( t, 0, xoffset, yoffset, width, height );
		}
	}

	/* make the texture current */
	mach64glx.currentTexture[ctx->Texture.CurrentUnit] = t;
}

/*
 * mach64DeleteTexture
 */
void mach64DeleteTexture( GLcontext *ctx, struct gl_texture_object *tObj )
{
	hwMsg( 10, "mach64DeleteTexture( %p )\n", tObj );

	/* delete our driver data */
	if ( tObj->DriverData ) {
		mach64DestroyTexObj( (mach64TextureObject_t *)(tObj->DriverData) );
	}
}

/*
 * mach64IsTextureResident
 */
GLboolean mach64IsTextureResident( GLcontext *ctx, struct gl_texture_object *tObj )
{
	GLboolean	is;

   	is = (tObj->DriverData != NULL);

	hwMsg( 10, "mach64IsTextureResident( %p ) == %i\n", tObj, is );

	return is;
}


/*
 * Local Variables:
 * mode: c
 * tab-width: 8
 * c-basic-offset: 8
 * End:
 */
