/* -*- mode: C; c-basic-offset:8 -*- */
/*
 * GLX Hardware Device Driver for Intel i810
 * Copyright (C) 1999 Keith Whitwell
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
 * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * based on the original mgadma.c by Jeff Hartmann <slicer@ionet.net>
 * as rewritten by John Carmack <johnc@idsoftware.com>
 */

#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/mman.h>
#include <stdio.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <signal.h>

#include "mm.h"
#include "i810lib.h"
#include "i810buf.h"
#include "i810dd.h"
#include "i810log.h"
#include "i810tris.h"
#include "i810direct.h"
#include "i810state.h"

#include "pb.h"

/* X server include:
 */
#include "i810.h"



/* This will be overwritten from the default values when glx.so is
 * loaded on the client.
 */
int i810ServerDmaFlush( int wait, int *retry_timeout );
int (*i810DoDmaFlush)( int, int * ) = i810ServerDmaFlush;
GLuint i810ActiveDmaBuffer = 0;	



#ifndef I810_DEBUG
int I810_DEBUG = (0
/*     		  | DEBUG_ALWAYS_SYNC  */
/*    		  | DEBUG_VERBOSE_ACCEL  */
/*  		  | DEBUG_VERBOSE_SYNC */
/*  		  | DEBUG_VERBOSE_VGA */
/*  		  | DEBUG_VERBOSE_RING    */
/*  		  | DEBUG_VERBOSE_OUTREG  */
/*  		  | DEBUG_VERBOSE_MEMORY */
/*  		  | DEBUG_ALWAYS_SYNC */
/*  		  | DEBUG_VERBOSE_MSG */
		  );
#endif



static void delay( void ) {
}


/* Duplicates code in i810_accel.c, but which was declared static...
 */
void I810WaitLpRing( int n )
{
	int j = 0;

	if (I810_DEBUG & DEBUG_VERBOSE_RING)
		fprintf(stderr, "I810WaitLpRing %d\n", n);

	while (GLXSYM(I810LpRing).space < n) 
	{
		GLXSYM(I810LpRing).head = INREG(GLXSYM(I810LpRing).base_reg + RING_HEAD);
		GLXSYM(I810LpRing).head &= HEAD_ADDR;

		GLXSYM(I810LpRing).space = GLXSYM(I810LpRing).head - (GLXSYM(I810LpRing).tail + 8);
		if (GLXSYM(I810LpRing).space < 0) 
			GLXSYM(I810LpRing).space += GLXSYM(I810LpRing).mem.Size;
		
		if (++j > 500) {
			int i;
			for (i = 0 ; i<10000 ; i++)
				delay();
		}
		
		/* This seems to be too agressive a test for lockups.
		 */
		if (0 && j > 5000) {
			GLXSYM(I810PrintErrorState)(); 
			fprintf(stderr, "space: %d wanted %d\n", 
				GLXSYM(I810LpRing).space, n );

			GLXSYM(I810FrameBufferLocked) = 0;
			FatalError("lockup in i810dma.c\n"); 
		}
	}
}


/* Duplicates code in i810_accel.c, but which was declared static...
 */
void I810Sync( void ) 
{
	if (I810_DEBUG)
		fprintf(stderr, "I810Sync\n");

	if (GLXSYM(I810LpRing).head == GLXSYM(I810LpRing).tail && GLXSYM(I810LpRing).space) 
		return;

	if (!I810_NO_FLUSH)
	{
		BEGIN_LP_RING(2);   
		OUT_RING( INST_PARSER_CLIENT | INST_OP_FLUSH );
		OUT_RING( 0 );		/* pad to quadword */
		ADVANCE_LP_RING();
	}
		
	I810WaitLpRing( GLXSYM(I810LpRing).mem.Size - 8 );	
	GLXSYM(I810LpRing).space = GLXSYM(I810LpRing).mem.Size - 8;			
}




void I810TimeoutWaitLpRing( int n, int *avail_usec )
{
	int j = 0;
	int end = 0, start = 0;

	if (I810_DEBUG & DEBUG_VERBOSE_RING)
		fprintf(stderr, "I810WaitLpRing %d\n", n);

	if (!avail_usec) {
		I810WaitLpRing( n );
		return;
	}

	while (GLXSYM(I810LpRing).space < n && *avail_usec - (end-start) > 0) 
	{
		GLXSYM(I810LpRing).head = INREG(GLXSYM(I810LpRing).base_reg + RING_HEAD);
		GLXSYM(I810LpRing).head &= HEAD_ADDR;

		GLXSYM(I810LpRing).space = GLXSYM(I810LpRing).head - (GLXSYM(I810LpRing).tail + 8);
		if (GLXSYM(I810LpRing).space < 0) 
			GLXSYM(I810LpRing).space += GLXSYM(I810LpRing).mem.Size;
		
		if (++j > 500) {
			int i;
			for (i = 0 ; i<10000 ; i++)
				delay();
		}
		
		end = usec();

		if (start == 0 || start > end)
			start = end;
	}

	*avail_usec -= (end-start);
}


static int i810WaitForDmaCompletion( int *avail_usec ) 
{
	if (GLXSYM(I810LpRing).head == GLXSYM(I810LpRing).tail && GLXSYM(I810LpRing).space) 
		return 0;

	GLXSYM(I810LpRing).head = INREG(GLXSYM(I810LpRing).base_reg + RING_HEAD);
	GLXSYM(I810LpRing).head &= HEAD_ADDR;
	GLXSYM(I810LpRing).space = GLXSYM(I810LpRing).head - (GLXSYM(I810LpRing).tail + 8);

	if (GLXSYM(I810LpRing).space < 0) 
		GLXSYM(I810LpRing).space += GLXSYM(I810LpRing).mem.Size;

	if (GLXSYM(I810LpRing).head == GLXSYM(I810LpRing).tail) 
		return 0;

	I810TimeoutWaitLpRing( GLXSYM(I810LpRing).mem.Size - 8, avail_usec );	

	return 1;
}



/*
 * i810DmaResetBuffer
 */
void i810DmaResetBuffer( void ) 
{
	i810glx.dma_buffer = dmaBuffers[ i810ActiveDmaBuffer ];
	i810glx.dma_buffer->head = 0;
	i810glx.dma_buffer->space = 0;
	i810glx.dma_buffer->additional_space = 
		i810glx.dma_buffer->mem.Size - 256;
	
	i810DmaOverflow( 0 );

	if (i810glx.dma_buffer_age < i810glx.dma_buffer->texture_age) 
		i810glx.dma_buffer_age = i810glx.dma_buffer->texture_age; 
}



/*
 * i810FlushRealDma
 */
void i810FlushRealDma( void ) {

	GLuint start;

	if (MESA_VERBOSE&VERBOSE_DRIVER)
		fprintf(stderr, "i810FlushRealDma()\n" );


	if ( i810glx.skipDma || !I810_USE_BATCH )
		return;
	

	if (i810glx.dma_buffer->head & 0x4) {
		FatalError( "Misaligned batch buffer\n" );
	}

	if ( (I810_DEBUG&DEBUG_VERBOSE_OUTREG) && 0)
	{
		int i;
		for (i = 0 ; i <= i810glx.dma_buffer->head ; i+=4) 
			fprintf(stderr, 
				" 0x%05x : 0x%08x\n", 
				i/4, 
				*(GLuint *)(i810glx.dma_buffer->virtual_start +
					    i));
	}


	if (0)
		fprintf(stderr, "firing buffer %d 0..%x\n", 
			i810ActiveDmaBuffer, i810glx.dma_buffer->head);

	/* fire the batch buffer */       
	for (start = 0 ; start < i810glx.dma_buffer->head ; start += MAX_BATCH)
	{
		GLuint ofs = i810glx.dma_buffer->mem.Start;
		GLuint end = MIN2(start + MAX_BATCH, i810glx.dma_buffer->head);
		
		BEGIN_LP_RING(4);

/*  		if (I810_DEBUG & DEBUG_VERBOSE_RING) */
		if (0)
			fprintf(stderr, "Fire: start: %x end: %x\n", start,end);

		OUT_RING( CMD_OP_BATCH_BUFFER );
		OUT_RING( (ofs + start) | BB1_PROTECTED );
		OUT_RING( (ofs + end) - 4 );
		OUT_RING( 0 );
		ADVANCE_LP_RING();
	}

	if (!I810_NO_FLUSH)
	{
		BEGIN_LP_RING(2);
		OUT_RING( INST_PARSER_CLIENT | INST_OP_FLUSH | 
			  INST_FLUSH_MAP_CACHE );
		OUT_RING( 0 );
	}
}

/*
 * i810DmaFlush
 * Send all pending commands off to the hardware.
 * If we are running async, the hardware will be drawing
 * while we return to do other things.
 */
int i810ServerDmaFlush( int wait, int *retry_usec ) {
	int		start, end;

	if (I810_DEBUG)
		fprintf(stderr, 
			"i810ServerDmaFlush, buffer %d, head %x space %x\n",
			i810ActiveDmaBuffer, 
			i810glx.dma_buffer->head,
			i810glx.dma_buffer->space);

	if ( i810glx.dma_buffer->head == 0 ) 
	{
		if (wait && !i810WaitForDmaCompletion( retry_usec ))
			i810glx.hardwareWentIdle = 1;

		return retry_usec && *retry_usec <= 0;
	}

	i810glx.c_dmaFlush++;
	
	/* wait for the last buffer to complete - timeout if necessary.
	 */
	if ( !i810WaitForDmaCompletion( retry_usec ) ) 
		i810glx.hardwareWentIdle = 1;
	else
		if (retry_usec && *retry_usec <= 0)
			return 1;
	

	/* collect timing information if we are going syncronously */
	if ( i810glx.dmaDriver != 3 ) {
 		start = usec();
 	} else {
 		start = end = 0;
 	}
 	
	i810FlushRealDma();
 
	if ( i810glx.dmaDriver == 2 ) {
		/* wait until the dma completes, don't try to timeout.
		 */
		i810WaitForDmaCompletion( 0 );
	}

 	if ( i810glx.dmaDriver != 3 ) {
		end = usec();
	}

	if (0)
		fprintf(stderr, "flushmode %i, buffer %i: prim dwords:%i usec:%i\n", 
			i810glx.dmaDriver,  i810ActiveDmaBuffer,
			i810glx.dma_buffer->head / 4,
			end - start );	

	/* swap to using the other buffer */
	i810ActiveDmaBuffer ^= 1;
	i810DmaResetBuffer();
	
	/* Again - it's too difficult to try to timeout here
	 * without some managment to check if the buffer has been flushed...
	 *
	 */
	if ( wait ) 
		i810WaitForDmaCompletion( 0 );

	return 0;
}


/*
 * i810DmaFlush
 */
void i810DmaFlush( void ) {
	i810FinishPrimitive();
	i810glx.dma_buffer->texture_age = ++i810glx.current_texture_age;

	if ( i810Ctx && i810Ctx->CurrentTex0Obj ) 
		i810Ctx->CurrentTex0Obj->age = ++i810glx.current_texture_age; 

	if ( i810Ctx && i810Ctx->CurrentTex1Obj ) 
		i810Ctx->CurrentTex1Obj->age = ++i810glx.current_texture_age; 
		
	i810DoDmaFlush( 0, 0 );
}


/*
 * i810DmaFinish
 */
void i810DmaFinish( void ) {
	i810FinishPrimitive();
	i810DoDmaFlush( 1, 0 );

	i810glx.dma_buffer_age = ++i810glx.current_texture_age;
  
	if ( i810Ctx && i810Ctx->CurrentTex0Obj ) 
		i810Ctx->CurrentTex0Obj->age = ++i810glx.current_texture_age; 

	if ( i810Ctx && i810Ctx->CurrentTex1Obj ) 
		i810Ctx->CurrentTex1Obj->age = ++i810glx.current_texture_age; 
}


/*
 * i810DmaOverflow
 * This is called when I810DMAGETPTR is at the end of the buffer
 */
void i810DmaOverflow( int newDwords ) {

	if (i810glx.dma_buffer->additional_space) {
		GLuint incr = MAX_BATCH;

		if (i810glx.dma_buffer->additional_space < incr) 
			incr = i810glx.dma_buffer->additional_space;

		while (i810glx.dma_buffer->head & (MAX_BATCH - 1)) {
			GLuint outbatch = i810glx.dma_buffer->head; 
			GLubyte *virt = i810glx.dma_buffer->virtual_start; 

			*(volatile unsigned int *)(virt + outbatch) = 0; 
			i810glx.dma_buffer->head += 4;
		}

		i810glx.dma_buffer->space += incr;
		i810glx.dma_buffer->additional_space -= incr;

		if (0)
			fprintf(stderr, "overflow, head %x space %x\n",
				i810glx.dma_buffer->head,
				i810glx.dma_buffer->space);
		return;
	}
	
	i810Msg( 9, "i810DmaOverflow(%i)\n", newDwords );

	/* flush all the current commands so we will have another
           empty buffer */
	i810DmaFlush();

	i810glx.c_overflows++;

	if ( newDwords > i810glx.dma_buffer->space ) 
 		FatalError("i810DmaOverflow > maxPrimaryDwords");
}


/*
 * i810WaitDrawingEngine
 * This will not return until the drawing engine has completed
 * drawing pixels and it is safe to read or write the framebuffer
 * for software rendering.
 */
int i810WaitDrawingEngine( void ) {
	/* note this for the performance block display */
	i810glx.c_drawWaits++;

	/* make sure all pending dma has completed */
	i810DmaFinish();
	return 0;
}


/*
 * i810DmaExecute
 * Add a block of data to the dma buffer
 */
void i810DmaExecute( GLuint *code, int dwords ) 
{	
	int i;
	BEGIN_BATCH(dwords);

	if (dwords & 1) 
		FatalError( "Misaligned buffer in i810DmaExecute\n" );

	for ( i = 0 ; i < dwords ; i++ )
		OUT_BATCH( code[i] );
	ADVANCE_BATCH();
}



