
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/mman.h>
#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <signal.h>

#include "glx_symbols.h"
#include "hwlog.h"


#ifndef NO_MTRR

#define MTRR_NEED_STRINGS
#include <errno.h>
#include <asm/mtrr.h>
#include <sys/ioctl.h>

static int mtrr;

#if defined(USE_X86_ASM)
#include "X86/common_x86asm.h"
#endif

static int IsPowerOfTwo( int val )
{
	int		i;

	for ( i = 0 ; i < 32 ; i++ ) {
		if ( val == ( 1 << i ) ) {
			return 1;
		}
	}
	return 0;
}

static void CloseMTRR( void )
{
	close( mtrr );
}

static void OpenMTRR( void )
{
	if ( ( mtrr = open( "/proc/mtrr", O_WRONLY, 0 ) ) == -1 )
	{
		if ( errno == ENOENT ) {
			hwError( "/proc/mtrr not found: MTRR not enabled\n" );
		}  else {
			hwError( "Error opening /proc/mtrr: %s\n", strerror( errno ) );
			hwError( "MTRR not enabled\n" );
		}
		return;
	}
	atexit( CloseMTRR );
}


/*
 * this function was probably a mistake.  Doing all this crap
 * results in the main memory MTRR being lost after X server exit,
 * so we wind up only calling this when it would be a single MTRR
 * anyway.
 */
static int CoverRangeWithMTRR( int base, int range, int type )
{
	int		count;
	int		size;

	count = 0;
	hwMsg( 1,"CoverRangeWithMTRR( 0x%x, 0x%x, %i )\n", base, range, type );

	while ( range )
	{
		/* see how big of an mtrr we can make */

		/* size must be a power of 2 */
		for ( size = 2048 ; ; )
		{
			size *= 2;

			/* the base must be a multiple of the size */
			if ( base != size * ( base / size ) ) {
				size >>= 1;
				break;
			}

			if ( size > range ) {
				size >>= 1;
				break;
			}
		}

		/* set it if we aren't just checking the number */
		if ( type != -1 ) {
			struct mtrr_sentry sentry;

			sentry.base = base;
			sentry.size = size;
			sentry.type = type;

			hwMsg( 1, "MTRR fragment added: addr=0x%x size=0x%x type=%i\n",
			       sentry.base, sentry.size, sentry.type );
			if ( ioctl( mtrr, MTRRIOC_SET_ENTRY, &sentry ) == -1 ) {
				hwError( "Error doing ioctl(2) on /proc/mtrr: %s\n",
					 strerror( errno ) );
			}
		}

		base += size;
		range -= size;
		count++;
	}

	hwMsg( 1, "------\n" );

	return count;
}

/*
 * SetWriteCombining
 *
 * tries to cover a range with MTRR if possible, but has no
 * harmfull effects if it fails for any reason.
 */
void SetWriteCombining( long physical, int bytes ) {
	struct mtrr_sentry sentry;
	struct mtrr_gentry gentry;
	int i;

	if ( !mtrr ) {
		OpenMTRR();
		if ( !mtrr ) {
			return;
		}	
	}

	if ( mtrr < 0 ) {
		return;
	}

	/* all processors must use 4k blocks */
  	if ( ( physical & 0xfff ) || ( bytes & 0xfff ) ) {
  		hwMsg( 1, "Can't set write combining on 0x%x / 0x%x, not in 4k blocks.\n",
  		 physical, bytes );
  		return;
  	}
	
	/* all must be a power of two bytes in size */
	if ( !IsPowerOfTwo( bytes ) ) {
  		hwMsg( 1, "Can't set write combining on 0x%x / 0x%x, not power of 2 bytes.\n",
  		 physical, bytes );
  		return;
	}
	
	/* all must have base a multiple of block size */
	if ( physical % bytes ) {
  		hwMsg( 1, "Can't set write combining on 0x%x / 0x%x, base not multiple of size.\n",
  		 physical, bytes );
  		return;
	}

#ifdef USE_X86_ASM
 	/* intel has additional restrictive rules */
  	if ( !(gl_identify_x86_cpu_features() & GL_CPU_3Dnow) )
#endif
	  { 
  		int		last, lbase;
  		
		/*  Check upper bits of base and last are equal and lower bits are 0
		    for base and 1 for last  */
		last = physical + bytes - 1;
		for (lbase = physical; !(lbase & 1) && (last & 1);
		     lbase = lbase >> 1, last = last >> 1);
		if (lbase != last) {
	  		hwMsg( 1, "Can't set write combining on 0x%x / 0x%x, intel alignment rules.\n",
 	 		 physical, bytes );
 	 		 return;
		}
  	}
	
	
	/* remove any MTRR that conver the range */
	for ( i = 0 ; i < 128 ; i++ ) {
		gentry.regnum = i;
		if ( ioctl( mtrr, MTRRIOC_GET_ENTRY, &gentry ) == -1 ) {
			break;
		}
		hwMsg( 1, "MTRR reg %i: addr=0x%x size=0x%x type=%i\n",
		       i, gentry.base, gentry.size, gentry.type );
		if ( gentry.base >= physical + bytes ) {
			continue;
		}
		if ( gentry.base + gentry.size <= physical ) {
			continue;
		}

		/* we must delete this entry */
		sentry.base = gentry.base;
		sentry.size = gentry.size;
		if ( ioctl( mtrr, MTRRIOC_DEL_ENTRY, &sentry ) == -1 ) {
			hwError( "Error doing MTRRIOC_DEL_ENTRY on /proc/mtrr: %s\n",
				     strerror( errno ) );
		} else {
			hwMsg( 1, "MTRRIOC_DEL_ENTRY succeeded\n" );
		}

		/* recreate fragments around the new region if necessary */
		if ( gentry.base < physical ) {
			CoverRangeWithMTRR( gentry.base,
					    physical - sentry.base,
					    gentry.type );
		}
		if ( gentry.base + gentry.size > physical + bytes ) {
			CoverRangeWithMTRR( physical + bytes,
					    gentry.base + gentry.size - sentry.base,
					    gentry.type );
		}

		/* because we deleted an entry, we need to check this index again */
		i--;
	}

	/* set this range to write combining */
	sentry.base = physical;
	sentry.size = bytes;
	sentry.type = MTRR_TYPE_WRCOMB; /* write-combining */

	if ( ioctl( mtrr, MTRRIOC_SET_ENTRY, &sentry ) == -1 ) {
		hwError( "Error doing ioctl(2) on /proc/mtrr: %s\n",
			 strerror( errno ) );
		hwError( "MTRR not enabled\n" );
	} else {
		hwMsg( 1, "MTRR enabled: write-combining, addr=0x%x size=0x%x\n",
		       sentry.base, sentry.size );
	}
}

/*
 * FlushWriteCombining
 * Make sure all writes have actually completed before 
 * starting a DMA operation.  This only matters in rare cases
 * of very small buffers, because write combining buffers are
 * automatically flushed some number of cycles after they idle (I think)
 */
int	xchangeDummy;
void FlushWriteCombining( void ) 
{
// Use the Intel Way of things...
#ifdef USE_X86_ASM
	__asm__ volatile( " push %%eax ; xchg %%eax, %0 ; pop %%eax" : : "m" (xchangeDummy) );
	__asm__ volatile( " push %%eax ; push %%ebx ; push %%ecx ; push %%edx ; movl $0,%%eax ; cpuid ; pop %%edx ; pop %%ecx ; pop %%ebx ; pop %%eax" : /* no outputs */ :  /* no inputs */ );
#endif

// Even though they don't call it this, it seems there's something _very_ 
// analogous to this in the PPC world and you're supposed to be using eieio or sync 
// to do the same thing.  We're going to use sync here...
#ifdef __PPC__
	__asm__ __volatile__ ("sync");
#endif
}

#else

// if we aren't conficured for MTRR, these are no-ops

void SetWriteCombining( long physical, int bytes ) 
{
}

void FlushWriteCombining( void ) 
{
}

#endif

/*
 * MemoryBenchmark
 * Print the speed that a memory buffer is filled at
 */
void MemoryBenchmark( void *buffer, int dwords ) {
	int		i;
	int		start, end;
	int		mb;
	int 	*base;

	base = (int *)buffer;

	start = usec();
	for ( i = 0 ; i < dwords ; i += 8 ) {
		base[i] =
		base[i+1] =
		base[i+2] =
		base[i+3] =
		base[i+4] =
		base[i+5] =
		base[i+6] =
		base[i+7] = 0x15151515;
	}
	end = usec();

	mb = ( (float)dwords / 0x40000 ) * 1000000 / ( end - start );

	hwMsg( 1, "MemoryBenchmark: %i mb/s\n", mb );
}


