/*
 * pthreadtest.c: a stupid nop loop with pthreads
 *
 * Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P.
 * Contributed by Stephane Eranian <eranian@hpl.hp.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 * 02111-1307 USA
 */
#include <sys/types.h>
#include <pthread.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>

#ifdef __GNUC__
extern inline void
clear_psr_ac(void)
{
	__asm__ __volatile__("rum psr.ac;;" ::: "memory" );
}
#elif defined(__ECC) && defined(__INTEL_COMPILER)
#include <ia64intrin.h>
#define clear_psr_ac()	__rum(1<<3)
#else
#error "You need to define clear_psr_ac() for your compiler"
#endif


#define PFM_TEST_INVALID	-1
#define PFM_TEST_VALID		0


static union {
	unsigned long   l_tab[2];
	unsigned int    i_tab[4];
	unsigned short  s_tab[8];
	unsigned char   c_tab[16];
} __attribute__((__aligned__(32))) messy;


/*
 * 1 load, 1 store both unaligned
 */
int
do_two_una(unsigned long pace_count)
{
	unsigned int *l, v;
	unsigned long c = pace_count;

	static unsigned int called;

	called++;
	l = (unsigned int *)(messy.c_tab+1);

	if (((unsigned long)l & 0x1) == 0) {
		printf("Data is not unaligned, can't run test\n");
		return  -1;
	}

	v = *l;
	while(c) c--; /* space the accesses */
	v++;
	*l = v;

	if (v != called) return -1;

	return c == 0 ? 0: -1;
}

int
do_una_test(unsigned long count, unsigned long pace)
{
	int ret;

	/* let the hardware do the unaligned access */
	clear_psr_ac();

	ret = 0;
	while (count-- && ret == 0) {
		ret = do_two_una(pace);
		printf("pid=%d count=%lu\n", getpid(), count);
	}
	return ret;
}

int
do_test1(unsigned long loop)
{
	return do_una_test(loop, 0);
}

int
do_test2(unsigned long loop)
{
	return do_una_test(loop, 0);
}



void
launch_test2(void *data)
{
	unsigned long loop = *(unsigned long *)data;
	int retval = 0;
	do_test2(loop);
	pthread_exit((void *)&retval);
}


int
main(int argc, char **argv)
{
	pthread_t	*thread_list;
	unsigned long 	loop;
	int 		nt, nthreads = 0, ret;


	loop = argc > 1 ? strtoul(argv[1], NULL, 0) : 1000;
	nt   = argc > 2 ? atoi(argv[2]) : 0;

	printf("creating %d thread(s)\n", nt);
	printf("expecting %lu misaligned loads for parent\n", loop);
	printf("expecting %lu misaligned loads for each thread (except manager)\n", loop);

	thread_list = malloc(nt*sizeof(pthread_t));
	if (thread_list == NULL) {
		fprintf(stderr, "cannot malloc thread table for %d threads\n", nthreads);
		exit(1);
	}

	while (nt--) {
		ret = pthread_create(&thread_list[nt], NULL, (void *(*)(void *))launch_test2, &loop);
		if (ret == -1) goto cleanup;
		nthreads++;
	}
	do_test1(loop);
cleanup:
	while(nthreads--) {
		pthread_join(thread_list[nthreads], NULL);
	}
	exit(0);
}
