#include <stdio.h>
#include <time.h>
#include <mmintrin.h>

#define LOOPS 500

#if defined(__GNUC__)
typedef unsigned long long uint64_t;
#else
typedef unsigned __int64 uint64_t;
#endif

typedef union {
	unsigned int u32[2];
	uint64_t u64;
	__m64 mm;
} bits_t;

bits_t array1[65536*16];
bits_t array2[65536*16];

void u32()
{
	int i;
	const int n = sizeof(array1) / sizeof(array1[0]);
	for (i = 0; i < n; i++) {
		array1[i].u32[0] ^= array2[i].u32[0];
		array1[i].u32[1] ^= array2[i].u32[1];
	}
}

void u64()
{
	int i;
	const int n = sizeof(array1) / sizeof(array1[0]);
	for (i = 0; i < n; i++) {
		array1[i].u64 ^= array2[i].u64;
	}
}

void mmx()
{
	int i;
	const int n = sizeof(array1) / sizeof(array1[0]);
	for (i = 0; i < n; i++) {
		array1[i].mm = _mm_xor_si64(array1[i].mm, array2[i].mm);
	}
}

void exec_func( void (*func)(), const char *name)
{
	clock_t start = clock();
	int l;
	for (l = 0; l < LOOPS; l++) {
		func();
	}
	clock_t end = clock();

	_mm_empty();
	printf("%s : %f ms\n", name, (end - start) * 1000.0 / CLOCKS_PER_SEC);
}

int main()
{
#define EXEC(f)	exec_func(f, #f)
	EXEC(u32);
	EXEC(u64);
	EXEC(mmx);
	return 0;
}
