// find_max() with OpenMP parallel for. This code is not efficient but still shows something ;-)  
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>

#define VERBOSE 1

double find_max(double *A, int N)
{
#if VERBOSE
	#pragma omp parallel
	#pragma omp master
	printf("Running with %d threads\n", omp_get_num_threads());
#endif

	int nthreads = omp_get_num_threads();
	
	double mx; 
	double thread_mx[nthreads];

	for (int i = 0; i < nthreads; i++) thread_mx[i] = A[0];

	#pragma omp parallel for 
	for (int i=0; i<N; i++) {
		int me = omp_get_thread_num();			// overhead
		if (A[i] > thread_mx[me]) thread_mx[me] = A[i];	// false sharing here
	}


	mx = thread_mx[0];
	for (int i = 1; i < nthreads; i++) 
		if (thread_mx[i] > mx) mx = thread_mx[i];

	return mx;
}


int main(int argc, char *argv[])
{
	int N = 32*1024*1024;

	double *a = (double *)malloc(N*sizeof(double));

	srand48(1);
	for (int i = 0; i < N; i++) a[i] = drand48();

	double t0 = omp_get_wtime();
	double mx = find_max(a, N);
	double t1 = omp_get_wtime();

	printf("max value = %.16f, elapsed time = %lf seconds\n", mx, t1-t0);

	free(a);

	return 0;
}
