// nested parallel loop execution by a single parallel region due to the collapse clause
#include <stdio.h>
#include <omp.h>

#define N 8

int main()
{
	double y[N];
	double x[N];
	double A[N*N];

	for (int i=0; i<N; i++) y[i] = 0;
	for (int i=0; i<N; i++) x[i] = 1;
	for (int i=0; i<N; i++) 
	for (int j=0; j<N; j++) 
		A[i*N+j] = 1;

	#pragma omp parallel for collapse(2)
	for (int i=0; i<N; i++) {
	for (int j=0; j<N; j++) {
		printf("loop %d on thread %d\n", i, omp_get_thread_num());
		y[i] += A[i*N+j]*x[j];
	}
	}

        return 0;
}

