#include <stdio.h>
#include <stdlib.h>
#include "papi.h" /* This needs to be included every time you use PAPI */

#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__);  exit(retval); }

double ** alloc_matrix(int size);
void free_matrix(double ** M, int size);
void matrix_rand_init(double ** M, int size);
/*void print_matrix(double **M, int size);*/


int main(int argc, char** argv){
  int retval;

  char errstring[PAPI_MAX_STR_LEN];

  int size = 1024;

  if (argc != 2) {
    fprintf(stdout,"[WARNING] Usage: %s #size\n",argv[0]);
    fprintf(stdout,"[WARNING] Using default size = 1024\n");
  }
  else {
    size = atoi(argv[1]);
  }

  /* allocate matrices spaces */
  double ** A = alloc_matrix(size);
  double ** B = alloc_matrix(size);
  double ** C = alloc_matrix(size);

  /* initialize matrices */
  matrix_rand_init(A, size);
  matrix_rand_init(B, size);

  int i,j,k;

  /***************************************************************************
   * This part initializes the library and compares the version number of the *
   * header file, to the version of the library, if these don't match then it *
   * is likely that PAPI won't work correctly.If there is an error, retval    *
   * keeps track of the version number.                                       *
   ****************************************************************************/

  retval = PAPI_hl_region_begin("computation");
  if ( retval != PAPI_OK ) {
    fprintf(stderr, "Error: %s\n", errstring);
    exit(1);
  }

  /****** this is where your computation goes *********/
  for (i=0; i<size; i++) {
    for (j=0; j<size; j++) {
      double tmp = 0;
      for (k=0; k<size; k++) {
          tmp += A[i][k]*B[k][j];
      }
      C[i][j] = tmp;
    }
  }

  /* Stop counting, this reads from the counter as well as stop it. */
  retval = PAPI_hl_region_end("computation");
  if ( retval != PAPI_OK ) {
    fprintf(stderr, "Error: %s\n", errstring);
    exit(1);
  }

  /* free matrices */
  free_matrix(A,size);
  free_matrix(B,size);
  free_matrix(C,size);

  exit(0);
}

double ** alloc_matrix(int size) {
  int i;
  double ** M = malloc(size * sizeof(double *));
  for (i=0; i<size; i++) {
    M[i] = malloc(size * sizeof(double));
  }
  return M;
}

void free_matrix(double ** M, int size) {
  int i;
  for (i=0; i<size; i++) {
    free(M[i]);
  }
  free(M);
}

void matrix_rand_init(double ** M, int size) {
  int i,j;
  for (i=0; i<size; i++) {
    for (j=0; j<size; j++) {
      M[i][j] = rand()/123.0;
    }
  }
}

void print_matrix(double **M, int size) {
  int i, j;
  for (i=0; i<size; i++) {
    for (j=0; j<size; j++)
      printf("%f\t", M[i][j]);
    printf("\n");
  }
}
