#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#define NRA 4 /* number of rows in matrix A */
#define NCA 4 /* number of columns in matrix A = rows of B*/
#define CHUNK 2 /*chunk size */
int main (int argc, char *argv[])
{
int tid,i, j;
int a[NRA][NCA], /* matrix A to be multiplied */
b[NCA], /* matrix B to be multiplied */
c[NRA], /* result matrix C */
x[NRA][NCA/CHUNK]; /* Individual Thread results */
for (i=0; i<NRA; i++) {
for (j=0; j<NCA; j++) {
a[i][j]= i+j;
printf(" a[%d,%d]=%i ",i,j,a[i][j]);
}
printf("\n");
}
for (i=0; i<NCA; i++) {
b[i]= i;
printf("b[%i]=%i ",i, b[i]);
}
printf("\nChecking\n");
/*Verify result*/
for (i=0; i<NRA; i++) {
c[i]=0;
for (j=0; j<NCA; j++) {
c[i] =c[i]+a[i][j]*b[j];
}
printf("c[%i]= %i ",i,c[i]);
}
printf("\n\nEntering Parallel Region\n");
#pragma omp parallel num_threads(2) shared(a,b,c,x) private(tid,i,j)
{
tid = omp_get_thread_num();
printf("Thread %d starting matrix multiplication\n",tid);
for (i=0; i<NRA; i++) {
x[i][tid]=0;
#pragma omp for schedule(static,CHUNK) /*nowait*/
for (j=0; j<4; j++) {
x[i][tid] =x[i][tid]+a[i][j]*b[j];
}
printf("Thread %i x[%i][%i] = %i\n",tid,i,tid, x[i][tid]);
}
}
printf("\nLeft Parallel Region\n");
for (i=0; i<NRA; i++) {
c[i]=0;
for (j=0;j<CHUNK;j++) {
c[i]=c[i]+x[i][j];
}
printf("c[%i]=%i ",i,c[i]);
}
printf("\n");
return 0;
}