Commit a2afe297 authored by Hendryk Bockelmann's avatar Hendryk Bockelmann
Browse files

rewrite papi test to check for relative change in counter value

parent 88826106
......@@ -30,11 +30,18 @@
#include "sct.h"
#define INDEX 100
#define INS_COUNT_TOLERANCE 5.
#define MY_TIMER_MAX 2
#define MY_TIMER_MAX 4
static const int timer_max = MY_TIMER_MAX;
static int timer[MY_TIMER_MAX];
static int pid, tid, nbt, istart, iend;
static double blocksize;
double **matrixa;
double **matrixb;
static double **mresult;
#pragma omp threadprivate(istart, iend, blocksize, tid, nbt, mresult)
void
dummy( void *array )
......@@ -145,44 +152,18 @@ double determineFrequency(void) {
}
int main( int argc, char **argv ) {
int i, j, k;
static int pid, tid, nbt, istart, iend;
static double blocksize, freq;
int myINDEX;
double **matrixa;
double **matrixb;
static double **mresult;
#pragma omp threadprivate(istart, iend, blocksize, tid, nbt, mresult)
#ifdef HAVE_MPI
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
#else
pid = 0;
#endif
/* init SCT*/
sct_init(timer_max, "test_papi", SCT_COMM_WORLD);
for (i=0; i < timer_max; i++) timer[i] = 0;
char label0[] = "sct-timer";
char label1[] = "seq-part";
timer[0] = sct_new_timer(label0);
timer[1] = sct_new_timer(label1);
/* Initialize the Matrix arrays */
myINDEX = (pid+1)*INDEX;
void initMatrix(int mysize) {
int i,j,k;
matrixa = (double **) malloc(myINDEX*sizeof(double *));
matrixb = (double **) malloc(myINDEX*sizeof(double *));
for (i=0; i<myINDEX; i++) {
matrixa[i] = (double *) malloc(myINDEX*sizeof(double));
matrixb[i] = (double *) malloc(myINDEX*sizeof(double));
matrixa = (double **) malloc(mysize*sizeof(double *));
matrixb = (double **) malloc(mysize*sizeof(double *));
for (i=0; i<mysize; i++) {
matrixa[i] = (double *) malloc(mysize*sizeof(double));
matrixb[i] = (double *) malloc(mysize*sizeof(double));
}
for ( i = 0; i < myINDEX; i++ ) {
for ( j = 0; j < myINDEX; j++) {
for ( i = 0; i < mysize; i++ ) {
for ( j = 0; j < mysize; j++) {
matrixa[i][j] = ( float ) rand( ) * ( float ) 1.1;
matrixb[i][j] = ( float ) rand( ) * ( float ) 1.1;
}
......@@ -201,26 +182,70 @@ int main( int argc, char **argv ) {
blocksize = 0.0;
/* find some blocksize which produces inbalance in threaded region */
for (i=0; i<nbt; i++) blocksize += 1./pow(1.5,i);
blocksize = myINDEX / blocksize;
blocksize = mysize / blocksize;
for (i=0; i<=tid; i++) {
istart = iend;
iend += (int)(blocksize/pow(1.5,i));
}
if (tid == nbt-1) iend = myINDEX;
if (tid == nbt-1) iend = mysize;
mresult = (double **) malloc((iend-istart)*sizeof(double *));
for (i=istart; i<iend; i++)
mresult[i-istart] = (double *) malloc(myINDEX*sizeof(double));
mresult[i-istart] = (double *) malloc(mysize*sizeof(double));
for (int i=istart; i<iend; i++) {
for (int j=0; j<myINDEX; j++) {
for (int j=0; j<mysize; j++) {
mresult[i-istart][j] = 0.0;
}
}
}
}
void freeMatrix(int mysize) {
int i;
for (i=0; i<mysize; i++) {
free(matrixa[i]);
free(matrixb[i]);
}
free(matrixa);
free(matrixb);
#pragma omp parallel private(i)
{
for (i=istart; i<iend; i++) free(mresult[i-istart]);
free(mresult);
}
}
int main( int argc, char **argv ) {
int i, j, k;
int myINDEX;
#ifdef HAVE_MPI
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
#else
pid = 0;
#endif
/* init SCT*/
sct_init(timer_max, "test_papi", SCT_COMM_WORLD);
for (i=0; i < timer_max; i++) timer[i] = 0;
char label0[] = "sct-timer 0";
char label1[] = "sct-timer 1";
char label2[] = "seq-part";
timer[0] = sct_new_timer(label0);
timer[1] = sct_new_timer(label1);
timer[2] = sct_new_timer(label2);
/* Initialize the Matrix arrays for round 0*/
myINDEX = (pid+1)*INDEX;
initMatrix(myINDEX);
#if _OPENMP
sct_start(timer[1]);
sct_start(timer[2]);
#endif
#pragma omp parallel private(i,j,k)
......@@ -239,22 +264,42 @@ int main( int argc, char **argv ) {
}
#if _OPENMP
sct_stop(timer[1]);
sct_stop(timer[2]);
#endif
/* free memory */
for (i=0; i<myINDEX; i++) {
free(matrixa[i]);
free(matrixb[i]);
}
free(matrixa);
free(matrixb);
#pragma omp parallel private(i)
freeMatrix(myINDEX);
/* Initialize the Matrix arrays for round 1*/
myINDEX = (pid+1)*INDEX*2;
initMatrix(myINDEX);
#if _OPENMP
sct_start(timer[2]);
#endif
#pragma omp parallel private(i,j,k)
{
for (i=istart; i<iend; i++) free(mresult[i-istart]);
free(mresult);
sct_start(timer[1]);
/* Matrix-Matrix multiply */
for (i=istart; i<iend; i++)
for (j=0; j<myINDEX; j++)
for (k=0; k<myINDEX; k++)
mresult[i-istart][j] = mresult[i-istart][j] + matrixa[i][k] * matrixb[k][j];
dummy( ( void * ) mresult );
sct_stop(timer[1]);
}
#if _OPENMP
sct_stop(timer[2]);
#endif
/* free memory */
freeMatrix(myINDEX);
sct_report(SCT_GETENV, SCT_GETENV, SCT_GETENV);
#ifdef HAVE_MPI
MPI_Barrier(MPI_COMM_WORLD);
......@@ -278,10 +323,10 @@ int main( int argc, char **argv ) {
// check for tests that might have failed
int err = 0;
double ref, act, diff;
double act0, act1, ratio;
if (evc == 1) {
#if _OPENMP
#pragma omp parallel private(act, ref)
#pragma omp parallel private(act0, act1, ratio)
{
#pragma omp critical
{
......@@ -294,14 +339,14 @@ int main( int argc, char **argv ) {
/* printf(" criterion (diff < 1.) ... is allowed to fail if fma is used !\n"); */
/* if (diff > 1.) err = 1; */
/* else use 'Instructions completed' */
ref = (double)3*(iend-istart)*myINDEX*myINDEX;
act = sct_event(timer[0], "PAPI_TOT_INS");
diff = fabs(ref-act)/ref*100.;
printf("outputcheck: PAPI_TOT_INS for proc %i, thread %i: ref %10.2e act %10.2e diff %5.2f %%\n",
pid, tid, ref, act, diff);
printf(" criterion (diff < %5.2f)\n", INS_COUNT_TOLERANCE);
if (diff > INS_COUNT_TOLERANCE) err = 1;
/* else compare 'Load instructions' for both runs of matmult*/
act0 = sct_event(timer[0], "PAPI_LD_INS");
act1 = sct_event(timer[1], "PAPI_LD_INS");
ratio = act1/act0;
printf("outputcheck: PAPI_LD_INS ratio for proc %i, thread %i: act0 %10.2e act1 %10.2e ratio %5.2f\n",
pid, tid, act0, act1, ratio);
printf(" criterion (7 <= ratio <= 8)\n");
if ((ratio > 8.) || (ratio < 7.)) err = 1;
}
}
#else
......@@ -313,14 +358,14 @@ int main( int argc, char **argv ) {
/* printf(" criterion (diff < 1.) ... is allowed to fail if fma is used !\n"); */
/* if ( diff > 1. ) err = 1; */
/* else use 'Instructions completed' */
ref = (double)3*(iend-istart)*myINDEX*myINDEX;
act = sct_event(timer[0], "PAPI_TOT_INS");
diff = fabs(ref-act)/ref*100.;
printf("outputcheck: PAPI_TOT_INS for proc %i: ref %10.2e act %10.2e diff %5.2f %%\n",
pid, ref, act, diff);
printf(" criterion (diff < %5.2f)\n", INS_COUNT_TOLERANCE);
if (diff > INS_COUNT_TOLERANCE) err = 1;
/* else compare 'Load instructions' for both runs of matmult*/
act0 = sct_event(timer[0], "PAPI_LD_INS");
act1 = sct_event(timer[1], "PAPI_LD_INS");
ratio = act1/act0;
printf("outputcheck: PAPI_LD_INS ratio for proc %i: act0 %10.2e act1 %10.2e ratio %5.2f\n",
pid, act0, act1, ratio);
printf(" criterion (7 <= ratio <= 8)\n");
if ((ratio > 8.) || (ratio < 7.)) err = 1;
#endif
}
/* skip check for clock rate ... not accurate enough on HSW and BDW ...
......
......@@ -13,12 +13,13 @@ export SCT_EVENTCOUNTERS=1
retval0=$?
export SCT_CALLSTATS=0
# test for counter rate
# test for counter rate ... skipped due to inexact results on HSW/BDW
retval1=0
export SCT_EVENTCOUNTERS=2
@MPI_TRUE@@MPI_LAUNCH@ -n 2 @abs_top_srcdir@/tests/test_papi
@MPI_FALSE@@abs_top_srcdir@/tests/test_papi
retval1=$?
#@MPI_TRUE@@MPI_LAUNCH@ -n 2 @abs_top_srcdir@/tests/test_papi
#@MPI_FALSE@@abs_top_srcdir@/tests/test_papi
#retval1=$?
if (test $retval0 != 0) || (test $retval1 != 0); then
exit 1
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment