Commit e653a050 authored by Uwe Schulzweida's avatar Uwe Schulzweida
Browse files

cgribexlib update

parent 18134373
/* Automatically generated by m214003 at 2013-07-11, do not edit */
/* Automatically generated by m214003 at 2013-07-16, do not edit */
/* CGRIBEXLIB_VERSION="1.6.2" */
......@@ -634,6 +634,30 @@ double intpow2(int x)
else
return (pow(2.0, (double) x));
}
/*
gcc -g -Wall -O3 -march=native -std=c99 -DTEST_MINMAXVAL minmax_val.c
result on bailung (gcc 4.7):
orig : fmin: -500000 fmax: 499999 time: 4.84193s
sse2 : fmin: -500000 fmax: 499999 time: 4.82625s
gcc -g -Wall -O3 -march=native -std=c99 -DTEST_MINMAXVAL minmax_val.c
result on thunder5 (gcc 4.7):
orig : fmin: -500000 fmax: 499999 time: 3.35603s
avx : fmin: -500000 fmax: 499999 time: 3.13276s
icc -g -Wall -O3 -march=native -std=c99 -vec-report=1 -DTEST_MINMAXVAL minmax_val.c
result on thunder5 (icc 13.2):
orig : fmin: -500000 fmax: 499999 time: 1.57076s
avx : fmin: -500000 fmax: 499999 time: 3.11323s
xlc_r -g -O3 -q64 -qhot -qstrict -qarch=auto -qtune=auto -qreport -DTEST_MINMAXVAL minmax_val.c
result on blizzard (xlc 12):
orig : fmin: -500000 fmax: 499999 time: 69.3418s
pwr6u6 : fmin: -500000 fmax: 499999 time: 5.9067s
*/
#include <stdlib.h>
//#undef _GET_X86_COUNTER
//#undef _GET_IBM_COUNTER
//#undef _GET_MACH_COUNTER
......@@ -654,7 +678,9 @@ double intpow2(int x)
#define DISABLE_SIMD
#endif
#ifndef TEST_MINMAXVAL
#define DISABLE_SIMD
#endif
#ifdef DISABLE_SIMD
# ifdef ENABLE_AVX
......@@ -1020,9 +1046,112 @@ void minmax_val(const double *restrict data, long idatasize, double *fmin, doubl
return;
}
#ifdef TEST_MINMAXVAL
#include <stdio.h>
#include <sys/time.h>
static
double dtime()
{
double tseconds = 0.0;
struct timeval mytime;
gettimeofday(&mytime, NULL);
tseconds = (double) (mytime.tv_sec + (double)mytime.tv_usec*1.0e-6);
return (tseconds);
}
#define NRUN 10000
int main(void)
{
long datasize = 1000000;
double *data = NULL;
double fmin, fmax;
double t_begin, t_end;
data = (double *) malloc(datasize*sizeof(double));
for ( long i = datasize-1; i >= 0; i-- ) data[i] = (double) (-datasize/2 + i);
t_begin = dtime();
for ( int i = 0; i < NRUN; ++i )
{
fmin = fmax = data[0];
minmax_val_orig(data, datasize, &fmin, &fmax);
}
t_end = dtime();
printf("orig : fmin: %ld fmax: %ld time: %gs\n", (long)fmin, (long) fmax, t_end-t_begin);
#if defined _ENABLE_AVX
t_begin = dtime();
for ( int i = 0; i < NRUN; ++i )
{
fmin = fmax = data[0];
avx_minmax_val(data, datasize, &fmin, &fmax);
}
t_end = dtime();
printf("avx : fmin: %ld fmax: %ld time: %gs\n", (long)fmin, (long) fmax, t_end-t_begin);
#elif defined _ENABLE_SSE2
t_begin = dtime();
for ( int i = 0; i < NRUN; ++i )
{
fmin = fmax = data[0];
sse2_minmax_val(data, datasize, &fmin, &fmax);
}
t_end = dtime();
printf("sse2 : fmin: %ld fmax: %ld time: %gs\n", (long)fmin, (long) fmax, t_end-t_begin);
#endif
#ifdef _ARCH_PWR6
t_begin = dtime();
for ( int i = 0; i < NRUN; ++i )
{
fmin = fmax = data[0];
pwr6_minmax_val_unrolled6(data, datasize, &fmin, &fmax);
}
t_end = dtime();
printf("pwr6u6 : fmin: %ld fmax: %ld time: %gs\n", (long)fmin, (long) fmax, t_end-t_begin);
#endif
return (0);
}
#endif // TEST_MINMAXVAL
#undef DISABLE_SIMD
#undef _ENABLE_AVX
#undef _ENABLE_SSE2
/*
gcc -g -Wall -O3 -march=native -std=c99 -DTEST_MINMAXVAL encode_double_array.c
result on bailung (gcc 4.7):
orig : val1: 1 val2: 1 val3: 2 valn: 66 time: 8.4166s
sse41 : val1: 1 val2: 1 val3: 2 valn: 66 time: 7.1522s
gcc -g -Wall -O3 -march=native -std=c99 -DTEST_MINMAXVAL encode_double_array.c
result on thunder5 (gcc 4.7):
orig : val1: 1 val2: 1 val3: 2 valn: 66 time: 6.21976s
avx : val1: 1 val2: 1 val3: 2 valn: 66 time: 4.54485s
icc -g -Wall -O3 -march=native -std=c99 -vec-report=1 -DTEST_MINMAXVAL encode_double_array.c
result on thunder5 (icc 13.2):
orig : val1: 1 val2: 1 val3: 2 valn: 66 time: 14.6279s
avx : val1: 1 val2: 1 val3: 2 valn: 66 time: 4.9776s
xlc_r -g -O3 -q64 -qhot -qstrict -qarch=auto -qtune=auto -qreport -DTEST_MINMAXVAL encode_double_array.c
result on blizzard (xlc 12):
orig : val1: 1 val2: 1 val3: 2 valn: 66 time: 199.627s
orig : val1: 1 val2: 1 val3: 2 valn: 66 time: 132.259s (nostrict)
unrolled: val1: 1 val2: 1 val3: 2 valn: 66 time: 27.372s
*/
#ifdef TEST_MINMAXVAL
#include <stdio.h>
#include <stdlib.h>
#define GRIBPACK unsigned char
#define IS_BIGENDIAN() (u_byteorder.c[sizeof(long) - 1])
#define U_BYTEORDER static union {unsigned long l; unsigned char c[sizeof(long)];} u_byteorder = {1}
#define Error(x,y)
#endif
//#undef _GET_X86_COUNTER
//#undef _GET_MACH_COUNTER
//#undef _GET_IBM_COUNTER
......@@ -1764,6 +1893,83 @@ void encode_double_array_unrolled(int numBits, size_t packStart, size_t datasize
#undef __UNROLL_DEPTH_2
}
#ifdef TEST_MINMAXVAL
#include <sys/time.h>
static
double dtime()
{
double tseconds = 0.0;
struct timeval mytime;
gettimeofday(&mytime, NULL);
tseconds = (double) (mytime.tv_sec + (double)mytime.tv_usec*1.0e-6);
return (tseconds);
}
#define NRUN 10000
int main(void)
{
long datasize = 1000000;
double *data = NULL;
double t_begin, t_end;
unsigned char *lgrib;
data = (double *) malloc(datasize*sizeof(double));
lgrib = (unsigned char *) malloc(2*datasize*sizeof(unsigned char));
for ( long i = 0; i < datasize; ++i ) data[i] = (double) (-datasize/2 + i);
int PackStart = 0;
int nbpv = 16;
double zref = data[0];
size_t z;
double factor = 0.00390625;
int s = 256;
t_begin = dtime();
for ( int i = 0; i < NRUN; ++i )
{
z = 0;
encode_double_array_2byte (datasize, lgrib, data, zref, factor, &z);
}
t_end = dtime();
printf("orig : val1: %d val2: %d val3: %d valn: %d time: %gs\n", (int) lgrib[s*1+1], (int) lgrib[s*2+1], (int) lgrib[s*3+1], (int) lgrib[2*datasize-1], t_end-t_begin);
t_begin = dtime();
for ( int i = 0; i < NRUN; ++i )
{
z = 0;
encode_double_array_unrolled (nbpv, PackStart, datasize, lgrib, data, zref, factor, &z);
}
t_end = dtime();
printf("unrolled: val1: %d val2: %d val3: %d valn: %d time: %gs\n", (int) lgrib[s*1+1], (int) lgrib[s*2+1], (int) lgrib[s*3+1], (int) lgrib[2*datasize-1], t_end-t_begin);
#if defined _ENABLE_AVX
t_begin = dtime();
for ( int i = 0; i < NRUN; ++i )
{
z = 0;
avx_encode_double_array_2byte (datasize, lgrib, data, zref, factor, &z);
}
t_end = dtime();
printf("avx : val1: %d val2: %d val3: %d valn: %d time: %gs\n", (int) lgrib[s*1+1], (int) lgrib[s*2+1], (int) lgrib[s*3+1], (int) lgrib[2*datasize-1], t_end-t_begin);
#elif defined _ENABLE_SSE4_1
t_begin = dtime();
for ( int i = 0; i < NRUN; ++i )
{
z = 0;
sse41_encode_double_array_2byte (datasize, lgrib, data, zref, factor, &z);
}
t_end = dtime();
printf("sse41 : val1: %d val2: %d val3: %d valn: %d time: %gs\n", (int) lgrib[s*1+1], (int) lgrib[s*2+1], (int) lgrib[s*3+1], (int) lgrib[2*datasize-1], t_end-t_begin);
#endif
return 0;
}
#endif // TEST_MINMAXVAL
#undef DISABLE_SIMD
#undef _ENABLE_AVX
#undef _ENABLE_SSE4_1
......@@ -10604,7 +10810,7 @@ int gribUnzip(unsigned char *dbuf, long dbufsize, unsigned char *sbuf, long sbu
return (gribLen);
}
static const char grb_libvers[] = "1.6.2" " of ""Jul 11 2013"" ""11:42:31";
static const char grb_libvers[] = "1.6.2" " of ""Jul 16 2013"" ""15:13:17";
const char *
cgribexLibraryVersion(void)
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment