Commit c5edb29e authored by Uwe Schulzweida's avatar Uwe Schulzweida
Browse files

cgribexlib: update

parent d47a4bc4
/* Automatically generated by m214003 at 2013-07-18, do not edit */
/* Automatically generated by m214003 at 2013-10-09, do not edit */
/* CGRIBEXLIB_VERSION="1.6.2" */
......@@ -637,25 +637,25 @@ double intpow2(int x)
/*
gcc -g -Wall -O3 -march=native -std=c99 -DTEST_MINMAXVAL minmax_val.c
result on bailung (gcc 4.7):
orig : fmin: -500000 fmax: 499999 time: 4.84193s
sse2 : fmin: -500000 fmax: 499999 time: 4.82625s
orig : fmin: -500000 fmax: 499999 time: 4.84s
sse2 : fmin: -500000 fmax: 499999 time: 4.82s
gcc -g -Wall -O3 -march=native -std=c99 -DTEST_MINMAXVAL minmax_val.c
result on thunder5 (gcc 4.7):
orig : fmin: -500000 fmax: 499999 time: 3.35603s
avx : fmin: -500000 fmax: 499999 time: 3.13276s
result on thunder5 (gcc 4.7.2):
orig : fmin: -500000 fmax: 499999 time: 3.10s
avx : fmin: -500000 fmax: 499999 time: 2.84s
icc -g -Wall -O3 -march=native -std=c99 -vec-report=1 -DTEST_MINMAXVAL minmax_val.c
result on thunder5 (icc 13.2):
orig : fmin: -500000 fmax: 499999 time: 1.57076s
avx : fmin: -500000 fmax: 499999 time: 3.11323s
result on thunder5 (icc 13.1.2):
orig : fmin: -500000 fmax: 499999 time: 2.83s
avx : fmin: -500000 fmax: 499999 time: 2.92s
xlc_r -g -O3 -qhot -q64 -qarch=auto -qtune=auto -qreport -DTEST_MINMAXVAL minmax_val.c
result on blizzard (xlc 12):
orig : fmin: -500000 fmax: 499999 time: 7.26469s
pwr6u6 : fmin: -500000 fmax: 499999 time: 5.92341s
orig : fmin: -500000 fmax: 499999 time: 7.26s
pwr6u6 : fmin: -500000 fmax: 499999 time: 5.92s
*/
#ifdef _ARCH_PWR6
#if defined(_ARCH_PWR6)
#pragma options nostrict
#endif
......@@ -666,14 +666,20 @@ xlc_r -g -O3 -qhot -q64 -qarch=auto -qtune=auto -qreport -DTEST_MINMAXVAL minmax
//#undef _GET_MACH_COUNTER
//#undef _ARCH_PWR6
#if defined _GET_IBM_COUNTER
#if defined(_GET_IBM_COUNTER)
#include <libhpc.h>
#elif defined _GET_X86_COUNTER
#elif defined(_GET_X86_COUNTER)
#include <x86intrin.h>
#elif defined _GET_MACH_COUNTER
#elif defined(_GET_MACH_COUNTER)
#include <mach/mach_time.h>
#endif
#if defined(__GNUC__) && !defined(__ICC) && !defined(__clang__)
#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 4)
#define GNUC_PUSH_POP
#endif
#endif
#if defined(__GNUC__) && (__GNUC__ >= 4)
#elif defined(__ICC) && (__ICC >= 1100)
#elif defined(__clang__)
......@@ -681,24 +687,24 @@ xlc_r -g -O3 -qhot -q64 -qarch=auto -qtune=auto -qreport -DTEST_MINMAXVAL minmax
#define DISABLE_SIMD
#endif
#ifndef TEST_MINMAXVAL
#if !defined(TEST_MINMAXVAL)
#define DISABLE_SIMD
#endif
#ifdef DISABLE_SIMD
# ifdef ENABLE_AVX
#if defined(DISABLE_SIMD)
# if defined(ENABLE_AVX)
# define _ENABLE_AVX
# endif
# ifdef ENABLE_SSE2
# if defined(ENABLE_SSE2)
# define _ENABLE_SSE2
# endif
#endif
#ifndef DISABLE_SIMD
# ifdef __AVX__
#if !defined(DISABLE_SIMD)
# if defined(__AVX__)
# define _ENABLE_AVX
# endif
# ifdef __SSE2__
# if defined(__SSE2__)
# define _ENABLE_SSE2
# endif
#endif
......@@ -707,14 +713,14 @@ xlc_r -g -O3 -qhot -q64 -qarch=auto -qtune=auto -qreport -DTEST_MINMAXVAL minmax
#include <stdint.h>
#include <inttypes.h>
#if defined _ENABLE_AVX
#if defined(_ENABLE_AVX)
#include <immintrin.h>
#elif defined _ENABLE_SSE2
#elif defined(_ENABLE_SSE2)
#include <emmintrin.h>
#endif
#if defined _ENABLE_AVX
#if defined(_ENABLE_AVX)
static
void avx_minmax_val(const double *restrict buf, size_t nframes, double *min, double *max)
......@@ -808,7 +814,7 @@ void avx_minmax_val(const double *restrict buf, size_t nframes, double *min, dou
return;
}
#elif defined _ENABLE_SSE2
#elif defined(_ENABLE_SSE2)
static
void sse2_minmax_val(const double *restrict buf, size_t nframes, double *min, double *max)
......@@ -887,7 +893,7 @@ void sse2_minmax_val(const double *restrict buf, size_t nframes, double *min, do
#endif // SIMD
#ifdef _ARCH_PWR6
#if defined(_ARCH_PWR6)
static
void pwr6_minmax_val_unrolled6(const double *restrict data, long idatasize, double *fmin, double *fmax)
{
......@@ -934,8 +940,12 @@ void pwr6_minmax_val_unrolled6(const double *restrict data, long idatasize, doub
}
#endif
#if defined(TEST_MINMAXVAL) && defined(__GNUC__)
static
void minmax_val_orig(const double *restrict data, long idatasize, double *fmin, double *fmax) __attribute__ ((noinline));
#endif
#if defined (__GNUC__) && defined (__GNUC_MINOR__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 4)
#if defined(GNUC_PUSH_POP)
#pragma GCC push_options
#pragma GCC optimize ("O3", "fast-math")
#endif
......@@ -946,11 +956,11 @@ void minmax_val_orig(const double *restrict data, long idatasize, double *fmin,
size_t datasize = idatasize;
double dmin = *fmin, dmax = *fmax;
#if defined (CRAY)
#if defined(CRAY)
#pragma _CRI ivdep
#elif defined (SX)
#elif defined(SX)
#pragma vdir nodep
#elif defined (__uxp__)
#elif defined(__uxp__)
#pragma loop novrec
#endif
for ( i = 0; i < datasize; ++i )
......@@ -964,49 +974,49 @@ void minmax_val_orig(const double *restrict data, long idatasize, double *fmin,
*fmin = dmin;
*fmax = dmax;
}
#if defined (__GNUC__) && defined (__GNUC_MINOR__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 4)
#if defined(GNUC_PUSH_POP)
#pragma GCC pop_options
#endif
static
void minmax_val(const double *restrict data, long idatasize, double *fmin, double *fmax)
{
#if defined _GET_X86_COUNTER || defined _GET_MACH_COUNTER
#if defined(_GET_X86_COUNTER) || defined(_GET_MACH_COUNTER)
uint64_t start_minmax, end_minmax;
#endif
size_t datasize = idatasize;
if ( idatasize < 1 ) return;
#ifdef _GET_X86_COUNTER
#if defined(_GET_X86_COUNTER)
start_minmax = _rdtsc();
#endif
#ifdef _GET_MACH_COUNTER
#if defined(_GET_MACH_COUNTER)
start_minmax = mach_absolute_time();
#endif
#if defined _ENABLE_AVX
#if defined(_ENABLE_AVX)
avx_minmax_val(data, datasize, fmin, fmax);
#elif defined _ENABLE_SSE2
#elif defined(_ENABLE_SSE2)
sse2_minmax_val(data, datasize, fmin, fmax);
#else
#ifdef _ARCH_PWR6
#if defined(_ARCH_PWR6)
#define __UNROLL_DEPTH_1 6
// to allow pipelining we have to unroll
#ifdef _GET_IBM_COUNTER
#if defined(_GET_IBM_COUNTER)
hpmStart(1, "minmax fsel");
#endif
pwr6_minmax_val_unrolled6(data, datasize, fmin, fmax);
#ifdef _GET_IBM_COUNTER
#if defined(_GET_IBM_COUNTER)
hpmStop(1);
#endif
......@@ -1014,30 +1024,30 @@ void minmax_val(const double *restrict data, long idatasize, double *fmin, doubl
#else // original loop
#ifdef _GET_IBM_COUNTER
#if defined(_GET_IBM_COUNTER)
hpmStart(1, "minmax base");
#endif
minmax_val_orig(data, datasize, fmin, fmax);
#ifdef _GET_IBM_COUNTER
#if defined(_GET_IBM_COUNTER)
hpmStop(1);
#endif
#endif // _ARCH_PWR6 && original loop
#endif // SIMD
#if defined _GET_X86_COUNTER || defined _GET_MACH_COUNTER
#ifdef _GET_X86_COUNTER
#if defined(_GET_X86_COUNTER) || defined(_GET_MACH_COUNTER)
#if defined(_GET_X86_COUNTER)
end_minmax = _rdtsc();
#endif
#ifdef _GET_MACH_COUNTER
#if defined(_GET_MACH_COUNTER)
end_minmax = mach_absolute_time();
#endif
#if defined _ENABLE_AVX
#if defined(_ENABLE_AVX)
printf("AVX minmax cycles:: %" PRIu64 "\n", end_minmax-start_minmax);
fprintf (stderr, "AVX min: %lf max: %lf\n", *fmin, *fmax);
#elif defined _ENABLE_SSE2
#elif defined(_ENABLE_SSE2)
printf("SSE2 minmax cycles:: %" PRIu64 "\n", end_minmax-start_minmax);
fprintf (stderr, "SSE2 min: %lf max: %lf\n", *fmin, *fmax);
#else
......@@ -1049,7 +1059,7 @@ void minmax_val(const double *restrict data, long idatasize, double *fmin, doubl
return;
}
#ifdef TEST_MINMAXVAL
#if defined(TEST_MINMAXVAL)
#include <stdio.h>
#include <sys/time.h>
......@@ -1073,10 +1083,27 @@ int main(void)
double fmin, fmax;
double t_begin, t_end;
#if defined(__ICC)
printf("icc\n");
#elif defined(__clang__)
printf("clang\n");
#elif defined(__GNUC__)
printf("gcc\n");
#endif
data = (double *) malloc(datasize*sizeof(double));
for ( long i = datasize-1; i >= 0; i-- ) data[i] = (double) (-datasize/2 + i);
t_begin = dtime();
for ( int i = 0; i < NRUN; ++i )
{
fmin = fmax = data[0];
minmax_val(data, datasize, &fmin, &fmax);
}
t_end = dtime();
printf("minmax_val: fmin: %ld fmax: %ld time: %6.2fs\n", (long)fmin, (long) fmax, t_end-t_begin);
t_begin = dtime();
for ( int i = 0; i < NRUN; ++i )
{
......@@ -1084,9 +1111,9 @@ int main(void)
minmax_val_orig(data, datasize, &fmin, &fmax);
}
t_end = dtime();
printf("orig : fmin: %ld fmax: %ld time: %gs\n", (long)fmin, (long) fmax, t_end-t_begin);
printf("orig : fmin: %ld fmax: %ld time: %6.2fs\n", (long)fmin, (long) fmax, t_end-t_begin);
#if defined _ENABLE_AVX
#if defined(_ENABLE_AVX)
t_begin = dtime();
for ( int i = 0; i < NRUN; ++i )
{
......@@ -1094,8 +1121,8 @@ int main(void)
avx_minmax_val(data, datasize, &fmin, &fmax);
}
t_end = dtime();
printf("avx : fmin: %ld fmax: %ld time: %gs\n", (long)fmin, (long) fmax, t_end-t_begin);
#elif defined _ENABLE_SSE2
printf("avx : fmin: %ld fmax: %ld time: %6.2fs\n", (long)fmin, (long) fmax, t_end-t_begin);
#elif defined(_ENABLE_SSE2)
t_begin = dtime();
for ( int i = 0; i < NRUN; ++i )
{
......@@ -1103,9 +1130,9 @@ int main(void)
sse2_minmax_val(data, datasize, &fmin, &fmax);
}
t_end = dtime();
printf("sse2 : fmin: %ld fmax: %ld time: %gs\n", (long)fmin, (long) fmax, t_end-t_begin);
printf("sse2 : fmin: %ld fmax: %ld time: %6.2fs\n", (long)fmin, (long) fmax, t_end-t_begin);
#endif
#ifdef _ARCH_PWR6
#if defined(_ARCH_PWR6)
t_begin = dtime();
for ( int i = 0; i < NRUN; ++i )
{
......@@ -1113,7 +1140,7 @@ int main(void)
pwr6_minmax_val_unrolled6(data, datasize, &fmin, &fmax);
}
t_end = dtime();
printf("pwr6u6 : fmin: %ld fmax: %ld time: %gs\n", (long)fmin, (long) fmax, t_end-t_begin);
printf("pwr6u6 : fmin: %ld fmax: %ld time: %6.2fs\n", (long)fmin, (long) fmax, t_end-t_begin);
#endif
return (0);
......@@ -1123,6 +1150,7 @@ int main(void)
#undef DISABLE_SIMD
#undef _ENABLE_AVX
#undef _ENABLE_SSE2
#undef GNUC_PUSH_POP
/*
gcc -g -Wall -O3 -march=native -std=c99 -DTEST_MINMAXVAL encode_double_array.c
result on bailung (gcc 4.7):
......@@ -3321,8 +3349,8 @@ void gribPrintSec1(int *isec0, int *isec1)
Section 1 . Print local DWD information.
-----------------------------------------------------------------
*/
if ( (isec1[ 1] == 78 && isec1[36] == 253) ||
(isec1[ 1] == 78 && isec1[36] == 254) )
if ( (ISEC1_CenterID == 78 || ISEC1_CenterID == 215 || ISEC1_CenterID == 250) &&
(isec1[36] == 253 || isec1[36] == 254) )
{
fprintf(grprsm, " DWD local usage identifier. %9d\n", isec1[36]);
if ( isec1[36] == 253 )
......@@ -4741,7 +4769,6 @@ void encodeES(GRIBPACK *lGrib, long *gribLen, long bdsstart)
#define DWD_extension_253_len 38
#define DWD_extension_254_len 26
#define ECMWF_extension_1_len 24
#define MCH_extension_254_len 19
#define MPIM_extension_1_len 18
static
......@@ -4751,7 +4778,7 @@ long getLocalExtLen(int *isec1)
if ( ISEC1_LocalFLag )
{
if ( ISEC1_CenterID == 78 )
if ( ISEC1_CenterID == 78 || ISEC1_CenterID == 215 || ISEC1_CenterID == 250 )
{
if ( isec1[36] == 254 ) extlen = DWD_extension_254_len;
else if ( isec1[36] == 253 ) extlen = DWD_extension_253_len;
......@@ -4760,10 +4787,6 @@ long getLocalExtLen(int *isec1)
{
if ( isec1[36] == 1 ) extlen = ECMWF_extension_1_len;
}
else if ( ISEC1_CenterID == 215 )
{
if ( isec1[36] == 254 ) extlen = MCH_extension_254_len;
}
else if ( ISEC1_CenterID == 252 )
{
if ( isec1[36] == 1 ) extlen = MPIM_extension_1_len;
......@@ -4970,30 +4993,18 @@ void encodePDS(GRIBPACK *lpds, long pdsLen, int *isec1)
if ( ISEC1_LocalFLag )
{
if ( ISEC1_CenterID == 78 )
if ( ISEC1_CenterID == 78 || ISEC1_CenterID == 215 || ISEC1_CenterID == 250 )
{
if ( isec1[36] == 254 )
{
encodePDS_DWD_local_Extension_254(lGrib, &z, isec1);
}
else if ( isec1[36] == 253 )
{
encodePDS_DWD_local_Extension_253(lGrib, &z, isec1);
}
if ( isec1[36] == 254 ) encodePDS_DWD_local_Extension_254(lGrib, &z, isec1);
else if ( isec1[36] == 253 ) encodePDS_DWD_local_Extension_253(lGrib, &z, isec1);
}
else if ( ISEC1_CenterID == 98 )
{
if ( isec1[36] == 1 )
{
encodePDS_ECMWF_local_Extension_1(lGrib, &z, isec1);
}
if ( isec1[36] == 1 ) encodePDS_ECMWF_local_Extension_1(lGrib, &z, isec1);
}
else if ( ISEC1_CenterID == 252 )
{
if ( isec1[36] == 1 )
{
encodePDS_MPIM_local_Extension_1(lGrib, &z, isec1);
}
if ( isec1[36] == 1 ) encodePDS_MPIM_local_Extension_1(lGrib, &z, isec1);
}
else
{
......@@ -5863,7 +5874,7 @@ int decodePDS(unsigned char *pds, int *isec0, int *isec1)
{
ISEC1_LocalFLag = 1;
if ( ISEC1_CenterID == 78 )
if ( ISEC1_CenterID == 78 || ISEC1_CenterID == 215 || ISEC1_CenterID == 250 )
{
if ( pds[40] == 254 )
{
......@@ -10828,7 +10839,7 @@ int gribUnzip(unsigned char *dbuf, long dbufsize, unsigned char *sbuf, long sbu
return (gribLen);
}
static const char grb_libvers[] = "1.6.2" " of ""Jul 18 2013"" ""09:34:47";
static const char grb_libvers[] = "1.6.2" " of ""Oct 9 2013"" ""11:03:55";
const char *
cgribexLibraryVersion(void)
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment