Commit dc1f7d7c authored by Uwe Schulzweida's avatar Uwe Schulzweida
Browse files

cgribexlib: update

parent 8703965a
/* Automatically generated by m214003 at 2013-06-27, do not edit */
/* Automatically generated by m214003 at 2013-07-10, do not edit */
/* CGRIBEXLIB_VERSION="1.6.1" */
/* CGRIBEXLIB_VERSION="1.6.2" */
#ifdef _ARCH_PWR6
#pragma options nostrict
......@@ -642,6 +642,7 @@ double intpow2(int x)
#if defined(__GNUC__) && (__GNUC__ >= 4)
#elif defined(__ICC) && (__ICC >= 1100)
#elif defined(__clang__)
#define ENABLE_SIMD
#else
#define DISABLE_SIMD
#endif
......@@ -654,17 +655,24 @@ double intpow2(int x)
#include <mach/mach_time.h>
#endif
//#define DISABLE_SIMD
#define DISABLE_SIMD
#undef ENABLE_SIMD
#ifdef DISABLE_SIMD
#ifndef ENABLE_AVX
#ifdef ENABLE_AVX
#define ENABLE_SIMD
#else
#undef __AVX__
#endif
#ifndef ENABLE_SSE2
#ifdef ENABLE_SSE2
#define ENABLE_SIMD
#else
#undef __SSE2__
#endif
#endif
#if defined (ENABLE_SIMD)
#if defined __AVX__
#include <float.h>
#include <stdint.h>
......@@ -677,6 +685,7 @@ double intpow2(int x)
#include <emmintrin.h>
#endif
#if defined __AVX__
static
......@@ -850,41 +859,17 @@ void sse2_minmax_val(const double *restrict buf, size_t nframes, double *min, do
#endif
static
void minmax_val(const double *restrict data, long idatasize, double *fmin, double *fmax)
{
#if defined _GET_X86_COUNTER || defined _GET_MACH_COUNTER
uint64_t start_minmax, end_minmax;
#endif
size_t datasize = idatasize;
if ( idatasize < 1 ) return;
#ifdef _GET_X86_COUNTER
start_minmax = _rdtsc();
#endif
#ifdef _GET_MACH_COUNTER
start_minmax = mach_absolute_time();
#endif
#if defined __AVX__
avx_minmax_val(data, datasize, fmin, fmax);
#elif defined __SSE2__
sse2_minmax_val(data, datasize, fmin, fmax);
#else
#endif // ENABLE_SIMD
#ifdef _ARCH_PWR6
static
void pwr6_minmax_val_unrolled6(const double *restrict data, long idatasize, double *fmin, double *fmax)
{
#define __UNROLL_DEPTH_1 6
size_t datasize = idatasize;
// to allow pipelining we have to unroll
#ifdef _GET_IBM_COUNTER
hpmStart(1, "minmax fsel");
#endif
{
size_t i, j;
size_t residual = datasize % __UNROLL_DEPTH_1;
......@@ -919,19 +904,21 @@ void minmax_val(const double *restrict data, long idatasize, double *fmin, doubl
*fmax = __fsel(dmax[j] - *fmax, dmax[j], *fmax);
}
}
#ifdef _GET_IBM_COUNTER
hpmStop(1);
#endif
#undef __UNROLL_DEPTH_1
}
#endif
#else // original loop
#ifdef _GET_IBM_COUNTER
hpmStart(1, "minmax base");
#if defined (__GNUC__) && defined (__GNUC_MINOR__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 4)
#pragma GCC push_options
#pragma GCC optimize ("O3", "fast-math", "tree-vectorizer-verbose=1")
#endif
{
size_t i;
static
void minmax_val_orig(const double *restrict data, long idatasize, double *fmin, double *fmax)
{
size_t i;
size_t datasize = idatasize;
double dmin = *fmin, dmax = *fmax;
#if defined (CRAY)
#pragma _CRI ivdep
......@@ -940,16 +927,73 @@ void minmax_val(const double *restrict data, long idatasize, double *fmin, doubl
#elif defined (__uxp__)
#pragma loop novrec
#endif
for ( i = 0; i < datasize; ++i )
{
if ( *fmin > data[i] ) *fmin = data[i];
if ( *fmax < data[i] ) *fmax = data[i];
/*
*fmin = *fmin < data[i] ? *fmin : data[i];
*fmax = *fmax > data[i] ? *fmax : data[i];
*/
}
}
for ( i = 0; i < datasize; ++i )
{
dmin = dmin < data[i] ? dmin : data[i];
dmax = dmax > data[i] ? dmax : data[i];
// if ( dmin > data[i] ) dmin = data[i];
// if ( dmax < data[i] ) dmax = data[i];
}
*fmin = dmin;
*fmax = dmax;
}
#if defined (__GNUC__) && defined (__GNUC_MINOR__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 4)
#pragma GCC pop_options
#endif
static
void minmax_val(const double *restrict data, long idatasize, double *fmin, double *fmax)
{
#if defined _GET_X86_COUNTER || defined _GET_MACH_COUNTER
uint64_t start_minmax, end_minmax;
#endif
size_t datasize = idatasize;
if ( idatasize < 1 ) return;
#ifdef _GET_X86_COUNTER
start_minmax = _rdtsc();
#endif
#ifdef _GET_MACH_COUNTER
start_minmax = mach_absolute_time();
#endif
#if defined __AVX__ && defined (ENABLE_SIMD)
avx_minmax_val(data, datasize, fmin, fmax);
#elif defined __SSE2__ && defined (ENABLE_SIMD)
sse2_minmax_val(data, datasize, fmin, fmax);
#else
#ifdef _ARCH_PWR6
#define __UNROLL_DEPTH_1 6
// to allow pipelining we have to unroll
#ifdef _GET_IBM_COUNTER
hpmStart(1, "minmax fsel");
#endif
pwr6_minmax_val_unrolled6(data, datasize, fmin, fmax);
#ifdef _GET_IBM_COUNTER
hpmStop(1);
#endif
#undef __UNROLL_DEPTH_1
#else // original loop
#ifdef _GET_IBM_COUNTER
hpmStart(1, "minmax base");
#endif
minmax_val_orig(data, datasize, fmin, fmax);
#ifdef _GET_IBM_COUNTER
hpmStop(1);
#endif
......@@ -10522,7 +10566,7 @@ int gribUnzip(unsigned char *dbuf, long dbufsize, unsigned char *sbuf, long sbu
return (gribLen);
}
static const char grb_libvers[] = "1.6.1" " of ""Jun 27 2013"" ""15:38:33";
static const char grb_libvers[] = "1.6.2" " of ""Jul 10 2013"" ""09:45:08";
const char *
cgribexLibraryVersion(void)
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment