Commit 7c115897 authored by Uwe Schulzweida's avatar Uwe Schulzweida
Browse files

cgribexlib update (encode: sse_minmaxval())

parent 6d704421
2012-05-?? Uwe Schulzweida <Uwe.Schulzweida@zmaw.de>
* Version 1.5.5 released
* using CGRIBEX library version 1.5.2
2012-02-15 Uwe Schulzweida <Uwe.Schulzweida@zmaw.de>
* stream_cdf::cdfDefDatatype: bug fix for DATATYPE_UINT8
......
/* Automatically generated by m214003 at 2011-08-29, do not edit */
/* Automatically generated by m214003 at 2012-02-27, do not edit */
/* CGRIBEXLIB_VERSION="1.5.1" */
/* CGRIBEXLIB_VERSION="1.5.2" */
#if defined (HAVE_CONFIG_H)
# include "config.h"
......@@ -12,6 +12,14 @@
#include <stdarg.h>
#include <sys/types.h>
#ifdef __SSE2__
#include <float.h>
#include <pmmintrin.h>
#include <stdint.h>
#include <inttypes.h>
#include <x86intrin.h>
#endif
#include "file.h"
#include "dmemory.h"
#include "dtypes.h"
......@@ -2796,6 +2804,118 @@ void gribPrintSec4Wave(int *isec4)
}
}
#ifdef __SSE2__
#endif
#ifdef __SSE2__
static
void sse_minmaxval(const double *restrict buf, long nframes, double *min, double *max)
{
__m128d current_max, current_min, work;
// load starting max and min values into all slots of the XMM registers
current_min = _mm_set1_pd(*min);
current_max = _mm_set1_pd(*max);
// work on input until buf reaches 16 byte alignment
while ( ((unsigned long)buf) % 16 != 0 && nframes > 0) {
// load one double and replicate
work = _mm_set1_pd(*buf);
current_min = _mm_min_pd(current_min, work);
current_max = _mm_max_pd(current_max, work);
buf++;
nframes--;
}
while (nframes >= 8) {
// use 64 byte prefetch for double octetts
__builtin_prefetch(buf+64,0,0); // for GCC 4.3.2 +
work = _mm_load_pd(buf);
current_min = _mm_min_pd(current_min, work);
current_max = _mm_max_pd(current_max, work);
buf += 2;
work = _mm_load_pd(buf);
current_min = _mm_min_pd(current_min, work);
current_max = _mm_max_pd(current_max, work);
buf += 2;
work = _mm_load_pd(buf);
current_min = _mm_min_pd(current_min, work);
current_max = _mm_max_pd(current_max, work);
buf += 2;
work = _mm_load_pd(buf);
current_min = _mm_min_pd(current_min, work);
current_max = _mm_max_pd(current_max, work);
buf += 2;
nframes -= 8;
}
// work through smaller chunks of aligned buffers without prefetching
while (nframes >= 2) {
work = _mm_load_pd(buf);
current_min = _mm_min_pd(current_min, work);
current_max = _mm_max_pd(current_max, work);
buf += 2;
nframes -= 2;
}
// work through the remaining value
while ( nframes > 0) {
// load the last double and replicate
work = _mm_set1_pd(*buf);
current_min = _mm_min_pd(current_min, work);
current_max = _mm_max_pd(current_max, work);
buf++;
nframes--;
}
// find final min and max value through shuffle tricks
work = current_min;
work = _mm_shuffle_pd(work, work, _MM_SHUFFLE2(0, 1));
work = _mm_min_pd (work, current_min);
_mm_store_sd(min, work);
work = current_max;
work = _mm_shuffle_pd(work, work, _MM_SHUFFLE2(0, 1));
work = _mm_max_pd (work, current_max);
_mm_store_sd(max, work);
return;
}
#endif
static
void minmaxval(const double *restrict data, long datasize, double *fmin, double *fmax)
{
long i;
if ( datasize < 1 ) return;
#ifdef __SSE2__
sse_minmaxval(data, datasize, fmin, fmax);
#else
#if defined (CRAY)
#pragma _CRI ivdep
#elif defined (SX)
#pragma vdir nodep
#elif defined (__uxp__)
#pragma loop novrec
#endif
for ( i = 0; i < datasize; ++i )
{
if ( *fmin > data[i] ) *fmin = data[i];
if ( *fmax < data[i] ) *fmax = data[i];
/*
*fmin = *fmin < data[i] ? *fmin : data[i];
*fmax = *fmax > data[i] ? *fmax : data[i];
*/
}
#endif
}
int BitsPerInt = (int) (sizeof(int) * 8);
......@@ -3387,7 +3507,7 @@ void encode_double_array_common(int numBits, long packStart, long datasize, GRIB
cbits -= jbits;
}
}
if (cbits != 8) lGrib[z++] = c << cbits;
if ( cbits != 8 ) lGrib[z++] = c << cbits;
*gz = z;
}
......@@ -3625,22 +3745,7 @@ int encodeBDS(GRIBPACK *lGrib, long *gribLen, int decscale, int *isec2, int *ise
fmin = fmax = data[PackStart];
#if defined (CRAY)
#pragma _CRI ivdep
#elif defined (SX)
#pragma vdir nodep
#elif defined (__uxp__)
#pragma loop novrec
#endif
for ( i = PackStart+1; i < datasize; ++i )
{
if ( fmin > data[i] ) fmin = data[i];
if ( fmax < data[i] ) fmax = data[i];
/*
fmin = fmin < data[i] ? fmin : data[i];
fmax = fmax > data[i] ? fmax : data[i];
*/
}
minmaxval(data+PackStart+1, datasize-PackStart-1, &fmin, &fmax);
zref = fmin;
......@@ -3750,7 +3855,7 @@ int encodeBDS(GRIBPACK *lGrib, long *gribLen, int decscale, int *isec2, int *ise
Put3Byte(blockLength); /* 0-2 Length of Block 4 */
Put1Byte(Flag); /* 3 Flag & Unused bits */
if (binscale < 0) binscale = 32768 - binscale;
if ( binscale < 0 ) binscale = 32768 - binscale;
Put2Byte(binscale); /* 4-5 Scale factor */
Put1Real(zref); /* 6-9 Reference value */
Put1Byte(nbpv); /* 10 Packing size */
......@@ -3777,11 +3882,9 @@ int encodeBDS(GRIBPACK *lGrib, long *gribLen, int decscale, int *isec2, int *ise
*datsize = ((datasize-PackStart)*nbpv + 7)/8;
encode_double_array(nbpv, PackStart, datasize, lGrib,
data, zref, factor, &z);
encode_double_array(nbpv, PackStart, datasize, lGrib, data, zref, factor, &z);
if ( unused_bits >= 8 )
Put1Byte(0); /* Fillbyte */
if ( unused_bits >= 8 ) Put1Byte(0); /* Fillbyte */
*gribLen = z;
......@@ -4596,7 +4699,7 @@ void decode_double_array(unsigned char *igrib, long jlend, int numBits,
}
else
{
fprintf(stderr," Unimplemented packing factor %d!\n", numBits);
fprintf(stderr, "Unimplemented packing factor %d!\n", numBits);
exit(EXIT_FAILURE);
}
#endif
......@@ -4902,14 +5005,6 @@ void gribDecode(int *isec0, int *isec1, int *isec2, double *fsec2, int *isec3,
grsdef();
if ( dfunc != 'L' && dfunc != 'J' )
if ( DBL_IS_NAN(FSEC3_MissVal) && lmissvalinfo )
{
lmissvalinfo = 0;
FSEC3_MissVal = GRIB_MISSVAL;
Message("Missing value = NaN is unsupported, set to %g!", GRIB_MISSVAL);
}
ISEC2_Reduced = FALSE;
/*
......@@ -5080,6 +5175,14 @@ void gribDecode(int *isec0, int *isec1, int *isec2, double *fsec2, int *isec3,
if ( bitmapSize > 0 )
{
if ( dfunc != 'L' && dfunc != 'J' )
if ( DBL_IS_NAN(FSEC3_MissVal) && lmissvalinfo )
{
lmissvalinfo = 0;
FSEC3_MissVal = GRIB_MISSVAL;
Message("Missing value = NaN is unsupported, set to %g!", GRIB_MISSVAL);
}
/* ISEC4_NumNonMissValues = ISEC4_NumValues; */
ISEC4_NumValues = bitmapSize;
......@@ -9167,7 +9270,7 @@ int gribUnzip(unsigned char *dbuf, long dbufsize, unsigned char *sbuf, long sbu
return (gribLen);
}
static const char grb_libvers[] = "1.5.1" " of ""Aug 29 2011"" ""20:30:27";
static const char grb_libvers[] = "1.5.2" " of ""Feb 27 2012"" ""13:52:12";
const char *
cgribexLibraryVersion(void)
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment