Commit 525e5392 authored by Uwe Schulzweida's avatar Uwe Schulzweida
Browse files

cgribexlib update.

parent b2cbbb47
2019-10-15 Uwe Schulzweida
* using CGRIBEX library version 1.9.4
* Version 1.9.8 released
2019-07-11 Uwe Schulzweida
......
This diff is collapsed.
/* Automatically generated by m214003 at 2019-04-15, do not edit */
/* Automatically generated by m214003 at 2019-07-12, do not edit */
/* CGRIBEXLIB_VERSION="1.9.3" */
/* CGRIBEXLIB_VERSION="1.9.4" */
#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 5) || defined (__clang__)
#pragma GCC diagnostic push
......@@ -453,6 +453,10 @@ xlc_r -g -O3 -qhot -q64 -qarch=auto -qtune=auto -qreport -DTEST_MINMAXVAL minmax
#pragma options nostrict
#endif
#if defined(OMP_SIMD)
#include <omp.h>
#endif
#include <stdlib.h>
//#undef _GET_X86_COUNTER
......@@ -745,6 +749,8 @@ void minmax_val_double_orig(const double *restrict data, size_t datasize, double
static
void minmax_val_double_simd(const double *restrict data, size_t datasize, double *fmin, double *fmax) __attribute__ ((noinline));
static
void minmax_val_double_omp(const double *restrict data, size_t datasize, double *fmin, double *fmax) __attribute__ ((noinline));
static
void minmax_val_float(const float *restrict data, long datasize, float *fmin, float *fmax) __attribute__ ((noinline));
static
void minmax_val_float_simd(const float *restrict data, size_t datasize, float *fmin, float *fmax) __attribute__ ((noinline));
......@@ -813,6 +819,24 @@ void minmax_val_float(const float *restrict data, long idatasize, float *fmin, f
#pragma GCC push_options
#pragma GCC optimize ("O3", "fast-math")
#endif
static
void minmax_val_double_omp(const double *restrict data, size_t datasize, double *fmin, double *fmax)
{
double dmin = *fmin, dmax = *fmax;
#if defined(_OPENMP)
#pragma omp parallel for simd reduction(min:dmin) reduction(max:dmax)
#endif
for ( size_t i = 0; i < datasize; ++i )
{
dmin = dmin < data[i] ? dmin : data[i];
dmax = dmax > data[i] ? dmax : data[i];
}
*fmin = dmin;
*fmax = dmax;
}
static
void minmax_val_double_simd(const double *restrict data, size_t datasize, double *fmin, double *fmax)
{
......@@ -830,6 +854,7 @@ void minmax_val_double_simd(const double *restrict data, size_t datasize, double
*fmin = dmin;
*fmax = dmax;
}
static
void minmax_val_float_simd(const float *restrict data, size_t datasize, float *fmin, float *fmax)
{
......@@ -955,6 +980,7 @@ int main(void)
long datasize = 1000000;
double t_begin, t_end;
printf("datasize %ld\n", datasize);
#if defined(_OPENMP)
printf("_OPENMP=%d\n", _OPENMP);
#endif
......@@ -1036,6 +1062,15 @@ int main(void)
}
t_end = dtime();
printf("simd : fmin: %ld fmax: %ld time: %6.2fs\n", (long)fmin, (long) fmax, t_end-t_begin);
t_begin = dtime();
for ( int i = 0; i < NRUN; ++i )
{
fmin = fmax = data_dp[0];
minmax_val_double_omp(data_dp, datasize, &fmin, &fmax);
}
t_end = dtime();
printf("openmp %d : fmin: %ld fmax: %ld time: %6.2fs\n", omp_get_max_threads(), (long)fmin, (long) fmax, t_end-t_begin);
#endif
#if defined(_ENABLE_AVX)
......@@ -8912,8 +8947,8 @@ int TEMPLATE(decodeGDS,T)(unsigned char *gds, int *isec0, int *isec2, T *fsec2,
ISEC2_Reduced = true;
for ( unsigned i = 0; i < jlenl; i++ )
{
ISEC2_RowLon(i) = GET_UINT2(gds[locnl+2*i], gds[locnl+2*i+1]);
*numGridVals += ISEC2_RowLon(i);
ISEC2_ReducedPoints(i) = GET_UINT2(gds[locnl+2*i], gds[locnl+2*i+1]);
*numGridVals += ISEC2_ReducedPoints(i);
}
}
else
......@@ -9588,10 +9623,10 @@ void TEMPLATE(grib_decode,T)(int *isec0, int *isec1, int *isec2, T *fsec2, int *
{
int nvalues = 0;
int nlat = ISEC2_NumLat;
int nlon = ISEC2_RowLonPtr[0];
for ( int ilat = 0; ilat < nlat; ++ilat ) nvalues += ISEC2_RowLon(ilat);
int nlon = ISEC2_ReducedPointsPtr[0];
for ( int ilat = 0; ilat < nlat; ++ilat ) nvalues += ISEC2_ReducedPoints(ilat);
for ( int ilat = 1; ilat < nlat; ++ilat )
if ( ISEC2_RowLon(ilat) > nlon ) nlon = ISEC2_RowLon(ilat);
if ( ISEC2_ReducedPoints(ilat) > nlon ) nlon = ISEC2_ReducedPoints(ilat);
// int dlon = ISEC2_LastLon-ISEC2_FirstLon;
// if ( dlon < 0 ) dlon += 360000;
......@@ -9620,7 +9655,7 @@ void TEMPLATE(grib_decode,T)(int *isec0, int *isec1, int *isec2, T *fsec2, int *
(ISEC1_Parameter == 41) || (ISEC1_Parameter == 42) ||
(ISEC1_Parameter == 43));
(void) TEMPLATE(qu2reg3,T)(fsec4, ISEC2_RowLonPtr, nlat, nlon, FSEC3_MissVal, iret, lsect3, lperio, lveggy);
(void) TEMPLATE(qu2reg3,T)(fsec4, ISEC2_ReducedPointsPtr, nlat, nlon, FSEC3_MissVal, iret, lsect3, lperio, lveggy);
if ( bitmapSize > 0 )
{
......@@ -9739,8 +9774,8 @@ int TEMPLATE(decodeGDS,T)(unsigned char *gds, int *isec0, int *isec2, T *fsec2,
ISEC2_Reduced = true;
for ( unsigned i = 0; i < jlenl; i++ )
{
ISEC2_RowLon(i) = GET_UINT2(gds[locnl+2*i], gds[locnl+2*i+1]);
*numGridVals += ISEC2_RowLon(i);
ISEC2_ReducedPoints(i) = GET_UINT2(gds[locnl+2*i], gds[locnl+2*i+1]);
*numGridVals += ISEC2_ReducedPoints(i);
}
}
else
......@@ -10415,10 +10450,10 @@ void TEMPLATE(grib_decode,T)(int *isec0, int *isec1, int *isec2, T *fsec2, int *
{
int nvalues = 0;
int nlat = ISEC2_NumLat;
int nlon = ISEC2_RowLonPtr[0];
for ( int ilat = 0; ilat < nlat; ++ilat ) nvalues += ISEC2_RowLon(ilat);
int nlon = ISEC2_ReducedPointsPtr[0];
for ( int ilat = 0; ilat < nlat; ++ilat ) nvalues += ISEC2_ReducedPoints(ilat);
for ( int ilat = 1; ilat < nlat; ++ilat )
if ( ISEC2_RowLon(ilat) > nlon ) nlon = ISEC2_RowLon(ilat);
if ( ISEC2_ReducedPoints(ilat) > nlon ) nlon = ISEC2_ReducedPoints(ilat);
// int dlon = ISEC2_LastLon-ISEC2_FirstLon;
// if ( dlon < 0 ) dlon += 360000;
......@@ -10447,7 +10482,7 @@ void TEMPLATE(grib_decode,T)(int *isec0, int *isec1, int *isec2, T *fsec2, int *
(ISEC1_Parameter == 41) || (ISEC1_Parameter == 42) ||
(ISEC1_Parameter == 43));
(void) TEMPLATE(qu2reg3,T)(fsec4, ISEC2_RowLonPtr, nlat, nlon, FSEC3_MissVal, iret, lsect3, lperio, lveggy);
(void) TEMPLATE(qu2reg3,T)(fsec4, ISEC2_ReducedPointsPtr, nlat, nlon, FSEC3_MissVal, iret, lsect3, lperio, lveggy);
if ( bitmapSize > 0 )
{
......@@ -12068,7 +12103,7 @@ void TEMPLATE(encodeGDS,T)(GRIBPACK *lGrib, long *gribLen, int *isec2, T *fsec2)
}
if ( ISEC2_Reduced )
for ( long i = 0; i < ISEC2_NumLat; i++ ) Put2Byte(ISEC2_RowLon(i));
for ( long i = 0; i < ISEC2_NumLat; i++ ) Put2Byte(ISEC2_ReducedPoints(i));
*gribLen = z;
}
......@@ -12643,7 +12678,7 @@ void TEMPLATE(encodeGDS,T)(GRIBPACK *lGrib, long *gribLen, int *isec2, T *fsec2)
}
if ( ISEC2_Reduced )
for ( long i = 0; i < ISEC2_NumLat; i++ ) Put2Byte(ISEC2_RowLon(i));
for ( long i = 0; i < ISEC2_NumLat; i++ ) Put2Byte(ISEC2_ReducedPoints(i));
*gribLen = z;
}
......@@ -13107,7 +13142,7 @@ void encode_dummy(void)
(void) encode_array_unrolled_double(0, 0, 0, NULL, NULL, 0, 0, NULL);
(void) encode_array_unrolled_float(0, 0, 0, NULL, NULL, 0, 0, NULL);
}
static const char grb_libvers[] = "1.9.3";
static const char grb_libvers[] = "1.9.4";
const char *
cgribexLibraryVersion(void)
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment