Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
mpim-sw
libcdi
Commits
948280e6
Commit
948280e6
authored
Dec 07, 2012
by
Uwe Schulzweida
Browse files
cgribexlib update
parent
90d917da
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/cgribexlib.c
View file @
948280e6
/* Automatically generated by m214003 at 2012-12-0
4
, do not edit */
/* Automatically generated by m214003 at 2012-12-0
7
, do not edit */
/* CGRIBEXLIB_VERSION="1.5.6" */
...
...
@@ -672,9 +672,9 @@ double intpow2(int x)
else
return
(
pow
(
2
.
0
,
(
double
)
x
));
}
//#def
ine
_GET_X86_COUNTER
//#
un
def _GET_X86_COUNTER
//#undef _GET_IBM_COUNTER
//#def
ine
_GET_MACH_COUNTER
//#
un
def _GET_MACH_COUNTER
//#undef _ARCH_PWR6
#if defined(__GNUC__) && (__GNUC__ >= 4)
...
...
@@ -684,16 +684,12 @@ double intpow2(int x)
#define DISABLE_SIMD
#endif
#ifdef _GET_IBM_COUNTER
#endif
#ifdef _GET_X86_COUNTER
#endif
#ifdef _GET_MACH_COUNTER
#if defined _GET_IBM_COUNTER
#elif defined _GET_X86_COUNTER
#elif defined _GET_MACH_COUNTER
#endif
#if defined __AVX__
#elif defined __SSE2__
#endif
//#define DISABLE_SIMD
#ifdef DISABLE_SIMD
#ifndef ENABLE_AVX
...
...
@@ -705,12 +701,18 @@ double intpow2(int x)
#endif
#if defined __AVX__
#elif defined __SSE2__
#endif
#if defined __AVX__
static
void
avx_minmax_val
(
const
double
*
restrict
buf
,
long
nframes
,
double
*
min
,
double
*
max
)
void
avx_minmax_val
(
const
double
*
restrict
buf
,
size_t
nframes
,
double
*
min
,
double
*
max
)
{
double
fmin
[
4
],
fmax
[
4
];
__m256d
current_max
,
current_min
,
work
;
// load max and min values into all four slots of the
X
MM registers
// load max and min values into all four slots of the
Y
MM registers
current_min
=
_mm256_set1_pd
(
*
min
);
current_max
=
_mm256_set1_pd
(
*
max
);
...
...
@@ -726,25 +728,27 @@ void avx_minmax_val(const double *restrict buf, long nframes, double *min, doubl
}
while
(
nframes
>=
16
)
{
// use 64 byte prefetch?
__builtin_prefetch
(
buf
+
64
,
0
,
0
);
// for GCC 4.3.2+
work
=
_mm256_loadu_pd
(
buf
);
(
void
)
_mm_prefetch
(
buf
+
8
,
_MM_HINT_NTA
);
work
=
_mm256_load_pd
(
buf
);
current_min
=
_mm256_min_pd
(
current_min
,
work
);
current_max
=
_mm256_max_pd
(
current_max
,
work
);
buf
+=
4
;
__builtin_prefetch
(
buf
+
64
,
0
,
0
);
// for GCC 4.3.2+
work
=
_mm256_load
u
_pd
(
buf
);
work
=
_mm256_load_pd
(
buf
);
current_min
=
_mm256_min_pd
(
current_min
,
work
);
current_max
=
_mm256_max_pd
(
current_max
,
work
);
buf
+=
4
;
__builtin_prefetch
(
buf
+
64
,
0
,
0
);
// for GCC 4.3.2+
work
=
_mm256_loadu_pd
(
buf
);
(
void
)
_mm_prefetch
(
buf
+
8
,
_MM_HINT_NTA
);
work
=
_mm256_load_pd
(
buf
);
current_min
=
_mm256_min_pd
(
current_min
,
work
);
current_max
=
_mm256_max_pd
(
current_max
,
work
);
buf
+=
4
;
__builtin_prefetch
(
buf
+
64
,
0
,
0
);
// for GCC 4.3.2+
work
=
_mm256_load
u
_pd
(
buf
);
work
=
_mm256_load_pd
(
buf
);
current_min
=
_mm256_min_pd
(
current_min
,
work
);
current_max
=
_mm256_max_pd
(
current_max
,
work
);
buf
+=
4
;
...
...
@@ -772,19 +776,24 @@ void avx_minmax_val(const double *restrict buf, long nframes, double *min, doubl
// find min & max value through shuffle tricks
work
=
current_min
;
work
=
_mm256_shuffle_pd
(
work
,
work
,
_MM_SHUFFLE
(
2
,
3
,
0
,
1
)
);
work
=
_mm256_shuffle_pd
(
work
,
work
,
5
);
work
=
_mm256_min_pd
(
work
,
current_min
);
current_min
=
work
;
work
=
_mm256_
shuffle
_pd
(
work
,
work
,
_MM_SHUFFLE
(
1
,
0
,
3
,
2
)
);
work
=
_mm256_
permute2f128
_pd
(
work
,
work
,
1
);
work
=
_mm256_min_pd
(
work
,
current_min
);
_mm256_storeu_pd
(
min
,
work
);
_mm256_storeu_pd
(
fmin
,
work
);
work
=
current_max
;
work
=
_mm256_shuffle_pd
(
work
,
work
,
_MM_SHUFFLE
(
2
,
3
,
0
,
1
));
work
=
current_max
;
work
=
_mm256_shuffle_pd
(
work
,
work
,
5
);
work
=
_mm256_max_pd
(
work
,
current_max
);
current_max
=
work
;
work
=
_mm256_
shuffle
_pd
(
work
,
work
,
_MM_SHUFFLE
(
1
,
0
,
3
,
2
)
);
work
=
_mm256_
permute2f128
_pd
(
work
,
work
,
1
);
work
=
_mm256_max_pd
(
work
,
current_max
);
_mm256_storeu_pd
(
max
,
work
);
_mm256_storeu_pd
(
fmax
,
work
);
*
min
=
fmin
[
0
];
*
max
=
fmax
[
0
];
return
;
}
...
...
@@ -792,7 +801,7 @@ void avx_minmax_val(const double *restrict buf, long nframes, double *min, doubl
#elif defined __SSE2__
static
void
sse2_minmax_val
(
const
double
*
restrict
buf
,
long
nframes
,
double
*
min
,
double
*
max
)
void
sse2_minmax_val
(
const
double
*
restrict
buf
,
size_t
nframes
,
double
*
min
,
double
*
max
)
{
__m128d
current_max
,
current_min
,
work
;
...
...
@@ -813,7 +822,7 @@ void sse2_minmax_val(const double *restrict buf, long nframes, double *min, doub
while
(
nframes
>=
8
)
{
// use 64 byte prefetch for double octetts
__builtin_prefetch
(
buf
+
64
,
0
,
0
);
// for GCC 4.3.2 +
//
__builtin_prefetch(buf+64,0,0); // for GCC 4.3.2 +
work
=
_mm_load_pd
(
buf
);
current_min
=
_mm_min_pd
(
current_min
,
work
);
...
...
@@ -869,7 +878,7 @@ void sse2_minmax_val(const double *restrict buf, long nframes, double *min, doub
#endif
static
void
minmax_val
(
const
double
*
restrict
data
,
long
datasize
,
double
*
fmin
,
double
*
fmax
)
void
minmax_val
(
const
double
*
restrict
data
,
size_t
datasize
,
double
*
fmin
,
double
*
fmax
)
{
#if defined _GET_X86_COUNTER || defined _GET_MACH_COUNTER
uint64_t
start_minmax
,
end_minmax
;
...
...
@@ -903,9 +912,9 @@ void minmax_val(const double *restrict data, long datasize, double *fmin, double
hpmStart
(
1
,
"minmax fsel"
);
#endif
{
long
i
,
j
;
long
residual
=
datasize
%
__UNROLL_DEPTH_1
;
long
ofs
=
datasize
-
residual
;
size_t
i
,
j
;
size_t
residual
=
datasize
%
__UNROLL_DEPTH_1
;
size_t
ofs
=
datasize
-
residual
;
double
register
dmin
[
__UNROLL_DEPTH_1
];
double
register
dmax
[
__UNROLL_DEPTH_1
];
...
...
@@ -948,7 +957,7 @@ void minmax_val(const double *restrict data, long datasize, double *fmin, double
hpmStart
(
1
,
"minmax base"
);
#endif
{
long
i
;
size_t
i
;
#if defined (CRAY)
#pragma _CRI ivdep
...
...
@@ -984,21 +993,25 @@ void minmax_val(const double *restrict data, long datasize, double *fmin, double
#if defined __AVX__
printf
(
"AVX minmax cycles:: %"
PRIu64
"
\n
"
,
end_minmax
-
start_minmax
);
fprintf
(
stderr
,
"AVX min: %lf max: %lf
\n
"
,
*
fmin
,
*
fmax
);
#elif defined __SSE2__
printf
(
"SSE2 minmax cycles:: %"
PRIu64
"
\n
"
,
end_minmax
-
start_minmax
);
fprintf
(
stderr
,
"SSE2 min: %lf max: %lf
\n
"
,
*
fmin
,
*
fmax
);
#else
printf
(
"loop minmax cycles:: %"
PRIu64
"
\n
"
,
end_minmax
-
start_minmax
);
fprintf
(
stderr
,
"loop min: %lf max: %lf
\n
"
,
*
fmin
,
*
fmax
);
#endif
#endif
return
;
}
//#define _GET_X86_COUNTER
//#define _GET_MACH_COUNTER
//#undef _GET_X86_COUNTER
//#undef _GET_MACH_COUNTER
//#undef _GET_IBM_COUNTER
//#undef _ARCH_PWR6
#if defined(__GNUC__) && (__GNUC__ >= 4)
#elif defined(__ICC) && (__ICC >= 1100)
...
...
@@ -1012,8 +1025,7 @@ void minmax_val(const double *restrict data, long datasize, double *fmin, double
#elif defined _GET_MACH_COUNTER
#endif
#define DISABLE_SIMD
//#define ENABLE_SSE4_1
//#define DISABLE_SIMD
#ifdef DISABLE_SIMD
#ifndef ENABLE_AVX
...
...
@@ -1028,19 +1040,17 @@ void minmax_val(const double *restrict data, long datasize, double *fmin, double
#elif defined __SSE4_1__
#endif
#if defined __AVX__
static
void
avx_encode_double_array_2byte
(
long
datasize
,
void
avx_encode_double_array_2byte
(
size_t
datasize
,
unsigned
char
*
restrict
lGrib
,
const
double
*
restrict
data
,
double
zref
,
double
factor
,
long
*
gz
)
double
zref
,
double
factor
,
size_t
*
gz
)
{
long
i
,
j
;
unsigned
int
ival
;
size_t
i
,
j
;
const
double
*
dval
=
data
;
__m128i
*
sgrib
=
(
__m128i
*
)
lGrib
;
__m128i
*
sgrib
=
(
__m128i
*
)
(
lGrib
+
(
*
gz
))
;
const
__m128i
swap
=
_mm_set_epi8
(
14
,
15
,
12
,
13
,
10
,
11
,
8
,
9
,
6
,
7
,
4
,
5
,
2
,
3
,
0
,
1
);
...
...
@@ -1114,14 +1124,17 @@ void avx_encode_double_array_2byte(long datasize,
}
if
(
i
!=
datasize
)
i
-=
16
;
for
(
j
=
i
;
j
<
datasize
;
j
++
)
{
ival
=
(
unsigned
int
)
((
dval
[
j
]
-
zref
)
*
factor
+
0
.
5
);
lGrib
[
2
*
j
]
=
ival
>>
8
;
lGrib
[
2
*
j
+
1
]
=
ival
;
}
{
uint16_t
ui16
;
i
-=
16
;
for
(
j
=
i
;
j
<
datasize
;
j
++
)
{
ui16
=
(
uint16_t
)
((
data
[
j
]
-
zref
)
*
factor
+
0
.
5
);
lGrib
[
*
gz
+
2
*
j
]
=
ui16
>>
8
;
lGrib
[
*
gz
+
2
*
j
+
1
]
=
ui16
;
}
}
*
gz
+=
2
*
datasize
;
return
;
...
...
@@ -1130,15 +1143,14 @@ void avx_encode_double_array_2byte(long datasize,
#elif defined __SSE4_1__
static
void
sse41_encode_double_array_2byte
(
long
datasize
,
void
sse41_encode_double_array_2byte
(
size_t
datasize
,
unsigned
char
*
restrict
lGrib
,
const
double
*
restrict
data
,
double
zref
,
double
factor
,
long
*
gz
)
double
zref
,
double
factor
,
size_t
*
gz
)
{
long
i
,
j
;
unsigned
int
ival
;
size_t
i
,
j
;
const
double
*
dval
=
data
;
__m128i
*
sgrib
=
(
__m128i
*
)
lGrib
;
__m128i
*
sgrib
=
(
__m128i
*
)
(
lGrib
+
(
*
gz
))
;
const
__m128i
swap
=
_mm_set_epi8
(
14
,
15
,
12
,
13
,
10
,
11
,
8
,
9
,
6
,
7
,
4
,
5
,
2
,
3
,
0
,
1
);
...
...
@@ -1150,8 +1162,6 @@ void sse41_encode_double_array_2byte(long datasize,
__m128i
i0
,
i1
,
i2
,
i3
,
i4
;
__m128i
s0
,
s1
;
// fprintf (stderr,"z entry: %ld ", *gz);
for
(
i
=
0
;
i
<
datasize
;
i
+=
16
)
{
(
void
)
_mm_prefetch
(
dval
+
8
,
_MM_HINT_NTA
);
...
...
@@ -1170,7 +1180,7 @@ void sse41_encode_double_array_2byte(long datasize,
i0
=
_mm_cvttpd_epi32
(
d0
);
i4
=
_mm_cvttpd_epi32
(
d4
);
i0
=
_mm_unpacklo_epi64
(
i0
,
i4
);
//_____________________________________________________________________________
d1
=
_mm_loadu_pd
(
dval
+
4
);
...
...
@@ -1191,7 +1201,7 @@ void sse41_encode_double_array_2byte(long datasize,
s0
=
_mm_packus_epi32
(
i0
,
i1
);
s0
=
_mm_shuffle_epi8
(
s0
,
swap
);
//
(void) _mm_storeu_si128 (sgrib, s0);
(
void
)
_mm_storeu_si128
(
sgrib
,
s0
);
//_____________________________________________________________________________
...
...
@@ -1212,7 +1222,7 @@ void sse41_encode_double_array_2byte(long datasize,
i2
=
_mm_cvttpd_epi32
(
d2
);
i4
=
_mm_cvttpd_epi32
(
d4
);
i2
=
_mm_unpacklo_epi64
(
i2
,
i4
);
//_____________________________________________________________________________
d3
=
_mm_loadu_pd
(
dval
+
12
);
...
...
@@ -1233,7 +1243,7 @@ void sse41_encode_double_array_2byte(long datasize,
s1
=
_mm_packus_epi32
(
i2
,
i3
);
s1
=
_mm_shuffle_epi8
(
s1
,
swap
);
//
(void) _mm_storeu_si128 (sgrib+1, s1);
(
void
)
_mm_storeu_si128
(
sgrib
+
1
,
s1
);
//_____________________________________________________________________________
...
...
@@ -1241,32 +1251,30 @@ void sse41_encode_double_array_2byte(long datasize,
sgrib
+=
2
;
}
if
(
i
!=
datasize
)
{
i
-=
16
;
for
(
j
=
i
;
j
<
datasize
;
j
++
)
{
ival
=
(
unsigned
int
)
((
data
[
j
]
-
zref
)
*
factor
+
0
.
5
);
lGrib
[
2
*
j
]
=
ival
>>
8
;
lGrib
[
2
*
j
+
1
]
=
ival
;
}
}
if
(
i
!=
datasize
)
{
uint16_t
ui16
;
i
-=
16
;
for
(
j
=
i
;
j
<
datasize
;
j
++
)
{
ui16
=
(
uint16_t
)
((
data
[
j
]
-
zref
)
*
factor
+
0
.
5
);
lGrib
[
*
gz
+
2
*
j
]
=
ui16
>>
8
;
lGrib
[
*
gz
+
2
*
j
+
1
]
=
ui16
;
}
}
*
gz
+=
2
*
datasize
;
fprintf
(
stderr
,
"z exit: %ld val 1: %d val z: %d
\n
"
,
*
gz
,
256
*
lGrib
[
0
]
+
lGrib
[
1
],
256
*
lGrib
[
datasize
-
2
]
+
lGrib
[
datasize
-
1
]);
return
;
}
#endif // S
SE4.1
#endif // S
IMD variants
static
void
encode_double_array_common
(
int
numBits
,
long
packStart
,
long
datasize
,
GRIBPACK
*
lGrib
,
const
double
*
data
,
double
zref
,
double
factor
,
long
*
gz
)
void
encode_double_array_common
(
int
numBits
,
size_t
packStart
,
size_t
datasize
,
GRIBPACK
*
lGrib
,
const
double
*
data
,
double
zref
,
double
factor
,
size_t
*
gz
)
{
long
i
,
z
=
*
gz
;
size_t
i
,
z
=
*
gz
;
unsigned
int
ival
;
int
cbits
,
jbits
;
unsigned
int
c
;
...
...
@@ -1313,10 +1321,10 @@ void encode_double_array_common(int numBits, long packStart, long datasize, GRIB
}
static
void
encode_double_array_byte
(
int
numBits
,
long
packStart
,
long
datasize
,
void
encode_double_array_byte
(
int
numBits
,
size_t
packStart
,
size_t
datasize
,
GRIBPACK
*
restrict
lGrib
,
const
double
*
restrict
data
,
double
zref
,
double
factor
,
long
*
restric
t
gz
)
double
zref
,
double
factor
,
size_
t
*
gz
)
{
#if defined _GET_X86_COUNTER || defined _GET_MACH_COUNTER
uint64_t
start_minmax
,
end_minmax
;
...
...
@@ -1324,7 +1332,7 @@ void encode_double_array_byte(int numBits, long packStart, long datasize,
uint16_t
ui16
;
uint32_t
ui32
;
long
i
,
z
=
*
gz
;
size_t
i
,
z
=
*
gz
;
double
tmp
;
data
+=
packStart
;
...
...
@@ -1335,6 +1343,7 @@ void encode_double_array_byte(int numBits, long packStart, long datasize,
#ifdef _GET_IBM_COUNTER
hpmStart
(
2
,
"pack 8 bit base"
);
#endif
#if defined (CRAY)
#pragma _CRI ivdep
#elif defined (SX)
...
...
@@ -1348,6 +1357,7 @@ void encode_double_array_byte(int numBits, long packStart, long datasize,
lGrib
[
z
]
=
(
uint16_t
)
tmp
;
z
++
;
}
#ifdef _GET_IBM_COUNTER
hpmStop
(
2
);
#endif
...
...
@@ -1416,6 +1426,7 @@ void encode_double_array_byte(int numBits, long packStart, long datasize,
#ifdef _GET_IBM_COUNTER
hpmStart
(
4
,
"pack 24 bit base"
);
#endif
#if defined (CRAY)
#pragma _CRI ivdep
#elif defined (SX)
...
...
@@ -1432,6 +1443,7 @@ void encode_double_array_byte(int numBits, long packStart, long datasize,
lGrib
[
z
+
2
]
=
ui32
;
z
+=
3
;
}
#ifdef _GET_IBM_COUNTER
hpmStop
(
4
);
#endif
...
...
@@ -1441,6 +1453,7 @@ void encode_double_array_byte(int numBits, long packStart, long datasize,
#ifdef _GET_IBM_COUNTER
hpmStart
(
5
,
"pack 32 bit base"
);
#endif
#if defined (CRAY)
#pragma _CRI ivdep
#elif defined (SX)
...
...
@@ -1458,6 +1471,7 @@ void encode_double_array_byte(int numBits, long packStart, long datasize,
lGrib
[
z
+
3
]
=
ui32
;
z
+=
4
;
}
#ifdef _GET_IBM_COUNTER
hpmStop
(
5
);
#endif
...
...
@@ -1480,13 +1494,13 @@ void encode_double_array_byte(int numBits, long packStart, long datasize,
#if defined (_ARCH_PWR6)
static
void
encode_double_array_unrolled
(
int
numBits
,
long
packStart
,
long
datasize
,
void
encode_double_array_unrolled
(
int
numBits
,
size_t
packStart
,
size_t
datasize
,
GRIBPACK
*
restrict
lGrib
,
const
double
*
restrict
data
,
double
zref
,
double
factor
,
long
*
restric
t
gz
)
double
zref
,
double
factor
,
size_
t
*
gz
)
{
U_BYTEORDER
;
long
i
,
j
,
z
=
*
gz
;
size_t
i
,
j
,
z
=
*
gz
;
#ifdef _ARCH_PWR6
#define __UNROLL_DEPTH_2 8
#else
...
...
@@ -4488,7 +4502,7 @@ int encodeBDS(GRIBPACK *lGrib, long *gribLen, int decscale, int *isec2, int *ise
/* Uwe Schulzweida, 11/04/2003 : Check that number of bits per value is not exceeded */
/* Uwe Schulzweida, 6/05/2003 : Copy result to fpval to prevent integer overflow */
long
z
=
*
gribLen
;
size_t
z
=
*
gribLen
;
long
i
,
jloop
;
int
numBits
;
int
ival
;
...
...
@@ -10174,7 +10188,7 @@ int gribUnzip(unsigned char *dbuf, long dbufsize, unsigned char *sbuf, long sbu
return
(
gribLen
);
}
static
const
char
grb_libvers
[]
=
"1.5.6"
" of ""Dec
4
2012"" ""1
5
:1
8
:1
0
"
;
static
const
char
grb_libvers
[]
=
"1.5.6"
" of ""Dec
7
2012"" ""1
3
:1
4
:1
2
"
;
const
char
*
cgribexLibraryVersion
(
void
)
{
...
...
src/stream_gribapi.c
View file @
948280e6
...
...
@@ -2070,7 +2070,6 @@ int gribapiDefSteptype(int editionNumber, grib_handle *gh, int tsteptype, int gc
switch
(
tsteptype
)
{
case
TSTEP_INSTANT
:
strcpy
(
stepType
,
"instant"
);
proDefTempNum
=
0
;
break
;
case
TSTEP_AVG
:
strcpy
(
stepType
,
"avg"
);
proDefTempNum
=
8
;
break
;
case
TSTEP_ACCUM
:
strcpy
(
stepType
,
"accum"
);
proDefTempNum
=
8
;
break
;
case
TSTEP_MAX
:
strcpy
(
stepType
,
"max"
);
proDefTempNum
=
8
;
break
;
...
...
@@ -2080,6 +2079,7 @@ int gribapiDefSteptype(int editionNumber, grib_handle *gh, int tsteptype, int gc
case
TSTEP_SD
:
strcpy
(
stepType
,
"sd"
);
proDefTempNum
=
8
;
break
;
case
TSTEP_COV
:
strcpy
(
stepType
,
"cov"
);
proDefTempNum
=
8
;
break
;
case
TSTEP_RATIO
:
strcpy
(
stepType
,
"ratio"
);
proDefTempNum
=
8
;
break
;
case
TSTEP_INSTANT
:
strcpy
(
stepType
,
"instant"
);
proDefTempNum
=
0
;
break
;
default:
strcpy
(
stepType
,
"instant"
);
proDefTempNum
=
0
;
break
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment