Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
mpim-sw
libcdi
Commits
990dfde0
Commit
990dfde0
authored
Jun 14, 2012
by
Uwe Schulzweida
Browse files
cgribexlib update
parent
08a3c7af
Changes
1
Hide whitespace changes
Inline
Side-by-side
src/cgribexlib.c
View file @
990dfde0
/* Automatically generated by m214003 at 2012-06-
08
, do not edit */
/* Automatically generated by m214003 at 2012-06-
14
, do not edit */
/* CGRIBEXLIB_VERSION="1.5.3" */
...
...
@@ -890,7 +890,6 @@ void minmax_val(const double *restrict data, long datasize, double *fmin, double
#else
#ifdef __SSE2__
sse2_minmax_val
(
data
,
datasize
,
fmin
,
fmax
);
...
...
@@ -944,7 +943,8 @@ void minmax_val(const double *restrict data, long datasize, double *fmin, double
#endif
#undef __UNROLL_DEPTH_1
#else
#else // original loop
#ifdef _GET_IBM_COUNTER
hpmStart
(
1
,
"minmax base"
);
...
...
@@ -973,9 +973,9 @@ void minmax_val(const double *restrict data, long datasize, double *fmin, double
hpmStop
(
1
);
#endif
#endif
#endif
#endif
#endif
// _ARCH_PWR6 && original loop
#endif
// __SSE2__
#endif
// __AVX__
#ifdef _GET_X86_COUNTER
end_minmax
=
_rdtsc
();
...
...
@@ -1001,7 +1001,136 @@ void minmax_val(const double *restrict data, long datasize, double *fmin, double
#ifdef _GET_IBM_COUNTER
#endif
#if defined(__GNUC__) && (__GNUC__ >= 4)
#elif defined(__ICC) && (__ICC >= 1100)
#else
#define DISABLE_SIMD
#endif
#ifdef DISABLE_SIMD
#ifndef ENABLE_SSE4_1
#undef __SSE4_1__
#endif
#endif
// SSE4.1
#if 0
#ifdef __SSE4_1__
static
void sse41_encode_double_array_2byte(long datasize,
unsigned char * restrict lGrib,
const double * restrict data,
double zref, double factor, long * restrict gz)
{
long i;
const double *dval = data;
__m128i *sgrib = (__m128i *) lGrib;
const __m128i swap = _mm_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
const __m128d c0 = _mm_set1_pd(zref);
const __m128d c1 = _mm_set1_pd(factor);
const __m128d c2 = _mm_set1_pd(0.5);
__m128d d0, d4, d3, d2, d1;
__m128i i0, i1, i2, i3, i4;
__m128i s0, s1;
for (i = 0; i < datasize; i += 16)
{
(void) _mm_prefetch(dval+8, _MM_HINT_NTA);
//_____________________________________________________________________________
d0 = _mm_load_pd (dval);
d0 = _mm_sub_pd (d0, c0);
d0 = _mm_mul_pd (d0, c1);
d0 = _mm_add_pd (d0, c2);
d4 = _mm_load_pd (dval+2);
d4 = _mm_sub_pd (d4, c0);
d4 = _mm_mul_pd (d4, c1);
d4 = _mm_add_pd (d4, c2);
i0 = _mm_cvttpd_epi32 (d0);
i4 = _mm_cvttpd_epi32 (d4);
i0 = _mm_unpacklo_epi64 (i0, i4);
//_____________________________________________________________________________
d1 = _mm_load_pd (dval+4);
d1 = _mm_sub_pd (d1, c0);
d1 = _mm_mul_pd (d1, c1);
d1 = _mm_add_pd (d1, c2);
d4 = _mm_load_pd (dval+6);
d4 = _mm_sub_pd (d4, c0);
d4 = _mm_mul_pd (d4, c1);
d4 = _mm_add_pd (d4, c2);
i1 = _mm_cvttpd_epi32 (d1);
i4 = _mm_cvttpd_epi32 (d4);
i1 = _mm_unpacklo_epi64 (i1, i4);
//_____________________________________________________________________________
s0 = _mm_packus_epi32(i0, i1);
s0 = _mm_shuffle_epi8 (s0, swap);
(void) _mm_stream_si128 (sgrib, s0);
//_____________________________________________________________________________
(void) _mm_prefetch(dval+16, _MM_HINT_NTA);
//_____________________________________________________________________________
d2 = _mm_load_pd (dval+8);
d2 = _mm_sub_pd (d2, c0);
d2 = _mm_mul_pd (d2, c1);
d2 = _mm_add_pd (d2, c2);
d4 = _mm_load_pd (dval+10);
d4 = _mm_sub_pd (d4, c0);
d4 = _mm_mul_pd (d4, c1);
d4 = _mm_add_pd (d4, c2);
i2 = _mm_cvttpd_epi32 (d2);
i4 = _mm_cvttpd_epi32 (d4);
i2 = _mm_unpacklo_epi64 (i2, i4);
//_____________________________________________________________________________
d3 = _mm_load_pd (dval+12);
d3 = _mm_sub_pd (d3, c0);
d3 = _mm_mul_pd (d3, c1);
d3 = _mm_add_pd (d3, c2);
d4 = _mm_load_pd (dval+14);
d4 = _mm_sub_pd (d4, c0);
d4 = _mm_mul_pd (d4, c1);
d4 = _mm_add_pd (d4, c2);
i3 = _mm_cvttpd_epi32 (d3);
i4 = _mm_cvttpd_epi32 (d4);
i3 = _mm_unpacklo_epi64 (i3, i4);
//_____________________________________________________________________________
s1 = _mm_packus_epi32(i2, i3);
s1 = _mm_shuffle_epi8 (s1, swap);
(void) _mm_stream_si128 (sgrib+1, s1);
//_____________________________________________________________________________
dval += 16;
sgrib += 2;
}
return;
}
#endif // SSE4.1
#endif
static
void
encode_double_array_common
(
int
numBits
,
long
packStart
,
long
datasize
,
GRIBPACK
*
lGrib
,
const
double
*
data
,
double
zref
,
double
factor
,
long
*
gz
)
...
...
@@ -1093,6 +1222,13 @@ void encode_double_array_byte(int numBits, long packStart, long datasize,
#ifdef _GET_IBM_COUNTER
hpmStart
(
3
,
"pack 16 bit base"
);
#endif
#if 0
sse41_encode_double_array_2byte(datasize, lGrib, data, zref, factor, &z);
#else
#if defined (CRAY)
#pragma _CRI ivdep
#elif defined (SX)
...
...
@@ -1108,6 +1244,8 @@ void encode_double_array_byte(int numBits, long packStart, long datasize,
lGrib
[
z
+
1
]
=
ival
;
z
+=
2
;
}
#endif
#ifdef _GET_IBM_COUNTER
hpmStop
(
3
);
#endif
...
...
@@ -9842,7 +9980,7 @@ int gribUnzip(unsigned char *dbuf, long dbufsize, unsigned char *sbuf, long sbu
return
(
gribLen
);
}
static
const
char
grb_libvers
[]
=
"1.5.3"
" of ""Jun
8
2012"" ""1
0
:2
0:01
"
;
static
const
char
grb_libvers
[]
=
"1.5.3"
" of ""Jun
14
2012"" ""1
3
:2
2:45
"
;
const
char
*
cgribexLibraryVersion
(
void
)
{
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment