cgribexlib.c 355 KB
Newer Older
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1

Uwe Schulzweida's avatar
Uwe Schulzweida committed
2
/* Automatically generated by m214003 at 2018-11-14, do not edit */
3

Uwe Schulzweida's avatar
Uwe Schulzweida committed
4
/* CGRIBEXLIB_VERSION="1.9.2" */
Uwe Schulzweida's avatar
Uwe Schulzweida committed
5

6
#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 5) || defined (__clang__)
7
8
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"
Thomas Jahns's avatar
Thomas Jahns committed
9
10
#pragma GCC diagnostic ignored "-Wsign-conversion"
#pragma GCC diagnostic warning "-Wstrict-overflow"
Uwe Schulzweida's avatar
Uwe Schulzweida committed
11
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
12

Uwe Schulzweida's avatar
Uwe Schulzweida committed
13
14
#ifdef _ARCH_PWR6
#pragma options nostrict
Uwe Schulzweida's avatar
Uwe Schulzweida committed
15
#include <ppu_intrinsics.h>
Uwe Schulzweida's avatar
Uwe Schulzweida committed
16
17
#endif

Uwe Schulzweida's avatar
Uwe Schulzweida committed
18
19
#ifdef  HAVE_CONFIG_H
#include "config.h"
Uwe Schulzweida's avatar
Uwe Schulzweida committed
20
21
22
23
24
#endif

#include <string.h>
#include <ctype.h>
#include <stdarg.h>
25
#include <stdbool.h>
Uwe Schulzweida's avatar
Uwe Schulzweida committed
26
#include <sys/types.h>
Uwe Schulzweida's avatar
Uwe Schulzweida committed
27
#include <inttypes.h>
Uwe Schulzweida's avatar
Uwe Schulzweida committed
28

29

Uwe Schulzweida's avatar
Uwe Schulzweida committed
30
31
32
#include "file.h"
#include "dmemory.h"
#include "dtypes.h"
Uwe Schulzweida's avatar
Uwe Schulzweida committed
33
#include "calendar.h"
34
#include "timebase.h"
Uwe Schulzweida's avatar
Uwe Schulzweida committed
35

36
37
#ifndef CGRIBEX_TEMPLATES_H
#define CGRIBEX_TEMPLATES_H
38
39
40
41
42

#define CAT(X,Y)      X##_##Y
#define TEMPLATE(X,Y) CAT(X,Y)

#endif 
Uwe Schulzweida's avatar
Uwe Schulzweida committed
43
44
#ifndef GRIB_INT_H
#define GRIB_INT_H
Uwe Schulzweida's avatar
Uwe Schulzweida committed
45

Uwe Schulzweida's avatar
Uwe Schulzweida committed
46
#if defined (HAVE_CONFIG_H)
47
#include "config.h"
48
49
#endif

Uwe Schulzweida's avatar
Uwe Schulzweida committed
50
51
#include <stdio.h>
#include <stdlib.h>
52
#include <stdbool.h>
Uwe Schulzweida's avatar
Uwe Schulzweida committed
53
54
55
56
#include <math.h>
#include <float.h>


57
#if ! defined   (CGRIBEX_H)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
58
#include "cgribex.h"
Uwe Schulzweida's avatar
Uwe Schulzweida committed
59
#endif
60
#if ! defined   (ERROR_H)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
61
#include "error.h"
Uwe Schulzweida's avatar
Uwe Schulzweida committed
62
#endif
63
#if ! defined   (DTYPES_H)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
64
#include "dtypes.h"
Uwe Schulzweida's avatar
Uwe Schulzweida committed
65
66
67
68
#endif


#if ! defined   (UCHAR)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
69
#define  UCHAR  unsigned char
Uwe Schulzweida's avatar
Uwe Schulzweida committed
70
71
#endif

Uwe Schulzweida's avatar
Uwe Schulzweida committed
72

Uwe Schulzweida's avatar
Uwe Schulzweida committed
73
#if defined (CRAY) || defined (SX) || defined (__uxpch__)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
74
#define VECTORCODE
Uwe Schulzweida's avatar
Uwe Schulzweida committed
75
76
#endif

Uwe Schulzweida's avatar
Uwe Schulzweida committed
77

Uwe Schulzweida's avatar
Uwe Schulzweida committed
78
#if defined (VECTORCODE)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
79
#ifdef  INT32
80
#  define  GRIBPACK     unsigned INT32
Uwe Schulzweida's avatar
Uwe Schulzweida committed
81
82
83
#  define  PACK_GRIB    packInt32
#  define  UNPACK_GRIB  unpackInt32
#else
84
#  define  GRIBPACK     unsigned INT64
Uwe Schulzweida's avatar
Uwe Schulzweida committed
85
86
87
#  define  PACK_GRIB    packInt64
#  define  UNPACK_GRIB  unpackInt64
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
88
89
90
#else
#  define  GRIBPACK     unsigned char
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
91

Uwe Schulzweida's avatar
Uwe Schulzweida committed
92
93
#define  U_BYTEORDER     static union {unsigned long l; unsigned char c[sizeof(long)];} u_byteorder = {1}
#define  IS_BIGENDIAN()  (u_byteorder.c[sizeof(long) - 1])
Uwe Schulzweida's avatar
Uwe Schulzweida committed
94

Uwe Schulzweida's avatar
Uwe Schulzweida committed
95
#if defined (__xlC__) /* performance problems on IBM */
Uwe Schulzweida's avatar
cleanup    
Uwe Schulzweida committed
96
97
98
99
#ifndef DBL_IS_NAN
#  define DBL_IS_NAN(x)     ((x) != (x))
#endif
#else
100
#ifndef DBL_IS_NAN
Uwe Schulzweida's avatar
Uwe Schulzweida committed
101
#if  defined  (HAVE_DECL_ISNAN)
102
#  define DBL_IS_NAN(x)     (isnan(x))
103
#elif  defined  (FP_NAN)
104
105
106
107
108
#  define DBL_IS_NAN(x)     (fpclassify(x) == FP_NAN)
#else
#  define DBL_IS_NAN(x)     ((x) != (x))
#endif
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
109
#endif
110
111
112
113

#ifndef IS_EQUAL
#  define IS_NOT_EQUAL(x,y) (x < y || y < x)
#  define IS_EQUAL(x,y)     (!IS_NOT_EQUAL(x,y))
Uwe Schulzweida's avatar
Uwe Schulzweida committed
114
115
#endif

116
/* dummy use of unused parameters to silence compiler warnings */
Uwe Schulzweida's avatar
Uwe Schulzweida committed
117
#ifndef UNUSED
Uwe Schulzweida's avatar
Uwe Schulzweida committed
118
#define  UNUSED(x) (void)(x)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
119
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
120

Uwe Schulzweida's avatar
Uwe Schulzweida committed
121
122
#define  JP24SET    0xFFFFFF  /* 2**24     (---> 16777215) */
#define  JP23SET    0x7FFFFF  /* 2**23 - 1 (--->  8388607) */
123

Uwe Schulzweida's avatar
Uwe Schulzweida committed
124
#define  POW_2_M24  0.000000059604644775390625  /* pow(2.0, -24.0) */
Uwe Schulzweida's avatar
Uwe Schulzweida committed
125

Uwe Schulzweida's avatar
Uwe Schulzweida committed
126
127
128
129
#ifdef __cplusplus
extern "C" {
#endif

Uwe Schulzweida's avatar
Uwe Schulzweida committed
130
#define intpow2(x) (ldexp(1.0, (x)))
Uwe Schulzweida's avatar
Uwe Schulzweida committed
131

Uwe Schulzweida's avatar
Uwe Schulzweida committed
132
unsigned correct_bdslen(unsigned bdslen, long recsize, long gribpos);
133

Uwe Schulzweida's avatar
Uwe Schulzweida committed
134
135
136
137
/* CDI converter routines */

/* param format:  DDDCCCNNN */

Uwe Schulzweida's avatar
Uwe Schulzweida committed
138
139
void    cdiDecodeParam(int param, int *pnum, int *pcat, int *pdis);
int     cdiEncodeParam(int pnum, int pcat, int pdis);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
140
141
142
143

/* date format:  YYYYMMDD */
/* time format:  hhmmss   */

Uwe Schulzweida's avatar
Uwe Schulzweida committed
144
145
void    cdiDecodeDate(int64_t date, int *year, int *month, int *day);
int64_t cdiEncodeDate(int year, int month, int day);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
146
147
148
149

void    cdiDecodeTime(int time, int *hour, int *minute, int *second);
int     cdiEncodeTime(int hour, int minute, int second);

Uwe Schulzweida's avatar
Uwe Schulzweida committed
150
151
152
/* CALENDAR types */

#define  CALENDAR_STANDARD        0  /* don't change this value (used also in cgribexlib)! */
Uwe Schulzweida's avatar
Uwe Schulzweida committed
153
154
155
156
157
158
#define  CALENDAR_GREGORIAN       1
#define  CALENDAR_PROLEPTIC       2
#define  CALENDAR_360DAYS         3
#define  CALENDAR_365DAYS         4
#define  CALENDAR_366DAYS         5
#define  CALENDAR_NONE            6
Uwe Schulzweida's avatar
Uwe Schulzweida committed
159
160
161

extern FILE *grprsm;

Uwe Schulzweida's avatar
Uwe Schulzweida committed
162
extern int  CGRIBEX_Debug;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
163
164
165
166
167
168
169
170
171
172

void   gprintf(const char *caller, const char *fmt, ...);

void   grsdef(void);

void   prtbin(int kin, int knbit, int *kout, int *kerr);
void   confp3(double pval, int *kexp, int *kmant, int kbits, int kround);
double decfp2(int kexp, int kmant);
void   ref2ibm(double *pref, int kbits);

173
174
175
176
void   scale_complex_double(double *fpdata, int pcStart, int pcScale, int trunc, int inv);
void   scale_complex_float(float *fpdata, int pcStart, int pcScale, int trunc, int inv);
void   scatter_complex_double(double *fpdata, int pcStart, int trunc, int nsp);
void   scatter_complex_float(float *fpdata, int pcStart, int trunc, int nsp);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
177
178
void   gather_complex_double(double *fpdata, size_t pcStart, size_t trunc, size_t nsp);
void   gather_complex_float(float *fpdata, size_t pcStart, size_t trunc, size_t nsp);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
179

Uwe Schulzweida's avatar
Uwe Schulzweida committed
180
void   scm0_double(double *pdl, double *pdr, double *pfl, double *pfr, int klg);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
181
182
int    qu2reg2(double *pfield, int *kpoint, int klat, int klon,
	       double *ztemp, double msval, int *kret);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
183
184
185
186
int    qu2reg3_double(double *pfield, int *kpoint, int klat, int klon,
		      double msval, int *kret, int omisng, int operio, int oveggy);
int    qu2reg3_float(float *pfield, int *kpoint, int klat, int klon,
		     float msval, int *kret, int omisng, int operio, int oveggy);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
187

Uwe Schulzweida's avatar
Uwe Schulzweida committed
188
#ifdef  INT32
Uwe Schulzweida's avatar
Uwe Schulzweida committed
189
long   packInt32(unsigned INT32 *up, unsigned char *cp, long bc, long tc);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
190
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
191
long   packInt64(unsigned INT64 *up, unsigned char *cp, long bc, long tc);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
192
#ifdef  INT32
Uwe Schulzweida's avatar
Uwe Schulzweida committed
193
long   unpackInt32(const unsigned char *cp, unsigned INT32 *up, long bc, long tc);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
194
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
195
long   unpackInt64(const unsigned char *cp, unsigned INT64 *up, long bc, long tc);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
196

197
198
199
200
201
202
void  grib_encode_double(int *isec0, int *isec1, int *isec2, double *fsec2, int *isec3,
			 double *fsec3, int *isec4, double *fsec4, int klenp, int *kgrib,
			 int kleng, int *kword, int efunc, int *kret);
void  grib_encode_float(int *isec0, int *isec1, int *isec2, float *fsec2, int *isec3,
			float *fsec3, int *isec4, float *fsec4, int klenp, int *kgrib,
			int kleng, int *kword, int efunc, int *kret);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
203

Uwe Schulzweida's avatar
Uwe Schulzweida committed
204
205
206
207
208
209
void  grib_decode_double(int *isec0, int *isec1, int *isec2, double *fsec2, int *isec3,
			 double *fsec3, int *isec4, double *fsec4, int klenp, int *kgrib,
			 int kleng, int *kword, int dfunc, int *kret);
void  grib_decode_float(int *isec0, int *isec1, int *isec2, float *fsec2, int *isec3,
			float *fsec3, int *isec4, float *fsec4, int klenp, int *kgrib,
			int kleng, int *kword, int dfunc, int *kret);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
210

Uwe Schulzweida's avatar
Uwe Schulzweida committed
211
212
213
214
215
216
217

int grib1Sections(unsigned char *gribbuffer, long gribbufsize, unsigned char **pdsp,
		  unsigned char **gdsp, unsigned char **bmsp, unsigned char **bdsp, long *gribrecsize);
int grib2Sections(unsigned char *gribbuffer, long gribbufsize, unsigned char **idsp,
		  unsigned char **lusp, unsigned char **gdsp, unsigned char **pdsp,
		  unsigned char **drsp, unsigned char **bmsp, unsigned char **bdsp);

Uwe Schulzweida's avatar
Uwe Schulzweida committed
218
#ifdef  __cplusplus
Uwe Schulzweida's avatar
Uwe Schulzweida committed
219
220
221
}
#endif

Uwe Schulzweida's avatar
Uwe Schulzweida committed
222
#endif  /* GRIB_INT_H */
223
224
#ifndef GRIBDECODE_H
#define GRIBDECODE_H
Uwe Schulzweida's avatar
Uwe Schulzweida committed
225
226
227
228
229
230

#define  UNDEFINED          9.999e20


#define  GET_INT3(a,b,c)    ((1-(int) ((unsigned) (a & 128) >> 6)) * (int) (((a & 127) << 16)+(b<<8)+c))
#define  GET_INT2(a,b)      ((1-(int) ((unsigned) (a & 128) >> 6)) * (int) (((a & 127) << 8) + b))
231
#define  GET_INT1(a)        ((1-(int) ((unsigned) (a & 128) >> 6)) * (int) (a&127))
Uwe Schulzweida's avatar
Uwe Schulzweida committed
232
233

/* this requires a 32-bit default integer machine */
Uwe Schulzweida's avatar
Uwe Schulzweida committed
234
235
236
237
#define  GET_UINT4(a,b,c,d) ((unsigned) ((a << 24) + (b << 16) + (c << 8) + (d)))
#define  GET_UINT3(a,b,c)   ((unsigned) ((a << 16) + (b << 8)  + (c)))
#define  GET_UINT2(a,b)     ((unsigned) ((a << 8)  + (b)))
#define  GET_UINT1(a)       ((unsigned)  (a))
Uwe Schulzweida's avatar
Uwe Schulzweida committed
238

Uwe Schulzweida's avatar
Uwe Schulzweida committed
239
240
241
#define  BUDG_START(s)      (s[0]=='B' && s[1]=='U' && s[2]=='D' && s[3]=='G')
#define  TIDE_START(s)      (s[0]=='T' && s[1]=='I' && s[2]=='D' && s[3]=='E')
#define  GRIB_START(s)      (s[0]=='G' && s[1]=='R' && s[2]=='I' && s[3]=='B')
Uwe Schulzweida's avatar
Uwe Schulzweida committed
242
#define  GRIB_FIN(s)        (s[0]=='7' && s[1]=='7' && s[2]=='7' && s[3]=='7')
Uwe Schulzweida's avatar
Uwe Schulzweida committed
243

Uwe Schulzweida's avatar
Uwe Schulzweida committed
244
/* GRIB1 Section 0: Indicator Section (IS) */
Uwe Schulzweida's avatar
Uwe Schulzweida committed
245

Uwe Schulzweida's avatar
Uwe Schulzweida committed
246
#define  GRIB1_SECLEN(s)     GET_UINT3(s[ 4], s[ 5], s[ 6])
Uwe Schulzweida's avatar
Uwe Schulzweida committed
247
#define  GRIB_EDITION(s)     GET_UINT1(s[ 7])
Uwe Schulzweida's avatar
Uwe Schulzweida committed
248

Uwe Schulzweida's avatar
Uwe Schulzweida committed
249
/* GRIB1 Section 1: Product Definition Section (PDS) */
Uwe Schulzweida's avatar
Uwe Schulzweida committed
250
251
252
253
254
255
256
257
258
259
260
261
262
263

#define  PDS_Len             GET_UINT3(pds[ 0], pds[ 1], pds[ 2])
#define  PDS_CodeTable       GET_UINT1(pds[ 3])
#define  PDS_CenterID        GET_UINT1(pds[ 4])
#define  PDS_ModelID         GET_UINT1(pds[ 5])
#define  PDS_GridDefinition  GET_UINT1(pds[ 6])
#define  PDS_Sec2Or3Flag     GET_UINT1(pds[ 7])
#define  PDS_HAS_GDS         ((pds[7] & 128) != 0)
#define  PDS_HAS_BMS         ((pds[7] &  64) != 0)
#define  PDS_Parameter       GET_UINT1(pds[ 8])
#define  PDS_LevelType       GET_UINT1(pds[ 9])
#define  PDS_Level1          (pds[10])
#define  PDS_Level2	     (pds[11])
#define  PDS_Level	     GET_UINT2(pds[10], pds[11])
Uwe Schulzweida's avatar
Uwe Schulzweida committed
264
#define  PDS_Year            GET_INT1(pds[12])
Uwe Schulzweida's avatar
Uwe Schulzweida committed
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
#define  PDS_Month           GET_UINT1(pds[13])
#define  PDS_Day             GET_UINT1(pds[14])
#define  PDS_Hour            GET_UINT1(pds[15])
#define  PDS_Minute          GET_UINT1(pds[16])
#define  PDS_Date            (PDS_Year*10000+PDS_Month*100+PDS_Day)
#define  PDS_Time            (PDS_Hour*100+PDS_Minute)
#define  PDS_TimeUnit        GET_UINT1(pds[17])
#define  PDS_TimePeriod1     GET_UINT1(pds[18])
#define  PDS_TimePeriod2     GET_UINT1(pds[19])
#define  PDS_TimeRange       GET_UINT1(pds[20])
#define  PDS_AvgNum          GET_UINT2(pds[21], pds[22])
#define  PDS_AvgMiss         GET_UINT1(pds[23])
#define  PDS_Century         GET_UINT1(pds[24])
#define  PDS_Subcenter       GET_UINT1(pds[25])
#define  PDS_DecimalScale    GET_INT2(pds[26],pds[27])


Uwe Schulzweida's avatar
Uwe Schulzweida committed
282
/* GRIB1 Section 2: Grid Description Section (GDS) */
Uwe Schulzweida's avatar
Uwe Schulzweida committed
283

Uwe Schulzweida's avatar
Uwe Schulzweida committed
284
#define  GDS_Len             ((gds) == NULL ? 0 : GET_UINT3(gds[0], gds[1], gds[2]))
Uwe Schulzweida's avatar
Uwe Schulzweida committed
285
286
#define  GDS_NV              GET_UINT1(gds[ 3])
#define  GDS_PVPL            GET_UINT1(gds[ 4])
Uwe Schulzweida's avatar
Uwe Schulzweida committed
287
288
#define  GDS_PV	             ((gds[3] ==    0) ? -1 : (int) gds[4] - 1)
#define  GDS_PL	             ((gds[4] == 0xFF) ? -1 : (int) gds[3] * 4 + (int) gds[4] - 1)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
289
290
291
#define  GDS_GridType        GET_UINT1(gds[ 5])


Uwe Schulzweida's avatar
Uwe Schulzweida committed
292
/* GRIB1 Triangular grid of DWD */
Uwe Schulzweida's avatar
Uwe Schulzweida committed
293
294
295
296
297
298
299
300
301
302
#define  GDS_GME_NI2         GET_UINT2(gds[ 6], gds[ 7])
#define  GDS_GME_NI3         GET_UINT2(gds[ 8], gds[ 9])
#define  GDS_GME_ND          GET_UINT3(gds[10], gds[11], gds[12])
#define  GDS_GME_NI          GET_UINT3(gds[13], gds[14], gds[15])
#define  GDS_GME_AFlag       GET_UINT1(gds[16])
#define  GDS_GME_LatPP       GET_INT3(gds[17], gds[18], gds[19])
#define  GDS_GME_LonPP       GET_INT3(gds[20], gds[21], gds[22])
#define  GDS_GME_LonMPL      GET_INT3(gds[23], gds[24], gds[25])
#define  GDS_GME_BFlag       GET_UINT1(gds[27])

Uwe Schulzweida's avatar
Uwe Schulzweida committed
303
/* GRIB1 Spectral */
Uwe Schulzweida's avatar
Uwe Schulzweida committed
304
305
306
307
308
309
#define  GDS_PentaJ          GET_UINT2(gds[ 6], gds[ 7])
#define  GDS_PentaK          GET_UINT2(gds[ 8], gds[ 9])
#define  GDS_PentaM          GET_UINT2(gds[10], gds[11])
#define  GDS_RepType         GET_UINT1(gds[12])
#define  GDS_RepMode         GET_UINT1(gds[13])

Uwe Schulzweida's avatar
Uwe Schulzweida committed
310
/* GRIB1 Regular grid */
Uwe Schulzweida's avatar
Uwe Schulzweida committed
311
312
313
314
315
316
317
#define  GDS_NumLon          GET_UINT2(gds[ 6], gds[ 7])
#define  GDS_NumLat          GET_UINT2(gds[ 8], gds[ 9])
#define  GDS_FirstLat        GET_INT3(gds[10], gds[11], gds[12])
#define  GDS_FirstLon        GET_INT3(gds[13], gds[14], gds[15])
#define  GDS_ResFlag         GET_UINT1(gds[16])
#define  GDS_LastLat         GET_INT3(gds[17], gds[18], gds[19])
#define  GDS_LastLon         GET_INT3(gds[20], gds[21], gds[22])
Uwe Schulzweida's avatar
Uwe Schulzweida committed
318
319
320
#define  GDS_LonIncr         GET_UINT2(gds[23], gds[24])
#define  GDS_LatIncr         GET_UINT2(gds[25], gds[26])
#define  GDS_NumPar          GET_UINT2(gds[25], gds[26])
Uwe Schulzweida's avatar
Uwe Schulzweida committed
321
322
323
#define  GDS_ScanFlag        GET_UINT1(gds[27])
#define  GDS_LatSP           GET_INT3(gds[32], gds[33], gds[34])
#define  GDS_LonSP           GET_INT3(gds[35], gds[36], gds[37])
Uwe Schulzweida's avatar
Uwe Schulzweida committed
324
#define  GDS_RotAngle        (GET_Real(&(gds[38])))
Uwe Schulzweida's avatar
Uwe Schulzweida committed
325

Uwe Schulzweida's avatar
Uwe Schulzweida committed
326
/* GRIB1 Lambert */
327
328
329
330
331
332
333
334
#define  GDS_Lambert_Lov     GET_INT3(gds[17], gds[18], gds[19])
#define  GDS_Lambert_dx	     GET_INT3(gds[20], gds[21], gds[22])
#define  GDS_Lambert_dy	     GET_INT3(gds[23], gds[24], gds[25])
#define  GDS_Lambert_ProjFlag GET_UINT1(gds[26])
#define  GDS_Lambert_LatS1   GET_INT3(gds[28], gds[29], gds[30])
#define  GDS_Lambert_LatS2   GET_INT3(gds[31], gds[32], gds[33])
#define  GDS_Lambert_LatSP   GET_INT3(gds[34], gds[35], gds[36])
#define  GDS_Lambert_LonSP   GET_INT3(gds[37], gds[37], gds[37])
335

Uwe Schulzweida's avatar
Uwe Schulzweida committed
336
/* GRIB1 Section 3: Bit Map Section (BMS) */
Uwe Schulzweida's avatar
Uwe Schulzweida committed
337

Uwe Schulzweida's avatar
Uwe Schulzweida committed
338
#define  BMS_Len	     ((bms) == NULL ? 0 : GET_UINT3(bms[0], bms[1], bms[2]))
Uwe Schulzweida's avatar
Uwe Schulzweida committed
339
340
341
342
343
#define  BMS_UnusedBits      (bms[3])
#define  BMS_Numeric         
#define  BMS_Bitmap	     ((bms) == NULL ? NULL : (bms)+6)
#define  BMS_BitmapSize      (((((bms[0]<<16)+(bms[1]<<8)+bms[2]) - 6)<<3) - bms[3])

Uwe Schulzweida's avatar
Uwe Schulzweida committed
344
/* GRIB1 Section 4: Binary Data Section (BDS) */
Uwe Schulzweida's avatar
Uwe Schulzweida committed
345

Uwe Schulzweida's avatar
Uwe Schulzweida committed
346
#define  BDS_Len	    GET_UINT3(bds[0], bds[1], bds[2])
Uwe Schulzweida's avatar
Uwe Schulzweida committed
347
348
#define  BDS_Flag	    (bds[3])
#define  BDS_BinScale       GET_INT2(bds[ 4], bds[ 5])
Uwe Schulzweida's avatar
Uwe Schulzweida committed
349
#define  BDS_RefValue       (decfp2((int)bds[ 6], GET_UINT3(bds[7], bds[8], bds[9])))
Uwe Schulzweida's avatar
Uwe Schulzweida committed
350
#define  BDS_NumBits        ((int) bds[10])
Uwe Schulzweida's avatar
Uwe Schulzweida committed
351
#define  BDS_RealCoef       (decfp2((int)bds[zoff+11], GET_UINT3(bds[zoff+12], bds[zoff+13], bds[zoff+14])))
Uwe Schulzweida's avatar
Uwe Schulzweida committed
352
353
#define  BDS_PackData       ((int) ((bds[zoff+11]<<8) + bds[zoff+12]))
#define  BDS_Power          GET_INT2(bds[zoff+13], bds[zoff+14])
Uwe Schulzweida's avatar
Uwe Schulzweida committed
354
355
#define  BDS_Z              (bds[13])

Uwe Schulzweida's avatar
Uwe Schulzweida committed
356
357
358
359
360
361
/* GRIB1 Section 5: End Section (ES) */

/* GRIB2 */

#define  GRIB2_SECLEN(section)   (GET_UINT4(section[0], section[1], section[2], section[3]))
#define  GRIB2_SECNUM(section)   (GET_UINT1(section[4]))
Uwe Schulzweida's avatar
Uwe Schulzweida committed
362

363
364
365
#endif  /* GRIBDECODE_H */
#ifndef CGRIBEX_GRIB_ENCODE_H
#define CGRIBEX_GRIB_ENCODE_H
Uwe Schulzweida's avatar
Uwe Schulzweida committed
366

Uwe Schulzweida's avatar
Uwe Schulzweida committed
367
#include <limits.h>
Uwe Schulzweida's avatar
Uwe Schulzweida committed
368
369
370

#define PutnZero(n) \
{ \
Uwe Schulzweida's avatar
Uwe Schulzweida committed
371
  for ( size_t i = z >= 0 ? (size_t)z : 0; i < (size_t)(z+n); i++ ) lGrib[i] = 0; \
Uwe Schulzweida's avatar
Uwe Schulzweida committed
372
373
374
  z += n; \
}

Uwe Schulzweida's avatar
Uwe Schulzweida committed
375
376
377
378
379
380
381
382
383
384
#define Put1Byte(Value)  (lGrib[z++] = (GRIBPACK)(Value))
#define Put2Byte(Value) ((lGrib[z++] = (GRIBPACK)((Value) >>  8)),      \
                         (lGrib[z++] = (GRIBPACK)(Value)))
#define Put3Byte(Value) ((lGrib[z++] = (GRIBPACK)((Value) >> 16)),      \
                         (lGrib[z++] = (GRIBPACK)((Value) >>  8)),      \
                         (lGrib[z++] = (GRIBPACK)(Value)))
#define Put4Byte(Value) ((lGrib[z++] = (GRIBPACK)((Value) >> 24)),      \
                         (lGrib[z++] = (GRIBPACK)((Value) >> 16)),      \
                         (lGrib[z++] = (GRIBPACK)((Value) >>  8)),      \
                         (lGrib[z++] = (GRIBPACK)(Value)))
Uwe Schulzweida's avatar
Uwe Schulzweida committed
385

Uwe Schulzweida's avatar
Uwe Schulzweida committed
386
387
388
#define Put1Int(Value)  {ival = Value; if ( ival < 0 ) ival =     0x80 - ival; Put1Byte(ival);}
#define Put2Int(Value)  {ival = Value; if ( ival < 0 ) ival =   0x8000 - ival; Put2Byte(ival);}
#define Put3Int(Value)  {ival = Value; if ( ival < 0 ) ival = 0x800000 - ival; Put3Byte(ival);}
Uwe Schulzweida's avatar
Uwe Schulzweida committed
389

Uwe Schulzweida's avatar
Uwe Schulzweida committed
390
391
392
393
394
enum {
  BitsPerInt = (int) (sizeof(int) * CHAR_BIT),
};


Uwe Schulzweida's avatar
Uwe Schulzweida committed
395
396
397
398
399
400
401
#define Put1Real(Value)          \
{                                \
  confp3(Value, &exponent, &mantissa, BitsPerInt, 1); \
  Put1Byte(exponent);            \
  Put3Byte(mantissa);            \
}

402
#endif  /* CGRIBEX_GRIB_ENCODE_H */
Uwe Schulzweida's avatar
Uwe Schulzweida committed
403
404
405
406
#ifndef CODEC_COMMON_H
#define CODEC_COMMON_H
#define gribSwapByteOrder_uint16(ui16)  ((uint16_t)((ui16<<8) | (ui16>>8)))
#endif  /* CODEC_COMMON_H */
Uwe Schulzweida's avatar
Uwe Schulzweida committed
407
/* 
Uwe Schulzweida's avatar
Uwe Schulzweida committed
408
icc -g -Wall -O3 -march=native -std=c99 -qopt-report=5 -DTEST_MINMAXVAL -qopenmp -DOMP_SIMD minmax_val.c
Uwe Schulzweida's avatar
Uwe Schulzweida committed
409
410
 result on hama2 (icc 16.0.0):
     float:
Uwe Schulzweida's avatar
Uwe Schulzweida committed
411
412
minmax_val: fmin: -500000  fmax: 499999  time:   1.22s
simd      : fmin: -500000  fmax: 499999  time:   1.20s
Uwe Schulzweida's avatar
Uwe Schulzweida committed
413
    double:
Uwe Schulzweida's avatar
Uwe Schulzweida committed
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
minmax_val: fmin: -500000  fmax: 499999  time:   2.86s
orig      : fmin: -500000  fmax: 499999  time:   2.74s
simd      : fmin: -500000  fmax: 499999  time:   2.70s
avx       : fmin: -500000  fmax: 499999  time:   2.99s

gcc -g -Wall -O3 -march=native -std=c99 -DTEST_MINMAXVAL -fopenmp -DOMP_SIMD -Wa,-q minmax_val.c
 result on thunder5 (gcc 6.1.0):
float:
minmax_val: fmin: -500000  fmax: 499999  time:   8.25s
  simd    : fmin: -500000  fmax: 499999  time:   1.24s
double:
minmax_val: fmin: -500000  fmax: 499999  time:   2.73s
  orig    : fmin: -500000  fmax: 499999  time:   9.24s
  simd    : fmin: -500000  fmax: 499999  time:   2.78s
  avx     : fmin: -500000  fmax: 499999  time:   2.90s
Uwe Schulzweida's avatar
Uwe Schulzweida committed
429

Uwe Schulzweida's avatar
Uwe Schulzweida committed
430
gcc -g -Wall -O3 -march=native -std=c99 -DTEST_MINMAXVAL minmax_val.c
431
432
433
 result on bailung (gcc 4.8.2):
  orig    : fmin: -500000  fmax: 499999  time:   4.82s
  sse2    : fmin: -500000  fmax: 499999  time:   4.83s
Uwe Schulzweida's avatar
Uwe Schulzweida committed
434

Uwe Schulzweida's avatar
Uwe Schulzweida committed
435
gcc -g -Wall -O3 -march=native -std=c99 -DTEST_MINMAXVAL -fopenmp -DOMP_SIMD -Wa,-q minmax_val.c
436
 result on thunder5 (gcc 4.8.2):
Uwe Schulzweida's avatar
Uwe Schulzweida committed
437
  orig    : fmin: -500000  fmax: 499999  time:   3.10s
438
  simd    : fmin: -500000  fmax: 499999  time:   3.10s # omp simd in gcc 4.9
Uwe Schulzweida's avatar
Uwe Schulzweida committed
439
  avx     : fmin: -500000  fmax: 499999  time:   2.84s
Uwe Schulzweida's avatar
Uwe Schulzweida committed
440

Uwe Schulzweida's avatar
Uwe Schulzweida committed
441
icc -g -Wall -O3 -march=native -std=c99 -qopt-report=5 -DTEST_MINMAXVAL -openmp -DOMP_SIMD minmax_val.c
Uwe Schulzweida's avatar
Uwe Schulzweida committed
442
 result on thunder5 (icc 14.0.2):
Uwe Schulzweida's avatar
Uwe Schulzweida committed
443
  orig    : fmin: -500000  fmax: 499999  time:   2.83s
Uwe Schulzweida's avatar
Uwe Schulzweida committed
444
  simd    : fmin: -500000  fmax: 499999  time:   2.83s
Uwe Schulzweida's avatar
Uwe Schulzweida committed
445
  avx     : fmin: -500000  fmax: 499999  time:   2.92s
Uwe Schulzweida's avatar
Uwe Schulzweida committed
446

Uwe Schulzweida's avatar
Uwe Schulzweida committed
447
xlc_r -g -O3 -qhot -q64 -qarch=auto -qtune=auto -qreport -DTEST_MINMAXVAL minmax_val.c
Uwe Schulzweida's avatar
Uwe Schulzweida committed
448
 result on blizzard (xlc 12):
Uwe Schulzweida's avatar
Uwe Schulzweida committed
449
450
  orig    : fmin: -500000  fmax: 499999  time:   7.26s
  pwr6u6  : fmin: -500000  fmax: 499999  time:   5.92s
Uwe Schulzweida's avatar
Uwe Schulzweida committed
451
*/
Uwe Schulzweida's avatar
Uwe Schulzweida committed
452
#if defined(_ARCH_PWR6)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
453
454
#pragma options nostrict
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
455
456
457

#include <stdlib.h>

Uwe Schulzweida's avatar
Uwe Schulzweida committed
458
//#undef _GET_X86_COUNTER
Uwe Schulzweida's avatar
Uwe Schulzweida committed
459
//#undef _GET_IBM_COUNTER
Uwe Schulzweida's avatar
Uwe Schulzweida committed
460
//#undef _GET_MACH_COUNTER
Uwe Schulzweida's avatar
Uwe Schulzweida committed
461
//#undef _ARCH_PWR6
Uwe Schulzweida's avatar
Uwe Schulzweida committed
462

Uwe Schulzweida's avatar
Uwe Schulzweida committed
463
#if defined(_GET_IBM_COUNTER)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
464
#include <libhpc.h>
Uwe Schulzweida's avatar
Uwe Schulzweida committed
465
#elif defined(_GET_X86_COUNTER)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
466
#include <x86intrin.h>
Uwe Schulzweida's avatar
Uwe Schulzweida committed
467
#elif defined(_GET_MACH_COUNTER)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
468
#include <mach/mach_time.h>
Uwe Schulzweida's avatar
Uwe Schulzweida committed
469
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
470

Uwe Schulzweida's avatar
Uwe Schulzweida committed
471
472
473
474
475
476
#if   defined(__GNUC__) && !defined(__ICC) && !defined(__clang__)
#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 4)
#define GNUC_PUSH_POP
#endif
#endif

Uwe Schulzweida's avatar
Uwe Schulzweida committed
477
#ifndef DISABLE_SIMD
Uwe Schulzweida's avatar
Uwe Schulzweida committed
478
479
480
481
482
483
#if   defined(__GNUC__) && (__GNUC__ >= 4)
#elif defined(__ICC)    && (__ICC >= 1100)
#elif defined(__clang__)
#else
#define DISABLE_SIMD
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
484
485
486
487
488
#endif

#ifdef DISABLE_SIMD
#define DISABLE_SIMD_MINMAXVAL
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
489

Uwe Schulzweida's avatar
Uwe Schulzweida committed
490
#if !defined(TEST_MINMAXVAL)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
491
#define DISABLE_SIMD_MINMAXVAL
Uwe Schulzweida's avatar
Uwe Schulzweida committed
492
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
493

Uwe Schulzweida's avatar
Uwe Schulzweida committed
494
#ifdef DISABLE_SIMD_MINMAXVAL
Uwe Schulzweida's avatar
Uwe Schulzweida committed
495
# if defined(ENABLE_AVX)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
496
497
#  define _ENABLE_AVX
# endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
498
# if defined(ENABLE_SSE2)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
499
500
#  define _ENABLE_SSE2
# endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
501
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
502

Uwe Schulzweida's avatar
Uwe Schulzweida committed
503
#ifndef DISABLE_SIMD_MINMAXVAL
Uwe Schulzweida's avatar
Uwe Schulzweida committed
504
# if defined(__AVX__)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
505
506
#  define _ENABLE_AVX
# endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
507
# if defined(__SSE2__)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
508
509
510
#  define _ENABLE_SSE2
# endif
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
511

Uwe Schulzweida's avatar
Uwe Schulzweida committed
512
513
514
#include <float.h>
#include <stdint.h>
#include <inttypes.h>
Uwe Schulzweida's avatar
Uwe Schulzweida committed
515

Uwe Schulzweida's avatar
Uwe Schulzweida committed
516
#if defined(_ENABLE_AVX)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
517
#include <immintrin.h>
Uwe Schulzweida's avatar
Uwe Schulzweida committed
518
#elif defined(_ENABLE_SSE2)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
519
#include <emmintrin.h>
Uwe Schulzweida's avatar
Uwe Schulzweida committed
520
521
#endif

Uwe Schulzweida's avatar
Uwe Schulzweida committed
522

Uwe Schulzweida's avatar
Uwe Schulzweida committed
523
#if defined(_ENABLE_AVX)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
524

Uwe Schulzweida's avatar
Uwe Schulzweida committed
525
static
526
void avx_minmax_val_double(const double *restrict buf, size_t nframes, double *min, double *max)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
527
{
Uwe Schulzweida's avatar
Uwe Schulzweida committed
528
  double fmin[4], fmax[4];
Uwe Schulzweida's avatar
Uwe Schulzweida committed
529
  __m256d current_max, current_min, work;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
530

Uwe Schulzweida's avatar
Uwe Schulzweida committed
531
  // load max and min values into all four slots of the YMM registers
Uwe Schulzweida's avatar
Uwe Schulzweida committed
532
533
  current_min = _mm256_set1_pd(*min);
  current_max = _mm256_set1_pd(*max);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
534

Uwe Schulzweida's avatar
Uwe Schulzweida committed
535
536
  // Work input until "buf" reaches 32 byte alignment
  while ( ((unsigned long)buf) % 32 != 0 && nframes > 0) {
Uwe Schulzweida's avatar
Uwe Schulzweida committed
537

Uwe Schulzweida's avatar
Uwe Schulzweida committed
538
539
540
541
542
543
544
    // Load the next double into the work buffer
    work = _mm256_set1_pd(*buf);
    current_min = _mm256_min_pd(current_min, work);
    current_max = _mm256_max_pd(current_max, work);
    buf++;
    nframes--;
  }
Uwe Schulzweida's avatar
Uwe Schulzweida committed
545

Uwe Schulzweida's avatar
Uwe Schulzweida committed
546
  while (nframes >= 16) {
Uwe Schulzweida's avatar
Uwe Schulzweida committed
547

548
    (void) _mm_prefetch((const char *)(buf+8), _MM_HINT_NTA);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
549
550

    work = _mm256_load_pd(buf);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
551
552
553
    current_min = _mm256_min_pd(current_min, work);
    current_max = _mm256_max_pd(current_max, work);
    buf += 4;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
554
555

    work = _mm256_load_pd(buf);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
556
557
558
    current_min = _mm256_min_pd(current_min, work);
    current_max = _mm256_max_pd(current_max, work);
    buf += 4;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
559

560
    (void) _mm_prefetch((const char *)(buf+8), _MM_HINT_NTA);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
561
562

    work = _mm256_load_pd(buf);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
563
564
565
    current_min = _mm256_min_pd(current_min, work);
    current_max = _mm256_max_pd(current_max, work);
    buf += 4;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
566
567

    work = _mm256_load_pd(buf);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
568
569
570
571
572
    current_min = _mm256_min_pd(current_min, work);
    current_max = _mm256_max_pd(current_max, work);
    buf += 4;
    nframes -= 16;
  }
Uwe Schulzweida's avatar
Uwe Schulzweida committed
573

Uwe Schulzweida's avatar
Uwe Schulzweida committed
574
575
576
577
578
579
580
581
  // work through aligned buffers
  while (nframes >= 4) {
    work = _mm256_load_pd(buf);
    current_min = _mm256_min_pd(current_min, work);
    current_max = _mm256_max_pd(current_max, work);
    buf += 4;
    nframes -= 4;
  }
Uwe Schulzweida's avatar
Uwe Schulzweida committed
582

Uwe Schulzweida's avatar
Uwe Schulzweida committed
583
584
585
586
587
588
589
590
  // work through the remainung values
  while ( nframes > 0) {
    work = _mm256_set1_pd(*buf);
    current_min = _mm256_min_pd(current_min, work);
    current_max = _mm256_max_pd(current_max, work);
    buf++;
    nframes--;
  }
Uwe Schulzweida's avatar
Uwe Schulzweida committed
591

Uwe Schulzweida's avatar
Uwe Schulzweida committed
592
  // find min & max value through shuffle tricks
Uwe Schulzweida's avatar
Uwe Schulzweida committed
593

Uwe Schulzweida's avatar
Uwe Schulzweida committed
594
  work = current_min;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
595
  work = _mm256_shuffle_pd(work, work, 5);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
596
597
  work = _mm256_min_pd (work, current_min);
  current_min = work;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
598
  work = _mm256_permute2f128_pd(work, work, 1);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
599
  work = _mm256_min_pd (work, current_min);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
600
601
  _mm256_storeu_pd(fmin, work);

Uwe Schulzweida's avatar
Uwe Schulzweida committed
602
  work = current_max;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
603
604
  work = current_max;
  work = _mm256_shuffle_pd(work, work, 5);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
605
606
  work = _mm256_max_pd (work, current_max);
  current_max = work;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
607
  work = _mm256_permute2f128_pd(work, work, 1);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
608
  work = _mm256_max_pd (work, current_max);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
609
610
611
612
  _mm256_storeu_pd(fmax, work);

  *min = fmin[0];
  *max = fmax[0];
Uwe Schulzweida's avatar
Uwe Schulzweida committed
613

Uwe Schulzweida's avatar
Uwe Schulzweida committed
614
615
  return;
}
Uwe Schulzweida's avatar
Uwe Schulzweida committed
616

Uwe Schulzweida's avatar
Uwe Schulzweida committed
617
#elif defined(_ENABLE_SSE2)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
618

Uwe Schulzweida's avatar
Uwe Schulzweida committed
619
static
620
void sse2_minmax_val_double(const double *restrict buf, size_t nframes, double *min, double *max)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
{
  __m128d current_max, current_min, work;
  
  // load starting max and min values into all slots of the XMM registers
  current_min = _mm_set1_pd(*min);
  current_max = _mm_set1_pd(*max);
  
  // work on input until buf reaches 16 byte alignment
  while ( ((unsigned long)buf) % 16 != 0 && nframes > 0) {
    
    // load one double and replicate
    work = _mm_set1_pd(*buf);    
    current_min = _mm_min_pd(current_min, work);
    current_max = _mm_max_pd(current_max, work);    
    buf++;
    nframes--;
  }
  
  while (nframes >= 8) {
    // use 64 byte prefetch for double octetts
Uwe Schulzweida's avatar
Uwe Schulzweida committed
641
    // __builtin_prefetch(buf+64,0,0); // for GCC 4.3.2 +
Uwe Schulzweida's avatar
Uwe Schulzweida committed
642

Uwe Schulzweida's avatar
Uwe Schulzweida committed
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
    work = _mm_load_pd(buf);
    current_min = _mm_min_pd(current_min, work);
    current_max = _mm_max_pd(current_max, work);
    buf += 2;
    work = _mm_load_pd(buf);
    current_min = _mm_min_pd(current_min, work);
    current_max = _mm_max_pd(current_max, work);
    buf += 2;
    work = _mm_load_pd(buf);
    current_min = _mm_min_pd(current_min, work);
    current_max = _mm_max_pd(current_max, work);
    buf += 2;
    work = _mm_load_pd(buf);
    current_min = _mm_min_pd(current_min, work);
    current_max = _mm_max_pd(current_max, work);
    buf += 2;
    nframes -= 8;
  }
Uwe Schulzweida's avatar
Uwe Schulzweida committed
661

Uwe Schulzweida's avatar
Uwe Schulzweida committed
662
663
664
665
666
667
668
669
  // work through smaller chunks of aligned buffers without prefetching
  while (nframes >= 2) {
    work = _mm_load_pd(buf);
    current_min = _mm_min_pd(current_min, work);
    current_max = _mm_max_pd(current_max, work);
    buf += 2;
    nframes -= 2;
  }
Uwe Schulzweida's avatar
Uwe Schulzweida committed
670

Uwe Schulzweida's avatar
Uwe Schulzweida committed
671
672
673
674
675
676
677
678
679
  // work through the remaining value
  while ( nframes > 0) {
    // load the last double and replicate
    work = _mm_set1_pd(*buf);
    current_min = _mm_min_pd(current_min, work);
    current_max = _mm_max_pd(current_max, work);
    buf++;
    nframes--;
  }
Uwe Schulzweida's avatar
Uwe Schulzweida committed
680

Uwe Schulzweida's avatar
Uwe Schulzweida committed
681
682
683
684
685
686
687
688
689
  // find final min and max value through shuffle tricks
  work = current_min;
  work = _mm_shuffle_pd(work, work, _MM_SHUFFLE2(0, 1));
  work = _mm_min_pd (work, current_min);
  _mm_store_sd(min, work);
  work = current_max;
  work = _mm_shuffle_pd(work, work, _MM_SHUFFLE2(0, 1));
  work = _mm_max_pd (work, current_max);
  _mm_store_sd(max, work);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
690

Uwe Schulzweida's avatar
Uwe Schulzweida committed
691
692
  return;
}
Uwe Schulzweida's avatar
Uwe Schulzweida committed
693

Uwe Schulzweida's avatar
Uwe Schulzweida committed
694
#endif // SIMD
Uwe Schulzweida's avatar
Uwe Schulzweida committed
695

Uwe Schulzweida's avatar
Uwe Schulzweida committed
696
#if defined(_ARCH_PWR6)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
697
static
Uwe Schulzweida's avatar
Uwe Schulzweida committed
698
void pwr6_minmax_val_double_unrolled6(const double *restrict data, size_t datasize, double *fmin, double *fmax)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
699
{
Uwe Schulzweida's avatar
Uwe Schulzweida committed
700
#define __UNROLL_DEPTH_1 6
Uwe Schulzweida's avatar
Uwe Schulzweida committed
701

Uwe Schulzweida's avatar
Uwe Schulzweida committed
702
  // to allow pipelining we have to unroll 
Uwe Schulzweida's avatar
Uwe Schulzweida committed
703

Uwe Schulzweida's avatar
Uwe Schulzweida committed
704
  {
Uwe Schulzweida's avatar
Uwe Schulzweida committed
705
706
707
    size_t i, j;
    size_t residual =  datasize % __UNROLL_DEPTH_1;
    size_t ofs = datasize - residual;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
708
709
    double register dmin[__UNROLL_DEPTH_1];
    double register dmax[__UNROLL_DEPTH_1];
Uwe Schulzweida's avatar
Uwe Schulzweida committed
710

Uwe Schulzweida's avatar
Uwe Schulzweida committed
711
712
713
714
715
716
717
718
719
720
721
722
723
724
    for ( j = 0; j < __UNROLL_DEPTH_1; j++) 
      {
	dmin[j] = data[0];
	dmax[j] = data[0];
      }
    
    for ( i = 0; i < datasize - residual; i += __UNROLL_DEPTH_1 ) 
      {
	for (j = 0; j < __UNROLL_DEPTH_1; j++) 
	  {
	    dmin[j] = __fsel(dmin[j] - data[i+j], data[i+j], dmin[j]);
	    dmax[j] = __fsel(data[i+j] - dmax[j], data[i+j], dmax[j]);
	  }
      }
Uwe Schulzweida's avatar
Uwe Schulzweida committed
725

Uwe Schulzweida's avatar
Uwe Schulzweida committed
726
727
728
729
730
    for (j = 0; j < residual; j++) 
      {
	dmin[j] = __fsel(dmin[j] - data[ofs+j], data[ofs+j], dmin[j]);
	dmax[j] = __fsel(data[ofs+j] - dmax[j], data[ofs+j], dmax[j]);
      }
Uwe Schulzweida's avatar
Uwe Schulzweida committed
731

Uwe Schulzweida's avatar
Uwe Schulzweida committed
732
733
734
735
736
737
738
    for ( j = 0; j < __UNROLL_DEPTH_1; j++) 
      {
	*fmin = __fsel(*fmin - dmin[j], dmin[j], *fmin);
	*fmax = __fsel(dmax[j] - *fmax, dmax[j], *fmax);
      }
  }
#undef __UNROLL_DEPTH_1
Uwe Schulzweida's avatar
Uwe Schulzweida committed
739
740
}
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
741

Uwe Schulzweida's avatar
Uwe Schulzweida committed
742
743
#if defined(TEST_MINMAXVAL) && defined(__GNUC__)
static
Uwe Schulzweida's avatar
Uwe Schulzweida committed
744
void minmax_val_double_orig(const double *restrict data, size_t datasize, double *fmin, double *fmax) __attribute__ ((noinline));
Uwe Schulzweida's avatar
Uwe Schulzweida committed
745
static
Uwe Schulzweida's avatar
Uwe Schulzweida committed
746
void minmax_val_double_simd(const double *restrict data, size_t datasize, double *fmin, double *fmax) __attribute__ ((noinline));
Uwe Schulzweida's avatar
Uwe Schulzweida committed
747
748
749
750
static
void minmax_val_float(const float *restrict data, long datasize, float *fmin, float *fmax) __attribute__ ((noinline));
static
void minmax_val_float_simd(const float *restrict data, size_t datasize, float *fmin, float *fmax) __attribute__ ((noinline));
Uwe Schulzweida's avatar
Uwe Schulzweida committed
751
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
752

Uwe Schulzweida's avatar
Uwe Schulzweida committed
753
#if defined(GNUC_PUSH_POP)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
754
#pragma GCC push_options
Uwe Schulzweida's avatar
Uwe Schulzweida committed
755
#pragma GCC optimize ("O3", "fast-math")
Uwe Schulzweida's avatar
Uwe Schulzweida committed
756
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
757
static
Uwe Schulzweida's avatar
Uwe Schulzweida committed
758
void minmax_val_double_orig(const double *restrict data, size_t datasize, double *fmin, double *fmax)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
759
760
{
  double dmin = *fmin, dmax = *fmax;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
761

Uwe Schulzweida's avatar
Uwe Schulzweida committed
762
#if   defined(CRAY)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
763
#pragma _CRI ivdep
Uwe Schulzweida's avatar
Uwe Schulzweida committed
764
#elif defined(SX)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
765
#pragma vdir nodep
Uwe Schulzweida's avatar
Uwe Schulzweida committed
766
#elif defined(__uxp__)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
767
#pragma loop novrec
768
769
770
#elif defined (__ICC)
#pragma ivdep
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
771
  for ( size_t i = 0; i < datasize; ++i )
772
773
774
775
776
777
778
779
780
781
782
783
    {
      dmin = dmin < data[i] ? dmin : data[i];
      dmax = dmax > data[i] ? dmax : data[i];
    }

  *fmin = dmin;
  *fmax = dmax;
}

static
void minmax_val_float(const float *restrict data, long idatasize, float *fmin, float *fmax)
{
Uwe Schulzweida's avatar
Uwe Schulzweida committed
784
  size_t datasize = (size_t)idatasize;
785
786
787
788
789
790
791
792
793
794
  float dmin = *fmin, dmax = *fmax;

#if   defined(CRAY)
#pragma _CRI ivdep
#elif defined(SX)
#pragma vdir nodep
#elif defined(__uxp__)
#pragma loop novrec
#elif defined (__ICC)
#pragma ivdep
Uwe Schulzweida's avatar
Uwe Schulzweida committed
795
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
796
  for ( size_t i = 0; i < datasize; ++i )
Uwe Schulzweida's avatar
Uwe Schulzweida committed
797
798
799
800
801
802
803
804
    {
      dmin = dmin < data[i] ? dmin : data[i];
      dmax = dmax > data[i] ? dmax : data[i];
    }

  *fmin = dmin;
  *fmax = dmax;
}
Uwe Schulzweida's avatar
Uwe Schulzweida committed
805
#if defined(GNUC_PUSH_POP)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
806
807
808
#pragma GCC pop_options
#endif

Uwe Schulzweida's avatar
Uwe Schulzweida committed
809
810
811
812
813
814
815
816
// TEST
#if defined(OMP_SIMD)

#if defined(GNUC_PUSH_POP)
#pragma GCC push_options
#pragma GCC optimize ("O3", "fast-math")
#endif
static
Uwe Schulzweida's avatar
Uwe Schulzweida committed
817
void minmax_val_double_simd(const double *restrict data, size_t datasize, double *fmin, double *fmax)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
818
819
820
{
  double dmin = *fmin, dmax = *fmax;

Uwe Schulzweida's avatar
Uwe Schulzweida committed
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
#if defined(_OPENMP)
#pragma omp simd reduction(min:dmin) reduction(max:dmax)
#endif
  for ( size_t i = 0; i < datasize; ++i )
    {
      dmin = dmin < data[i] ? dmin : data[i];
      dmax = dmax > data[i] ? dmax : data[i];
    }

  *fmin = dmin;
  *fmax = dmax;
}
static
void minmax_val_float_simd(const float *restrict data, size_t datasize, float *fmin, float *fmax)
{
  float dmin = *fmin, dmax = *fmax;

Uwe Schulzweida's avatar
Uwe Schulzweida committed
838
#if defined(_OPENMP)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
839
#pragma omp simd reduction(min:dmin) reduction(max:dmax)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
840
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
841
  for ( size_t i = 0; i < datasize; ++i )
Uwe Schulzweida's avatar
Uwe Schulzweida committed
842
843
844
845
846
847
848
849
850
851
852
853
854
    {
      dmin = dmin < data[i] ? dmin : data[i];
      dmax = dmax > data[i] ? dmax : data[i];
    }

  *fmin = dmin;
  *fmax = dmax;
}
#if defined(GNUC_PUSH_POP)
#pragma GCC pop_options
#endif
#endif

Uwe Schulzweida's avatar
Uwe Schulzweida committed
855
static
856
void minmax_val_double(const double *restrict data, long idatasize, double *fmin, double *fmax)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
857
{
Uwe Schulzweida's avatar
Uwe Schulzweida committed
858
#if defined(_GET_X86_COUNTER) || defined(_GET_MACH_COUNTER) 
Uwe Schulzweida's avatar
Uwe Schulzweida committed
859
860
  uint64_t start_minmax, end_minmax;
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
861
  size_t datasize = (size_t)idatasize;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
862

Uwe Schulzweida's avatar
Uwe Schulzweida committed
863
  if ( idatasize >= 1 ) ; else return;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
864

Uwe Schulzweida's avatar
Uwe Schulzweida committed
865
#if defined(_GET_X86_COUNTER) 
Uwe Schulzweida's avatar
Uwe Schulzweida committed
866
867
  start_minmax = _rdtsc();
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
868
#if defined(_GET_MACH_COUNTER) 
Uwe Schulzweida's avatar
Uwe Schulzweida committed
869
870
871
  start_minmax = mach_absolute_time();
#endif

Uwe Schulzweida's avatar
Uwe Schulzweida committed
872
#if defined(_ENABLE_AVX)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
873

874
  avx_minmax_val_double(data, datasize, fmin, fmax);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
875

Uwe Schulzweida's avatar
Uwe Schulzweida committed
876
#elif defined(_ENABLE_SSE2)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
877

878
  sse2_minmax_val_double(data, datasize, fmin, fmax);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
879
880
881

#else

Uwe Schulzweida's avatar
Uwe Schulzweida committed
882
#if defined(_ARCH_PWR6)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
883
884
885
886
#define __UNROLL_DEPTH_1 6

  // to allow pipelining we have to unroll 

Uwe Schulzweida's avatar
Uwe Schulzweida committed
887
#if defined(_GET_IBM_COUNTER)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
888
889
890
  hpmStart(1, "minmax fsel");
#endif

891
  pwr6_minmax_val_double_unrolled6(data, datasize, fmin, fmax);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
892

Uwe Schulzweida's avatar
Uwe Schulzweida committed
893
#if defined(_GET_IBM_COUNTER) 
Uwe Schulzweida's avatar
Uwe Schulzweida committed
894
895
896
897
898
899
900
  hpmStop(1);
#endif

#undef __UNROLL_DEPTH_1

#else // original loop

Uwe Schulzweida's avatar
Uwe Schulzweida committed
901
#if defined(_GET_IBM_COUNTER) 
Uwe Schulzweida's avatar
Uwe Schulzweida committed
902
903
904
  hpmStart(1, "minmax base");
#endif

905
  minmax_val_double_orig(data, datasize, fmin, fmax);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
906

Uwe Schulzweida's avatar
Uwe Schulzweida committed
907
#if defined(_GET_IBM_COUNTER) 
Uwe Schulzweida's avatar
Uwe Schulzweida committed
908
909
  hpmStop(1);
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
910

Uwe Schulzweida's avatar
Uwe Schulzweida committed
911
#endif // _ARCH_PWR6 && original loop
Uwe Schulzweida's avatar
Uwe Schulzweida committed
912
#endif // SIMD
Uwe Schulzweida's avatar
Uwe Schulzweida committed
913

Uwe Schulzweida's avatar
Uwe Schulzweida committed
914
915
#if defined(_GET_X86_COUNTER) || defined(_GET_MACH_COUNTER)
#if defined(_GET_X86_COUNTER) 
Uwe Schulzweida's avatar
Uwe Schulzweida committed
916
  end_minmax = _rdtsc();
Uwe Schulzweida's avatar
Uwe Schulzweida committed
917
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
918
#if defined(_GET_MACH_COUNTER) 
Uwe Schulzweida's avatar
Uwe Schulzweida committed
919
920
  end_minmax = mach_absolute_time();
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
921
#if defined(_ENABLE_AVX)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
922
  printf("AVX minmax cycles:: %" PRIu64 "\n",  end_minmax-start_minmax);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
923
  fprintf (stderr, "AVX min: %lf max: %lf\n", *fmin, *fmax);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
924
#elif defined(_ENABLE_SSE2)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
925
  printf("SSE2 minmax cycles:: %" PRIu64 "\n", end_minmax-start_minmax);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
926
  fprintf (stderr, "SSE2 min: %lf max: %lf\n", *fmin, *fmax);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
927
#else
Uwe Schulzweida's avatar
Uwe Schulzweida committed
928
  printf("loop minmax cycles:: %" PRIu64 "\n", end_minmax-start_minmax);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
929
  fprintf (stderr, "loop min: %lf max: %lf\n", *fmin, *fmax);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
930
931
#endif
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
932

Uwe Schulzweida's avatar
Uwe Schulzweida committed
933
934
  return;
}
Uwe Schulzweida's avatar
Uwe Schulzweida committed
935

Uwe Schulzweida's avatar
Uwe Schulzweida committed
936
#if defined(TEST_MINMAXVAL)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957

#include <stdio.h>
#include <sys/time.h>

static
double dtime()
{
  double tseconds = 0.0;
  struct timeval mytime;
  gettimeofday(&mytime, NULL);
  tseconds = (double) (mytime.tv_sec + (double)mytime.tv_usec*1.0e-6);
  return (tseconds);
}

#define NRUN 10000

int main(void)
{
  long datasize = 1000000;
  double t_begin, t_end;

Uwe Schulzweida's avatar
Uwe Schulzweida committed
958
959
960
961
#if   defined(_OPENMP)
  printf("_OPENMP=%d\n", _OPENMP);
#endif

Uwe Schulzweida's avatar
Uwe Schulzweida committed
962
963
964
965
966
967
968
969
#if   defined(__ICC)
  printf("icc\n");
#elif defined(__clang__)
  printf("clang\n");
#elif defined(__GNUC__)
  printf("gcc\n");
#endif

970
971
  {
    float fmin, fmax;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
972
    float *data_sp = (float*) malloc(datasize*sizeof(float));
Uwe Schulzweida's avatar
Uwe Schulzweida committed
973

974
975
    for ( long i = 0; i < datasize/2; i++ )        data_sp[i] = (float) (i);
    for ( long i = datasize/2; i < datasize; i++ ) data_sp[i] = (float) (-datasize + i);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
976

977
    printf("float:\n");
Uwe Schulzweida's avatar
Uwe Schulzweida committed
978

979
980
981
982
983
984
985
986
    t_begin = dtime();
    for ( int i = 0; i < NRUN; ++i )
      {
	fmin = fmax = data_sp[0];
	minmax_val_float(data_sp, datasize, &fmin, &fmax);
      }
    t_end = dtime();
    printf("minmax_val: fmin: %ld  fmax: %ld  time: %6.2fs\n", (long)fmin, (long) fmax, t_end-t_begin);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
987
988
989
990
991
992
993
994
995
996
997
998

#if defined(OMP_SIMD)
    t_begin = dtime();
    for ( int i = 0; i < NRUN; ++i )
      {
	fmin = fmax = data_sp[0];
	minmax_val_float_simd(data_sp, datasize, &fmin, &fmax);
      }
    t_end = dtime();
    printf("simd      : fmin: %ld  fmax: %ld  time: %6.2fs\n", (long)fmin, (long) fmax, t_end-t_begin);
#endif

Uwe Schulzweida's avatar
Uwe Schulzweida committed
999
    free(data_sp);
1000
1001
1002
1003
  }

  {
    double fmin, fmax;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1004
    double *data_dp = (double*) malloc(datasize*sizeof(double));
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028

    // for ( long i = datasize-1; i >= 0; i-- ) data[i] = (double) (-datasize/2 + i);
    for ( long i = 0; i < datasize/2; i++ )        data_dp[i] = (double) (i);
    for ( long i = datasize/2; i < datasize; i++ ) data_dp[i] = (double) (-datasize + i);

    printf("double:\n");

    t_begin = dtime();
    for ( int i = 0; i < NRUN; ++i )
      {
	fmin = fmax = data_dp[0];
	minmax_val_double(data_dp, datasize, &fmin, &fmax);
      }
    t_end = dtime();
    printf("minmax_val: fmin: %ld  fmax: %ld  time: %6.2fs\n", (long)fmin, (long) fmax, t_end-t_begin);

    t_begin = dtime();
    for ( int i = 0; i < NRUN; ++i )
      {
	fmin = fmax = data_dp[0];
	minmax_val_double_orig(data_dp, datasize, &fmin, &fmax);
      }
    t_end = dtime();
    printf("orig      : fmin: %ld  fmax: %ld  time: %6.2fs\n", (long)fmin, (long) fmax, t_end-t_begin);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1029

Uwe Schulzweida's avatar
Uwe Schulzweida committed
1030
#if defined(OMP_SIMD)
1031
1032
1033
1034
1035
1036
1037
1038
    t_begin = dtime();
    for ( int i = 0; i < NRUN; ++i )
      {
	fmin = fmax = data_dp[0];
	minmax_val_double_simd(data_dp, datasize, &fmin, &fmax);
      }
    t_end = dtime();
    printf("simd      : fmin: %ld  fmax: %ld  time: %6.2fs\n", (long)fmin, (long) fmax, t_end-t_begin);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1039
1040
#endif

Uwe Schulzweida's avatar
Uwe Schulzweida committed
1041
#if defined(_ENABLE_AVX)
1042
1043
1044
1045
    t_begin = dtime();
    for ( int i = 0; i < NRUN; ++i )
      {
	fmin = fmax = data_dp[0];
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1046
	avx_minmax_val_double(data_dp, datasize, &fmin, &fmax);
1047
1048
1049
      }
    t_end = dtime();
    printf("avx       : fmin: %ld  fmax: %ld  time: %6.2fs\n", (long)fmin, (long) fmax, t_end-t_begin);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1050
#elif defined(_ENABLE_SSE2)
1051
1052
1053
1054
1055
1056
1057
1058
    t_begin = dtime();
    for ( int i = 0; i < NRUN; ++i )
      {
	fmin = fmax = data_dp[0];
	sse2_minmax_val_double(data_dp, datasize, &fmin, &fmax);
      }
    t_end = dtime();
    printf("sse2      : fmin: %ld  fmax: %ld  time: %6.2fs\n", (long)fmin, (long) fmax, t_end-t_begin);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1059
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1060
#if defined(_ARCH_PWR6)
1061
1062
1063
1064
1065
1066
1067
1068
    t_begin = dtime();
    for ( int i = 0; i < NRUN; ++i )
      {
	fmin = fmax = data_dp[0];
	pwr6_minmax_val_double_unrolled6(data_dp, datasize, &fmin, &fmax);
      }
    t_end = dtime();
    printf("pwr6u6  : fmin: %ld  fmax: %ld  time: %6.2fs\n", (long)fmin, (long) fmax, t_end-t_begin);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1069
#endif
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1070
    free(data_dp);
1071
  }
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1072
1073
1074
1075
1076

  return (0);
}
#endif // TEST_MINMAXVAL

Uwe Schulzweida's avatar
Uwe Schulzweida committed
1077
#undef DISABLE_SIMD_MINMAXVAL
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1078
1079
#undef _ENABLE_AVX
#undef _ENABLE_SSE2
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1080
#undef GNUC_PUSH_POP
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1081
1082
/*
### new version with gribSwapByteOrder_uint16()
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1083
icc -g -Wall -O3 -march=native -std=c99 -qopt-report=5 -DTEST_ENCODE encode_array.c
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1084
 result on hama2 (icc 16.0.2):
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1085