Commit afca0021 authored by Uwe Schulzweida's avatar Uwe Schulzweida
Browse files

merged trunk/cdi to branches/cdo-pio

parents 405cb015 7dca1a62
2013-06-18 Uwe Schulzweida <Uwe.Schulzweida@zmaw.de>
2013-09-26 Uwe Schulzweida <Uwe.Schulzweida@zmaw.de>
* Version 1.6.2 released
* using CGRIBEX library version 1.6.2
2013-07-08 Uwe Schulzweida <Uwe.Schulzweida@zmaw.de>
* netCDF: wrong result if type of data and type of attribute valid_range differ [Bug #3727]
2013-07-04 Uwe Schulzweida <Uwe.Schulzweida@zmaw.de>
* gribapiDefLevel::ZAXIS_DEPTH_BELOW_LAND: apply scalefactor to dlevel2 (bug fix) [report: Harald Anlauf]
* streamFilesuffix: changed default filename suffix for FILETYPE_NC2 to nc
2013-06-28 Uwe Schulzweida <Uwe.Schulzweida@zmaw.de>
* Version 1.6.1 released
* using CGRIBEX library version 1.6.1
2013-06-27 Uwe Schulzweida <Uwe.Schulzweida@zmaw.de>
* file.c: added env. var. FILE_TYPE_WRITE (1:open; 2:fopen)
2013-06-11 Uwe Schulzweida <Uwe.Schulzweida@zmaw.de>
......
CDI NEWS
--------
Version 1.6.2 (26 September 2013):
Fixed bugs:
* netCDF: wrong result if type of data and type of attribute valid_range differ [Bug #3727]
Version 1.6.1 (18 June 2013):
New features:
......
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.68 for cdi 1.6.1.
# Generated by GNU Autoconf 2.68 for cdi 1.6.2.
#
# Report bugs to <http://code.zmaw.de/projects/cdi>.
#
......@@ -570,8 +570,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cdi'
PACKAGE_TARNAME='cdi'
PACKAGE_VERSION='1.6.1'
PACKAGE_STRING='cdi 1.6.1'
PACKAGE_VERSION='1.6.2'
PACKAGE_STRING='cdi 1.6.2'
PACKAGE_BUGREPORT='http://code.zmaw.de/projects/cdi'
PACKAGE_URL=''
 
......@@ -1421,7 +1421,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cdi 1.6.1 to adapt to many kinds of systems.
\`configure' configures cdi 1.6.2 to adapt to many kinds of systems.
 
Usage: $0 [OPTION]... [VAR=VALUE]...
 
......@@ -1491,7 +1491,7 @@ fi
 
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cdi 1.6.1:";;
short | recursive ) echo "Configuration of cdi 1.6.2:";;
esac
cat <<\_ACEOF
 
......@@ -1659,7 +1659,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cdi configure 1.6.1
cdi configure 1.6.2
generated by GNU Autoconf 2.68
 
Copyright (C) 2010 Free Software Foundation, Inc.
......@@ -2420,7 +2420,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
 
It was created by cdi $as_me 1.6.1, which was
It was created by cdi $as_me 1.6.2, which was
generated by GNU Autoconf 2.68. Invocation command line was
 
$ $0 $@
......@@ -3317,7 +3317,7 @@ fi
 
# Define the identity of the package.
PACKAGE='cdi'
VERSION='1.6.1'
VERSION='1.6.2'
 
 
cat >>confdefs.h <<_ACEOF
......@@ -27882,7 +27882,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cdi $as_me 1.6.1, which was
This file was extended by cdi $as_me 1.6.2, which was
generated by GNU Autoconf 2.68. Invocation command line was
 
CONFIG_FILES = $CONFIG_FILES
......@@ -27948,7 +27948,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cdi config.status 1.6.1
cdi config.status 1.6.2
configured by $0, generated by GNU Autoconf 2.68,
with options \\"\$ac_cs_config\\"
 
......
# Process this file with autoconf to produce a configure script.
AC_INIT([cdi], [1.6.1], [http://code.zmaw.de/projects/cdi])
AC_INIT([cdi], [1.6.2], [http://code.zmaw.de/projects/cdi])
echo "configuring ${PACKAGE_NAME} ${PACKAGE_VERSION}"
......
......@@ -856,6 +856,15 @@ Destroy a variable list.
Duplicate a variable list.
\section*{\tt \htmlref{vlistHasVarKey}{vlistHasVarKey}}
\begin{verbatim}
int vlistHasVarKey (int vlistID, int varID, const char *name);
\end{verbatim}
returns 1 if meta-data key was read, 0 otherwise..
\section*{\tt \htmlref{vlistInqAtt}{vlistInqAtt}}
\begin{verbatim}
......
......@@ -867,6 +867,16 @@ Destroy a variable list.
Duplicate a variable list.
\section*{\tt \htmlref{vlistHasVarKey}{vlistHasVarKey}}
\begin{verbatim}
INTEGER FUNCTION vlistHasVarKey (INTEGER vlistID, INTEGER varID,
CHARACTER*(*) name)
\end{verbatim}
returns 1 if meta-data key was read, 0 otherwise..
\section*{\tt \htmlref{vlistInqAtt}{vlistInqAtt}}
\begin{verbatim}
......
......@@ -561,7 +561,7 @@ void vlistDefVarIntKey(int vlistID, int varID, const char *name, int value);
void vlistDefVarDblKey(int vlistID, int varID, const char *name, double value);
/* vlistHasVarKey: returns 1 if meta-data key was read, 0 otherwise. */
int vlistHasVarKey(int vlistID, int varID, const char* name);
int vlistHasVarKey(int vlistID, int varID, const char *name);
/* vlistInqVarDblKey: raw access to GRIB meta-data */
double vlistInqVarDblKey(int vlistID, int varID, const char *name);
/* vlistInqVarIntKey: raw access to GRIB meta-data */
......
......@@ -1227,6 +1227,12 @@
! DOUBLEPRECISION value)
EXTERNAL vlistDefVarDblKey
INTEGER vlistHasVarKey
! (INTEGER vlistID,
! INTEGER varID,
! CHARACTER*(*) name)
EXTERNAL vlistHasVarKey
DOUBLEPRECISION vlistInqVarDblKey
! (INTEGER vlistID,
! INTEGER varID,
......
......@@ -278,6 +278,7 @@ FCALLSCFUN5 (INT, vlistInqVarEnsemble, VLISTINQVARENSEMBLE, vlistinqvarensemble,
FCALLSCSUB1 (cdiDefAdditionalKey, CDIDEFADDITIONALKEY, cdidefadditionalkey, STRING)
FCALLSCSUB4 (vlistDefVarIntKey, VLISTDEFVARINTKEY, vlistdefvarintkey, INT, INT, STRING, INT)
FCALLSCSUB4 (vlistDefVarDblKey, VLISTDEFVARDBLKEY, vlistdefvardblkey, INT, INT, STRING, DOUBLE)
FCALLSCFUN3 (INT, vlistHasVarKey, VLISTHASVARKEY, vlisthasvarkey, INT, INT, STRING)
FCALLSCFUN3 (DOUBLE, vlistInqVarDblKey, VLISTINQVARDBLKEY, vlistinqvardblkey, INT, INT, STRING)
FCALLSCFUN3 (INT, vlistInqVarIntKey, VLISTINQVARINTKEY, vlistinqvarintkey, INT, INT, STRING)
......
......@@ -64,6 +64,7 @@
#define __CF__APOLLO67 /* __STDCPP__ is in Apollo 6.8 (i.e. ANSI) and onwards */
#endif
#endif
#include <limits.h> /* LONG_MAX */
#if !defined(__GNUC__) && !defined(__sun) && (defined(sun)||defined(VAXUltrix)||defined(lynx))
#define __CF__KnR /* Sun, LynxOS and VAX Ultrix cc only supports K&R. */
......@@ -244,6 +245,18 @@ only C calling FORTRAN subroutines will work using K&R style.*/
#endif
#endif
/* INT64_T for 8 byte integers */
#if ! defined (LONG_MAX)
# error LONG_MAX undefined
#endif
#undef INT64_T
#if LONG_MAX > 2147483647L
# define INT64_T long int
#else
# define INT64_T long long int /* c.f. typedef of int64_t in <stdint.h> */
#endif
#ifdef CRAYFortran
#ifdef _CRAY
#include <fortran.h>
......@@ -461,8 +474,10 @@ for (i=0; i<sizeofcstr/elem_len; i++) {
} return cstr; }
/* kill the trailing char t's in string s. */
#if defined (__GNUC__)
#pragma GCC push_options
#pragma GCC optimize ("O2")
#endif
#ifndef __CF__KnR
static char *kill_trailing(char *s, char t)
#else
......@@ -474,7 +489,9 @@ if (e>s) { /* Need this to handle NULL string.*/
while (e>s && *--e==t); /* Don't follow t's past beginning. */
e[*e==t?0:1] = '\0'; /* Handle s[0]=t correctly. */
} return s; }
#if defined (__GNUC__)
#pragma GCC pop_options
#endif
/* kill_trailingn(s,t,e) will kill the trailing t's in string s. e normally
points to the terminating '\0' of s, but may actually point to anywhere in s.
......@@ -1331,20 +1348,24 @@ do{VVCF(T1,A1,B1) VVCF(T2,A2,B2) VVCF(T3,A3,B3) VVCF(T4,A4,B4) VVCF(T5,A5,B5
#else
#define INTEGER_BYTE unsigned char
#endif
#define BYTEVVVVVVV_cfTYPE INTEGER_BYTE
#define DOUBLEVVVVVVV_cfTYPE DOUBLE_PRECISION
#define FLOATVVVVVVV_cfTYPE FORTRAN_REAL
#define INTVVVVVVV_cfTYPE int
#define LOGICALVVVVVVV_cfTYPE int
#define LONGVVVVVVV_cfTYPE long
#define SHORTVVVVVVV_cfTYPE short
#define PBYTE_cfTYPE INTEGER_BYTE
#define PDOUBLE_cfTYPE DOUBLE_PRECISION
#define PFLOAT_cfTYPE FORTRAN_REAL
#define PINT_cfTYPE int
#define PLOGICAL_cfTYPE int
#define PLONG_cfTYPE long
#define PSHORT_cfTYPE short
#define BYTEVVVVVVV_cfTYPE INTEGER_BYTE
#define DOUBLEVVVVVVV_cfTYPE DOUBLE_PRECISION
#define FLOATVVVVVVV_cfTYPE FORTRAN_REAL
#define INTVVVVVVV_cfTYPE int
#define LOGICALVVVVVVV_cfTYPE int
#define LONGVVVVVVV_cfTYPE long
#define LONGLONGVVVVVVV_cfTYPE long long
#define INT64VVVVVVV_cfTYPE INT64_T
#define SHORTVVVVVVV_cfTYPE short
#define PBYTE_cfTYPE INTEGER_BYTE
#define PDOUBLE_cfTYPE DOUBLE_PRECISION
#define PFLOAT_cfTYPE FORTRAN_REAL
#define PINT_cfTYPE int
#define PLOGICAL_cfTYPE int
#define PLONG_cfTYPE long
#define PLONGLONG_cfTYPE long long
#define PINT64_cfTYPE INT64_T
#define PSHORT_cfTYPE short
#define CFARGS0(A,T,V,W,X,Y,Z) _3(T,_cf,A)
#define CFARGS1(A,T,V,W,X,Y,Z) _3(T,_cf,A)(V)
......@@ -1361,6 +1382,8 @@ do{VVCF(T1,A1,B1) VVCF(T2,A2,B2) VVCF(T3,A3,B3) VVCF(T4,A4,B4) VVCF(T5,A5,B5
#define INT_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z)
#define LOGICAL_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z)
#define LONG_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z)
#define LONGLONG_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z)
#define INT64_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z)
#define SHORT_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z)
#define PBYTE_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z)
#define PDOUBLE_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,PINT,B,X,Y,Z,0)
......@@ -1368,6 +1391,8 @@ do{VVCF(T1,A1,B1) VVCF(T2,A2,B2) VVCF(T3,A3,B3) VVCF(T4,A4,B4) VVCF(T5,A5,B5
#define PINT_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z)
#define PLOGICAL_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z)
#define PLONG_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z)
#define PLONGLONG_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z)
#define PINT64_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z)
#define PSHORT_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z)
#define BYTEV_cfINT(N,A,B,X,Y,Z) DOUBLEV_cfINT(N,A,B,X,Y,Z)
#define BYTEVV_cfINT(N,A,B,X,Y,Z) DOUBLEVV_cfINT(N,A,B,X,Y,Z)
......@@ -1608,6 +1633,8 @@ do{VVCF(T1,A1,B1) VVCF(T2,A2,B2) VVCF(T3,A3,B3) VVCF(T4,A4,B4) VVCF(T5,A5,B5
#define INT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define LOGICAL_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,LOGICAL,A,B,C,D,E)
#define LONG_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define LONGLONG_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define INT64_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define SHORT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define BYTEV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define BYTEVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
......@@ -1664,6 +1691,8 @@ do{VVCF(T1,A1,B1) VVCF(T2,A2,B2) VVCF(T3,A3,B3) VVCF(T4,A4,B4) VVCF(T5,A5,B5
#define PINT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define PLOGICAL_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,PLOGICAL,A,B,C,D,E)
#define PLONG_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define PLONGLONG_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define PINT64_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define PSHORT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define STRING_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,STRING,A,B,C,D,E)
#define PSTRING_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,PSTRING,A,B,C,D,E)
......@@ -1721,6 +1750,8 @@ do{VVCF(T1,A1,B1) VVCF(T2,A2,B2) VVCF(T3,A3,B3) VVCF(T4,A4,B4) VVCF(T5,A5,B5
#define INT_cfCCC(A,B) &A
#define LOGICAL_cfCCC(A,B) &A
#define LONG_cfCCC(A,B) &A
#define LONGLONG_cfCCC(A,B) &A
#define INT64_cfCCC(A,B) &A
#define SHORT_cfCCC(A,B) &A
#define PBYTE_cfCCC(A,B) A
#define PDOUBLE_cfCCC(A,B) A
......@@ -1976,6 +2007,8 @@ static _Icf(2,U,F,CFFUN(UN),0)() {_(F,_cfE) _Icf(3,GZ,F,UN,LN) ABSOFT_cf1(F));_(
#define INT_cfT(M,I,A,B,D) *A
#define LOGICAL_cfT(M,I,A,B,D) F2CLOGICAL(*A)
#define LONG_cfT(M,I,A,B,D) *A
#define LONGLONG_cfT(M,I,A,B,D) *A
#define INT64_cfT(M,I,A,B,D) *A
#define SHORT_cfT(M,I,A,B,D) *A
#define BYTEV_cfT(M,I,A,B,D) A
#define DOUBLEV_cfT(M,I,A,B,D) A
......@@ -2032,6 +2065,8 @@ static _Icf(2,U,F,CFFUN(UN),0)() {_(F,_cfE) _Icf(3,GZ,F,UN,LN) ABSOFT_cf1(F));_(
#define PINT_cfT(M,I,A,B,D) A
#define PLOGICAL_cfT(M,I,A,B,D) ((*A=F2CLOGICAL(*A)),A)
#define PLONG_cfT(M,I,A,B,D) A
#define PLONGLONG_cfT(M,I,A,B,D) A
#define PINT64_cfT(M,I,A,B,D) A
#define PSHORT_cfT(M,I,A,B,D) A
#define PVOID_cfT(M,I,A,B,D) A
#if defined(apolloFortran) || defined(hpuxFortran800) || defined(AbsoftUNIXFortran)
......@@ -2380,12 +2415,3 @@ string. */
#endif /* __CFORTRAN_LOADED */
/*
* Local Variables:
* c-file-style: "Java"
* c-basic-offset: 2
* indent-tabs-mode: nil
* show-trailing-whitespace: t
* require-trailing-newline: t
* End:
*/
/* Automatically generated by m214003 at 2013-03-05, do not edit */
/* Automatically generated by m214003 at 2013-07-11, do not edit */
/* CGRIBEXLIB_VERSION="1.6.0" */
/* CGRIBEXLIB_VERSION="1.6.2" */
#ifdef _ARCH_PWR6
#pragma options nostrict
......@@ -15,6 +15,7 @@
#include <ctype.h>
#include <stdarg.h>
#include <sys/types.h>
#include <inttypes.h>
#include "file.h"
......@@ -368,7 +369,7 @@ void gribDecode(int *isec0, int *isec1, int *isec2, double *fsec2, int *isec3,
#include <math.h>
const double const _pow2tab[158] = {
const double _pow2tab[158] = {
/* pow(2.0, 0.0) */ 1.0,
/* pow(2.0, 1.0) */ 2.0,
/* pow(2.0, 2.0) */ 4.0,
......@@ -530,7 +531,7 @@ const double const _pow2tab[158] = {
};
const double const _pow16tab[71] = {
const double _pow16tab[71] = {
/* pow(16.0, 0.0) */ 1.0,
/* pow(16.0, 1.0) */ 16.0,
/* pow(16.0, 2.0) */ 256.0,
......@@ -640,13 +641,6 @@ double intpow2(int x)
//#undef _GET_MACH_COUNTER
//#undef _ARCH_PWR6
#if defined(__GNUC__) && (__GNUC__ >= 4)
#elif defined(__ICC) && (__ICC >= 1100)
#elif defined(__clang__)
#else
#define DISABLE_SIMD
#endif
#if defined _GET_IBM_COUNTER
#include <libhpc.h>
#elif defined _GET_X86_COUNTER
......@@ -655,30 +649,45 @@ double intpow2(int x)
#include <mach/mach_time.h>
#endif
//#define DISABLE_SIMD
#if defined(__GNUC__) && (__GNUC__ >= 4)
#elif defined(__ICC) && (__ICC >= 1100)
#elif defined(__clang__)
#else
#define DISABLE_SIMD
#endif
#define DISABLE_SIMD
#ifdef DISABLE_SIMD
#ifndef ENABLE_AVX
#undef __AVX__
#endif
#ifndef ENABLE_SSE2
#undef __SSE2__
# ifdef ENABLE_AVX
# define _ENABLE_AVX
# endif
# ifdef ENABLE_SSE2
# define _ENABLE_SSE2
# endif
#endif
#ifndef DISABLE_SIMD
# ifdef __AVX__
# define _ENABLE_AVX
# endif
# ifdef __SSE2__
# define _ENABLE_SSE2
# endif
#endif
#if defined __AVX__
#include <float.h>
#include <stdint.h>
#include <inttypes.h>
#if defined _ENABLE_AVX
#include <immintrin.h>
#elif defined __SSE2__
#include <float.h>
#include <stdint.h>
#include <inttypes.h>
#elif defined _ENABLE_SSE2
#include <emmintrin.h>
#endif
#if defined __AVX__
#if defined _ENABLE_AVX
static
void avx_minmax_val(const double *restrict buf, size_t nframes, double *min, double *max)
......@@ -772,7 +781,7 @@ void avx_minmax_val(const double *restrict buf, size_t nframes, double *min, dou
return;
}
#elif defined __SSE2__
#elif defined _ENABLE_SSE2
static
void sse2_minmax_val(const double *restrict buf, size_t nframes, double *min, double *max)
......@@ -849,43 +858,17 @@ void sse2_minmax_val(const double *restrict buf, size_t nframes, double *min, do
return;
}
#endif
#endif // SIMD
#ifdef _ARCH_PWR6
static
void minmax_val(const double *restrict data, long idatasize, double *fmin, double *fmax)
void pwr6_minmax_val_unrolled6(const double *restrict data, long idatasize, double *fmin, double *fmax)
{
#if defined _GET_X86_COUNTER || defined _GET_MACH_COUNTER
uint64_t start_minmax, end_minmax;
#endif
size_t datasize = idatasize;
if ( idatasize < 1 ) return;
#ifdef _GET_X86_COUNTER
start_minmax = _rdtsc();
#endif
#ifdef _GET_MACH_COUNTER
start_minmax = mach_absolute_time();
#endif
#if defined __AVX__
avx_minmax_val(data, datasize, fmin, fmax);
#elif defined __SSE2__
sse2_minmax_val(data, datasize, fmin, fmax);
#else
#ifdef _ARCH_PWR6
#define __UNROLL_DEPTH_1 6
size_t datasize = idatasize;
// to allow pipelining we have to unroll
#ifdef _GET_IBM_COUNTER
hpmStart(1, "minmax fsel");
#endif
{
size_t i, j;
size_t residual = datasize % __UNROLL_DEPTH_1;
......@@ -920,19 +903,21 @@ void minmax_val(const double *restrict data, long idatasize, double *fmin, doubl
*fmax = __fsel(dmax[j] - *fmax, dmax[j], *fmax);
}
}
#ifdef _GET_IBM_COUNTER
hpmStop(1);
#endif
#undef __UNROLL_DEPTH_1
}
#endif
#else // original loop
#ifdef _GET_IBM_COUNTER
hpmStart(1, "minmax base");
#if defined (__GNUC__) && defined (__GNUC_MINOR__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 4)
#pragma GCC push_options
#pragma GCC optimize ("O3", "fast-math")
#endif
{
size_t i;
static
void minmax_val_orig(const double *restrict data, long idatasize, double *fmin, double *fmax)
{
size_t i;
size_t datasize = idatasize;
double dmin = *fmin, dmax = *fmax;
#if defined (CRAY)
#pragma _CRI ivdep
......@@ -941,16 +926,73 @@ void minmax_val(const double *restrict data, long idatasize, double *fmin, doubl
#elif defined (__uxp__)
#pragma loop novrec
#endif
for ( i = 0; i < datasize; ++i )
{
if ( *fmin > data[i] ) *fmin = data[i];
if ( *fmax < data[i] ) *fmax = data[i];
/*
*fmin = *fmin < data[i] ? *fmin : data[i];
*fmax = *fmax > data[i] ? *fmax : data[i];
*/
}
}
for ( i = 0; i < datasize; ++i )
{
dmin = dmin < data[i] ? dmin : data[i];
dmax = dmax > data[i] ? dmax : data[i];
// if ( dmin > data[i] ) dmin = data[i];
// if ( dmax < data[i] ) dmax = data[i];
}
*fmin = dmin;
*fmax = dmax;
}
#if defined (__GNUC__) && defined (__GNUC_MINOR__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 4)
#pragma GCC pop_options
#endif
static
void minmax_val(const double *restrict data, long idatasize, double *fmin, double *fmax)
{
#if defined _GET_X86_COUNTER || defined _GET_MACH_COUNTER
uint64_t start_minmax, end_minmax;
#endif
size_t datasize = idatasize;
if ( idatasize < 1 ) return;
#ifdef _GET_X86_COUNTER
start_minmax = _rdtsc();
#endif
#ifdef _GET_MACH_COUNTER
start_minmax = mach_absolute_time();
#endif
#if defined _ENABLE_AVX
avx_minmax_val(data, datasize, fmin, fmax);
#elif defined _ENABLE_SSE2
sse2_minmax_val(data, datasize, fmin, fmax);
#else
#ifdef _ARCH_PWR6
#define __UNROLL_DEPTH_1 6
// to allow pipelining we have to unroll
#ifdef _GET_IBM_COUNTER
hpmStart(1, "minmax fsel");
#endif
pwr6_minmax_val_unrolled6(data, datasize, fmin, fmax);
#ifdef _GET_IBM_COUNTER
hpmStop(1);
#endif
#undef __UNROLL_DEPTH_1
#else // original loop
#ifdef _GET_IBM_COUNTER
hpmStart(1, "minmax base");
#endif
minmax_val_orig(data, datasize, fmin, fmax);
#ifdef _GET_IBM_COUNTER
hpmStop(1);
#endif
......@@ -965,28 +1007,37 @@ void minmax_val(const double *restrict data, long idatasize, double *fmin, doubl
#ifdef _GET_MACH_COUNTER
end_minmax = mach_absolute_time();
#endif
#if defined __AVX__
printf("AVX minmax cycles:: %" PRIu64 "\n",
end_minmax-start_minmax);
#if defined _ENABLE_AVX
printf("AVX minmax cycles:: %" PRIu64 "\n", end_minmax-start_minmax);
fprintf (stderr, "AVX min: %lf max: %lf\n", *fmin, *fmax);
#elif defined __SSE2__
printf("SSE2 minmax cycles:: %" PRIu64 "\n",
end_minmax-start_minmax);
#elif defined _ENABLE_SSE2
printf("SSE2 minmax cycles:: %" PRIu64 "\n", end_minmax-start_minmax);
fprintf (stderr, "SSE2 min: %lf max: %lf\n", *fmin, *fmax);
#else
printf("loop minmax cycles:: %" PRIu64 "\n",
end_minmax-start_minmax);
printf("loop minmax cycles:: %" PRIu64 "\n", end_minmax-start_minmax);
fprintf (stderr, "loop min: %lf max: %lf\n", *fmin, *fmax);
#endif
#endif
return;
}