Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
mpim-sw
libcdi
Commits
afca0021
Commit
afca0021
authored
Jul 12, 2013
by
Uwe Schulzweida
Browse files
merged trunk/cdi to branches/cdo-pio
parents
405cb015
7dca1a62
Changes
18
Hide whitespace changes
Inline
Side-by-side
ChangeLog
View file @
afca0021
2013-06-18 Uwe Schulzweida <Uwe.Schulzweida@zmaw.de>
2013-09-26 Uwe Schulzweida <Uwe.Schulzweida@zmaw.de>
* Version 1.6.2 released
* using CGRIBEX library version 1.6.2
2013-07-08 Uwe Schulzweida <Uwe.Schulzweida@zmaw.de>
* netCDF: wrong result if type of data and type of attribute valid_range differ [Bug #3727]
2013-07-04 Uwe Schulzweida <Uwe.Schulzweida@zmaw.de>
* gribapiDefLevel::ZAXIS_DEPTH_BELOW_LAND: apply scalefactor to dlevel2 (bug fix) [report: Harald Anlauf]
* streamFilesuffix: changed default filename suffix for FILETYPE_NC2 to nc
2013-06-28 Uwe Schulzweida <Uwe.Schulzweida@zmaw.de>
* Version 1.6.1 released
* using CGRIBEX library version 1.6.1
2013-06-27 Uwe Schulzweida <Uwe.Schulzweida@zmaw.de>
* file.c: added env. var. FILE_TYPE_WRITE (1:open; 2:fopen)
2013-06-11 Uwe Schulzweida <Uwe.Schulzweida@zmaw.de>
...
...
NEWS
View file @
afca0021
CDI NEWS
--------
Version 1.6.2 (26 September 2013):
Fixed bugs:
* netCDF: wrong result if type of data and type of attribute valid_range differ [Bug #3727]
Version 1.6.1 (18 June 2013):
New features:
...
...
configure
View file @
afca0021
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.68 for cdi 1.6.
1
.
# Generated by GNU Autoconf 2.68 for cdi 1.6.
2
.
#
# Report bugs to <http://code.zmaw.de/projects/cdi>.
#
...
...
@@ -570,8 +570,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='cdi'
PACKAGE_TARNAME='cdi'
PACKAGE_VERSION='1.6.
1
'
PACKAGE_STRING='cdi 1.6.
1
'
PACKAGE_VERSION='1.6.
2
'
PACKAGE_STRING='cdi 1.6.
2
'
PACKAGE_BUGREPORT='http://code.zmaw.de/projects/cdi'
PACKAGE_URL=''
...
...
@@ -1421,7 +1421,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures cdi 1.6.
1
to adapt to many kinds of systems.
\`configure' configures cdi 1.6.
2
to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
...
...
@@ -1491,7 +1491,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of cdi 1.6.
1
:";;
short | recursive ) echo "Configuration of cdi 1.6.
2
:";;
esac
cat <<\_ACEOF
...
...
@@ -1659,7 +1659,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
cdi configure 1.6.
1
cdi configure 1.6.
2
generated by GNU Autoconf 2.68
Copyright (C) 2010 Free Software Foundation, Inc.
...
...
@@ -2420,7 +2420,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by cdi $as_me 1.6.
1
, which was
It was created by cdi $as_me 1.6.
2
, which was
generated by GNU Autoconf 2.68. Invocation command line was
$ $0 $@
...
...
@@ -3317,7 +3317,7 @@ fi
# Define the identity of the package.
PACKAGE='cdi'
VERSION='1.6.
1
'
VERSION='1.6.
2
'
cat >>confdefs.h <<_ACEOF
...
...
@@ -27882,7 +27882,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by cdi $as_me 1.6.
1
, which was
This file was extended by cdi $as_me 1.6.
2
, which was
generated by GNU Autoconf 2.68. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
...
...
@@ -27948,7 +27948,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
cdi config.status 1.6.
1
cdi config.status 1.6.
2
configured by $0, generated by GNU Autoconf 2.68,
with options \\"\$ac_cs_config\\"
...
...
configure.ac
View file @
afca0021
# Process this file with autoconf to produce a configure script.
AC_INIT([cdi], [1.6.
1
], [http://code.zmaw.de/projects/cdi])
AC_INIT([cdi], [1.6.
2
], [http://code.zmaw.de/projects/cdi])
echo "configuring ${PACKAGE_NAME} ${PACKAGE_VERSION}"
...
...
doc/tex/c_quick_ref.tex
View file @
afca0021
...
...
@@ -856,6 +856,15 @@ Destroy a variable list.
Duplicate a variable list.
\section*
{
\tt
\htmlref
{
vlistHasVarKey
}{
vlistHasVarKey
}}
\begin{verbatim}
int vlistHasVarKey (int vlistID, int varID, const char *name);
\end{verbatim}
returns 1 if meta-data key was read, 0 otherwise..
\section*
{
\tt
\htmlref
{
vlistInqAtt
}{
vlistInqAtt
}}
\begin{verbatim}
...
...
doc/tex/f_quick_ref.tex
View file @
afca0021
...
...
@@ -867,6 +867,16 @@ Destroy a variable list.
Duplicate a variable list.
\section*
{
\tt
\htmlref
{
vlistHasVarKey
}{
vlistHasVarKey
}}
\begin{verbatim}
INTEGER FUNCTION vlistHasVarKey (INTEGER vlistID, INTEGER varID,
CHARACTER*(*) name)
\end{verbatim}
returns 1 if meta-data key was read, 0 otherwise..
\section*
{
\tt
\htmlref
{
vlistInqAtt
}{
vlistInqAtt
}}
\begin{verbatim}
...
...
src/cdi.h
View file @
afca0021
...
...
@@ -561,7 +561,7 @@ void vlistDefVarIntKey(int vlistID, int varID, const char *name, int value);
void
vlistDefVarDblKey
(
int
vlistID
,
int
varID
,
const
char
*
name
,
double
value
);
/* vlistHasVarKey: returns 1 if meta-data key was read, 0 otherwise. */
int
vlistHasVarKey
(
int
vlistID
,
int
varID
,
const
char
*
name
);
int
vlistHasVarKey
(
int
vlistID
,
int
varID
,
const
char
*
name
);
/* vlistInqVarDblKey: raw access to GRIB meta-data */
double
vlistInqVarDblKey
(
int
vlistID
,
int
varID
,
const
char
*
name
);
/* vlistInqVarIntKey: raw access to GRIB meta-data */
...
...
src/cdi.inc
View file @
afca0021
...
...
@@ -1227,6 +1227,12 @@
!
DOUBLEPRECISION
value
)
EXTERNAL
vlistDefVarDblKey
INTEGER
vlistHasVarKey
!
(
INTEGER
vlistID
,
!
INTEGER
varID
,
!
CHARACTER
*
(
*
)
name
)
EXTERNAL
vlistHasVarKey
DOUBLEPRECISION
vlistInqVarDblKey
!
(
INTEGER
vlistID
,
!
INTEGER
varID
,
...
...
src/cdiFortran.c
View file @
afca0021
...
...
@@ -278,6 +278,7 @@ FCALLSCFUN5 (INT, vlistInqVarEnsemble, VLISTINQVARENSEMBLE, vlistinqvarensemble,
FCALLSCSUB1
(
cdiDefAdditionalKey
,
CDIDEFADDITIONALKEY
,
cdidefadditionalkey
,
STRING
)
FCALLSCSUB4
(
vlistDefVarIntKey
,
VLISTDEFVARINTKEY
,
vlistdefvarintkey
,
INT
,
INT
,
STRING
,
INT
)
FCALLSCSUB4
(
vlistDefVarDblKey
,
VLISTDEFVARDBLKEY
,
vlistdefvardblkey
,
INT
,
INT
,
STRING
,
DOUBLE
)
FCALLSCFUN3
(
INT
,
vlistHasVarKey
,
VLISTHASVARKEY
,
vlisthasvarkey
,
INT
,
INT
,
STRING
)
FCALLSCFUN3
(
DOUBLE
,
vlistInqVarDblKey
,
VLISTINQVARDBLKEY
,
vlistinqvardblkey
,
INT
,
INT
,
STRING
)
FCALLSCFUN3
(
INT
,
vlistInqVarIntKey
,
VLISTINQVARINTKEY
,
vlistinqvarintkey
,
INT
,
INT
,
STRING
)
...
...
src/cfortran.h
View file @
afca0021
...
...
@@ -64,6 +64,7 @@
#define __CF__APOLLO67
/* __STDCPP__ is in Apollo 6.8 (i.e. ANSI) and onwards */
#endif
#endif
#include
<limits.h>
/* LONG_MAX */
#if !defined(__GNUC__) && !defined(__sun) && (defined(sun)||defined(VAXUltrix)||defined(lynx))
#define __CF__KnR
/* Sun, LynxOS and VAX Ultrix cc only supports K&R. */
...
...
@@ -244,6 +245,18 @@ only C calling FORTRAN subroutines will work using K&R style.*/
#endif
#endif
/* INT64_T for 8 byte integers */
#if ! defined (LONG_MAX)
# error LONG_MAX undefined
#endif
#undef INT64_T
#if LONG_MAX > 2147483647L
# define INT64_T long int
#else
# define INT64_T long long int
/* c.f. typedef of int64_t in <stdint.h> */
#endif
#ifdef CRAYFortran
#ifdef _CRAY
#include
<fortran.h>
...
...
@@ -461,8 +474,10 @@ for (i=0; i<sizeofcstr/elem_len; i++) {
}
return
cstr
;
}
/* kill the trailing char t's in string s. */
#if defined (__GNUC__)
#pragma GCC push_options
#pragma GCC optimize ("O2")
#endif
#ifndef __CF__KnR
static
char
*
kill_trailing
(
char
*
s
,
char
t
)
#else
...
...
@@ -474,7 +489,9 @@ if (e>s) { /* Need this to handle NULL string.*/
while
(
e
>
s
&&
*--
e
==
t
);
/* Don't follow t's past beginning. */
e
[
*
e
==
t
?
0
:
1
]
=
'\0'
;
/* Handle s[0]=t correctly. */
}
return
s
;
}
#if defined (__GNUC__)
#pragma GCC pop_options
#endif
/* kill_trailingn(s,t,e) will kill the trailing t's in string s. e normally
points to the terminating '\0' of s, but may actually point to anywhere in s.
...
...
@@ -1331,20 +1348,24 @@ do{VVCF(T1,A1,B1) VVCF(T2,A2,B2) VVCF(T3,A3,B3) VVCF(T4,A4,B4) VVCF(T5,A5,B5
#else
#define INTEGER_BYTE unsigned char
#endif
#define BYTEVVVVVVV_cfTYPE INTEGER_BYTE
#define DOUBLEVVVVVVV_cfTYPE DOUBLE_PRECISION
#define FLOATVVVVVVV_cfTYPE FORTRAN_REAL
#define INTVVVVVVV_cfTYPE int
#define LOGICALVVVVVVV_cfTYPE int
#define LONGVVVVVVV_cfTYPE long
#define SHORTVVVVVVV_cfTYPE short
#define PBYTE_cfTYPE INTEGER_BYTE
#define PDOUBLE_cfTYPE DOUBLE_PRECISION
#define PFLOAT_cfTYPE FORTRAN_REAL
#define PINT_cfTYPE int
#define PLOGICAL_cfTYPE int
#define PLONG_cfTYPE long
#define PSHORT_cfTYPE short
#define BYTEVVVVVVV_cfTYPE INTEGER_BYTE
#define DOUBLEVVVVVVV_cfTYPE DOUBLE_PRECISION
#define FLOATVVVVVVV_cfTYPE FORTRAN_REAL
#define INTVVVVVVV_cfTYPE int
#define LOGICALVVVVVVV_cfTYPE int
#define LONGVVVVVVV_cfTYPE long
#define LONGLONGVVVVVVV_cfTYPE long long
#define INT64VVVVVVV_cfTYPE INT64_T
#define SHORTVVVVVVV_cfTYPE short
#define PBYTE_cfTYPE INTEGER_BYTE
#define PDOUBLE_cfTYPE DOUBLE_PRECISION
#define PFLOAT_cfTYPE FORTRAN_REAL
#define PINT_cfTYPE int
#define PLOGICAL_cfTYPE int
#define PLONG_cfTYPE long
#define PLONGLONG_cfTYPE long long
#define PINT64_cfTYPE INT64_T
#define PSHORT_cfTYPE short
#define CFARGS0(A,T,V,W,X,Y,Z) _3(T,_cf,A)
#define CFARGS1(A,T,V,W,X,Y,Z) _3(T,_cf,A)(V)
...
...
@@ -1361,6 +1382,8 @@ do{VVCF(T1,A1,B1) VVCF(T2,A2,B2) VVCF(T3,A3,B3) VVCF(T4,A4,B4) VVCF(T5,A5,B5
#define INT_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z)
#define LOGICAL_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z)
#define LONG_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z)
#define LONGLONG_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z)
#define INT64_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z)
#define SHORT_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z)
#define PBYTE_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z)
#define PDOUBLE_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,PINT,B,X,Y,Z,0)
...
...
@@ -1368,6 +1391,8 @@ do{VVCF(T1,A1,B1) VVCF(T2,A2,B2) VVCF(T3,A3,B3) VVCF(T4,A4,B4) VVCF(T5,A5,B5
#define PINT_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z)
#define PLOGICAL_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z)
#define PLONG_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z)
#define PLONGLONG_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z)
#define PINT64_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z)
#define PSHORT_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z)
#define BYTEV_cfINT(N,A,B,X,Y,Z) DOUBLEV_cfINT(N,A,B,X,Y,Z)
#define BYTEVV_cfINT(N,A,B,X,Y,Z) DOUBLEVV_cfINT(N,A,B,X,Y,Z)
...
...
@@ -1608,6 +1633,8 @@ do{VVCF(T1,A1,B1) VVCF(T2,A2,B2) VVCF(T3,A3,B3) VVCF(T4,A4,B4) VVCF(T5,A5,B5
#define INT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define LOGICAL_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,LOGICAL,A,B,C,D,E)
#define LONG_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define LONGLONG_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define INT64_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define SHORT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define BYTEV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define BYTEVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
...
...
@@ -1664,6 +1691,8 @@ do{VVCF(T1,A1,B1) VVCF(T2,A2,B2) VVCF(T3,A3,B3) VVCF(T4,A4,B4) VVCF(T5,A5,B5
#define PINT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define PLOGICAL_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,PLOGICAL,A,B,C,D,E)
#define PLONG_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define PLONGLONG_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define PINT64_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define PSHORT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E)
#define STRING_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,STRING,A,B,C,D,E)
#define PSTRING_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,PSTRING,A,B,C,D,E)
...
...
@@ -1721,6 +1750,8 @@ do{VVCF(T1,A1,B1) VVCF(T2,A2,B2) VVCF(T3,A3,B3) VVCF(T4,A4,B4) VVCF(T5,A5,B5
#define INT_cfCCC(A,B) &A
#define LOGICAL_cfCCC(A,B) &A
#define LONG_cfCCC(A,B) &A
#define LONGLONG_cfCCC(A,B) &A
#define INT64_cfCCC(A,B) &A
#define SHORT_cfCCC(A,B) &A
#define PBYTE_cfCCC(A,B) A
#define PDOUBLE_cfCCC(A,B) A
...
...
@@ -1976,6 +2007,8 @@ static _Icf(2,U,F,CFFUN(UN),0)() {_(F,_cfE) _Icf(3,GZ,F,UN,LN) ABSOFT_cf1(F));_(
#define INT_cfT(M,I,A,B,D) *A
#define LOGICAL_cfT(M,I,A,B,D) F2CLOGICAL(*A)
#define LONG_cfT(M,I,A,B,D) *A
#define LONGLONG_cfT(M,I,A,B,D) *A
#define INT64_cfT(M,I,A,B,D) *A
#define SHORT_cfT(M,I,A,B,D) *A
#define BYTEV_cfT(M,I,A,B,D) A
#define DOUBLEV_cfT(M,I,A,B,D) A
...
...
@@ -2032,6 +2065,8 @@ static _Icf(2,U,F,CFFUN(UN),0)() {_(F,_cfE) _Icf(3,GZ,F,UN,LN) ABSOFT_cf1(F));_(
#define PINT_cfT(M,I,A,B,D) A
#define PLOGICAL_cfT(M,I,A,B,D) ((*A=F2CLOGICAL(*A)),A)
#define PLONG_cfT(M,I,A,B,D) A
#define PLONGLONG_cfT(M,I,A,B,D) A
#define PINT64_cfT(M,I,A,B,D) A
#define PSHORT_cfT(M,I,A,B,D) A
#define PVOID_cfT(M,I,A,B,D) A
#if defined(apolloFortran) || defined(hpuxFortran800) || defined(AbsoftUNIXFortran)
...
...
@@ -2380,12 +2415,3 @@ string. */
#endif
/* __CFORTRAN_LOADED */
/*
* Local Variables:
* c-file-style: "Java"
* c-basic-offset: 2
* indent-tabs-mode: nil
* show-trailing-whitespace: t
* require-trailing-newline: t
* End:
*/
src/cgribexlib.c
View file @
afca0021
/* Automatically generated by m214003 at 2013-0
3-05
, do not edit */
/* Automatically generated by m214003 at 2013-0
7-11
, do not edit */
/* CGRIBEXLIB_VERSION="1.6.
0
" */
/* CGRIBEXLIB_VERSION="1.6.
2
" */
#ifdef _ARCH_PWR6
#pragma options nostrict
...
...
@@ -15,6 +15,7 @@
#include
<ctype.h>
#include
<stdarg.h>
#include
<sys/types.h>
#include
<inttypes.h>
#include
"file.h"
...
...
@@ -368,7 +369,7 @@ void gribDecode(int *isec0, int *isec1, int *isec2, double *fsec2, int *isec3,
#include
<math.h>
const
double
const
_pow2tab
[
158
]
=
{
const
double
_pow2tab
[
158
]
=
{
/* pow(2.0, 0.0) */
1
.
0
,
/* pow(2.0, 1.0) */
2
.
0
,
/* pow(2.0, 2.0) */
4
.
0
,
...
...
@@ -530,7 +531,7 @@ const double const _pow2tab[158] = {
};
const
double
const
_pow16tab
[
71
]
=
{
const
double
_pow16tab
[
71
]
=
{
/* pow(16.0, 0.0) */
1
.
0
,
/* pow(16.0, 1.0) */
16
.
0
,
/* pow(16.0, 2.0) */
256
.
0
,
...
...
@@ -640,13 +641,6 @@ double intpow2(int x)
//#undef _GET_MACH_COUNTER
//#undef _ARCH_PWR6
#if defined(__GNUC__) && (__GNUC__ >= 4)
#elif defined(__ICC) && (__ICC >= 1100)
#elif defined(__clang__)
#else
#define DISABLE_SIMD
#endif
#if defined _GET_IBM_COUNTER
#include
<libhpc.h>
#elif defined _GET_X86_COUNTER
...
...
@@ -655,30 +649,45 @@ double intpow2(int x)
#include
<mach/mach_time.h>
#endif
//#define DISABLE_SIMD
#if defined(__GNUC__) && (__GNUC__ >= 4)
#elif defined(__ICC) && (__ICC >= 1100)
#elif defined(__clang__)
#else
#define DISABLE_SIMD
#endif
#define DISABLE_SIMD
#ifdef DISABLE_SIMD
#ifndef ENABLE_AVX
#undef __AVX__
#endif
#ifndef ENABLE_SSE2
#undef __SSE2__
# ifdef ENABLE_AVX
# define _ENABLE_AVX
# endif
# ifdef ENABLE_SSE2
# define _ENABLE_SSE2
# endif
#endif
#ifndef DISABLE_SIMD
# ifdef __AVX__
# define _ENABLE_AVX
# endif
# ifdef __SSE2__
# define _ENABLE_SSE2
# endif
#endif
#if defined __AVX__
#include
<float.h>
#include
<stdint.h>
#include
<inttypes.h>
#if defined _ENABLE_AVX
#include
<immintrin.h>
#elif defined __SSE2__
#include
<float.h>
#include
<stdint.h>
#include
<inttypes.h>
#elif defined _ENABLE_SSE2
#include
<emmintrin.h>
#endif
#if defined __AVX__
#if defined _ENABLE_AVX
static
void
avx_minmax_val
(
const
double
*
restrict
buf
,
size_t
nframes
,
double
*
min
,
double
*
max
)
...
...
@@ -772,7 +781,7 @@ void avx_minmax_val(const double *restrict buf, size_t nframes, double *min, dou
return
;
}
#elif defined __SSE2
__
#elif defined _
ENABLE
_SSE2
static
void
sse2_minmax_val
(
const
double
*
restrict
buf
,
size_t
nframes
,
double
*
min
,
double
*
max
)
...
...
@@ -849,43 +858,17 @@ void sse2_minmax_val(const double *restrict buf, size_t nframes, double *min, do
return
;
}
#endif
#endif
// SIMD
#ifdef _ARCH_PWR6
static
void
minmax_val
(
const
double
*
restrict
data
,
long
idatasize
,
double
*
fmin
,
double
*
fmax
)
void
pwr6_
minmax_val
_unrolled6
(
const
double
*
restrict
data
,
long
idatasize
,
double
*
fmin
,
double
*
fmax
)
{
#if defined _GET_X86_COUNTER || defined _GET_MACH_COUNTER
uint64_t
start_minmax
,
end_minmax
;
#endif
size_t
datasize
=
idatasize
;
if
(
idatasize
<
1
)
return
;
#ifdef _GET_X86_COUNTER
start_minmax
=
_rdtsc
();
#endif
#ifdef _GET_MACH_COUNTER
start_minmax
=
mach_absolute_time
();
#endif
#if defined __AVX__
avx_minmax_val
(
data
,
datasize
,
fmin
,
fmax
);
#elif defined __SSE2__
sse2_minmax_val
(
data
,
datasize
,
fmin
,
fmax
);
#else
#ifdef _ARCH_PWR6
#define __UNROLL_DEPTH_1 6
size_t
datasize
=
idatasize
;
// to allow pipelining we have to unroll
#ifdef _GET_IBM_COUNTER
hpmStart
(
1
,
"minmax fsel"
);
#endif
{
size_t
i
,
j
;
size_t
residual
=
datasize
%
__UNROLL_DEPTH_1
;
...
...
@@ -920,19 +903,21 @@ void minmax_val(const double *restrict data, long idatasize, double *fmin, doubl
*
fmax
=
__fsel
(
dmax
[
j
]
-
*
fmax
,
dmax
[
j
],
*
fmax
);
}
}
#ifdef _GET_IBM_COUNTER
hpmStop
(
1
);
#endif
#undef __UNROLL_DEPTH_1
}
#endif
#else // original loop
#ifdef _GET_IBM_COUNTER
hpmStart
(
1
,
"minmax base"
);
#if defined (__GNUC__) && defined (__GNUC_MINOR__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 4)
#pragma GCC push_options
#pragma GCC optimize ("O3", "fast-math")
#endif
{
size_t
i
;
static
void
minmax_val_orig
(
const
double
*
restrict
data
,
long
idatasize
,
double
*
fmin
,
double
*
fmax
)
{
size_t
i
;
size_t
datasize
=
idatasize
;
double
dmin
=
*
fmin
,
dmax
=
*
fmax
;
#if defined (CRAY)
#pragma _CRI ivdep
...
...
@@ -941,16 +926,73 @@ void minmax_val(const double *restrict data, long idatasize, double *fmin, doubl
#elif defined (__uxp__)
#pragma loop novrec
#endif
for
(
i
=
0
;
i
<
datasize
;
++
i
)
{
if
(
*
fmin
>
data
[
i
]
)
*
fmin
=
data
[
i
];
if
(
*
fmax
<
data
[
i
]
)
*
fmax
=
data
[
i
];
/*
*fmin = *fmin < data[i] ? *fmin : data[i];
*fmax = *fmax > data[i] ? *fmax : data[i];
*/
}
}
for
(
i
=
0
;
i
<
datasize
;
++
i
)
{
dmin
=
dmin
<
data
[
i
]
?
dmin
:
data
[
i
];
dmax
=
dmax
>
data
[
i
]
?
dmax
:
data
[
i
];
// if ( dmin > data[i] ) dmin = data[i];
// if ( dmax < data[i] ) dmax = data[i];
}
*
fmin
=
dmin
;
*
fmax
=
dmax
;
}
#if defined (__GNUC__) && defined (__GNUC_MINOR__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 4)
#pragma GCC pop_options
#endif
static
void
minmax_val
(
const
double
*
restrict
data
,
long
idatasize
,
double
*
fmin
,
double
*
fmax
)
{
#if defined _GET_X86_COUNTER || defined _GET_MACH_COUNTER
uint64_t
start_minmax
,
end_minmax
;
#endif
size_t
datasize
=
idatasize
;
if
(
idatasize
<
1
)
return
;
#ifdef _GET_X86_COUNTER
start_minmax
=
_rdtsc
();
#endif
#ifdef _GET_MACH_COUNTER
start_minmax
=
mach_absolute_time
();
#endif
#if defined _ENABLE_AVX
avx_minmax_val
(
data
,
datasize
,
fmin
,
fmax
);
#elif defined _ENABLE_SSE2
sse2_minmax_val
(
data
,
datasize
,
fmin
,
fmax
);
#else
#ifdef _ARCH_PWR6
#define __UNROLL_DEPTH_1 6
// to allow pipelining we have to unroll
#ifdef _GET_IBM_COUNTER
hpmStart
(
1
,
"minmax fsel"
);
#endif
pwr6_minmax_val_unrolled6
(
data
,
datasize
,
fmin
,
fmax
);
#ifdef _GET_IBM_COUNTER
hpmStop
(
1
);
#endif
#undef __UNROLL_DEPTH_1
#else // original loop
#ifdef _GET_IBM_COUNTER
hpmStart
(
1
,
"minmax base"
);
#endif
minmax_val_orig
(
data
,
datasize
,
fmin
,
fmax
);
#ifdef _GET_IBM_COUNTER
hpmStop
(
1
);
#endif
...
...
@@ -965,28 +1007,37 @@ void minmax_val(const double *restrict data, long idatasize, double *fmin, doubl
#ifdef _GET_MACH_COUNTER
end_minmax
=
mach_absolute_time
();
#endif
#if defined __AVX__
printf
(
"AVX minmax cycles:: %"
PRIu64
"
\n
"
,
end_minmax
-
start_minmax
);
#if defined _ENABLE_AVX
printf
(
"AVX minmax cycles:: %"
PRIu64
"
\n
"
,
end_minmax
-
start_minmax
);
fprintf
(
stderr
,
"AVX min: %lf max: %lf
\n
"
,
*
fmin
,
*
fmax
);
#elif defined __SSE2__
printf
(
"SSE2 minmax cycles:: %"
PRIu64
"
\n
"
,
end_minmax
-
start_minmax
);
#elif defined _ENABLE_SSE2
printf
(
"SSE2 minmax cycles:: %"
PRIu64
"
\n
"
,
end_minmax
-
start_minmax
);
fprintf
(
stderr
,
"SSE2 min: %lf max: %lf
\n
"
,
*
fmin
,
*
fmax
);
#else
printf
(
"loop minmax cycles:: %"
PRIu64
"
\n
"
,
end_minmax
-
start_minmax
);
printf
(
"loop minmax cycles:: %"
PRIu64
"
\n
"
,
end_minmax
-
start_minmax
);
fprintf
(
stderr
,
"loop min: %lf max: %lf
\n
"
,
*
fmin
,
*
fmax
);
#endif
#endif