From fd7c902522d3274ff2e22345d4e30bc406d49410 Mon Sep 17 00:00:00 2001
From: Thomas Jahns <jahns@dkrz.de>
Date: Wed, 31 Aug 2016 10:22:28 +0200
Subject: [PATCH] Make necessary changes to open CDF-1/2 files in parallel mode
 when possible.

* And fallback to serial if not. Unfortunately this can only be queried at
  run-time it seems.
---
 .gitignore              |   4 ++
 configure.ac            |   3 +-
 m4/acx_tls_xlc_retry.m4 |  82 ++++++++++++++++++++++++++++++
 m4/ax_tls.m4            |  74 +++++++++++++++++++++++++++
 src/pio_cdf_int.c       | 107 +++++++++++++++++++++++++++++++++++-----
 src/pio_cdf_int.h       |  23 +++++++--
 src/pio_dist_grid.c     |   2 +-
 src/pio_server.c        |  98 ++++++++++++++++++++++++++++--------
 tests/pio_cksum_cdf.in  |  17 +++++--
 9 files changed, 370 insertions(+), 40 deletions(-)
 create mode 100644 m4/acx_tls_xlc_retry.m4
 create mode 100644 m4/ax_tls.m4

diff --git a/.gitignore b/.gitignore
index a0cc17580..4f978fcf9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -117,10 +117,14 @@ Makefile
 /tests/example_0.cksum
 /tests/example_0.grb
 /tests/example_0.grb2
+/tests/example_0.nc
+/tests/example_0.nc2
 /tests/example_0.nc4
 /tests/example_1.cksum
 /tests/example_1.grb
 /tests/example_1.grb2
+/tests/example_1.nc
+/tests/example_1.nc2
 /tests/example_1.nc4
 /tests/pio_write
 /tests/pio_write_deco2d
diff --git a/configure.ac b/configure.ac
index 7285820e5..b0ef8976d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -267,7 +267,8 @@ main(int argc, char **argv)
   [MPI_LAUNCH="`pwd`/util/serialrun"
    AC_SUBST([MPI_LAUNCH])])
 AS_IF([test $HAVE_PARALLEL_NC4 -gt 0],
-  [AC_DEFINE([HAVE_PARALLEL_NC4],[1],
+  [ACX_TLS_XLC_RETRY
+   AC_DEFINE([HAVE_PARALLEL_NC4],[1],
              [netCDF library does support MPI parallel invocations])])
 AC_SUBST([HAVE_PARALLEL_NC4])
 AM_CONDITIONAL([USE_MPI],[test x"$USE_MPI" = xyes])
diff --git a/m4/acx_tls_xlc_retry.m4 b/m4/acx_tls_xlc_retry.m4
new file mode 100644
index 000000000..20c0042f2
--- /dev/null
+++ b/m4/acx_tls_xlc_retry.m4
@@ -0,0 +1,82 @@
+dnl acx_tls_xlc_retry.m4 --- check for TLS storage declarator and retry for
+dnl                     IBM XL which might need an extra compiler option
+dnl
+dnl Copyright  (C)  2016  Thomas Jahns <jahns@dkrz.de>
+dnl
+dnl Keywords: configure configure.ac autoconf MPI mpirun mpiexec
+dnl Author: Thomas Jahns <jahns@dkrz.de>
+dnl Maintainer: Thomas Jahns <jahns@dkrz.de>
+dnl URL: https://www.dkrz.de/redmine/projects/show/scales-ppm
+dnl
+dnl Redistribution and use in source and binary forms, with or without
+dnl modification, are  permitted provided that the following conditions are
+dnl met:
+dnl
+dnl Redistributions of source code must retain the above copyright notice,
+dnl this list of conditions and the following disclaimer.
+dnl
+dnl Redistributions in binary form must reproduce the above copyright
+dnl notice, this list of conditions and the following disclaimer in the
+dnl documentation and/or other materials provided with the distribution.
+dnl
+dnl Neither the name of the DKRZ GmbH nor the names of its contributors
+dnl may be used to endorse or promote products derived from this software
+dnl without specific prior written permission.
+dnl
+dnl THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+dnl IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+dnl TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+dnl PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+dnl OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+dnl EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+dnl PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+dnl PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+dnl LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+dnl NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+dnl SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+dnl
+dnl
+dnl ACX_TLS_XLC_RETRY([ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+dnl
+dnl First runs AX_TLS and retries with xlc option if this doesn't work
+dnl
+dnl TODO: instead of assuming C language, perform test for active AC_LANG
+AC_DEFUN([ACX_TLS_XLC_RETRY],
+  [AX_TLS(,[ac_cv_tls=`$CC -qversion 2>&1 | sed -n '/^IBM XL C/{
+n
+s/^Version: \(@<:@0-9@:>@*\).*/\1/
+t print
+b
+: print
+p
+}'`
+   AS_IF([test x"$ac_cv_tls" = x],
+       [ac_cv_tls=none],
+     [test "$ac_cv_tls" -gt 7],
+       [saved_CFLAGS=$CFLAGS
+        CFLAGS=`echo "$CFLAGS" | sed -n '/.*-qtls\(=@<:@^ @:>@*\)\{0,1\}/{
+p
+q
+}
+s/$/ -qtls=initial-exec/
+p
+q
+'`
+dnl unless the user already set the -qtls option, add it and retry test
+       AS_IF([test x"$CFLAGS" = x"$saved_CFLAGS"],
+             [ac_cv_tls=none],
+             [AC_MSG_NOTICE([retrying with -qtls=initial-exec added to CFLAGS])
+              AS_UNSET([ac_cv_tls])
+              AX_TLS(,[CFLAGS=$saved_CFLAGS])])
+],[ac_cv_tls=none])])
+   m4_ifnblank([$1$2],
+     [AS_IF([test "$ac_cv_tls" != "none"],
+        [m4_ifnblank([$1],[$1])],
+        [m4_ifnblank([$2],[$2])])])
+])
+dnl
+dnl Local Variables:
+dnl mode: autoconf
+dnl license-project-url: "https://www.dkrz.de/redmine/projects/show/scales-ppm"
+dnl license-default: "bsd"
+dnl End:
diff --git a/m4/ax_tls.m4 b/m4/ax_tls.m4
new file mode 100644
index 000000000..809b761a0
--- /dev/null
+++ b/m4/ax_tls.m4
@@ -0,0 +1,74 @@
+# ===========================================================================
+#          http://www.gnu.org/software/autoconf-archive/ax_tls.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_TLS([action-if-found], [action-if-not-found])
+#
+# DESCRIPTION
+#
+#   Provides a test for the compiler support of thread local storage (TLS)
+#   extensions. Defines TLS if it is found. Currently knows about GCC/ICC
+#   and MSVC. I think SunPro uses the same as GCC, and Borland apparently
+#   supports either.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Alan Woodland <ajw05@aber.ac.uk>
+#   Copyright (c) 2010 Diego Elio Petteno` <flameeyes@gmail.com>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 11
+
+AC_DEFUN([AX_TLS], [
+  AC_MSG_CHECKING([for thread local storage (TLS) class])
+  AC_CACHE_VAL([ac_cv_tls],
+   [for ax_tls_keyword in __thread '__declspec(thread)' none; do
+       AS_CASE([$ax_tls_keyword],
+          [none], [ac_cv_tls=none ; break],
+          [AC_TRY_COMPILE(
+              [#include <stdlib.h>
+               static void
+               foo(void) {
+               static ] $ax_tls_keyword [ int bar;
+               exit(1);
+               }],
+               [],
+               [ac_cv_tls=$ax_tls_keyword ; break],
+               ac_cv_tls=none
+           )])
+    done
+  ])
+  AC_MSG_RESULT([$ac_cv_tls])
+
+  AS_IF([test "$ac_cv_tls" != "none"],
+    [AC_DEFINE_UNQUOTED([TLS],[$ac_cv_tls],[If the compiler supports a TLS storage class define it to that here])
+     m4_ifnblank([$1],[$1])],
+    [m4_ifnblank([$2],[$2])])
+])
diff --git a/src/pio_cdf_int.c b/src/pio_cdf_int.c
index df8e71e0a..729ff696e 100644
--- a/src/pio_cdf_int.c
+++ b/src/pio_cdf_int.c
@@ -2,10 +2,13 @@
 #include "config.h"
 #endif
 
-#if defined(HAVE_NETCDF4) && defined(HAVE_PARALLEL_NC4)
+#ifdef HAVE_LIBNETCDF
+
+#include <setjmp.h>
+#include <stdbool.h>
 
 #include <netcdf.h>
-#ifdef HAVE_NETCDF_PAR_H
+#if defined HAVE_PARALLEL_NC4 && defined HAVE_NETCDF_PAR_H
 #include <netcdf_par.h>
 #endif
 
@@ -18,19 +21,76 @@
 
 #include "pio_cdf_int.h"
 
+#if defined HAVE_PARALLEL_NC4
+#if !defined TLS && defined HAVE_PTHREAD
+pthread_key_t cdiPioCdfJmpKey;
+#else
+TLS struct cdiPioNcCreateLongJmpRetBuf *cdiPioCdfJmpBuf;
+#endif
+
 static int
 cdiPio_nc__create(const char *path, int cmode, size_t initialsz, size_t *chunksizehintp, int *ncidp)
 {
   int status, ioMode = commInqIOMode();
-  if (cmode & NC_NETCDF4 && ioMode != PIO_NONE)
-    {
-      cmode |= NC_MPIPOSIX;
-      status = nc_create_par(path, cmode, commInqCommColl(), MPI_INFO_NULL, ncidp);
-    }
-  else if (cmode & (NC_64BIT_OFFSET | NC_CLASSIC_MODEL) && ioMode != PIO_NONE)
+  if (ioMode != PIO_NONE)
     {
-      /* FIXME: improve handling of pnetcdf here */
-      abort();
+      if (cmode & NC_NETCDF4)
+        {
+          cmode |= NC_MPIPOSIX;
+          status = nc_create_par(path, cmode, commInqCommColl(), MPI_INFO_NULL, ncidp);
+          cdiPioCdfJmpBuf->openRank = CDI_PIO_COLLECTIVE_OPEN;
+        }
+      else
+        {
+#ifdef NC_PNETCDF
+#if !defined TLS && defined HAVE_PTHREAD
+          struct cdiPioNcCreateLongJmpRetBuf *cdiPioCdfJmpBuf = pthread_getspecific(cdiPioCdfJmpKey);
+#endif
+          /* which combination of cmode flags has already been tested? */
+          static bool pnetcdfWontWork[] = {
+            false, /* CDF-1 */
+            false, /* CDF-2 */
+            false  /* CDF-5 */
+          };
+          static const char cdfVers[] = { '1', '2', '5' };
+          size_t cdfIdx;
+          if (cmode & NC_64BIT_OFFSET)
+            cdfIdx = 1;
+          else if (cmode & NC_CLASSIC_MODEL)
+            cdfIdx = 2;
+          else
+            cdfIdx = 0;
+          MPI_Comm collComm = commInqCommColl();
+          int rank = commInqRankColl();
+          cmode |= NC_PNETCDF;
+          if (!pnetcdfWontWork[cdfIdx])
+            {
+              status = nc_create_par(path, cmode, collComm, MPI_INFO_NULL, ncidp);
+              if (status == NC_EINVAL)
+                {
+                  if (rank == 0)
+                    fprintf(stderr,
+                            "warning: parallel create not implemented"
+                            " for cdf-%c format!\n",
+                            cdfVers[cdfIdx]);
+                  pnetcdfWontWork[cdfIdx] = true;
+                }
+              else
+                cdiPioCdfJmpBuf->openRank = CDI_PIO_COLLECTIVE_OPEN;
+            }
+          if (pnetcdfWontWork[cdfIdx])
+            {
+              /* no pnetcdf is implied if not even NC_PNETCDF is defined */
+              cmode &= ~NC_PNETCDF;
+#endif
+              if (rank == cdiPioCdfJmpBuf->openRank)
+                status = nc__create(path, cmode, initialsz, chunksizehintp, ncidp);
+              else
+                longjmp(cdiPioCdfJmpBuf->jmpBuf, 1);
+#ifdef NC_PNETCDF
+            }
+#endif
+        }
     }
   else
     status = nc__create(path, cmode, initialsz, chunksizehintp, ncidp);
@@ -41,7 +101,10 @@ static void
 cdiPioCdfDefVar(int ncid, const char *name, nc_type xtype, int ndims, const int dimids[], int *varidp)
 {
   cdf_def_var_serial(ncid, name, xtype, ndims, dimids, varidp);
-  if (commInqIOMode() != PIO_NONE)
+  int cf_format;
+  int status = nc_inq_format(ncid, &cf_format);
+  if (status != NC_NOERR) Error("%s", nc_strerror(status));
+  if (commInqIOMode() != PIO_NONE && cf_format == NC_FORMAT_NETCDF4)
     {
       xdebug("%s", "calling nc_var_par_access");
       int status = nc_var_par_access(ncid, *varidp, NC_COLLECTIVE);
@@ -54,9 +117,29 @@ cdiPioEnableNetCDFParAccess(void)
 {
   namespaceSwitchSet(NSSWITCH_NC__CREATE, NSSW_FUNC(cdiPio_nc__create));
   namespaceSwitchSet(NSSWITCH_CDF_DEF_VAR, NSSW_FUNC(cdiPioCdfDefVar));
+#if !defined TLS && defined HAVE_PTHREAD
+  int ierror = pthread_key_create(&cdiPioCdfJmpKey, NULL);
+  if (ierror)
+    {
+      Error("%s: error creating pthread key: %s\n", __func__, strerror(ierror));
+    }
+#endif
+}
+
+void
+cdiPioDisableNetCDFParAccess(void)
+{
+#if !defined TLS && defined HAVE_PTHREAD
+  int ierror = pthread_key_delete(cdiPioCdfJmpKey);
+  if (ierror)
+    {
+      Error("%s: error deleting pthread key: %s\n", __func__, strerror(ierror));
+    }
+#endif
 }
+#endif /* ifdef HAVE_PARALLEL_NC4 */
 
-#endif /* defined (HAVE_NETCDF4) && defined (HAVE_PARALLEL_NC4) */
+#endif /* ifdef HAVE_LIBNETCDF */
 /*
  * Local Variables:
  * c-file-style: "Java"
diff --git a/src/pio_cdf_int.h b/src/pio_cdf_int.h
index 3539a62c4..cb185c8bd 100644
--- a/src/pio_cdf_int.h
+++ b/src/pio_cdf_int.h
@@ -5,13 +5,30 @@
 #include "config.h"
 #endif
 
-#ifdef HAVE_LIBNETCDF
+#ifdef HAVE_PARALLEL_NC4
 #include "cdf_int.h"
 
-void cdiPioEnableNetCDFParAccess(void);
+enum
+{
+  CDI_PIO_COLLECTIVE_OPEN = -1,
+};
 
+struct cdiPioNcCreateLongJmpRetBuf
+{
+  sigjmp_buf jmpBuf;
+  int openRank;
+};
+
+#if !defined TLS && defined HAVE_PTHREAD
+extern pthread_key_t cdiPioCdfJmpKey;
+#else
+extern TLS struct cdiPioNcCreateLongJmpRetBuf *cdiPioCdfJmpBuf;
 #endif
-#endif
+
+void cdiPioEnableNetCDFParAccess(void);
+
+#endif /* HAVE_PARALLEL_NC4 */
+#endif /* PIO_CDF_INT_H */
 
 /*
  * Local Variables:
diff --git a/src/pio_dist_grid.c b/src/pio_dist_grid.c
index 51daa6f84..7391f4c14 100644
--- a/src/pio_dist_grid.c
+++ b/src/pio_dist_grid.c
@@ -156,7 +156,7 @@ cdiPioDistGridCreate(int gridtype, int size, int xsize, int ysize, int nvertex,
 
 #ifdef HAVE_PPM_DIST_ARRAY_H
 
-#if defined(HAVE_NETCDF4) && defined(HAVE_PARALLEL_NC4)
+#ifdef HAVE_PARALLEL_NC4
 /* maximal spatial rank of variable */
 enum
 {
diff --git a/src/pio_server.c b/src/pio_server.c
index 466e087fb..bd6606fea 100644
--- a/src/pio_server.c
+++ b/src/pio_server.c
@@ -7,6 +7,9 @@
 #include "pio_server.h"
 
 #include <limits.h>
+#ifdef HAVE_PARALLEL_NC4
+#include <setjmp.h>
+#endif
 #include <stdlib.h>
 #include <stdio.h>
 
@@ -74,7 +77,7 @@ static struct
 {
   MPI_Win getWin;
   struct clientBuf *clientBuf;
-#if defined HAVE_LIBNETCDF && !defined HAVE_PARALLEL_NC4
+#ifdef HAVE_LIBNETCDF
   int ownerRank;
 #endif
   /* put data for description of last layout from RMA GET here */
@@ -388,8 +391,8 @@ myVarPart(struct PPM_extent varShape[3], struct xyzDims collGrid, struct PPM_ext
 
 /* collective writing variant */
 static void
-writeNetCDFStream(size_t streamIdx, struct streamMapping *mapping, double **data_, int *currentDataBufSize,
-                  const struct cdiPioConf *conf)
+writeNetCDFStreamParallel(size_t streamIdx, struct streamMapping *mapping, double **data_, int *currentDataBufSize,
+                          const struct cdiPioConf *conf)
 {
   const int nvars = mapping->numVars;
   const int *restrict varMap = mapping->varMap;
@@ -483,7 +486,8 @@ writeNetCDFStream(size_t streamIdx, struct streamMapping *mapping, double **data
       }
 }
 
-#elif defined(HAVE_LIBNETCDF)
+#endif
+#if defined(HAVE_LIBNETCDF)
 /* needed for writing when some files are only written to by a single process */
 /* cdiOpenFileMap(fileID) gives the writer process */
 static int
@@ -493,12 +497,13 @@ cdiPioSerialOpenFileMap(int streamID)
   xassert(streamIdx < SIZE_MAX);
   return rxWin[streamIdx].ownerRank;
 }
+
 /* for load-balancing purposes, count number of files per process */
 /* cdiOpenFileCounts[rank] gives number of open files rank has to himself */
 static int *cdiSerialOpenFileCount;
 
 static int
-cdiPioNextOpenRank()
+cdiPioNextOpenRank(void)
 {
   xassert(cdiSerialOpenFileCount != NULL);
   int commCollSize = commInqSizeColl();
@@ -530,14 +535,18 @@ cdiPioCloseFileOnRank(int rank)
 static void
 cdiPioServerCdfDefVars(stream_t *streamptr)
 {
-  int rank, rankOpen;
-  if (commInqIOMode() == PIO_NONE || ((rank = commInqRankColl()) == (rankOpen = cdiPioSerialOpenFileMap(streamptr->self))))
+  int rank, rankOpen = cdiPioSerialOpenFileMap(streamptr->self);
+  if (commInqIOMode() == PIO_NONE
+#ifdef HAVE_PARALLEL_NC4
+      || rankOpen == CDI_PIO_COLLECTIVE_OPEN
+#endif
+      || (rank = commInqRankColl()) == rankOpen)
     cdfDefCoordinateVars(streamptr);
 }
 
 static void
-writeNetCDFStream(size_t streamIdx, struct streamMapping *mapping, double **data_, int *currentDataBufSize,
-                  const struct cdiPioConf *conf)
+writeNetCDFStreamSerial(size_t streamIdx, struct streamMapping *mapping, double **data_, int *currentDataBufSize,
+                        const struct cdiPioConf *conf)
 {
   const int nvars = mapping->numVars;
   const int *restrict varMap = mapping->varMap, *restrict numLvlsW = mapping->numLvlsW;
@@ -597,6 +606,20 @@ writeNetCDFStream(size_t streamIdx, struct streamMapping *mapping, double **data
       }
 }
 
+static void
+writeNetCDFStream(size_t streamIdx, struct streamMapping *mapping, double **data_, int *currentDataBufSize,
+                  const struct cdiPioConf *conf)
+{
+  void (*writeNetCDFStream_)(size_t streamIdx, struct streamMapping * mapping, double **data_, int *currentDataBufSize,
+                             const struct cdiPioConf *conf)
+      = writeNetCDFStreamSerial;
+#ifdef HAVE_PARALLEL_NC4
+  int streamID = openStreams.entries[streamIdx];
+  int rankOpen = cdiPioSerialOpenFileMap(streamID);
+  if (rankOpen == CDI_PIO_COLLECTIVE_OPEN) writeNetCDFStream_ = writeNetCDFStreamParallel;
+#endif
+  writeNetCDFStream_(streamIdx, mapping, data_, currentDataBufSize, conf);
+}
 #endif
 
 static inline struct winHeaderEntry *
@@ -1009,7 +1032,7 @@ readGetBuffers(size_t streamIdx, const struct cdiPioConf *conf)
       {
       case CDI_FILETYPE_GRB:
       case CDI_FILETYPE_GRB2: writeGribStream(streamIdx, map, &data, &currentDataBufSize, conf); break;
-#ifdef HAVE_NETCDF4
+#ifdef HAVE_LIBNETCDF
       case CDI_FILETYPE_NETCDF: writeNetCDFStream(streamIdx, map, &data, &currentDataBufSize, conf); break;
 #endif
       default: xabort("unhandled filetype in parallel I/O.");
@@ -1066,17 +1089,39 @@ static int
 cdiPioServerStreamOpen(const char *filename, char filemode, int filetype, stream_t *streamptr, int recordBufIsToBeCreated)
 {
   int fileID = -1;
-#if defined HAVE_LIBNETCDF && !defined HAVE_PARALLEL_NC4
+#ifdef HAVE_LIBNETCDF
   /* Only needs initialization to shut up gcc */
   int rank = -1;
 #endif
   switch (filetype)
     {
-#if defined HAVE_LIBNETCDF && !defined HAVE_PARALLEL_NC4
+#ifdef HAVE_LIBNETCDF
     case CDI_FILETYPE_NC:
     case CDI_FILETYPE_NC2:
     case CDI_FILETYPE_NC4:
     case CDI_FILETYPE_NC4C:
+#ifdef HAVE_PARALLEL_NC4
+      {
+        struct cdiPioNcCreateLongJmpRetBuf retJmpBuf;
+        retJmpBuf.openRank = cdiPioNextOpenRank();
+#if !defined TLS && defined HAVE_PTHREAD
+        pthread_setspecific(cdiPioCdfJmpKey, &jmpBuf);
+#else
+        cdiPioCdfJmpBuf = &retJmpBuf;
+#endif
+        if (!setjmp(retJmpBuf.jmpBuf)) /* attempt parallel open first */
+          /* in case it fails, ranks other than retJmpBuf.openRank
+           * will call longjmp and return 1 from the above setjmp */
+          fileID = cdiStreamOpenDefaultDelegate(filename, filemode, filetype, streamptr, recordBufIsToBeCreated);
+        rank = retJmpBuf.openRank;
+        if (rank != CDI_PIO_COLLECTIVE_OPEN)
+          {
+            streamptr->filetype = filetype;
+            if (commInqIOMode() != PIO_NONE) xmpi(MPI_Bcast(&fileID, 1, MPI_INT, rank, commInqCommColl()));
+            cdiPioOpenFileOnRank(rank);
+          }
+      }
+#else
       {
         int ioMode = commInqIOMode();
         if (ioMode == PIO_NONE || commInqRankColl() == (rank = cdiPioNextOpenRank()))
@@ -1086,6 +1131,7 @@ cdiPioServerStreamOpen(const char *filename, char filemode, int filetype, stream
         if (ioMode != PIO_NONE) xmpi(MPI_Bcast(&fileID, 1, MPI_INT, rank, commInqCommColl()));
         cdiPioOpenFileOnRank(rank);
       }
+#endif
       break;
 #endif
     default: fileID = cdiStreamOpenDefaultDelegate(filename, filemode, filetype, streamptr, recordBufIsToBeCreated);
@@ -1112,7 +1158,7 @@ cdiPioServerStreamOpen(const char *filename, char filemode, int filetype, stream
       rxWin[streamIdx].aggBufSize = 0;
       rxWin[streamIdx].aggBufUsed = 0;
       rxWin[streamIdx].aggBuf = NULL;
-#if defined HAVE_LIBNETCDF && !defined HAVE_PARALLEL_NC4
+#ifdef HAVE_LIBNETCDF
       rxWin[streamIdx].ownerRank = rank;
 #endif
     }
@@ -1130,13 +1176,20 @@ cdiPioServerStreamClose(stream_t *streamptr, int recordBufIsToBeDeleted)
     {
       switch (cdiBaseFiletype(filetype))
         {
-#if defined(HAVE_LIBNETCDF) && !defined(HAVE_PARALLEL_NC4)
+#ifdef HAVE_LIBNETCDF
         case CDI_FILETYPE_NETCDF:
           {
             int rank, rankOpen = cdiPioSerialOpenFileMap(streamptr->self);
-            if (commInqIOMode() == PIO_NONE || ((rank = commInqRankColl()) == rankOpen))
+            if (commInqIOMode() == PIO_NONE
+#ifdef HAVE_PARALLEL_NC4
+                || rankOpen == CDI_PIO_COLLECTIVE_OPEN
+#endif
+                || ((rank = commInqRankColl()) == rankOpen))
               cdiStreamCloseDefaultDelegate(streamptr, recordBufIsToBeDeleted);
-            cdiPioCloseFileOnRank(rankOpen);
+#ifdef HAVE_PARALLEL_NC4
+            if (rankOpen != CDI_PIO_COLLECTIVE_OPEN)
+#endif
+              cdiPioCloseFileOnRank(rankOpen);
           }
           break;
 #endif
@@ -1154,12 +1207,16 @@ cdiPioServerStreamClose(stream_t *streamptr, int recordBufIsToBeDeleted)
     }
 }
 
-#if defined(HAVE_LIBNETCDF) && !defined(HAVE_PARALLEL_NC4)
+#ifdef HAVE_LIBNETCDF
 static void
 cdiPioCdfDefTimestep(stream_t *streamptr, int tsID)
 {
-  int rank, rankOpen, streamID = streamptr->self;
-  if (commInqIOMode() == PIO_NONE || ((rank = commInqRankColl()) == (rankOpen = cdiPioSerialOpenFileMap(streamID))))
+  int rank, streamID = streamptr->self, rankOpen = cdiPioSerialOpenFileMap(streamID);
+  if (commInqIOMode() == PIO_NONE
+#ifdef HAVE_PARALLEL_NC4
+      || rankOpen == CDI_PIO_COLLECTIVE_OPEN
+#endif
+      || (rank = commInqRankColl()) == rankOpen)
     cdfDefTimestep(streamptr, tsID);
 }
 #endif
@@ -1248,7 +1305,8 @@ cdiPioCollectorMessageLoop(const struct cdiPioConf *conf)
 #ifdef HAVE_PARALLEL_NC4
   cdiPioEnableNetCDFParAccess();
   numPioPrimes = PPM_prime_factorization_32((uint32_t) commInqSizeColl(), &pioPrimes);
-#elif defined(HAVE_LIBNETCDF)
+#endif
+#ifdef HAVE_LIBNETCDF
   cdiSerialOpenFileCount = Calloc(sizeof(cdiSerialOpenFileCount[0]), (size_t) commInqSizeColl());
   namespaceSwitchSet(NSSWITCH_CDF_DEF_TIMESTEP, NSSW_FUNC(cdiPioCdfDefTimestep));
   namespaceSwitchSet(NSSWITCH_CDF_STREAM_SETUP, NSSW_FUNC(cdiPioServerCdfDefVars));
diff --git a/tests/pio_cksum_cdf.in b/tests/pio_cksum_cdf.in
index 56fc2ef5b..5b57c947d 100644
--- a/tests/pio_cksum_cdf.in
+++ b/tests/pio_cksum_cdf.in
@@ -1,13 +1,24 @@
 #! @SHELL@
-pio_write_args="-f nc4 -w 3 -qtaxis-type=relative"
-mpi_task_num=7
 LOG=pio_cksum_cdf.log
-suffix=nc4
 if [ "@USE_MPI@" = yes -a "@ENABLE_NETCDF@" = yes ]; then
+  if [ "@ENABLE_NC4@" = yes ]; then
+    suffix=nc4
+    mpi_task_num=7
+    pio_write_args="-f $suffix -w 3 -qtaxis-type=relative"
+    . ./pio_write_run
+  fi
+  mpi_task_num=8
+  suffix=nc2
+  pio_write_args="-f $suffix -w 3 -qtaxis-type=relative"
+  . ./pio_write_run
+  mpi_task_num=5
+  suffix=nc
+  pio_write_args="-f $suffix -w 2 -qtaxis-type=relative"
   . ./pio_write_run
 else
   exit 77
 fi
+
 #
 # Local Variables:
 # mode: sh
-- 
GitLab