From 3ff24aa4101e74638c38f6f17d09d25f3377f301 Mon Sep 17 00:00:00 2001
From: Uwe Schulzweida <uwe.schulzweida@mpimet.mpg.de>
Date: Wed, 19 Jun 2024 18:37:49 +0200
Subject: [PATCH] NetCDF4: improved calculation of chunk cache size

---
 ChangeLog          |  4 ++--
 NEWS               |  2 +-
 src/cdi_int.c      | 22 +++++++++++++---------
 src/cdi_int.h      |  3 ++-
 src/stream_cdf_i.c | 16 ++++++++--------
 5 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 39a577e77..8cba36572 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,9 +4,9 @@
         * using EXSE library version 2.0.0
 	* Version 2.4.2 released
 
-2024-06-18  Uwe Schulzweida
+2024-06-19  Uwe Schulzweida
 
-	* NetCDF4: set always the CDI calculate chunk cache size
+	* NetCDF4: improved calculation of chunk cache size
 
 2024-06-17  Uwe Schulzweida
 
diff --git a/NEWS b/NEWS
index 166765c7a..358f410f5 100644
--- a/NEWS
+++ b/NEWS
@@ -5,7 +5,7 @@ Version 2.4.1 (21 May 2024):
 
    Changes:
      * Removed  interface function vlistCopy(), use vlistDuplicate()
-     * calc_chunk_cache_size:  improved for 4D chunked data
+     * calc_chunk_cache_size: improved for 4D chunked data
    Fixed bugs:
      * gribapi_get_timeunits: check unitsOfTime (bug fix) [report: Chris Barnard]
 
diff --git a/src/cdi_int.c b/src/cdi_int.c
index 0f1bca014..b25437d54 100644
--- a/src/cdi_int.c
+++ b/src/cdi_int.c
@@ -47,7 +47,8 @@ int CDI_Reduce_Dim = 0;
 int CDI_Shuffle = 0;
 int CDI_Test = 0;
 size_t CDI_Netcdf_Hdr_Pad = 0UL;
-size_t CDI_Chunk_Cache = 0UL;
+bool CDI_Chunk_Cache_Info = false;
+long CDI_Chunk_Cache = -1L;
 size_t CDI_Chunk_Cache_Max = 0UL;
 bool CDI_Netcdf_Lazy_Grid_Load = false;
 
@@ -367,8 +368,11 @@ cdiInitialize(void)
       value = cdi_getenv_int("CDI_NETCDF_HDR_PAD");
       if (value >= 0) CDI_Netcdf_Hdr_Pad = (size_t) value;
 
+      value = cdi_getenv_int("CDI_CHUNK_CACHE_INFO");
+      if (value > 0) CDI_Chunk_Cache_Info = true;
+
       value = cdi_getenv_int("CDI_CHUNK_CACHE");
-      if (value >= 0) CDI_Chunk_Cache = (size_t) value;
+      if (value >= 0) CDI_Chunk_Cache = value;
 
       value = cdi_getenv_int("CDI_CHUNK_CACHE_MAX");
       if (value >= 0) CDI_Chunk_Cache_Max = (size_t) value;
@@ -470,13 +474,13 @@ cdiInitialize(void)
       if (envstr)
         {
           // clang-format off
-	  if      (strncmp(envstr, "standard", 8)  == 0) CDI_Default_Calendar = CALENDAR_STANDARD;
-	  else if (strncmp(envstr, "gregorian", 9) == 0) CDI_Default_Calendar = CALENDAR_GREGORIAN;
-	  else if (strncmp(envstr, "proleptic", 9) == 0) CDI_Default_Calendar = CALENDAR_PROLEPTIC;
-	  else if (strncmp(envstr, "360days", 7)   == 0) CDI_Default_Calendar = CALENDAR_360DAYS;
-	  else if (strncmp(envstr, "365days", 7)   == 0) CDI_Default_Calendar = CALENDAR_365DAYS;
-	  else if (strncmp(envstr, "366days", 7)   == 0) CDI_Default_Calendar = CALENDAR_366DAYS;
-	  else if (strncmp(envstr, "none", 4)      == 0) CDI_Default_Calendar = CALENDAR_NONE;
+	      if      (strncmp(envstr, "standard", 8)  == 0) CDI_Default_Calendar = CALENDAR_STANDARD;
+	      else if (strncmp(envstr, "gregorian", 9) == 0) CDI_Default_Calendar = CALENDAR_GREGORIAN;
+	      else if (strncmp(envstr, "proleptic", 9) == 0) CDI_Default_Calendar = CALENDAR_PROLEPTIC;
+	      else if (strncmp(envstr, "360days", 7)   == 0) CDI_Default_Calendar = CALENDAR_360DAYS;
+	      else if (strncmp(envstr, "365days", 7)   == 0) CDI_Default_Calendar = CALENDAR_365DAYS;
+	      else if (strncmp(envstr, "366days", 7)   == 0) CDI_Default_Calendar = CALENDAR_366DAYS;
+	      else if (strncmp(envstr, "none", 4)      == 0) CDI_Default_Calendar = CALENDAR_NONE;
           // clang-format on
           if (CDI_Debug) Message("Default calendar set to %s!", envstr);
         }
diff --git a/src/cdi_int.h b/src/cdi_int.h
index e36d4a754..00c9bb1ff 100644
--- a/src/cdi_int.h
+++ b/src/cdi_int.h
@@ -408,7 +408,8 @@ extern int CDI_CMOR_Mode;
 extern int CDI_Reduce_Dim;
 extern int CDI_Shuffle;
 extern size_t CDI_Netcdf_Hdr_Pad;
-extern size_t CDI_Chunk_Cache;
+extern bool CDI_Chunk_Cache_Info;
+extern long CDI_Chunk_Cache;
 extern size_t CDI_Chunk_Cache_Max;
 extern bool CDI_Netcdf_Lazy_Grid_Load;
 extern int STREAM_Debug;
diff --git a/src/stream_cdf_i.c b/src/stream_cdf_i.c
index 89dd81a5d..2750f5000 100644
--- a/src/stream_cdf_i.c
+++ b/src/stream_cdf_i.c
@@ -112,7 +112,6 @@ typedef struct
   size_t gridSize;
   size_t xSize;
   size_t ySize;
-  size_t numLevels;
   int natts;
   int *atts;
   size_t vctsize;
@@ -648,7 +647,6 @@ init_ncvars(int nvars, ncvar_t *ncvars, int ncid)
       ncvar->gridSize = 0;
       ncvar->xSize = 0;
       ncvar->ySize = 0;
-      ncvar->numLevels = 0;
       ncvar->natts = 0;
       ncvar->atts = NULL;
       ncvar->vctsize = 0;
@@ -3309,7 +3307,6 @@ cdf_define_all_zaxes(stream_t *streamptr, int vlistID, ncdim_t *ncdims, int nvar
               return CDI_EDIMSIZE;
             }
 
-          ncvar->numLevels = zsize;
           ncvar->zaxisID = varDefZaxis(vlistID, zaxisType, (int) zsize, zvar, (const char **) zcvals, zclength, withBounds, lbounds,
                                        ubounds, (int) vctsize, vct, pname, plongname, punits, zdatatype, 1, 0, -1);
 
@@ -3532,20 +3529,23 @@ size_of_dim_chunks(size_t n, size_t c)
 static size_t
 calc_chunk_cache_size(int timedimid, ncvar_t *ncvar)
 {
-  size_t nx = 0, ny = 0, nz = 0;
+  size_t nx = 0, ny = 0;
   size_t cx = 0, cy = 0, cz = 0;
   for (int i = 0; i < ncvar->ndims; i++)
     {
       int dimtype = ncvar->dimtypes[i];
       // clang-format off
-      if      (dimtype == Z_AXIS) { cz = ncvar->chunks[i]; nz = ncvar->numLevels; }
+      if      (dimtype == Z_AXIS) { cz = ncvar->chunks[i]; }
       else if (dimtype == Y_AXIS) { cy = ncvar->chunks[i]; ny = ncvar->ySize; }
       else if (dimtype == X_AXIS) { cx = ncvar->chunks[i]; nx = ncvar->xSize; }
       // clang-format on
     }
 
   size_t chunkCacheSize = (ncvar->dimids[0] == timedimid) ? ncvar->chunks[0] : 1;
-  if (nz > 0 && cz > 0) chunkCacheSize *= (chunkCacheSize == 1) ? cz : size_of_dim_chunks(nz, cz);
+  if (cz > 0) chunkCacheSize *= cz;
+
+  if (chunkCacheSize == 1) return 0;  // no chunk cache needed because the full field is read
+
   if (ny > 0 && cy > 0) chunkCacheSize *= size_of_dim_chunks(ny, cy);
   if (nx > 0 && cx > 0) chunkCacheSize *= size_of_dim_chunks(nx, cx);
 
@@ -3559,7 +3559,7 @@ calc_chunk_cache_size(int timedimid, ncvar_t *ncvar)
 static void
 cdf_set_var_chunk_cache(ncvar_t *ncvar, int ncvarid, size_t chunkCacheSize)
 {
-  if (CDI_Debug) Message("%s: chunkCacheSize=%zu", ncvar->name, chunkCacheSize);
+  if (CDI_Debug || CDI_Chunk_Cache_Info) Message("%s: chunkCacheSize=%zu", ncvar->name, chunkCacheSize);
   nc_set_var_chunk_cache(ncvar->ncid, ncvarid, chunkCacheSize, ncvar->chunkCacheNelems, ncvar->chunkCachePreemption);
 }
 
@@ -3597,7 +3597,7 @@ cdf_define_all_vars(stream_t *streamptr, int vlistID, int instID, int modelID, i
           if (ncvar->chunkType != CDI_UNDEFID) cdiDefKeyInt(vlistID, varID, CDI_KEY_CHUNKTYPE, ncvar->chunkType);
           if (ncvar->chunkSize > 1) cdiDefKeyInt(vlistID, varID, CDI_KEY_CHUNKSIZE, ncvar->chunkSize);
 
-          size_t cacheSize = (CDI_Chunk_Cache > 0) ? CDI_Chunk_Cache : calc_chunk_cache_size(timedimid, ncvar);
+          size_t cacheSize = (CDI_Chunk_Cache >= 0) ? (size_t) CDI_Chunk_Cache : calc_chunk_cache_size(timedimid, ncvar);
           cdf_set_var_chunk_cache(ncvar, ncvarid, cacheSize);
         }
 #endif
-- 
GitLab