From a62b8a39142afbcb312ef94a94ddc1c291234c38 Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg-de>
Date: Fri, 14 Feb 2025 16:01:25 +0200
Subject: [PATCH 01/20] updated the way to create array in groups as per zarr 3

---
 apps/hiopy/configure/configure.py      | 15 +++++++++------
 apps/hiopy/configure/create_dataset.py |  4 ++--
 requirements-dev.txt                   |  2 +-
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py
index d2902c7..eb3e090 100755
--- a/apps/hiopy/configure/configure.py
+++ b/apps/hiopy/configure/configure.py
@@ -15,7 +15,8 @@ def add_time(dataset, startdate, enddate, dt, name="time"):
     time_data = (np.arange(startdate + dt, enddate + dt, dt) - startdate) // np.timedelta64(1, "s")
 
     for g in _collect_groups(dataset):
-        time = g.create_dataset(name, data=time_data, fill_value=None, shape=time_data.shape)
+        time = dataset.create_array(name="time", fill_value=None, shape=time_data.shape, dtype=np.longlong)
+        time.append(data=time_data)
         time.attrs["_ARRAY_DIMENSIONS"] = (name,)
         time.attrs["axis"] = "T"
         time.attrs["calendar"] = "proleptic_gregorian"
@@ -31,7 +32,8 @@ def _collect_groups(dataset):
 
 def add_height(dataset, name, n):
     for g in _collect_groups(dataset):
-        height = g.create_dataset(name, data=np.arange(n))
+        height = g.create_array(name, fill_value=None, dtype=np.int32, shape=np.arange(n).shape)
+        height.append(data=np.arange(n)) 
         height.attrs["_ARRAY_DIMENSIONS"] = [name]
         height.attrs["axis"] = "Z"
         height.attrs["long_name"] = "generalized_height"
@@ -56,13 +58,13 @@ def add_variable(
     for g in _collect_groups(dataset):
         taxis_tuple = tuple() if taxis is None else (taxis,)
         ntime = tuple() if taxis is None else (g[taxis].shape[0],)
-        grid_mapping_name = g.crs.attrs["grid_mapping_name"]
+        grid_mapping_name = g['crs'].attrs["grid_mapping_name"]
         spatial_attr = "point" if (grid_mapping_name == "point_cloud") else "cell"
         crs_len = 0
         if grid_mapping_name == "healpix":
-            crs_len = healpy.nside2npix(g.crs.attrs["healpix_nside"])
+            crs_len = healpy.nside2npix(g['crs'].attrs["healpix_nside"])
         elif grid_mapping_name == "point_cloud":
-            lon_coord, lat_coord = g.crs.attrs["coordinates"].split(" ")
+            lon_coord, lat_coord = g['crs'].attrs["coordinates"].split(" ")
             assert lon_coord in g and lat_coord in g
             assert g[lon_coord].shape[0] == g[lat_coord].shape[0]
             crs_len = g[lat_coord].shape[0]
@@ -80,7 +82,8 @@ def add_variable(
             _chunk_shape = np.minimum(chunk_shape, shape) if chunk_shape is not None else None
             _attributes["_ARRAY_DIMENSIONS"] = (*taxis_tuple, zaxis, spatial_attr)
         _attributes["grid_mapping"] = "crs"
-        v = g.create_dataset(
+
+        v = g.create_array(
             name, shape=shape, dtype=np.float32, fill_value=np.nan, chunks=_chunk_shape, **kwargs
         )
 
diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py
index 36e336e..b20133f 100644
--- a/apps/hiopy/configure/create_dataset.py
+++ b/apps/hiopy/configure/create_dataset.py
@@ -24,9 +24,8 @@ def add_coordinates(dataset, coordinates, coord_names=("lon", "lat")):
     lat.attrs["units"] = "degree"
     lat.attrs["standard_name"] = "grid_latitude"
 
-
 def add_healpix_grid(dataset, order):
-    crs = dataset.create_dataset("crs", data=np.array([np.nan], dtype=np.float32), shape=(1,))
+    crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,))
     crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",)
     crs.attrs["grid_mapping_name"] = "healpix"
     crs.attrs["healpix_nside"] = 2**order
@@ -39,3 +38,4 @@ def add_healpix_hierarchy(dataset, order, prefix="healpix_"):
         add_healpix_grid(zg, o)
         if o < order:
             zg.attrs["hiopy::parent"] = f"{prefix}{o+1}"
+
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 01b1586..bfb1d4d 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -3,5 +3,5 @@ wheel
 ruff
 pre-commit
 healpy
-zarr<3.0
+zarr>=2,<3
 aiohttp
-- 
GitLab


From 224cd2cca76b63194679d8ef08d17a925a7f62cb Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg-de>
Date: Mon, 17 Feb 2025 19:03:44 +0200
Subject: [PATCH 02/20] added argument to allow chunks-per-shard while
 configuring the dataset

---
 apps/hiopy/configure/configure.py      | 18 +++++++++++++++---
 apps/hiopy/configure/create_dataset.py |  1 -
 apps/hiopy/worker.py                   |  2 +-
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py
index eb3e090..76409ee 100755
--- a/apps/hiopy/configure/configure.py
+++ b/apps/hiopy/configure/configure.py
@@ -32,8 +32,8 @@ def _collect_groups(dataset):
 
 def add_height(dataset, name, n):
     for g in _collect_groups(dataset):
-        height = g.create_array(name, fill_value=None, dtype=np.int32, shape=np.arange(n).shape)
-        height.append(data=np.arange(n)) 
+        height = g.create_array(name, fill_value=None, dtype=np.int64, shape=np.arange(n).shape)
+        height[:] = np.arange(n)
         height.attrs["_ARRAY_DIMENSIONS"] = [name]
         height.attrs["axis"] = "Z"
         height.attrs["long_name"] = "generalized_height"
@@ -53,6 +53,7 @@ def add_variable(
     frac_mask=None,
     yac_name=None,
     attributes=None,
+    chunks_per_shard=None,
     **kwargs,
 ):
     for g in _collect_groups(dataset):
@@ -72,6 +73,7 @@ def add_variable(
             raise Exception("Unknown crs.")
 
         _attributes = attributes or {}
+
         if zaxis is None:
             shape = (*ntime, crs_len)
             _chunk_shape = np.minimum(chunk_shape, shape) if chunk_shape is not None else None
@@ -83,8 +85,18 @@ def add_variable(
             _attributes["_ARRAY_DIMENSIONS"] = (*taxis_tuple, zaxis, spatial_attr)
         _attributes["grid_mapping"] = "crs"
 
+        _shard_shape = None
+        if chunks_per_shard is not None:
+            _shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape)
+
         v = g.create_array(
-            name, shape=shape, dtype=np.float32, fill_value=np.nan, chunks=_chunk_shape, **kwargs
+            name,
+            shape=shape,
+            dtype=np.float32,
+            fill_value=np.nan,
+            chunks=_chunk_shape,
+            shards=_shard_shape,
+            **kwargs,
         )
 
         # TODO: Use a generic name instead of hiopy such that it represents arbitrary grid too
diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py
index b20133f..8ea822b 100644
--- a/apps/hiopy/configure/create_dataset.py
+++ b/apps/hiopy/configure/create_dataset.py
@@ -38,4 +38,3 @@ def add_healpix_hierarchy(dataset, order, prefix="healpix_"):
         add_healpix_grid(zg, o)
         if o < order:
             zg.attrs["hiopy::parent"] = f"{prefix}{o+1}"
-
diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py
index a9f4e43..5dfb8ae 100755
--- a/apps/hiopy/worker.py
+++ b/apps/hiopy/worker.py
@@ -67,7 +67,7 @@ def main():
         assert len(args.datasets) == 1, "Loco only supports reading from 1 dataset"
         loco_store = zarr.MemoryStore()
         zarr.copy_store(args.datasets[0].store, loco_store)
-        zarr.convenience.consolidate_metadata(loco_store)
+        zarr.consolidate_metadata(loco_store)
         loco_server = LocoServer(loco_store, args.loco_host, args.loco_port)
         args.datasets = [zarr.open(store=loco_store)]
 
-- 
GitLab


From 6cb14f31ba1f06b4290c2f082e3b12853aa47d33 Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Wed, 26 Feb 2025 16:40:46 +0100
Subject: [PATCH 03/20] added some documentation to the create-dataset helper
 functions

---
 apps/hiopy/configure/create_dataset.py | 80 +++++++++++++++++++++++---
 1 file changed, 73 insertions(+), 7 deletions(-)

diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py
index 8ea822b..2f5ba11 100644
--- a/apps/hiopy/configure/create_dataset.py
+++ b/apps/hiopy/configure/create_dataset.py
@@ -1,11 +1,34 @@
 #!/usr/bin/env python3
 
 import numpy as np
+import zarr
 
 
-def add_coordinates(dataset, coordinates, coord_names=("lon", "lat")):
-    # TODO: update create_dataset() calls to adhrer to zarr 3.0 recommendations
-    crs = dataset.create_dataset("crs", data=np.array([np.nan], dtype=np.float32))
+def add_coordinates(
+    dataset: zarr.Dataset,
+    coordinates: list[tuple[float, float]],
+    coord_names: tuple[str, str] = ("lon", "lat"),
+) -> None:
+    """
+    Add longitude and latitude coordinates to the specified Zarr dataset.
+
+    Parameters
+    ----------
+    dataset : zarr.Dataset
+        The Zarr dataset where the coordinates will be added.
+    coordinates : list[tuple[float, float]]
+        A list of tuples containing the (longitude, latitude) values for each point.
+    coord_names : tuple[str, str], optional
+        The names to use for the longitude and latitude arrays. Defaults to ("lon", "lat").
+
+    Notes
+    -----
+    This function creates two new arrays in the dataset: `coord_names[0]` for longitude and `coord_names[1]` for latitude.
+    The `crs` array is also created, with its attributes set to indicate that it's a "point_cloud" coordinate reference system.
+    Example: add_coordinates(dataset, [(10.2, 45.3), (20.4, 50.5)])
+    """
+
+    crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,))
     crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",)
     crs.attrs["grid_mapping_name"] = "point_cloud"
     crs.attrs["coordinates"] = f"{coord_names[0]} {coord_names[1]}"
@@ -24,7 +47,23 @@ def add_coordinates(dataset, coordinates, coord_names=("lon", "lat")):
     lat.attrs["units"] = "degree"
     lat.attrs["standard_name"] = "grid_latitude"
 
-def add_healpix_grid(dataset, order):
+
+def add_healpix_grid(dataset: zarr.Dataset, order: int):
+    """
+        Add a HealPix grid to the specified Zarr dataset.
+
+        Parameters
+        ----------
+        dataset : zarr.Dataset
+            The Zarr dataset where the HealPix grid will be added to the crs.
+        order : int
+            The order of the HealPix grid. This corresponds to 2^order for the NSIDE.
+
+        Notes
+        -----
+        The HealPix grid is stored as a single array named "crs" in the dataset, with the healpix_nside and healpix_order attributes set
+    accordingly. No values are added to it
+    """
     crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,))
     crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",)
     crs.attrs["grid_mapping_name"] = "healpix"
@@ -32,9 +71,36 @@ def add_healpix_grid(dataset, order):
     crs.attrs["healpix_order"] = "nest"
 
 
-def add_healpix_hierarchy(dataset, order, prefix="healpix_"):
-    for o in range(order + 1):
-        zg = dataset.create_group(f"{prefix}{o}")
+def add_healpix_hierarchy(
+    dataset: zarr.Dataset,
+    order: int,
+    nr_of_coarsenings: int = 4,
+    prefix: str = "healpix_",
+) -> None:
+    """
+    Add a hierarchical structure to the specified Zarr dataset for a given Healpix order.
+
+    This function creates a group hierarchy with each level representing a specific resolution of the Healpix grid.
+    The `add_healpix_grid` function is used to create the actual grid arrays within each group.
+
+    Parameters
+    ----------
+    dataset : zarr.Dataset
+        The Zarr dataset where the hierarchy will be added.
+    order : int
+        The maximum level in the hierarchy.
+    nr_of_coarsenings : int
+        Number of coarsening aggregation levels needed
+    prefix : str, optional
+        The prefix to use for naming each group. Defaults to "healpix_".
+
+    Notes
+    -----
+    This function sets up a hierarchical structure with each level representing a specific resolution of the Healpix grid.
+    The `hiopy::parent` attribute is used to link each group to its parent in the hierarchy, allowing for efficient navigation.
+    """
+    for o in range(order, order - nr_of_coarsenings, -1):
+        zg = dataset.create_group(name=f"{prefix}{o}")
         add_healpix_grid(zg, o)
         if o < order:
             zg.attrs["hiopy::parent"] = f"{prefix}{o+1}"
-- 
GitLab


From 16fcef0698fd9e578ab1b01b92c3f666f07a941f Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Wed, 26 Feb 2025 18:06:22 +0100
Subject: [PATCH 04/20] bumped zarr required version to 3 and updated the
 scripts to use python3.11

---
 pyproject.toml                             | 2 +-
 requirements-dev.txt                       | 3 ++-
 scripts/setup_devenv/build_dependencies.sh | 6 +++---
 scripts/setup_devenv/levante_omp412.sh     | 4 ++--
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index a8f2422..9153da4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ version = "0.0.1"
 dependencies = [
   "numpy",
   "pybind11",
-  "zarr<3.0",
+  "zarr>=3.0",
   "healpy",
   "aiohttp",
   "regex_engine"
diff --git a/requirements-dev.txt b/requirements-dev.txt
index bfb1d4d..751652f 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -3,5 +3,6 @@ wheel
 ruff
 pre-commit
 healpy
-zarr>=2,<3
+zarr>=3
 aiohttp
+rich
\ No newline at end of file
diff --git a/scripts/setup_devenv/build_dependencies.sh b/scripts/setup_devenv/build_dependencies.sh
index a6b50a8..33a6022 100755
--- a/scripts/setup_devenv/build_dependencies.sh
+++ b/scripts/setup_devenv/build_dependencies.sh
@@ -220,9 +220,9 @@ function install_yac {
 
 
 function install_all {
-    echo "========================"
-    echo "== building HEALPIX & Co"
-    check_and_install healpix_cxx
+    # echo "========================"
+    # echo "== building HEALPIX & Co"
+    # check_and_install healpix_cxx
     echo "========================"
     echo "== building YAC & Co"
     check_and_install yac
diff --git a/scripts/setup_devenv/levante_omp412.sh b/scripts/setup_devenv/levante_omp412.sh
index 0a10e26..92054b2 100755
--- a/scripts/setup_devenv/levante_omp412.sh
+++ b/scripts/setup_devenv/levante_omp412.sh
@@ -36,7 +36,7 @@ INSTALL_PATH=$BUILD_PATH/install
 mkdir -p "$BUILD_PATH"
 pushd "$BUILD_PATH"
 
-eval `spack load --sh python@3.10.10%gcc@=11.2.0`
+eval `spack load --sh python@3.11.2%gcc@=11.2.0`
 
 # recommended to use a compute node for the build process with > 8 threads
 THREADS=64
@@ -61,7 +61,7 @@ echo "=== Building coyote ==="
 CC="${CC}" CXX="${CXX}" FC="${FC}" cmake $ABSOLUTE_coyote_ROOT -DCMAKE_PREFIX_PATH=$INSTALL_PATH -DCMAKE_BUILD_TYPE=Debug
 cmake --build . -j $THREADS
 
-cp $BUILD_PATH/python/coyote.*.so $VENV_PATH/lib/python3.10/site-packages/
+cp $BUILD_PATH/python/coyote.*.so $VENV_PATH/lib/python3.11/site-packages/
 export PYTHONPATH=${BUILD_PATH}/python:${ABSOLUTE_coyote_ROOT}/apps
 echo $PYTHONPATH
 
-- 
GitLab


From 42fa695362feb18e910408c67e0a98c5a8a2a2ef Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Wed, 26 Feb 2025 18:09:39 +0100
Subject: [PATCH 05/20] fixed the right value to be passed for chunk-shape in
 zarr 3

---
 apps/hiopy/configure/configure.py      | 12 +++++++-----
 apps/hiopy/configure/create_dataset.py | 26 +++++++++++++++-----------
 requirements-dev.txt                   |  2 +-
 3 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py
index 76409ee..09ca00d 100755
--- a/apps/hiopy/configure/configure.py
+++ b/apps/hiopy/configure/configure.py
@@ -76,18 +76,20 @@ def add_variable(
 
         if zaxis is None:
             shape = (*ntime, crs_len)
-            _chunk_shape = np.minimum(chunk_shape, shape) if chunk_shape is not None else None
             _attributes["_ARRAY_DIMENSIONS"] = (*taxis_tuple, spatial_attr)
         else:
             nheight = g[zaxis].shape[0]
             shape = (*ntime, nheight, crs_len)
-            _chunk_shape = np.minimum(chunk_shape, shape) if chunk_shape is not None else None
             _attributes["_ARRAY_DIMENSIONS"] = (*taxis_tuple, zaxis, spatial_attr)
-        _attributes["grid_mapping"] = "crs"
 
+        _attributes["grid_mapping"] = "crs"
+        _chunk_shape = "auto"
         _shard_shape = None
-        if chunks_per_shard is not None:
-            _shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape)
+
+        if chunk_shape is not None:
+            _chunk_shape = (np.min(chunk_shape, shape),)
+            if chunks_per_shard is not None:
+                _shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape)
 
         v = g.create_array(
             name,
diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py
index 2f5ba11..097d4f7 100644
--- a/apps/hiopy/configure/create_dataset.py
+++ b/apps/hiopy/configure/create_dataset.py
@@ -5,7 +5,7 @@ import zarr
 
 
 def add_coordinates(
-    dataset: zarr.Dataset,
+    dataset: zarr.Group,
     coordinates: list[tuple[float, float]],
     coord_names: tuple[str, str] = ("lon", "lat"),
 ) -> None:
@@ -14,8 +14,8 @@ def add_coordinates(
 
     Parameters
     ----------
-    dataset : zarr.Dataset
-        The Zarr dataset where the coordinates will be added.
+    dataset : zarr.Group
+        The Zarr group where the coordinates will be added.
     coordinates : list[tuple[float, float]]
         A list of tuples containing the (longitude, latitude) values for each point.
     coord_names : tuple[str, str], optional
@@ -35,27 +35,31 @@ def add_coordinates(
 
     lat_list, lon_list = zip(*coordinates)
 
-    lon = dataset.create_dataset(coord_names[0], data=np.array(lon_list))
+    lon = dataset.create_dataset(
+        name=coord_names[0], data=np.array(lon_list), shape=(len(coordinates),)
+    )
     lon.attrs["_ARRAY_DIMENSIONS"] = [coord_names[0]]
     lon.attrs["long_name"] = "longitude"
     lon.attrs["units"] = "degree"
     lon.attrs["standard_name"] = "grid_longitude"
 
-    lat = dataset.create_dataset(coord_names[1], data=np.array(lat_list))
+    lat = dataset.create_dataset(
+        name=coord_names[1], data=np.array(lat_list), shape=(len(coordinates),)
+    )
     lat.attrs["_ARRAY_DIMENSIONS"] = [coord_names[1]]
     lat.attrs["long_name"] = "latitude"
     lat.attrs["units"] = "degree"
     lat.attrs["standard_name"] = "grid_latitude"
 
 
-def add_healpix_grid(dataset: zarr.Dataset, order: int):
+def add_healpix_grid(dataset: zarr.Group, order: int):
     """
         Add a HealPix grid to the specified Zarr dataset.
 
         Parameters
         ----------
-        dataset : zarr.Dataset
-            The Zarr dataset where the HealPix grid will be added to the crs.
+        dataset : zarr.Group
+            The Zarr group where the HealPix grid will be added to the crs.
         order : int
             The order of the HealPix grid. This corresponds to 2^order for the NSIDE.
 
@@ -72,7 +76,7 @@ def add_healpix_grid(dataset: zarr.Dataset, order: int):
 
 
 def add_healpix_hierarchy(
-    dataset: zarr.Dataset,
+    dataset: zarr.Group,
     order: int,
     nr_of_coarsenings: int = 4,
     prefix: str = "healpix_",
@@ -85,8 +89,8 @@ def add_healpix_hierarchy(
 
     Parameters
     ----------
-    dataset : zarr.Dataset
-        The Zarr dataset where the hierarchy will be added.
+    dataset : zarr.Group
+        The Zarr group where the hierarchy will be added.
     order : int
         The maximum level in the hierarchy.
     nr_of_coarsenings : int
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 751652f..873ff3b 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -5,4 +5,4 @@ pre-commit
 healpy
 zarr>=3
 aiohttp
-rich
\ No newline at end of file
+rich
-- 
GitLab


From bf843f720add2e948bb2b213921cb6724399803f Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Tue, 11 Mar 2025 15:01:29 +0100
Subject: [PATCH 06/20] fixed chunk-shape calculation

---
 apps/hiopy/configure/configure.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py
index 09ca00d..7978999 100755
--- a/apps/hiopy/configure/configure.py
+++ b/apps/hiopy/configure/configure.py
@@ -87,7 +87,7 @@ def add_variable(
         _shard_shape = None
 
         if chunk_shape is not None:
-            _chunk_shape = (np.min(chunk_shape, shape),)
+            _chunk_shape = tuple(min(chunk_shape, shape))
             if chunks_per_shard is not None:
                 _shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape)
 
-- 
GitLab


From 15f319380718835a0bd5365216fde89531b95bab Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Tue, 11 Mar 2025 15:36:18 +0100
Subject: [PATCH 07/20] changed the order of imports in the worker as the numpy
 (built with different compiler) is causing unexpected behaviour if imported
 first

---
 apps/hiopy/worker.py | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py
index 5dfb8ae..5617b8b 100755
--- a/apps/hiopy/worker.py
+++ b/apps/hiopy/worker.py
@@ -1,29 +1,19 @@
 #!/usr/bin/env python3
 
-import json
-import logging
-from argparse import ArgumentParser
-from itertools import chain, groupby
-
-import numpy as np
-import zarr
-from coyote import (
-    Coyote,
-    ensure_enddef,
-    get_field_metadata,
-    group_comm_rank,
-    group_comm_size,
-    init,
-    run,
-    start_datetime,
-)
-
+from coyote import Coyote, group_comm_rank, group_comm_size, init, run, start_datetime
 from ._data_handler import DataHandler
 from ._distribute_work import distribute_work
 from ._grids import def_grid, grid_id
 from ._zarr_utils import get_time_axis, get_var_group
 from .loco import LocoServer
 
+import numpy as np
+import zarr
+import logging
+from argparse import ArgumentParser
+from itertools import chain, groupby
+
+
 
 def main():
     parser = ArgumentParser()
-- 
GitLab


From fa991350fd770564e4e5023581fd753bfb7d2fd8 Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Fri, 21 Mar 2025 12:31:19 +0100
Subject: [PATCH 08/20] zarr 3 fixes

---
 apps/hiopy/_zarr_utils.py |  7 +++----
 apps/hiopy/worker.py      | 13 ++++++-------
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/apps/hiopy/_zarr_utils.py b/apps/hiopy/_zarr_utils.py
index 39c6cde..0b2cf56 100644
--- a/apps/hiopy/_zarr_utils.py
+++ b/apps/hiopy/_zarr_utils.py
@@ -1,10 +1,9 @@
 import zarr
 
-
 def get_var_group(v):
-    root = zarr.Group(store=v.store)
-    last_slash_idx = v.name.rindex("/")
-    return root[v.name[:last_slash_idx]]
+    store = zarr.open(v.store)
+    parent_group_path = '/'.join(v.path.split('/')[:-1])
+    return store[parent_group_path]
 
 
 def get_time_axis(v):
diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py
index 5617b8b..9a9cc15 100755
--- a/apps/hiopy/worker.py
+++ b/apps/hiopy/worker.py
@@ -121,10 +121,10 @@ def main():
             # compute time start index
             t0 = (
                 np.datetime64(start_datetime())
-                - np.datetime64(var_group.time.attrs["units"][len("seconds since ") :])
-            ) / np.timedelta64(1, "s") + dt
-            t0_idx = np.searchsorted(var_group.time, t0)
-            assert var_group.time[t0_idx] == t0, "start_datetime not found in time axis"
+                - np.datetime64(var_group["time"].attrs["units"][len("seconds since ") :])
+            ) / np.timedelta64(1, "s")
+            t0_idx = np.searchsorted(var_group["time"], t0)
+            assert var_group["time"][t0_idx] == t0, "start_datetime not found in time axis"
 
             # see YAC_REDUCTION_TIME_NONE etc. (TODO: pass constants through coyote)
             time_methods2yac = {"point": 0, "sum": 1, "mean": 2, "min": 3, "max": 4}
@@ -135,9 +135,8 @@ def main():
                 src_comp, src_grid = v.attrs["hiopy::yac_source"]
             else:
                 assert "hiopy::parent" in var_group.attrs, f"No source for field {v.name} specified"
-                parent_var_path = var_group.attrs["hiopy::parent"] + "/" + src_name
-                source_var = zarr.Group(store=v.store)[parent_var_path]
-                src_name = source_var.name
+                parent_var_name = var_group.attrs["hiopy::parent"] + "/" + v.name.split("/")[-1]
+                source_var = zarr.open(store=v.store)[parent_var_name]
                 source_var_gid = grid_id(source_var)
                 src_comp = src_grid = f"{args.process_group}_{source_var_gid}"
             time_method = v.attrs.get("hiopy::time_method", "point")
-- 
GitLab


From 9f19bda241232747c7fbccd20835e5762f522206 Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Wed, 26 Mar 2025 17:10:42 +0100
Subject: [PATCH 09/20] fixed the py-linting and turned off alphabetical
 sorting of imports due to an issue with compiler version with which numpy was
 built

---
 apps/hiopy/configure/create_dataset.py | 42 ++++++++++++++------------
 apps/hiopy/worker.py                   | 17 +++++++++--
 pyproject.toml                         |  2 --
 3 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py
index 097d4f7..483844e 100644
--- a/apps/hiopy/configure/create_dataset.py
+++ b/apps/hiopy/configure/create_dataset.py
@@ -23,8 +23,9 @@ def add_coordinates(
 
     Notes
     -----
-    This function creates two new arrays in the dataset: `coord_names[0]` for longitude and `coord_names[1]` for latitude.
-    The `crs` array is also created, with its attributes set to indicate that it's a "point_cloud" coordinate reference system.
+    This function creates two new arrays in the dataset: `coord_names[0]` for longitude and
+    `coord_names[1]` for latitude. The `crs` array is also created, with its attributes set
+    to indicate that it's a "point_cloud" coordinate reference system.
     Example: add_coordinates(dataset, [(10.2, 45.3), (20.4, 50.5)])
     """
 
@@ -54,19 +55,20 @@ def add_coordinates(
 
 def add_healpix_grid(dataset: zarr.Group, order: int):
     """
-        Add a HealPix grid to the specified Zarr dataset.
-
-        Parameters
-        ----------
-        dataset : zarr.Group
-            The Zarr group where the HealPix grid will be added to the crs.
-        order : int
-            The order of the HealPix grid. This corresponds to 2^order for the NSIDE.
-
-        Notes
-        -----
-        The HealPix grid is stored as a single array named "crs" in the dataset, with the healpix_nside and healpix_order attributes set
-    accordingly. No values are added to it
+    Add a HealPix grid to the specified Zarr dataset.
+
+    Parameters
+    ----------
+    dataset : zarr.Group
+        The Zarr group where the HealPix grid will be added to the crs.
+    order : int
+        The order of the HealPix grid. This corresponds to 2^order for the NSIDE.
+
+    Notes
+    -----
+    The HealPix grid is stored as a single array named "crs" in the dataset, with
+    the healpix_nside and healpix_order attributes set accordingly.
+    No values are added to it
     """
     crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,))
     crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",)
@@ -84,8 +86,9 @@ def add_healpix_hierarchy(
     """
     Add a hierarchical structure to the specified Zarr dataset for a given Healpix order.
 
-    This function creates a group hierarchy with each level representing a specific resolution of the Healpix grid.
-    The `add_healpix_grid` function is used to create the actual grid arrays within each group.
+    This function creates a group hierarchy with each level representing a specific
+    resolution of the Healpix grid. The `add_healpix_grid` function is used to create the
+    actual grid arrays within each group.
 
     Parameters
     ----------
@@ -100,8 +103,9 @@ def add_healpix_hierarchy(
 
     Notes
     -----
-    This function sets up a hierarchical structure with each level representing a specific resolution of the Healpix grid.
-    The `hiopy::parent` attribute is used to link each group to its parent in the hierarchy, allowing for efficient navigation.
+    This function sets up a hierarchical structure with each level representing a
+    specific resolution of the Healpix grid. The `hiopy::parent` attribute is used
+    to link each group to its parent in the hierarchy, allowing for efficient navigation.
     """
     for o in range(order, order - nr_of_coarsenings, -1):
         zg = dataset.create_group(name=f"{prefix}{o}")
diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py
index 9a9cc15..6b74d94 100755
--- a/apps/hiopy/worker.py
+++ b/apps/hiopy/worker.py
@@ -1,4 +1,14 @@
 #!/usr/bin/env python3
+from coyote import (
+    Coyote,
+    ensure_enddef,
+    get_field_metadata,
+    group_comm_rank,
+    group_comm_size,
+    init,
+    run,
+    start_datetime,
+)
 
 from coyote import Coyote, group_comm_rank, group_comm_size, init, run, start_datetime
 from ._data_handler import DataHandler
@@ -7,12 +17,13 @@ from ._grids import def_grid, grid_id
 from ._zarr_utils import get_time_axis, get_var_group
 from .loco import LocoServer
 
-import numpy as np
-import zarr
-import logging
 from argparse import ArgumentParser
 from itertools import chain, groupby
 
+import json
+import logging
+import numpy as np
+import zarr
 
 
 def main():
diff --git a/pyproject.toml b/pyproject.toml
index 9153da4..5ff9121 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,7 +43,5 @@ lint.select = [
     "B",
     # flake8-simplify
     "SIM",
-    # isort
-    "I",
 ]
 line-length = 100 # accomodate any libc++ motivated requirements of over 80 characters
-- 
GitLab


From d5de118ed3ef72e8428c8872b5af530f389ea4ae Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Tue, 1 Apr 2025 15:50:29 +0200
Subject: [PATCH 10/20] Ran pre-commit hook to fix issues from last changes

---
 apps/hiopy/_zarr_utils.py         |  3 ++-
 apps/hiopy/configure/configure.py | 10 ++++++----
 apps/hiopy/worker.py              |  1 -
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/apps/hiopy/_zarr_utils.py b/apps/hiopy/_zarr_utils.py
index 0b2cf56..8403cd2 100644
--- a/apps/hiopy/_zarr_utils.py
+++ b/apps/hiopy/_zarr_utils.py
@@ -1,8 +1,9 @@
 import zarr
 
+
 def get_var_group(v):
     store = zarr.open(v.store)
-    parent_group_path = '/'.join(v.path.split('/')[:-1])
+    parent_group_path = "/".join(v.path.split("/")[:-1])
     return store[parent_group_path]
 
 
diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py
index 7978999..badc732 100755
--- a/apps/hiopy/configure/configure.py
+++ b/apps/hiopy/configure/configure.py
@@ -15,7 +15,9 @@ def add_time(dataset, startdate, enddate, dt, name="time"):
     time_data = (np.arange(startdate + dt, enddate + dt, dt) - startdate) // np.timedelta64(1, "s")
 
     for g in _collect_groups(dataset):
-        time = dataset.create_array(name="time", fill_value=None, shape=time_data.shape, dtype=np.longlong)
+        time = g.create_array(
+            name="time", fill_value=None, shape=time_data.shape, dtype=np.longlong
+        )
         time.append(data=time_data)
         time.attrs["_ARRAY_DIMENSIONS"] = (name,)
         time.attrs["axis"] = "T"
@@ -59,13 +61,13 @@ def add_variable(
     for g in _collect_groups(dataset):
         taxis_tuple = tuple() if taxis is None else (taxis,)
         ntime = tuple() if taxis is None else (g[taxis].shape[0],)
-        grid_mapping_name = g['crs'].attrs["grid_mapping_name"]
+        grid_mapping_name = g["crs"].attrs["grid_mapping_name"]
         spatial_attr = "point" if (grid_mapping_name == "point_cloud") else "cell"
         crs_len = 0
         if grid_mapping_name == "healpix":
-            crs_len = healpy.nside2npix(g['crs'].attrs["healpix_nside"])
+            crs_len = healpy.nside2npix(g["crs"].attrs["healpix_nside"])
         elif grid_mapping_name == "point_cloud":
-            lon_coord, lat_coord = g['crs'].attrs["coordinates"].split(" ")
+            lon_coord, lat_coord = g["crs"].attrs["coordinates"].split(" ")
             assert lon_coord in g and lat_coord in g
             assert g[lon_coord].shape[0] == g[lat_coord].shape[0]
             crs_len = g[lat_coord].shape[0]
diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py
index 6b74d94..8926aba 100755
--- a/apps/hiopy/worker.py
+++ b/apps/hiopy/worker.py
@@ -10,7 +10,6 @@ from coyote import (
     start_datetime,
 )
 
-from coyote import Coyote, group_comm_rank, group_comm_size, init, run, start_datetime
 from ._data_handler import DataHandler
 from ._distribute_work import distribute_work
 from ._grids import def_grid, grid_id
-- 
GitLab


From 8623e414edbda049e5ea48c26c09eae38b1817fd Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Wed, 26 Mar 2025 17:23:57 +0100
Subject: [PATCH 11/20] bumped the python version to 3.11 with a local python
 installation referenced in scripts

---
 .gitlab-ci.yml | 2 +-
 README.md      | 2 +-
 ci/Dockerfile  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index a5970e5..6f0042d 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -80,7 +80,7 @@ hiopy-levante:
   script:
     - |
       module load git
-      /sw/spack-levante/python-3.9.9-fwvsvi/bin/python -m venv venv
+      /home/m/m301120/sw/spack-levante/python-3.11.2-sk474k/bin/python -m venv venv
       . venv/bin/activate
       ICON_DIR=`pwd -P`/icon
       (
diff --git a/README.md b/README.md
index 99ab095..49183eb 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ python -m pip install git+https://gitlab.dkrz.de/nils/coyote.git
 
 ## Installation with ICON on levante
 ```bash
-/sw/spack-levante/python-3.9.9-fwvsvi/bin/python -m venv ./venv --prompt icon
+/home/m/m301120/sw/spack-levante/python-3.11.2-sk474k -m venv ./venv --prompt icon
 . ./venv/bin/activate
 git clone --recursive git@gitlab.dkrz.de:icon/icon.git icon
 pushd icon
diff --git a/ci/Dockerfile b/ci/Dockerfile
index fc08679..66faa8a 100644
--- a/ci/Dockerfile
+++ b/ci/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.9-slim
+FROM python:3.11-slim
 LABEL maintainer="dreier@dkrz.de"
 
 #Remove some warnings
-- 
GitLab


From ea7d8e5756de21c90cd8917fbb3e4132470b86a2 Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Wed, 26 Mar 2025 17:37:01 +0100
Subject: [PATCH 12/20] added rich as a mandatory dependency

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5ff9121..003727b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,7 +11,8 @@ dependencies = [
   "zarr>=3.0",
   "healpy",
   "aiohttp",
-  "regex_engine"
+  "regex_engine", 
+  "rich"
 ]
 
 [tool.scikit-build]
-- 
GitLab


From f8fba52820efb664e940c77a03900c65832bbc8b Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Wed, 26 Mar 2025 18:19:34 +0100
Subject: [PATCH 13/20] fixing zarr version to v3.0.6 for essential bug fixes

---
 apps/hiopy/tests/CMakeLists.txt | 3 ++-
 ci/Dockerfile                   | 2 +-
 pyproject.toml                  | 4 ++--
 requirements-dev.txt            | 2 +-
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/apps/hiopy/tests/CMakeLists.txt b/apps/hiopy/tests/CMakeLists.txt
index 0d1eb8c..f1ad72b 100644
--- a/apps/hiopy/tests/CMakeLists.txt
+++ b/apps/hiopy/tests/CMakeLists.txt
@@ -29,8 +29,9 @@ set_tests_properties(hiopy.create_simple_dataset_4threads PROPERTIES
 
 add_test(
   NAME hiopy.check_hierarchy
-  COMMAND "/usr/bin/env" "python3" "${CMAKE_CURRENT_SOURCE_DIR}/check_hierarchy.py" "simple_dataset.zarr"
+  COMMAND ${Python_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/check_hierarchy.py" "simple_dataset.zarr"
 )
+
 set_tests_properties(hiopy.check_hierarchy PROPERTIES
   FIXTURES_REQUIRED   simple_dataset.zarr
   ENVIRONMENT "PYTHONPATH=${CMAKE_BINARY_DIR}/python:${CMAKE_SOURCE_DIR}/apps:$ENV{PYTHONPATH}"
diff --git a/ci/Dockerfile b/ci/Dockerfile
index 66faa8a..22bb9f7 100644
--- a/ci/Dockerfile
+++ b/ci/Dockerfile
@@ -23,7 +23,7 @@ RUN apt-get update \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 
-RUN pip install --no-cache-dir numpy mpi4py matplotlib cython healpy aiohttp zarr
+RUN pip install --no-cache-dir numpy mpi4py matplotlib cython healpy aiohttp zarr rich
 
 # install yaxt
 RUN curl -s -L https://gitlab.dkrz.de/dkrz-sw/yaxt/-/archive/release-0.11.3/yaxt-release-0.11.3.tar.gz | \
diff --git a/pyproject.toml b/pyproject.toml
index 003727b..2d27f64 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,10 +8,10 @@ version = "0.0.1"
 dependencies = [
   "numpy",
   "pybind11",
-  "zarr>=3.0",
+  "zarr>=3.0.6",
   "healpy",
   "aiohttp",
-  "regex_engine", 
+  "regex_engine",
   "rich"
 ]
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 873ff3b..f930191 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -3,6 +3,6 @@ wheel
 ruff
 pre-commit
 healpy
-zarr>=3
+zarr>=3.0.6
 aiohttp
 rich
-- 
GitLab


From cf118b6b41313a620331b3f02c117b48bb3c6b85 Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Tue, 1 Apr 2025 16:54:18 +0200
Subject: [PATCH 14/20] Fixes for zarr-3 which slipped through

---
 apps/hiopy/configure/create_dataset.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py
index 483844e..1c0f027 100644
--- a/apps/hiopy/configure/create_dataset.py
+++ b/apps/hiopy/configure/create_dataset.py
@@ -36,7 +36,7 @@ def add_coordinates(
 
     lat_list, lon_list = zip(*coordinates)
 
-    lon = dataset.create_dataset(
+    lon = dataset.create_array(
         name=coord_names[0], data=np.array(lon_list), shape=(len(coordinates),)
     )
     lon.attrs["_ARRAY_DIMENSIONS"] = [coord_names[0]]
@@ -44,7 +44,7 @@ def add_coordinates(
     lon.attrs["units"] = "degree"
     lon.attrs["standard_name"] = "grid_longitude"
 
-    lat = dataset.create_dataset(
+    lat = dataset.create_array(
         name=coord_names[1], data=np.array(lat_list), shape=(len(coordinates),)
     )
     lat.attrs["_ARRAY_DIMENSIONS"] = [coord_names[1]]
-- 
GitLab


From d8427fd6f17e266a1d54e0746a212bbcb38f9042 Mon Sep 17 00:00:00 2001
From: Nils-Arne Dreier <dreier@dkrz.de>
Date: Wed, 26 Mar 2025 17:36:08 +0100
Subject: [PATCH 15/20] fix: copy_metadata

---
 apps/hiopy/_zarr_utils.py | 11 ++++++++++-
 apps/hiopy/worker.py      | 18 +++++++++---------
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/apps/hiopy/_zarr_utils.py b/apps/hiopy/_zarr_utils.py
index 8403cd2..dc08f3f 100644
--- a/apps/hiopy/_zarr_utils.py
+++ b/apps/hiopy/_zarr_utils.py
@@ -4,7 +4,10 @@ import zarr
 def get_var_group(v):
     store = zarr.open(v.store)
     parent_group_path = "/".join(v.path.split("/")[:-1])
-    return store[parent_group_path]
+    if parent_group_path == "":
+        return store
+    else:
+        return store[parent_group_path]
 
 
 def get_time_axis(v):
@@ -18,3 +21,9 @@ def get_time_axis(v):
         return time_axis
     else:
         return None
+
+def get_var_parent_group(v):
+    var_group = get_var_group(v)
+    parent_var_path = var_group.attrs["hiopy::parent"]
+    parent_group = zarr.open(v.store)[parent_var_path]
+    return parent_group
diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py
index 8926aba..13f0986 100755
--- a/apps/hiopy/worker.py
+++ b/apps/hiopy/worker.py
@@ -13,7 +13,7 @@ from coyote import (
 from ._data_handler import DataHandler
 from ._distribute_work import distribute_work
 from ._grids import def_grid, grid_id
-from ._zarr_utils import get_time_axis, get_var_group
+from ._zarr_utils import get_var_group, get_var_parent_group
 from .loco import LocoServer
 
 from argparse import ArgumentParser
@@ -145,8 +145,9 @@ def main():
                 src_comp, src_grid = v.attrs["hiopy::yac_source"]
             else:
                 assert "hiopy::parent" in var_group.attrs, f"No source for field {v.name} specified"
-                parent_var_name = var_group.attrs["hiopy::parent"] + "/" + v.name.split("/")[-1]
-                source_var = zarr.open(store=v.store)[parent_var_name]
+                parent_group = get_var_parent_group(v)
+                source_var = parent_group[v.basename]
+                src_name = source_var.name
                 source_var_gid = grid_id(source_var)
                 src_comp = src_grid = f"{args.process_group}_{source_var_gid}"
             time_method = v.attrs.get("hiopy::time_method", "point")
@@ -185,14 +186,13 @@ def main():
             )
 
     def get_source_triple(v):
-        group = get_var_group(v)
-        src_field = v.attrs.get("hiopy::src_name", v.basename)
-        if "hiopy::parent" in group.attrs:
-            parent_var_path = group.attrs["hiopy::parent"] + "/" + src_field
-            source_var = zarr.Group(store=v.store)[parent_var_path]
-            return get_source_triple(source_var)
+        var_group = get_var_group(v)
+        if "hiopy::parent" in var_group.attrs:
+            pgroup = get_var_parent_group(v)
+            return get_source_triple(pgroup[v.basename])
         elif "hiopy::yac_source" in v.attrs:
             src_comp, src_grid = v.attrs["hiopy::yac_source"]
+            src_field = v.attrs.get("hiopy::src_name", v.basename)
             return src_comp, src_grid, src_field
         else:
             raise RuntimeError("Invalid attributes: " + str(dict(v.attrs)))
-- 
GitLab


From 5bff601fc4aea359a5f79fccd7959b5677e0b517 Mon Sep 17 00:00:00 2001
From: Nils-Arne Dreier <dreier@dkrz.de>
Date: Thu, 27 Mar 2025 14:12:06 +0100
Subject: [PATCH 16/20] fix: keep all data_handlers for final flush

---
 apps/hiopy/worker.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py
index 13f0986..6822d98 100755
--- a/apps/hiopy/worker.py
+++ b/apps/hiopy/worker.py
@@ -115,13 +115,13 @@ def main():
         gid: Coyote(f"{args.process_group}_{gid}") for gid, data_vars, chunk_slice in my_data_vars
     }
 
+    data_handlers = []
+
     for gid, data_vars, chunk_slice in my_data_vars:
         coyote = coyote_instances[gid]
         # all vars in data_vars define the same grid
         def_grid(coyote, data_vars[0], chunk_slice)
 
-        data_handlers = []
-
         for v in data_vars:
             # compute timestep
             var_group = get_var_group(v)
-- 
GitLab


From 1c56757c5e3d898b8eab2512612f89ad8908179f Mon Sep 17 00:00:00 2001
From: Nils-Arne Dreier <dreier@dkrz.de>
Date: Thu, 27 Mar 2025 18:03:40 +0100
Subject: [PATCH 17/20] fix: avoid reopen the zarr again and again

---
 apps/hiopy/_zarr_utils.py              | 13 +++++++------
 apps/hiopy/configure/configure.py      |  5 +----
 apps/hiopy/configure/create_dataset.py |  6 ++++--
 apps/hiopy/worker.py                   | 12 ++++++------
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/apps/hiopy/_zarr_utils.py b/apps/hiopy/_zarr_utils.py
index dc08f3f..a46b8cb 100644
--- a/apps/hiopy/_zarr_utils.py
+++ b/apps/hiopy/_zarr_utils.py
@@ -2,12 +2,13 @@ import zarr
 
 
 def get_var_group(v):
-    store = zarr.open(v.store)
-    parent_group_path = "/".join(v.path.split("/")[:-1])
-    if parent_group_path == "":
-        return store
+    if not hasattr(v, "root"):
+        v.root = zarr.open(v.store)
+    group_path = "/".join(v.path.split("/")[:-1])
+    if group_path == "":
+        return v.root
     else:
-        return store[parent_group_path]
+        return v.root[group_path]
 
 
 def get_time_axis(v):
@@ -25,5 +26,5 @@ def get_time_axis(v):
 def get_var_parent_group(v):
     var_group = get_var_group(v)
     parent_var_path = var_group.attrs["hiopy::parent"]
-    parent_group = zarr.open(v.store)[parent_var_path]
+    parent_group = v.root[parent_var_path]
     return parent_group
diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py
index badc732..de186ac 100755
--- a/apps/hiopy/configure/configure.py
+++ b/apps/hiopy/configure/configure.py
@@ -15,10 +15,7 @@ def add_time(dataset, startdate, enddate, dt, name="time"):
     time_data = (np.arange(startdate + dt, enddate + dt, dt) - startdate) // np.timedelta64(1, "s")
 
     for g in _collect_groups(dataset):
-        time = g.create_array(
-            name="time", fill_value=None, shape=time_data.shape, dtype=np.longlong
-        )
-        time.append(data=time_data)
+        time = g.create_dataset(name, data=time_data, fill_value=None, shape=time_data.shape)
         time.attrs["_ARRAY_DIMENSIONS"] = (name,)
         time.attrs["axis"] = "T"
         time.attrs["calendar"] = "proleptic_gregorian"
diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py
index 1c0f027..8a4d193 100644
--- a/apps/hiopy/configure/create_dataset.py
+++ b/apps/hiopy/configure/create_dataset.py
@@ -37,16 +37,18 @@ def add_coordinates(
     lat_list, lon_list = zip(*coordinates)
 
     lon = dataset.create_array(
-        name=coord_names[0], data=np.array(lon_list), shape=(len(coordinates),)
+        name=coord_names[0], dtype=np.float32, shape=(len(coordinates),)
     )
+    lon[:] = np.array(lon_list)
     lon.attrs["_ARRAY_DIMENSIONS"] = [coord_names[0]]
     lon.attrs["long_name"] = "longitude"
     lon.attrs["units"] = "degree"
     lon.attrs["standard_name"] = "grid_longitude"
 
     lat = dataset.create_array(
-        name=coord_names[1], data=np.array(lat_list), shape=(len(coordinates),)
+        name=coord_names[1], dtype=np.float32, shape=(len(coordinates),)
     )
+    lat[:] = np.array(lat_list)
     lat.attrs["_ARRAY_DIMENSIONS"] = [coord_names[1]]
     lat.attrs["long_name"] = "latitude"
     lat.attrs["units"] = "degree"
diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py
index 6822d98..c5a82c9 100755
--- a/apps/hiopy/worker.py
+++ b/apps/hiopy/worker.py
@@ -13,7 +13,7 @@ from coyote import (
 from ._data_handler import DataHandler
 from ._distribute_work import distribute_work
 from ._grids import def_grid, grid_id
-from ._zarr_utils import get_var_group, get_var_parent_group
+from ._zarr_utils import get_var_group, get_var_parent_group, get_time_axis
 from .loco import LocoServer
 
 from argparse import ArgumentParser
@@ -79,15 +79,15 @@ def main():
     )
 
     # find all variables considered to be written in the input datasets:
-    def collect_data_vars(group):
+    def collect_data_vars(group, root):
         for _name, item in group.arrays():
             if "hiopy::enable" in item.attrs and item.attrs["hiopy::enable"]:
+                item.root = root
                 yield item
         for _name, item in group.groups():
-            item.parent = group
-            yield from collect_data_vars(item)
+            yield from collect_data_vars(item, root)
 
-    all_data_vars = list(chain(*[collect_data_vars(z) for z in args.datasets]))
+    all_data_vars = list(chain(*[collect_data_vars(z, z) for z in args.datasets]))
     logging.info(f"Found {len(all_data_vars)} variables")
     if len(all_data_vars) == 0:
         raise RuntimeError("No variables found by the hiopy worker.")
@@ -134,7 +134,7 @@ def main():
                 - np.datetime64(var_group["time"].attrs["units"][len("seconds since ") :])
             ) / np.timedelta64(1, "s")
             t0_idx = np.searchsorted(var_group["time"], t0)
-            assert var_group["time"][t0_idx] == t0, "start_datetime not found in time axis"
+            assert var_group["time"][t0_idx] == t0, f"start_datetime {t0} not found in time axis at index {t0_idx} which has value {var_group['time'][t0_idx]}"
 
             # see YAC_REDUCTION_TIME_NONE etc. (TODO: pass constants through coyote)
             time_methods2yac = {"point": 0, "sum": 1, "mean": 2, "min": 3, "max": 4}
-- 
GitLab


From f8a0fc3079a60d251ad9ca9ae41701d20fd2a8ad Mon Sep 17 00:00:00 2001
From: Nils-Arne Dreier <dreier@dkrz.de>
Date: Fri, 21 Mar 2025 16:14:57 +0100
Subject: [PATCH 18/20] fix: datetime shift

---
 apps/hiopy/worker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py
index c5a82c9..24b999b 100755
--- a/apps/hiopy/worker.py
+++ b/apps/hiopy/worker.py
@@ -132,7 +132,7 @@ def main():
             t0 = (
                 np.datetime64(start_datetime())
                 - np.datetime64(var_group["time"].attrs["units"][len("seconds since ") :])
-            ) / np.timedelta64(1, "s")
+            ) / np.timedelta64(1, "s") + dt
             t0_idx = np.searchsorted(var_group["time"], t0)
             assert var_group["time"][t0_idx] == t0, f"start_datetime {t0} not found in time axis at index {t0_idx} which has value {var_group['time'][t0_idx]}"
 
-- 
GitLab


From b269ec2fae675305281033566f3e7a9a14057db9 Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Mon, 17 Mar 2025 19:12:10 +0100
Subject: [PATCH 19/20] reconsolidating zmetadata after pulling metadata for
 fields via yac

---
 apps/hiopy/_zarr_utils.py              |  1 +
 apps/hiopy/configure/create_dataset.py |  8 ++------
 apps/hiopy/tests/CMakeLists.txt        |  2 +-
 apps/hiopy/tests/check_hierarchy.py    |  4 +++-
 apps/hiopy/worker.py                   | 10 +++++++---
 5 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/apps/hiopy/_zarr_utils.py b/apps/hiopy/_zarr_utils.py
index a46b8cb..4010274 100644
--- a/apps/hiopy/_zarr_utils.py
+++ b/apps/hiopy/_zarr_utils.py
@@ -23,6 +23,7 @@ def get_time_axis(v):
     else:
         return None
 
+
 def get_var_parent_group(v):
     var_group = get_var_group(v)
     parent_var_path = var_group.attrs["hiopy::parent"]
diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py
index 8a4d193..dd84adf 100644
--- a/apps/hiopy/configure/create_dataset.py
+++ b/apps/hiopy/configure/create_dataset.py
@@ -36,18 +36,14 @@ def add_coordinates(
 
     lat_list, lon_list = zip(*coordinates)
 
-    lon = dataset.create_array(
-        name=coord_names[0], dtype=np.float32, shape=(len(coordinates),)
-    )
+    lon = dataset.create_array(name=coord_names[0], dtype=np.float32, shape=(len(coordinates),))
     lon[:] = np.array(lon_list)
     lon.attrs["_ARRAY_DIMENSIONS"] = [coord_names[0]]
     lon.attrs["long_name"] = "longitude"
     lon.attrs["units"] = "degree"
     lon.attrs["standard_name"] = "grid_longitude"
 
-    lat = dataset.create_array(
-        name=coord_names[1], dtype=np.float32, shape=(len(coordinates),)
-    )
+    lat = dataset.create_array(name=coord_names[1], dtype=np.float32, shape=(len(coordinates),))
     lat[:] = np.array(lat_list)
     lat.attrs["_ARRAY_DIMENSIONS"] = [coord_names[1]]
     lat.attrs["long_name"] = "latitude"
diff --git a/apps/hiopy/tests/CMakeLists.txt b/apps/hiopy/tests/CMakeLists.txt
index f1ad72b..6f96015 100644
--- a/apps/hiopy/tests/CMakeLists.txt
+++ b/apps/hiopy/tests/CMakeLists.txt
@@ -29,7 +29,7 @@ set_tests_properties(hiopy.create_simple_dataset_4threads PROPERTIES
 
 add_test(
   NAME hiopy.check_hierarchy
-  COMMAND ${Python_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/check_hierarchy.py" "simple_dataset.zarr"
+  COMMAND "/usr/bin/env" "python3" "${CMAKE_CURRENT_SOURCE_DIR}/check_hierarchy.py" "simple_dataset.zarr"
 )
 
 set_tests_properties(hiopy.check_hierarchy PROPERTIES
diff --git a/apps/hiopy/tests/check_hierarchy.py b/apps/hiopy/tests/check_hierarchy.py
index 4b6905a..fbfbcb3 100755
--- a/apps/hiopy/tests/check_hierarchy.py
+++ b/apps/hiopy/tests/check_hierarchy.py
@@ -21,7 +21,9 @@ def check_interpolation(source_var, target_var):
 
 
 def check_metadata(var):
-    assert "hiopy::copy_metadata" not in var.attrs
+    assert (
+        "hiopy::copy_metadata" not in var.attrs
+    ), f"Attributes of {var.name} {var.attrs.asdict()} is not cleaned"
 
 
 def _collect_groups(dataset, parent=None):
diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py
index 24b999b..ebf7419 100755
--- a/apps/hiopy/worker.py
+++ b/apps/hiopy/worker.py
@@ -134,7 +134,8 @@ def main():
                 - np.datetime64(var_group["time"].attrs["units"][len("seconds since ") :])
             ) / np.timedelta64(1, "s") + dt
             t0_idx = np.searchsorted(var_group["time"], t0)
-            assert var_group["time"][t0_idx] == t0, f"start_datetime {t0} not found in time axis at index {t0_idx} which has value {var_group['time'][t0_idx]}"
+            assert var_group["time"][t0_idx] == t0, f"start_datetime {t0} not found in time axis \
+                                    at index {t0_idx} which has value {var_group['time'][t0_idx]}"
 
             # see YAC_REDUCTION_TIME_NONE etc. (TODO: pass constants through coyote)
             time_methods2yac = {"point": 0, "sum": 1, "mean": 2, "min": 3, "max": 4}
@@ -203,13 +204,16 @@ def main():
             if "hiopy::copy_metadata" in v.attrs:
                 comp, grid, field = get_source_triple(v)
                 md_str = get_field_metadata(comp, grid, field)
-                print(md_str)
                 metadata = json.loads(md_str)
-                print(metadata)
+                logging.debug(f"Found metadata for {field}: {metadata}")
                 for key, value in metadata["cf"].items():
                     v.attrs[key] = value
                 del v.attrs["hiopy::copy_metadata"]  # copy only once
 
+        # re-consolidate the newly updated metadata
+        for ds in args.datasets:
+            zarr.consolidate_metadata(ds.store)
+
     run()
 
     for dh in data_handlers:
-- 
GitLab


From 9a8307db4378ae21592cf200b50d4fa348d57ec7 Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Fri, 4 Apr 2025 10:41:27 +0200
Subject: [PATCH 20/20] ci: added python3.11 to project work accessible across
 developers (to be replaced once there is a system installation on levante)

---
 .gitlab-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6f0042d..6f41b7d 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -80,7 +80,7 @@ hiopy-levante:
   script:
     - |
       module load git
-      /home/m/m301120/sw/spack-levante/python-3.11.2-sk474k/bin/python -m venv venv
+      /work/bk1414/m301120/sw-spack-common/python-3.11.2-sk474k/bin/python -m venv venv
       . venv/bin/activate
       ICON_DIR=`pwd -P`/icon
       (
-- 
GitLab