From 4605bfb837962e68b67292f2e3aa0a68b971199f Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg-de>
Date: Fri, 14 Feb 2025 16:01:25 +0200
Subject: [PATCH 1/7] updated the way to create array in groups as per zarr 3

---
 apps/hiopy/configure/configure.py      | 20 +++++++++++---------
 apps/hiopy/configure/create_dataset.py | 13 +++++++------
 requirements-dev.txt                   |  2 +-
 3 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py
index 9ed4ba0..d1b1ffc 100755
--- a/apps/hiopy/configure/configure.py
+++ b/apps/hiopy/configure/configure.py
@@ -40,7 +40,8 @@ def _collect_groups(dataset):
 
 def add_height(dataset, name, n):
     for g in _collect_groups(dataset):
-        height = g.create_dataset(name, data=np.arange(n))
+        height = g.create_array(name, fill_value=None, dtype=np.int32, shape=np.arange(n).shape)
+        height.append(data=np.arange(n)) 
         height.attrs["_ARRAY_DIMENSIONS"] = [name]
         height.attrs["axis"] = "Z"
         height.attrs["long_name"] = "generalized_height"
@@ -62,31 +63,32 @@ def add_variable(
     **kwargs,
 ):
     for g in _collect_groups(dataset):
-        ntime = g.time.shape[0]
+        ntime = g.get('time').shape[0]
 
-        grid_mapping_name = g.crs.attrs["grid_mapping_name"]
+        grid_mapping_name = g.get('crs').attrs["grid_mapping_name"]
         spatial_attr = "point" if (grid_mapping_name == "point_cloud") else "cell"
         crs_len = 0
         if grid_mapping_name == "healpix":
-            crs_len = healpy.nside2npix(g.crs.attrs["healpix_nside"])
+            crs_len = healpy.nside2npix(g.get('crs').attrs["healpix_nside"])
         else:
             if "clon" not in g or "clat" not in g:
                 raise Exception("Coordinates not defined appropriately in the dataset.")
-            assert g.clon.shape[0] == g.clat.shape[0]
-            crs_len = g.clon.shape[0]
+            assert g.get('clon').shape[0] == g.get('clat').shape[0]
+            crs_len = g.get('clon').shape[0]
 
         _attributes = attributes or {}
         if zaxis is None:
             shape = (ntime, crs_len)
-            _chunk_shape = np.minimum(chunk_shape, shape) if chunk_shape is not None else None
+            _chunk_shape = (np.minimum(chunk_shape, shape).item(0), np.minimum(chunk_shape, shape).item(1)) if chunk_shape is not None else ()
             _attributes["_ARRAY_DIMENSIONS"] = ("time", spatial_attr)
         else:
             nheight = g[zaxis].shape[0]
             shape = (ntime, nheight, crs_len)
-            _chunk_shape = np.minimum(chunk_shape, shape) if chunk_shape is not None else None
+            _chunk_shape = (np.minimum(chunk_shape, shape).item(0), np.minimum(chunk_shape, shape).item(1), np.minimum(chunk_shape, shape).item(2)) if chunk_shape is not None else None
             _attributes["_ARRAY_DIMENSIONS"] = ("time", zaxis, spatial_attr)
         _attributes["grid_mapping"] = "crs"
-        v = g.create_dataset(
+
+        v = g.create_array(
             name, shape=shape, dtype=np.float32, fill_value=np.nan, chunks=_chunk_shape, **kwargs
         )
 
diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py
index 453e213..0e47c6f 100644
--- a/apps/hiopy/configure/create_dataset.py
+++ b/apps/hiopy/configure/create_dataset.py
@@ -1,13 +1,15 @@
 #!/usr/bin/env python3
 
 import numpy as np
+import zarr
 
 
 def add_timedata(dataset, startdate, enddate, dt):
     time_data = (
         np.arange(startdate, enddate, np.timedelta64(dt, "s")) - startdate
     ) // np.timedelta64(1, "s")
-    time = dataset.create_dataset("time", data=time_data, fill_value=None, shape=time_data.shape)
+    time = dataset.create_array(name="time", fill_value=None, shape=time_data.shape, dtype=np.longlong)
+    time.append(data=time_data)
     time.attrs["_ARRAY_DIMENSIONS"] = ("time",)
     time.attrs["axis"] = "T"
     time.attrs["calendar"] = "proleptic_gregorian"
@@ -16,8 +18,7 @@ def add_timedata(dataset, startdate, enddate, dt):
 
 def create_dataset_icon(dataset, startdate, enddate, dt, grid_name="icon_atmo"):
     assert grid_name in ["icon_atmo", "icon_ocean", "point_cloud"]
-    # TODO: update create_dataset() calls to adhrer to zarr 3.0 recommendations
-    crs = dataset.create_dataset("crs", data=np.array([np.nan], dtype=np.float32))
+    crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,))
     crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",)
     crs.attrs["grid_mapping_name"] = grid_name
     crs.attrs["coordinates"] = "clon clat"
@@ -26,8 +27,7 @@ def create_dataset_icon(dataset, startdate, enddate, dt, grid_name="icon_atmo"):
 
 
 def create_dataset_healpix(dataset, startdate, enddate, dt, order):
-    print(dataset)
-    crs = dataset.create_dataset("crs", data=np.array([np.nan], dtype=np.float32), shape=(1,))
+    crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,))
     crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",)
     crs.attrs["grid_mapping_name"] = "healpix"
     crs.attrs["healpix_nside"] = 2**order
@@ -39,7 +39,8 @@ def create_dataset_healpix(dataset, startdate, enddate, dt, order):
 # TODO: For the future, allow hierarchy also in other dataset types
 def create_dataset_healpix_hierarchic(dataset, startdate, enddate, dt, order, prefix="healpix_"):
     for o in range(order + 1):
-        zg = dataset.create_group(f"{prefix}{o}")
+        zg = dataset.create_group(name=f"{prefix}{o}")
         create_dataset_healpix(zg, startdate, enddate, dt, o)
         if o < order:
             zg.attrs["hiopy::parent"] = f"{prefix}{o+1}"
+
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 01b1586..bfb1d4d 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -3,5 +3,5 @@ wheel
 ruff
 pre-commit
 healpy
-zarr<3.0
+zarr>=2,<3
 aiohttp
-- 
GitLab


From f6f32c33262c13ae9cd3d0025783b81c23b29ab1 Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg-de>
Date: Mon, 17 Feb 2025 19:03:44 +0200
Subject: [PATCH 2/7] added argument to allow chunks-per-shard while
 configuring the dataset

---
 apps/hiopy/configure/configure.py      | 47 ++++++++++++++++++++------
 apps/hiopy/configure/create_dataset.py |  8 ++---
 apps/hiopy/worker.py                   |  8 ++---
 3 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py
index d1b1ffc..86d0202 100755
--- a/apps/hiopy/configure/configure.py
+++ b/apps/hiopy/configure/configure.py
@@ -40,8 +40,8 @@ def _collect_groups(dataset):
 
 def add_height(dataset, name, n):
     for g in _collect_groups(dataset):
-        height = g.create_array(name, fill_value=None, dtype=np.int32, shape=np.arange(n).shape)
-        height.append(data=np.arange(n)) 
+        height = g.create_array(name, fill_value=None, dtype=np.int64, shape=np.arange(n).shape)
+        height[:] = np.arange(n)
         height.attrs["_ARRAY_DIMENSIONS"] = [name]
         height.attrs["axis"] = "Z"
         height.attrs["long_name"] = "generalized_height"
@@ -60,36 +60,60 @@ def add_variable(
     frac_mask=None,
     yac_name=None,
     attributes=None,
+    chunks_per_shard=None,
     **kwargs,
 ):
     for g in _collect_groups(dataset):
-        ntime = g.get('time').shape[0]
+        ntime = g.get("time").shape[0]
 
-        grid_mapping_name = g.get('crs').attrs["grid_mapping_name"]
+        grid_mapping_name = g.get("crs").attrs["grid_mapping_name"]
         spatial_attr = "point" if (grid_mapping_name == "point_cloud") else "cell"
         crs_len = 0
         if grid_mapping_name == "healpix":
-            crs_len = healpy.nside2npix(g.get('crs').attrs["healpix_nside"])
+            crs_len = healpy.nside2npix(g.get("crs").attrs["healpix_nside"])
         else:
             if "clon" not in g or "clat" not in g:
                 raise Exception("Coordinates not defined appropriately in the dataset.")
-            assert g.get('clon').shape[0] == g.get('clat').shape[0]
-            crs_len = g.get('clon').shape[0]
+            assert g.get("clon").shape[0] == g.get("clat").shape[0]
+            crs_len = g.get("clon").shape[0]
 
         _attributes = attributes or {}
+
         if zaxis is None:
             shape = (ntime, crs_len)
-            _chunk_shape = (np.minimum(chunk_shape, shape).item(0), np.minimum(chunk_shape, shape).item(1)) if chunk_shape is not None else ()
+            _chunk_shape = (
+                (np.minimum(chunk_shape, shape).item(0), np.minimum(chunk_shape, shape).item(1))
+                if chunk_shape is not None
+                else ()
+            )
             _attributes["_ARRAY_DIMENSIONS"] = ("time", spatial_attr)
         else:
             nheight = g[zaxis].shape[0]
             shape = (ntime, nheight, crs_len)
-            _chunk_shape = (np.minimum(chunk_shape, shape).item(0), np.minimum(chunk_shape, shape).item(1), np.minimum(chunk_shape, shape).item(2)) if chunk_shape is not None else None
+            _chunk_shape = (
+                (
+                    np.minimum(chunk_shape, shape).item(0),
+                    nheight,
+                    np.minimum(chunk_shape, shape).item(2),
+                )
+                if chunk_shape is not None
+                else None
+            )
             _attributes["_ARRAY_DIMENSIONS"] = ("time", zaxis, spatial_attr)
         _attributes["grid_mapping"] = "crs"
 
+        _shard_shape = None
+        if chunks_per_shard is not None:
+            _shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape)
+
         v = g.create_array(
-            name, shape=shape, dtype=np.float32, fill_value=np.nan, chunks=_chunk_shape, **kwargs
+            name,
+            shape=shape,
+            dtype=np.float32,
+            fill_value=np.nan,
+            chunks=_chunk_shape,
+            shards=_shard_shape,
+            **kwargs,
         )
 
         # TODO: Use a generic name instead of hiopy such that it represents arbitrary grid too
@@ -119,7 +143,7 @@ def extend_time(dataset, enddate):
 
 
 def consolidate(dataset):
-    zarr.convenience.consolidate_metadata(dataset.store)
+    zarr.consolidate_metadata(dataset.store)
 
 
 def info(dataset, tree):
@@ -193,6 +217,7 @@ def main():
         "--frac-mask", type=str, default=None, help="Name of the frac_mask array in the same group"
     )
     add_variable_parser.add_argument("--yac-name", type=str, help="name of the yac field")
+    add_variable_parser.add_argument("--chunks-per-shard", type=int)
 
     extend_time_parser = subparsers.add_parser("extend-time")
     extend_time_parser.set_defaults(func=extend_time)
diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py
index 0e47c6f..bb048ac 100644
--- a/apps/hiopy/configure/create_dataset.py
+++ b/apps/hiopy/configure/create_dataset.py
@@ -1,15 +1,16 @@
 #!/usr/bin/env python3
 
 import numpy as np
-import zarr
 
 
 def add_timedata(dataset, startdate, enddate, dt):
     time_data = (
         np.arange(startdate, enddate, np.timedelta64(dt, "s")) - startdate
     ) // np.timedelta64(1, "s")
-    time = dataset.create_array(name="time", fill_value=None, shape=time_data.shape, dtype=np.longlong)
-    time.append(data=time_data)
+    time = dataset.create_array(
+        name="time", fill_value=None, shape=time_data.shape, dtype=np.longlong
+    )
+    time[:] = time_data
     time.attrs["_ARRAY_DIMENSIONS"] = ("time",)
     time.attrs["axis"] = "T"
     time.attrs["calendar"] = "proleptic_gregorian"
@@ -43,4 +44,3 @@ def create_dataset_healpix_hierarchic(dataset, startdate, enddate, dt, order, pr
         create_dataset_healpix(zg, startdate, enddate, dt, o)
         if o < order:
             zg.attrs["hiopy::parent"] = f"{prefix}{o+1}"
-
diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py
index 794f769..c031239 100755
--- a/apps/hiopy/worker.py
+++ b/apps/hiopy/worker.py
@@ -56,7 +56,7 @@ def main():
         assert len(args.datasets) == 1, "Loco only supports reading from 1 dataset"
         loco_store = zarr.MemoryStore()
         zarr.copy_store(args.datasets[0].store, loco_store)
-        zarr.convenience.consolidate_metadata(loco_store)
+        zarr.consolidate_metadata(loco_store)
         loco_server = LocoServer(loco_store, args.loco_host, args.loco_port)
         args.datasets = [zarr.open(store=loco_store)]
 
@@ -126,10 +126,10 @@ def main():
             # compute time start index
             t0 = (
                 np.datetime64(start_datetime())
-                - np.datetime64(v.group.time.attrs["units"][len("seconds since ") :])
+                - np.datetime64(v.group["time"].attrs["units"][len("seconds since ") :])
             ) / np.timedelta64(1, "s")
-            t0_idx = np.searchsorted(v.group.time, t0)
-            assert v.group.time[t0_idx] == t0, "start_datetime not found in time axis"
+            t0_idx = np.searchsorted(v.group["time"], t0)
+            assert v.group["time"][t0_idx] == t0, "start_datetime not found in time axis"
 
             dt = time_coordinate[t0_idx + 1] - time_coordinate[t0_idx]
 
-- 
GitLab


From 3d0a938be375716c07db1a842b14f806a08fe892 Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Wed, 26 Feb 2025 16:40:46 +0100
Subject: [PATCH 3/7] added some documentation to the create-dataset helper
 functions

---
 apps/hiopy/configure/create_dataset.py | 77 ++++++++++++++++++++++++--
 1 file changed, 73 insertions(+), 4 deletions(-)

diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py
index 51b4bdb..ca976b3 100644
--- a/apps/hiopy/configure/create_dataset.py
+++ b/apps/hiopy/configure/create_dataset.py
@@ -1,8 +1,33 @@
 #!/usr/bin/env python3
 
 import numpy as np
+import zarr
+
+
+def add_coordinates(
+    dataset: zarr.Dataset,
+    coordinates: list[tuple[float, float]],
+    coord_names: tuple[str, str] = ("lon", "lat"),
+) -> None:
+    """
+    Add longitude and latitude coordinates to the specified Zarr dataset.
+
+    Parameters
+    ----------
+    dataset : zarr.Dataset
+        The Zarr dataset where the coordinates will be added.
+    coordinates : list[tuple[float, float]]
+        A list of tuples containing the (longitude, latitude) values for each point.
+    coord_names : tuple[str, str], optional
+        The names to use for the longitude and latitude arrays. Defaults to ("lon", "lat").
+
+    Notes
+    -----
+    This function creates two new arrays in the dataset: `coord_names[0]` for longitude and `coord_names[1]` for latitude.
+    The `crs` array is also created, with its attributes set to indicate that it's a "point_cloud" coordinate reference system.
+    Example: add_coordinates(dataset, [(10.2, 45.3), (20.4, 50.5)])
+    """
 
-def add_coordinates(dataset, coordinates, coord_names=("lon", "lat")):
     crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,))
     crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",)
     crs.attrs["grid_mapping_name"] = "point_cloud"
@@ -28,15 +53,59 @@ def add_coordinates(dataset, coordinates, coord_names=("lon", "lat")):
     lat.attrs["units"] = "degree"
     lat.attrs["standard_name"] = "grid_latitude"
 
-def add_healpix_grid(dataset, order):
+
+def add_healpix_grid(dataset: zarr.Dataset, order: int):
+    """
+        Add a HealPix grid to the specified Zarr dataset.
+
+        Parameters
+        ----------
+        dataset : zarr.Dataset
+            The Zarr dataset where the HealPix grid will be added to the crs.
+        order : int
+            The order of the HealPix grid. This corresponds to 2^order for the NSIDE.
+
+        Notes
+        -----
+        The HealPix grid is stored as a single array named "crs" in the dataset, with the healpix_nside and healpix_order attributes set
+    accordingly. No values are added to it
+    """
     crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,))
     crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",)
     crs.attrs["grid_mapping_name"] = "healpix"
     crs.attrs["healpix_nside"] = 2**order
     crs.attrs["healpix_order"] = "nest"
 
-def add_healpix_hierarchy(dataset, order, prefix="healpix_"):
-    for o in range(order + 1):
+
+def add_healpix_hierarchy(
+    dataset: zarr.Dataset,
+    order: int,
+    nr_of_coarsenings: int = 4,
+    prefix: str = "healpix_",
+) -> None:
+    """
+    Add a hierarchical structure to the specified Zarr dataset for a given Healpix order.
+
+    This function creates a group hierarchy with each level representing a specific resolution of the Healpix grid.
+    The `add_healpix_grid` function is used to create the actual grid arrays within each group.
+
+    Parameters
+    ----------
+    dataset : zarr.Dataset
+        The Zarr dataset where the hierarchy will be added.
+    order : int
+        The maximum level in the hierarchy.
+    nr_of_coarsenings : int
+        Number of coarsening aggregation levels needed
+    prefix : str, optional
+        The prefix to use for naming each group. Defaults to "healpix_".
+
+    Notes
+    -----
+    This function sets up a hierarchical structure with each level representing a specific resolution of the Healpix grid.
+    The `hiopy::parent` attribute is used to link each group to its parent in the hierarchy, allowing for efficient navigation.
+    """
+    for o in range(order, order - nr_of_coarsenings, -1):
         zg = dataset.create_group(name=f"{prefix}{o}")
         add_healpix_grid(zg, o)
         if o < order:
-- 
GitLab


From 3403df03b6f4e45fb2a2b9159981b6ea5da860f6 Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Wed, 26 Feb 2025 18:06:22 +0100
Subject: [PATCH 4/7] bumped zarr required version to 3 and updated the scripts
 to use python3.11

---
 pyproject.toml                             | 2 +-
 requirements-dev.txt                       | 3 ++-
 scripts/setup_devenv/build_dependencies.sh | 6 +++---
 scripts/setup_devenv/levante_omp412.sh     | 4 ++--
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index a8f2422..9153da4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ version = "0.0.1"
 dependencies = [
   "numpy",
   "pybind11",
-  "zarr<3.0",
+  "zarr>=3.0",
   "healpy",
   "aiohttp",
   "regex_engine"
diff --git a/requirements-dev.txt b/requirements-dev.txt
index bfb1d4d..751652f 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -3,5 +3,6 @@ wheel
 ruff
 pre-commit
 healpy
-zarr>=2,<3
+zarr>=3
 aiohttp
+rich
\ No newline at end of file
diff --git a/scripts/setup_devenv/build_dependencies.sh b/scripts/setup_devenv/build_dependencies.sh
index a6b50a8..33a6022 100755
--- a/scripts/setup_devenv/build_dependencies.sh
+++ b/scripts/setup_devenv/build_dependencies.sh
@@ -220,9 +220,9 @@ function install_yac {
 
 
 function install_all {
-    echo "========================"
-    echo "== building HEALPIX & Co"
-    check_and_install healpix_cxx
+    # echo "========================"
+    # echo "== building HEALPIX & Co"
+    # check_and_install healpix_cxx
     echo "========================"
     echo "== building YAC & Co"
     check_and_install yac
diff --git a/scripts/setup_devenv/levante_omp412.sh b/scripts/setup_devenv/levante_omp412.sh
index 0a10e26..92054b2 100755
--- a/scripts/setup_devenv/levante_omp412.sh
+++ b/scripts/setup_devenv/levante_omp412.sh
@@ -36,7 +36,7 @@ INSTALL_PATH=$BUILD_PATH/install
 mkdir -p "$BUILD_PATH"
 pushd "$BUILD_PATH"
 
-eval `spack load --sh python@3.10.10%gcc@=11.2.0`
+eval `spack load --sh python@3.11.2%gcc@=11.2.0`
 
 # recommended to use a compute node for the build process with > 8 threads
 THREADS=64
@@ -61,7 +61,7 @@ echo "=== Building coyote ==="
 CC="${CC}" CXX="${CXX}" FC="${FC}" cmake $ABSOLUTE_coyote_ROOT -DCMAKE_PREFIX_PATH=$INSTALL_PATH -DCMAKE_BUILD_TYPE=Debug
 cmake --build . -j $THREADS
 
-cp $BUILD_PATH/python/coyote.*.so $VENV_PATH/lib/python3.10/site-packages/
+cp $BUILD_PATH/python/coyote.*.so $VENV_PATH/lib/python3.11/site-packages/
 export PYTHONPATH=${BUILD_PATH}/python:${ABSOLUTE_coyote_ROOT}/apps
 echo $PYTHONPATH
 
-- 
GitLab


From 4160449a7a2d84c464f1063b45e20a6f0d72e837 Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Wed, 26 Feb 2025 18:09:39 +0100
Subject: [PATCH 5/7] fixed the right value to be passed for chunk-shape in
 zarr 3

---
 apps/hiopy/configure/configure.py      | 12 ++++++-----
 apps/hiopy/configure/create_dataset.py | 28 ++++++++++++--------------
 requirements-dev.txt                   |  2 +-
 3 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py
index 6a9e4d7..602003b 100755
--- a/apps/hiopy/configure/configure.py
+++ b/apps/hiopy/configure/configure.py
@@ -75,18 +75,20 @@ def add_variable(
 
         if zaxis is None:
             shape = (*ntime, crs_len)
-            _chunk_shape = (np.min(chunk_shape, shape),) if chunk_shape is not None else None
             _attributes["_ARRAY_DIMENSIONS"] = (*taxis_tuple, spatial_attr)
         else:
             nheight = g.get(zaxis).shape[0]
             shape = (*ntime, nheight, crs_len)
-            _chunk_shape = (np.min(chunk_shape, shape),) if chunk_shape is not None else None
             _attributes["_ARRAY_DIMENSIONS"] = (*taxis_tuple, zaxis, spatial_attr)
-        _attributes["grid_mapping"] = "crs"
 
+        _attributes["grid_mapping"] = "crs"
+        _chunk_shape = "auto"
         _shard_shape = None
-        if chunks_per_shard is not None:
-            _shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape)
+
+        if chunk_shape is not None:
+            _chunk_shape = (np.min(chunk_shape, shape),)
+            if chunks_per_shard is not None:
+                _shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape)
 
         v = g.create_array(
             name,
diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py
index ca976b3..097d4f7 100644
--- a/apps/hiopy/configure/create_dataset.py
+++ b/apps/hiopy/configure/create_dataset.py
@@ -5,7 +5,7 @@ import zarr
 
 
 def add_coordinates(
-    dataset: zarr.Dataset,
+    dataset: zarr.Group,
     coordinates: list[tuple[float, float]],
     coord_names: tuple[str, str] = ("lon", "lat"),
 ) -> None:
@@ -14,8 +14,8 @@ def add_coordinates(
 
     Parameters
     ----------
-    dataset : zarr.Dataset
-        The Zarr dataset where the coordinates will be added.
+    dataset : zarr.Group
+        The Zarr group where the coordinates will be added.
     coordinates : list[tuple[float, float]]
         A list of tuples containing the (longitude, latitude) values for each point.
     coord_names : tuple[str, str], optional
@@ -35,33 +35,31 @@ def add_coordinates(
 
     lat_list, lon_list = zip(*coordinates)
 
-    lon = dataset.create_array(
-        name=coord_names[0], fill_value=None, shape=lon_list.shape, dtype=np.float32
+    lon = dataset.create_dataset(
+        name=coord_names[0], data=np.array(lon_list), shape=(len(coordinates),)
     )
-    lon[:] = np.array(lon_list)
     lon.attrs["_ARRAY_DIMENSIONS"] = [coord_names[0]]
     lon.attrs["long_name"] = "longitude"
     lon.attrs["units"] = "degree"
     lon.attrs["standard_name"] = "grid_longitude"
 
-    lat = dataset.create_array(
-        name=coord_names[1], fill_value=None, shape=lat_list.shape, dtype=np.float32
+    lat = dataset.create_dataset(
+        name=coord_names[1], data=np.array(lat_list), shape=(len(coordinates),)
     )
-    lat[:] = np.array(lat_list)
     lat.attrs["_ARRAY_DIMENSIONS"] = [coord_names[1]]
     lat.attrs["long_name"] = "latitude"
     lat.attrs["units"] = "degree"
     lat.attrs["standard_name"] = "grid_latitude"
 
 
-def add_healpix_grid(dataset: zarr.Dataset, order: int):
+def add_healpix_grid(dataset: zarr.Group, order: int):
     """
         Add a HealPix grid to the specified Zarr dataset.
 
         Parameters
         ----------
-        dataset : zarr.Dataset
-            The Zarr dataset where the HealPix grid will be added to the crs.
+        dataset : zarr.Group
+            The Zarr group where the HealPix grid will be added to the crs.
         order : int
             The order of the HealPix grid. This corresponds to 2^order for the NSIDE.
 
@@ -78,7 +76,7 @@ def add_healpix_grid(dataset: zarr.Dataset, order: int):
 
 
 def add_healpix_hierarchy(
-    dataset: zarr.Dataset,
+    dataset: zarr.Group,
     order: int,
     nr_of_coarsenings: int = 4,
     prefix: str = "healpix_",
@@ -91,8 +89,8 @@ def add_healpix_hierarchy(
 
     Parameters
     ----------
-    dataset : zarr.Dataset
-        The Zarr dataset where the hierarchy will be added.
+    dataset : zarr.Group
+        The Zarr group where the hierarchy will be added.
     order : int
         The maximum level in the hierarchy.
     nr_of_coarsenings : int
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 751652f..873ff3b 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -5,4 +5,4 @@ pre-commit
 healpy
 zarr>=3
 aiohttp
-rich
\ No newline at end of file
+rich
-- 
GitLab


From e04afefd7927940a644c6987143925c495521adc Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Tue, 11 Mar 2025 15:01:29 +0100
Subject: [PATCH 6/7] fixed chunk-shape calculation

---
 apps/hiopy/configure/configure.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py
index 602003b..5ceb9b0 100755
--- a/apps/hiopy/configure/configure.py
+++ b/apps/hiopy/configure/configure.py
@@ -86,7 +86,7 @@ def add_variable(
         _shard_shape = None
 
         if chunk_shape is not None:
-            _chunk_shape = (np.min(chunk_shape, shape),)
+            _chunk_shape = tuple(min(chunk_shape, shape))
             if chunks_per_shard is not None:
                 _shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape)
 
-- 
GitLab


From 9edd63422ae7aaba265ce08b6ba8be41d6796660 Mon Sep 17 00:00:00 2001
From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de>
Date: Tue, 11 Mar 2025 15:36:18 +0100
Subject: [PATCH 7/7] changed the order of imports in the worker as the numpy
 (built with different compiler) is causing unexpected behaviour if imported
 first

---
 apps/hiopy/worker.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py
index c031239..a0d8bca 100755
--- a/apps/hiopy/worker.py
+++ b/apps/hiopy/worker.py
@@ -1,18 +1,18 @@
 #!/usr/bin/env python3
 
-import logging
-from argparse import ArgumentParser
-from itertools import chain, groupby
-
-import numpy as np
-import zarr
 from coyote import Coyote, group_comm_rank, group_comm_size, init, run, start_datetime
-
 from ._data_handler import DataHandler
 from ._distribute_work import distribute_work
 from ._grids import def_grid, grid_id
 from .loco import LocoServer
 
+import numpy as np
+import zarr
+import logging
+from argparse import ArgumentParser
+from itertools import chain, groupby
+
+
 
 def main():
     parser = ArgumentParser()
-- 
GitLab