From a62b8a39142afbcb312ef94a94ddc1c291234c38 Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg-de> Date: Fri, 14 Feb 2025 16:01:25 +0200 Subject: [PATCH 01/20] updated the way to create array in groups as per zarr 3 --- apps/hiopy/configure/configure.py | 15 +++++++++------ apps/hiopy/configure/create_dataset.py | 4 ++-- requirements-dev.txt | 2 +- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py index d2902c7..eb3e090 100755 --- a/apps/hiopy/configure/configure.py +++ b/apps/hiopy/configure/configure.py @@ -15,7 +15,8 @@ def add_time(dataset, startdate, enddate, dt, name="time"): time_data = (np.arange(startdate + dt, enddate + dt, dt) - startdate) // np.timedelta64(1, "s") for g in _collect_groups(dataset): - time = g.create_dataset(name, data=time_data, fill_value=None, shape=time_data.shape) + time = dataset.create_array(name="time", fill_value=None, shape=time_data.shape, dtype=np.longlong) + time.append(data=time_data) time.attrs["_ARRAY_DIMENSIONS"] = (name,) time.attrs["axis"] = "T" time.attrs["calendar"] = "proleptic_gregorian" @@ -31,7 +32,8 @@ def _collect_groups(dataset): def add_height(dataset, name, n): for g in _collect_groups(dataset): - height = g.create_dataset(name, data=np.arange(n)) + height = g.create_array(name, fill_value=None, dtype=np.int32, shape=np.arange(n).shape) + height.append(data=np.arange(n)) height.attrs["_ARRAY_DIMENSIONS"] = [name] height.attrs["axis"] = "Z" height.attrs["long_name"] = "generalized_height" @@ -56,13 +58,13 @@ def add_variable( for g in _collect_groups(dataset): taxis_tuple = tuple() if taxis is None else (taxis,) ntime = tuple() if taxis is None else (g[taxis].shape[0],) - grid_mapping_name = g.crs.attrs["grid_mapping_name"] + grid_mapping_name = g['crs'].attrs["grid_mapping_name"] spatial_attr = "point" if (grid_mapping_name == "point_cloud") else "cell" crs_len = 0 if grid_mapping_name == "healpix": - crs_len = healpy.nside2npix(g.crs.attrs["healpix_nside"]) + crs_len = healpy.nside2npix(g['crs'].attrs["healpix_nside"]) elif grid_mapping_name == "point_cloud": - lon_coord, lat_coord = g.crs.attrs["coordinates"].split(" ") + lon_coord, lat_coord = g['crs'].attrs["coordinates"].split(" ") assert lon_coord in g and lat_coord in g assert g[lon_coord].shape[0] == g[lat_coord].shape[0] crs_len = g[lat_coord].shape[0] @@ -80,7 +82,8 @@ def add_variable( _chunk_shape = np.minimum(chunk_shape, shape) if chunk_shape is not None else None _attributes["_ARRAY_DIMENSIONS"] = (*taxis_tuple, zaxis, spatial_attr) _attributes["grid_mapping"] = "crs" - v = g.create_dataset( + + v = g.create_array( name, shape=shape, dtype=np.float32, fill_value=np.nan, chunks=_chunk_shape, **kwargs ) diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py index 36e336e..b20133f 100644 --- a/apps/hiopy/configure/create_dataset.py +++ b/apps/hiopy/configure/create_dataset.py @@ -24,9 +24,8 @@ def add_coordinates(dataset, coordinates, coord_names=("lon", "lat")): lat.attrs["units"] = "degree" lat.attrs["standard_name"] = "grid_latitude" - def add_healpix_grid(dataset, order): - crs = dataset.create_dataset("crs", data=np.array([np.nan], dtype=np.float32), shape=(1,)) + crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,)) crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",) crs.attrs["grid_mapping_name"] = "healpix" crs.attrs["healpix_nside"] = 2**order @@ -39,3 +38,4 @@ def add_healpix_hierarchy(dataset, order, prefix="healpix_"): add_healpix_grid(zg, o) if o < order: zg.attrs["hiopy::parent"] = f"{prefix}{o+1}" + diff --git a/requirements-dev.txt b/requirements-dev.txt index 01b1586..bfb1d4d 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,5 +3,5 @@ wheel ruff pre-commit healpy -zarr<3.0 +zarr>=2,<3 aiohttp -- GitLab From 224cd2cca76b63194679d8ef08d17a925a7f62cb Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg-de> Date: Mon, 17 Feb 2025 19:03:44 +0200 Subject: [PATCH 02/20] added argument to allow chunks-per-shard while configuring the dataset --- apps/hiopy/configure/configure.py | 18 +++++++++++++++--- apps/hiopy/configure/create_dataset.py | 1 - apps/hiopy/worker.py | 2 +- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py index eb3e090..76409ee 100755 --- a/apps/hiopy/configure/configure.py +++ b/apps/hiopy/configure/configure.py @@ -32,8 +32,8 @@ def _collect_groups(dataset): def add_height(dataset, name, n): for g in _collect_groups(dataset): - height = g.create_array(name, fill_value=None, dtype=np.int32, shape=np.arange(n).shape) - height.append(data=np.arange(n)) + height = g.create_array(name, fill_value=None, dtype=np.int64, shape=np.arange(n).shape) + height[:] = np.arange(n) height.attrs["_ARRAY_DIMENSIONS"] = [name] height.attrs["axis"] = "Z" height.attrs["long_name"] = "generalized_height" @@ -53,6 +53,7 @@ def add_variable( frac_mask=None, yac_name=None, attributes=None, + chunks_per_shard=None, **kwargs, ): for g in _collect_groups(dataset): @@ -72,6 +73,7 @@ def add_variable( raise Exception("Unknown crs.") _attributes = attributes or {} + if zaxis is None: shape = (*ntime, crs_len) _chunk_shape = np.minimum(chunk_shape, shape) if chunk_shape is not None else None @@ -83,8 +85,18 @@ def add_variable( _attributes["_ARRAY_DIMENSIONS"] = (*taxis_tuple, zaxis, spatial_attr) _attributes["grid_mapping"] = "crs" + _shard_shape = None + if chunks_per_shard is not None: + _shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape) + v = g.create_array( - name, shape=shape, dtype=np.float32, fill_value=np.nan, chunks=_chunk_shape, **kwargs + name, + shape=shape, + dtype=np.float32, + fill_value=np.nan, + chunks=_chunk_shape, + shards=_shard_shape, + **kwargs, ) # TODO: Use a generic name instead of hiopy such that it represents arbitrary grid too diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py index b20133f..8ea822b 100644 --- a/apps/hiopy/configure/create_dataset.py +++ b/apps/hiopy/configure/create_dataset.py @@ -38,4 +38,3 @@ def add_healpix_hierarchy(dataset, order, prefix="healpix_"): add_healpix_grid(zg, o) if o < order: zg.attrs["hiopy::parent"] = f"{prefix}{o+1}" - diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py index a9f4e43..5dfb8ae 100755 --- a/apps/hiopy/worker.py +++ b/apps/hiopy/worker.py @@ -67,7 +67,7 @@ def main(): assert len(args.datasets) == 1, "Loco only supports reading from 1 dataset" loco_store = zarr.MemoryStore() zarr.copy_store(args.datasets[0].store, loco_store) - zarr.convenience.consolidate_metadata(loco_store) + zarr.consolidate_metadata(loco_store) loco_server = LocoServer(loco_store, args.loco_host, args.loco_port) args.datasets = [zarr.open(store=loco_store)] -- GitLab From 6cb14f31ba1f06b4290c2f082e3b12853aa47d33 Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Wed, 26 Feb 2025 16:40:46 +0100 Subject: [PATCH 03/20] added some documentation to the create-dataset helper functions --- apps/hiopy/configure/create_dataset.py | 80 +++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 7 deletions(-) diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py index 8ea822b..2f5ba11 100644 --- a/apps/hiopy/configure/create_dataset.py +++ b/apps/hiopy/configure/create_dataset.py @@ -1,11 +1,34 @@ #!/usr/bin/env python3 import numpy as np +import zarr -def add_coordinates(dataset, coordinates, coord_names=("lon", "lat")): - # TODO: update create_dataset() calls to adhrer to zarr 3.0 recommendations - crs = dataset.create_dataset("crs", data=np.array([np.nan], dtype=np.float32)) +def add_coordinates( + dataset: zarr.Dataset, + coordinates: list[tuple[float, float]], + coord_names: tuple[str, str] = ("lon", "lat"), +) -> None: + """ + Add longitude and latitude coordinates to the specified Zarr dataset. + + Parameters + ---------- + dataset : zarr.Dataset + The Zarr dataset where the coordinates will be added. + coordinates : list[tuple[float, float]] + A list of tuples containing the (longitude, latitude) values for each point. + coord_names : tuple[str, str], optional + The names to use for the longitude and latitude arrays. Defaults to ("lon", "lat"). + + Notes + ----- + This function creates two new arrays in the dataset: `coord_names[0]` for longitude and `coord_names[1]` for latitude. + The `crs` array is also created, with its attributes set to indicate that it's a "point_cloud" coordinate reference system. + Example: add_coordinates(dataset, [(10.2, 45.3), (20.4, 50.5)]) + """ + + crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,)) crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",) crs.attrs["grid_mapping_name"] = "point_cloud" crs.attrs["coordinates"] = f"{coord_names[0]} {coord_names[1]}" @@ -24,7 +47,23 @@ def add_coordinates(dataset, coordinates, coord_names=("lon", "lat")): lat.attrs["units"] = "degree" lat.attrs["standard_name"] = "grid_latitude" -def add_healpix_grid(dataset, order): + +def add_healpix_grid(dataset: zarr.Dataset, order: int): + """ + Add a HealPix grid to the specified Zarr dataset. + + Parameters + ---------- + dataset : zarr.Dataset + The Zarr dataset where the HealPix grid will be added to the crs. + order : int + The order of the HealPix grid. This corresponds to 2^order for the NSIDE. + + Notes + ----- + The HealPix grid is stored as a single array named "crs" in the dataset, with the healpix_nside and healpix_order attributes set + accordingly. No values are added to it + """ crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,)) crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",) crs.attrs["grid_mapping_name"] = "healpix" @@ -32,9 +71,36 @@ def add_healpix_grid(dataset, order): crs.attrs["healpix_order"] = "nest" -def add_healpix_hierarchy(dataset, order, prefix="healpix_"): - for o in range(order + 1): - zg = dataset.create_group(f"{prefix}{o}") +def add_healpix_hierarchy( + dataset: zarr.Dataset, + order: int, + nr_of_coarsenings: int = 4, + prefix: str = "healpix_", +) -> None: + """ + Add a hierarchical structure to the specified Zarr dataset for a given Healpix order. + + This function creates a group hierarchy with each level representing a specific resolution of the Healpix grid. + The `add_healpix_grid` function is used to create the actual grid arrays within each group. + + Parameters + ---------- + dataset : zarr.Dataset + The Zarr dataset where the hierarchy will be added. + order : int + The maximum level in the hierarchy. + nr_of_coarsenings : int + Number of coarsening aggregation levels needed + prefix : str, optional + The prefix to use for naming each group. Defaults to "healpix_". + + Notes + ----- + This function sets up a hierarchical structure with each level representing a specific resolution of the Healpix grid. + The `hiopy::parent` attribute is used to link each group to its parent in the hierarchy, allowing for efficient navigation. + """ + for o in range(order, order - nr_of_coarsenings, -1): + zg = dataset.create_group(name=f"{prefix}{o}") add_healpix_grid(zg, o) if o < order: zg.attrs["hiopy::parent"] = f"{prefix}{o+1}" -- GitLab From 16fcef0698fd9e578ab1b01b92c3f666f07a941f Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Wed, 26 Feb 2025 18:06:22 +0100 Subject: [PATCH 04/20] bumped zarr required version to 3 and updated the scripts to use python3.11 --- pyproject.toml | 2 +- requirements-dev.txt | 3 ++- scripts/setup_devenv/build_dependencies.sh | 6 +++--- scripts/setup_devenv/levante_omp412.sh | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a8f2422..9153da4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ version = "0.0.1" dependencies = [ "numpy", "pybind11", - "zarr<3.0", + "zarr>=3.0", "healpy", "aiohttp", "regex_engine" diff --git a/requirements-dev.txt b/requirements-dev.txt index bfb1d4d..751652f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,5 +3,6 @@ wheel ruff pre-commit healpy -zarr>=2,<3 +zarr>=3 aiohttp +rich \ No newline at end of file diff --git a/scripts/setup_devenv/build_dependencies.sh b/scripts/setup_devenv/build_dependencies.sh index a6b50a8..33a6022 100755 --- a/scripts/setup_devenv/build_dependencies.sh +++ b/scripts/setup_devenv/build_dependencies.sh @@ -220,9 +220,9 @@ function install_yac { function install_all { - echo "========================" - echo "== building HEALPIX & Co" - check_and_install healpix_cxx + # echo "========================" + # echo "== building HEALPIX & Co" + # check_and_install healpix_cxx echo "========================" echo "== building YAC & Co" check_and_install yac diff --git a/scripts/setup_devenv/levante_omp412.sh b/scripts/setup_devenv/levante_omp412.sh index 0a10e26..92054b2 100755 --- a/scripts/setup_devenv/levante_omp412.sh +++ b/scripts/setup_devenv/levante_omp412.sh @@ -36,7 +36,7 @@ INSTALL_PATH=$BUILD_PATH/install mkdir -p "$BUILD_PATH" pushd "$BUILD_PATH" -eval `spack load --sh python@3.10.10%gcc@=11.2.0` +eval `spack load --sh python@3.11.2%gcc@=11.2.0` # recommended to use a compute node for the build process with > 8 threads THREADS=64 @@ -61,7 +61,7 @@ echo "=== Building coyote ===" CC="${CC}" CXX="${CXX}" FC="${FC}" cmake $ABSOLUTE_coyote_ROOT -DCMAKE_PREFIX_PATH=$INSTALL_PATH -DCMAKE_BUILD_TYPE=Debug cmake --build . -j $THREADS -cp $BUILD_PATH/python/coyote.*.so $VENV_PATH/lib/python3.10/site-packages/ +cp $BUILD_PATH/python/coyote.*.so $VENV_PATH/lib/python3.11/site-packages/ export PYTHONPATH=${BUILD_PATH}/python:${ABSOLUTE_coyote_ROOT}/apps echo $PYTHONPATH -- GitLab From 42fa695362feb18e910408c67e0a98c5a8a2a2ef Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Wed, 26 Feb 2025 18:09:39 +0100 Subject: [PATCH 05/20] fixed the right value to be passed for chunk-shape in zarr 3 --- apps/hiopy/configure/configure.py | 12 +++++++----- apps/hiopy/configure/create_dataset.py | 26 +++++++++++++++----------- requirements-dev.txt | 2 +- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py index 76409ee..09ca00d 100755 --- a/apps/hiopy/configure/configure.py +++ b/apps/hiopy/configure/configure.py @@ -76,18 +76,20 @@ def add_variable( if zaxis is None: shape = (*ntime, crs_len) - _chunk_shape = np.minimum(chunk_shape, shape) if chunk_shape is not None else None _attributes["_ARRAY_DIMENSIONS"] = (*taxis_tuple, spatial_attr) else: nheight = g[zaxis].shape[0] shape = (*ntime, nheight, crs_len) - _chunk_shape = np.minimum(chunk_shape, shape) if chunk_shape is not None else None _attributes["_ARRAY_DIMENSIONS"] = (*taxis_tuple, zaxis, spatial_attr) - _attributes["grid_mapping"] = "crs" + _attributes["grid_mapping"] = "crs" + _chunk_shape = "auto" _shard_shape = None - if chunks_per_shard is not None: - _shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape) + + if chunk_shape is not None: + _chunk_shape = (np.min(chunk_shape, shape),) + if chunks_per_shard is not None: + _shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape) v = g.create_array( name, diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py index 2f5ba11..097d4f7 100644 --- a/apps/hiopy/configure/create_dataset.py +++ b/apps/hiopy/configure/create_dataset.py @@ -5,7 +5,7 @@ import zarr def add_coordinates( - dataset: zarr.Dataset, + dataset: zarr.Group, coordinates: list[tuple[float, float]], coord_names: tuple[str, str] = ("lon", "lat"), ) -> None: @@ -14,8 +14,8 @@ def add_coordinates( Parameters ---------- - dataset : zarr.Dataset - The Zarr dataset where the coordinates will be added. + dataset : zarr.Group + The Zarr group where the coordinates will be added. coordinates : list[tuple[float, float]] A list of tuples containing the (longitude, latitude) values for each point. coord_names : tuple[str, str], optional @@ -35,27 +35,31 @@ def add_coordinates( lat_list, lon_list = zip(*coordinates) - lon = dataset.create_dataset(coord_names[0], data=np.array(lon_list)) + lon = dataset.create_dataset( + name=coord_names[0], data=np.array(lon_list), shape=(len(coordinates),) + ) lon.attrs["_ARRAY_DIMENSIONS"] = [coord_names[0]] lon.attrs["long_name"] = "longitude" lon.attrs["units"] = "degree" lon.attrs["standard_name"] = "grid_longitude" - lat = dataset.create_dataset(coord_names[1], data=np.array(lat_list)) + lat = dataset.create_dataset( + name=coord_names[1], data=np.array(lat_list), shape=(len(coordinates),) + ) lat.attrs["_ARRAY_DIMENSIONS"] = [coord_names[1]] lat.attrs["long_name"] = "latitude" lat.attrs["units"] = "degree" lat.attrs["standard_name"] = "grid_latitude" -def add_healpix_grid(dataset: zarr.Dataset, order: int): +def add_healpix_grid(dataset: zarr.Group, order: int): """ Add a HealPix grid to the specified Zarr dataset. Parameters ---------- - dataset : zarr.Dataset - The Zarr dataset where the HealPix grid will be added to the crs. + dataset : zarr.Group + The Zarr group where the HealPix grid will be added to the crs. order : int The order of the HealPix grid. This corresponds to 2^order for the NSIDE. @@ -72,7 +76,7 @@ def add_healpix_grid(dataset: zarr.Dataset, order: int): def add_healpix_hierarchy( - dataset: zarr.Dataset, + dataset: zarr.Group, order: int, nr_of_coarsenings: int = 4, prefix: str = "healpix_", @@ -85,8 +89,8 @@ def add_healpix_hierarchy( Parameters ---------- - dataset : zarr.Dataset - The Zarr dataset where the hierarchy will be added. + dataset : zarr.Group + The Zarr group where the hierarchy will be added. order : int The maximum level in the hierarchy. nr_of_coarsenings : int diff --git a/requirements-dev.txt b/requirements-dev.txt index 751652f..873ff3b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,4 +5,4 @@ pre-commit healpy zarr>=3 aiohttp -rich \ No newline at end of file +rich -- GitLab From bf843f720add2e948bb2b213921cb6724399803f Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Tue, 11 Mar 2025 15:01:29 +0100 Subject: [PATCH 06/20] fixed chunk-shape calculation --- apps/hiopy/configure/configure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py index 09ca00d..7978999 100755 --- a/apps/hiopy/configure/configure.py +++ b/apps/hiopy/configure/configure.py @@ -87,7 +87,7 @@ def add_variable( _shard_shape = None if chunk_shape is not None: - _chunk_shape = (np.min(chunk_shape, shape),) + _chunk_shape = tuple(min(chunk_shape, shape)) if chunks_per_shard is not None: _shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape) -- GitLab From 15f319380718835a0bd5365216fde89531b95bab Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Tue, 11 Mar 2025 15:36:18 +0100 Subject: [PATCH 07/20] changed the order of imports in the worker as the numpy (built with different compiler) is causing unexpected behaviour if imported first --- apps/hiopy/worker.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py index 5dfb8ae..5617b8b 100755 --- a/apps/hiopy/worker.py +++ b/apps/hiopy/worker.py @@ -1,29 +1,19 @@ #!/usr/bin/env python3 -import json -import logging -from argparse import ArgumentParser -from itertools import chain, groupby - -import numpy as np -import zarr -from coyote import ( - Coyote, - ensure_enddef, - get_field_metadata, - group_comm_rank, - group_comm_size, - init, - run, - start_datetime, -) - +from coyote import Coyote, group_comm_rank, group_comm_size, init, run, start_datetime from ._data_handler import DataHandler from ._distribute_work import distribute_work from ._grids import def_grid, grid_id from ._zarr_utils import get_time_axis, get_var_group from .loco import LocoServer +import numpy as np +import zarr +import logging +from argparse import ArgumentParser +from itertools import chain, groupby + + def main(): parser = ArgumentParser() -- GitLab From fa991350fd770564e4e5023581fd753bfb7d2fd8 Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Fri, 21 Mar 2025 12:31:19 +0100 Subject: [PATCH 08/20] zarr 3 fixes --- apps/hiopy/_zarr_utils.py | 7 +++---- apps/hiopy/worker.py | 13 ++++++------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/apps/hiopy/_zarr_utils.py b/apps/hiopy/_zarr_utils.py index 39c6cde..0b2cf56 100644 --- a/apps/hiopy/_zarr_utils.py +++ b/apps/hiopy/_zarr_utils.py @@ -1,10 +1,9 @@ import zarr - def get_var_group(v): - root = zarr.Group(store=v.store) - last_slash_idx = v.name.rindex("/") - return root[v.name[:last_slash_idx]] + store = zarr.open(v.store) + parent_group_path = '/'.join(v.path.split('/')[:-1]) + return store[parent_group_path] def get_time_axis(v): diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py index 5617b8b..9a9cc15 100755 --- a/apps/hiopy/worker.py +++ b/apps/hiopy/worker.py @@ -121,10 +121,10 @@ def main(): # compute time start index t0 = ( np.datetime64(start_datetime()) - - np.datetime64(var_group.time.attrs["units"][len("seconds since ") :]) - ) / np.timedelta64(1, "s") + dt - t0_idx = np.searchsorted(var_group.time, t0) - assert var_group.time[t0_idx] == t0, "start_datetime not found in time axis" + - np.datetime64(var_group["time"].attrs["units"][len("seconds since ") :]) + ) / np.timedelta64(1, "s") + t0_idx = np.searchsorted(var_group["time"], t0) + assert var_group["time"][t0_idx] == t0, "start_datetime not found in time axis" # see YAC_REDUCTION_TIME_NONE etc. (TODO: pass constants through coyote) time_methods2yac = {"point": 0, "sum": 1, "mean": 2, "min": 3, "max": 4} @@ -135,9 +135,8 @@ def main(): src_comp, src_grid = v.attrs["hiopy::yac_source"] else: assert "hiopy::parent" in var_group.attrs, f"No source for field {v.name} specified" - parent_var_path = var_group.attrs["hiopy::parent"] + "/" + src_name - source_var = zarr.Group(store=v.store)[parent_var_path] - src_name = source_var.name + parent_var_name = var_group.attrs["hiopy::parent"] + "/" + v.name.split("/")[-1] + source_var = zarr.open(store=v.store)[parent_var_name] source_var_gid = grid_id(source_var) src_comp = src_grid = f"{args.process_group}_{source_var_gid}" time_method = v.attrs.get("hiopy::time_method", "point") -- GitLab From 9f19bda241232747c7fbccd20835e5762f522206 Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Wed, 26 Mar 2025 17:10:42 +0100 Subject: [PATCH 09/20] fixed the py-linting and turned off alphabetical sorting of imports due to an issue with compiler version with which numpy was built --- apps/hiopy/configure/create_dataset.py | 42 ++++++++++++++------------ apps/hiopy/worker.py | 17 +++++++++-- pyproject.toml | 2 -- 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py index 097d4f7..483844e 100644 --- a/apps/hiopy/configure/create_dataset.py +++ b/apps/hiopy/configure/create_dataset.py @@ -23,8 +23,9 @@ def add_coordinates( Notes ----- - This function creates two new arrays in the dataset: `coord_names[0]` for longitude and `coord_names[1]` for latitude. - The `crs` array is also created, with its attributes set to indicate that it's a "point_cloud" coordinate reference system. + This function creates two new arrays in the dataset: `coord_names[0]` for longitude and + `coord_names[1]` for latitude. The `crs` array is also created, with its attributes set + to indicate that it's a "point_cloud" coordinate reference system. Example: add_coordinates(dataset, [(10.2, 45.3), (20.4, 50.5)]) """ @@ -54,19 +55,20 @@ def add_coordinates( def add_healpix_grid(dataset: zarr.Group, order: int): """ - Add a HealPix grid to the specified Zarr dataset. - - Parameters - ---------- - dataset : zarr.Group - The Zarr group where the HealPix grid will be added to the crs. - order : int - The order of the HealPix grid. This corresponds to 2^order for the NSIDE. - - Notes - ----- - The HealPix grid is stored as a single array named "crs" in the dataset, with the healpix_nside and healpix_order attributes set - accordingly. No values are added to it + Add a HealPix grid to the specified Zarr dataset. + + Parameters + ---------- + dataset : zarr.Group + The Zarr group where the HealPix grid will be added to the crs. + order : int + The order of the HealPix grid. This corresponds to 2^order for the NSIDE. + + Notes + ----- + The HealPix grid is stored as a single array named "crs" in the dataset, with + the healpix_nside and healpix_order attributes set accordingly. + No values are added to it """ crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,)) crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",) @@ -84,8 +86,9 @@ def add_healpix_hierarchy( """ Add a hierarchical structure to the specified Zarr dataset for a given Healpix order. - This function creates a group hierarchy with each level representing a specific resolution of the Healpix grid. - The `add_healpix_grid` function is used to create the actual grid arrays within each group. + This function creates a group hierarchy with each level representing a specific + resolution of the Healpix grid. The `add_healpix_grid` function is used to create the + actual grid arrays within each group. Parameters ---------- @@ -100,8 +103,9 @@ def add_healpix_hierarchy( Notes ----- - This function sets up a hierarchical structure with each level representing a specific resolution of the Healpix grid. - The `hiopy::parent` attribute is used to link each group to its parent in the hierarchy, allowing for efficient navigation. + This function sets up a hierarchical structure with each level representing a + specific resolution of the Healpix grid. The `hiopy::parent` attribute is used + to link each group to its parent in the hierarchy, allowing for efficient navigation. """ for o in range(order, order - nr_of_coarsenings, -1): zg = dataset.create_group(name=f"{prefix}{o}") diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py index 9a9cc15..6b74d94 100755 --- a/apps/hiopy/worker.py +++ b/apps/hiopy/worker.py @@ -1,4 +1,14 @@ #!/usr/bin/env python3 +from coyote import ( + Coyote, + ensure_enddef, + get_field_metadata, + group_comm_rank, + group_comm_size, + init, + run, + start_datetime, +) from coyote import Coyote, group_comm_rank, group_comm_size, init, run, start_datetime from ._data_handler import DataHandler @@ -7,12 +17,13 @@ from ._grids import def_grid, grid_id from ._zarr_utils import get_time_axis, get_var_group from .loco import LocoServer -import numpy as np -import zarr -import logging from argparse import ArgumentParser from itertools import chain, groupby +import json +import logging +import numpy as np +import zarr def main(): diff --git a/pyproject.toml b/pyproject.toml index 9153da4..5ff9121 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,5 @@ lint.select = [ "B", # flake8-simplify "SIM", - # isort - "I", ] line-length = 100 # accomodate any libc++ motivated requirements of over 80 characters -- GitLab From d5de118ed3ef72e8428c8872b5af530f389ea4ae Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Tue, 1 Apr 2025 15:50:29 +0200 Subject: [PATCH 10/20] Ran pre-commit hook to fix issues from last changes --- apps/hiopy/_zarr_utils.py | 3 ++- apps/hiopy/configure/configure.py | 10 ++++++---- apps/hiopy/worker.py | 1 - 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/apps/hiopy/_zarr_utils.py b/apps/hiopy/_zarr_utils.py index 0b2cf56..8403cd2 100644 --- a/apps/hiopy/_zarr_utils.py +++ b/apps/hiopy/_zarr_utils.py @@ -1,8 +1,9 @@ import zarr + def get_var_group(v): store = zarr.open(v.store) - parent_group_path = '/'.join(v.path.split('/')[:-1]) + parent_group_path = "/".join(v.path.split("/")[:-1]) return store[parent_group_path] diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py index 7978999..badc732 100755 --- a/apps/hiopy/configure/configure.py +++ b/apps/hiopy/configure/configure.py @@ -15,7 +15,9 @@ def add_time(dataset, startdate, enddate, dt, name="time"): time_data = (np.arange(startdate + dt, enddate + dt, dt) - startdate) // np.timedelta64(1, "s") for g in _collect_groups(dataset): - time = dataset.create_array(name="time", fill_value=None, shape=time_data.shape, dtype=np.longlong) + time = g.create_array( + name="time", fill_value=None, shape=time_data.shape, dtype=np.longlong + ) time.append(data=time_data) time.attrs["_ARRAY_DIMENSIONS"] = (name,) time.attrs["axis"] = "T" @@ -59,13 +61,13 @@ def add_variable( for g in _collect_groups(dataset): taxis_tuple = tuple() if taxis is None else (taxis,) ntime = tuple() if taxis is None else (g[taxis].shape[0],) - grid_mapping_name = g['crs'].attrs["grid_mapping_name"] + grid_mapping_name = g["crs"].attrs["grid_mapping_name"] spatial_attr = "point" if (grid_mapping_name == "point_cloud") else "cell" crs_len = 0 if grid_mapping_name == "healpix": - crs_len = healpy.nside2npix(g['crs'].attrs["healpix_nside"]) + crs_len = healpy.nside2npix(g["crs"].attrs["healpix_nside"]) elif grid_mapping_name == "point_cloud": - lon_coord, lat_coord = g['crs'].attrs["coordinates"].split(" ") + lon_coord, lat_coord = g["crs"].attrs["coordinates"].split(" ") assert lon_coord in g and lat_coord in g assert g[lon_coord].shape[0] == g[lat_coord].shape[0] crs_len = g[lat_coord].shape[0] diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py index 6b74d94..8926aba 100755 --- a/apps/hiopy/worker.py +++ b/apps/hiopy/worker.py @@ -10,7 +10,6 @@ from coyote import ( start_datetime, ) -from coyote import Coyote, group_comm_rank, group_comm_size, init, run, start_datetime from ._data_handler import DataHandler from ._distribute_work import distribute_work from ._grids import def_grid, grid_id -- GitLab From 8623e414edbda049e5ea48c26c09eae38b1817fd Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Wed, 26 Mar 2025 17:23:57 +0100 Subject: [PATCH 11/20] bumped the python version to 3.11 with a local python installation referenced in scripts --- .gitlab-ci.yml | 2 +- README.md | 2 +- ci/Dockerfile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a5970e5..6f0042d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -80,7 +80,7 @@ hiopy-levante: script: - | module load git - /sw/spack-levante/python-3.9.9-fwvsvi/bin/python -m venv venv + /home/m/m301120/sw/spack-levante/python-3.11.2-sk474k/bin/python -m venv venv . venv/bin/activate ICON_DIR=`pwd -P`/icon ( diff --git a/README.md b/README.md index 99ab095..49183eb 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ python -m pip install git+https://gitlab.dkrz.de/nils/coyote.git ## Installation with ICON on levante ```bash -/sw/spack-levante/python-3.9.9-fwvsvi/bin/python -m venv ./venv --prompt icon +/home/m/m301120/sw/spack-levante/python-3.11.2-sk474k -m venv ./venv --prompt icon . ./venv/bin/activate git clone --recursive git@gitlab.dkrz.de:icon/icon.git icon pushd icon diff --git a/ci/Dockerfile b/ci/Dockerfile index fc08679..66faa8a 100644 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.9-slim +FROM python:3.11-slim LABEL maintainer="dreier@dkrz.de" #Remove some warnings -- GitLab From ea7d8e5756de21c90cd8917fbb3e4132470b86a2 Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Wed, 26 Mar 2025 17:37:01 +0100 Subject: [PATCH 12/20] added rich as a mandatory dependency --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5ff9121..003727b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,8 @@ dependencies = [ "zarr>=3.0", "healpy", "aiohttp", - "regex_engine" + "regex_engine", + "rich" ] [tool.scikit-build] -- GitLab From f8fba52820efb664e940c77a03900c65832bbc8b Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Wed, 26 Mar 2025 18:19:34 +0100 Subject: [PATCH 13/20] fixing zarr version to v3.0.6 for essential bug fixes --- apps/hiopy/tests/CMakeLists.txt | 3 ++- ci/Dockerfile | 2 +- pyproject.toml | 4 ++-- requirements-dev.txt | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/apps/hiopy/tests/CMakeLists.txt b/apps/hiopy/tests/CMakeLists.txt index 0d1eb8c..f1ad72b 100644 --- a/apps/hiopy/tests/CMakeLists.txt +++ b/apps/hiopy/tests/CMakeLists.txt @@ -29,8 +29,9 @@ set_tests_properties(hiopy.create_simple_dataset_4threads PROPERTIES add_test( NAME hiopy.check_hierarchy - COMMAND "/usr/bin/env" "python3" "${CMAKE_CURRENT_SOURCE_DIR}/check_hierarchy.py" "simple_dataset.zarr" + COMMAND ${Python_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/check_hierarchy.py" "simple_dataset.zarr" ) + set_tests_properties(hiopy.check_hierarchy PROPERTIES FIXTURES_REQUIRED simple_dataset.zarr ENVIRONMENT "PYTHONPATH=${CMAKE_BINARY_DIR}/python:${CMAKE_SOURCE_DIR}/apps:$ENV{PYTHONPATH}" diff --git a/ci/Dockerfile b/ci/Dockerfile index 66faa8a..22bb9f7 100644 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -23,7 +23,7 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* -RUN pip install --no-cache-dir numpy mpi4py matplotlib cython healpy aiohttp zarr +RUN pip install --no-cache-dir numpy mpi4py matplotlib cython healpy aiohttp zarr rich # install yaxt RUN curl -s -L https://gitlab.dkrz.de/dkrz-sw/yaxt/-/archive/release-0.11.3/yaxt-release-0.11.3.tar.gz | \ diff --git a/pyproject.toml b/pyproject.toml index 003727b..2d27f64 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,10 +8,10 @@ version = "0.0.1" dependencies = [ "numpy", "pybind11", - "zarr>=3.0", + "zarr>=3.0.6", "healpy", "aiohttp", - "regex_engine", + "regex_engine", "rich" ] diff --git a/requirements-dev.txt b/requirements-dev.txt index 873ff3b..f930191 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,6 +3,6 @@ wheel ruff pre-commit healpy -zarr>=3 +zarr>=3.0.6 aiohttp rich -- GitLab From cf118b6b41313a620331b3f02c117b48bb3c6b85 Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Tue, 1 Apr 2025 16:54:18 +0200 Subject: [PATCH 14/20] Fixes for zarr-3 which slipped through --- apps/hiopy/configure/create_dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py index 483844e..1c0f027 100644 --- a/apps/hiopy/configure/create_dataset.py +++ b/apps/hiopy/configure/create_dataset.py @@ -36,7 +36,7 @@ def add_coordinates( lat_list, lon_list = zip(*coordinates) - lon = dataset.create_dataset( + lon = dataset.create_array( name=coord_names[0], data=np.array(lon_list), shape=(len(coordinates),) ) lon.attrs["_ARRAY_DIMENSIONS"] = [coord_names[0]] @@ -44,7 +44,7 @@ def add_coordinates( lon.attrs["units"] = "degree" lon.attrs["standard_name"] = "grid_longitude" - lat = dataset.create_dataset( + lat = dataset.create_array( name=coord_names[1], data=np.array(lat_list), shape=(len(coordinates),) ) lat.attrs["_ARRAY_DIMENSIONS"] = [coord_names[1]] -- GitLab From d8427fd6f17e266a1d54e0746a212bbcb38f9042 Mon Sep 17 00:00:00 2001 From: Nils-Arne Dreier <dreier@dkrz.de> Date: Wed, 26 Mar 2025 17:36:08 +0100 Subject: [PATCH 15/20] fix: copy_metadata --- apps/hiopy/_zarr_utils.py | 11 ++++++++++- apps/hiopy/worker.py | 18 +++++++++--------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/apps/hiopy/_zarr_utils.py b/apps/hiopy/_zarr_utils.py index 8403cd2..dc08f3f 100644 --- a/apps/hiopy/_zarr_utils.py +++ b/apps/hiopy/_zarr_utils.py @@ -4,7 +4,10 @@ import zarr def get_var_group(v): store = zarr.open(v.store) parent_group_path = "/".join(v.path.split("/")[:-1]) - return store[parent_group_path] + if parent_group_path == "": + return store + else: + return store[parent_group_path] def get_time_axis(v): @@ -18,3 +21,9 @@ def get_time_axis(v): return time_axis else: return None + +def get_var_parent_group(v): + var_group = get_var_group(v) + parent_var_path = var_group.attrs["hiopy::parent"] + parent_group = zarr.open(v.store)[parent_var_path] + return parent_group diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py index 8926aba..13f0986 100755 --- a/apps/hiopy/worker.py +++ b/apps/hiopy/worker.py @@ -13,7 +13,7 @@ from coyote import ( from ._data_handler import DataHandler from ._distribute_work import distribute_work from ._grids import def_grid, grid_id -from ._zarr_utils import get_time_axis, get_var_group +from ._zarr_utils import get_var_group, get_var_parent_group from .loco import LocoServer from argparse import ArgumentParser @@ -145,8 +145,9 @@ def main(): src_comp, src_grid = v.attrs["hiopy::yac_source"] else: assert "hiopy::parent" in var_group.attrs, f"No source for field {v.name} specified" - parent_var_name = var_group.attrs["hiopy::parent"] + "/" + v.name.split("/")[-1] - source_var = zarr.open(store=v.store)[parent_var_name] + parent_group = get_var_parent_group(v) + source_var = parent_group[v.basename] + src_name = source_var.name source_var_gid = grid_id(source_var) src_comp = src_grid = f"{args.process_group}_{source_var_gid}" time_method = v.attrs.get("hiopy::time_method", "point") @@ -185,14 +186,13 @@ def main(): ) def get_source_triple(v): - group = get_var_group(v) - src_field = v.attrs.get("hiopy::src_name", v.basename) - if "hiopy::parent" in group.attrs: - parent_var_path = group.attrs["hiopy::parent"] + "/" + src_field - source_var = zarr.Group(store=v.store)[parent_var_path] - return get_source_triple(source_var) + var_group = get_var_group(v) + if "hiopy::parent" in var_group.attrs: + pgroup = get_var_parent_group(v) + return get_source_triple(pgroup[v.basename]) elif "hiopy::yac_source" in v.attrs: src_comp, src_grid = v.attrs["hiopy::yac_source"] + src_field = v.attrs.get("hiopy::src_name", v.basename) return src_comp, src_grid, src_field else: raise RuntimeError("Invalid attributes: " + str(dict(v.attrs))) -- GitLab From 5bff601fc4aea359a5f79fccd7959b5677e0b517 Mon Sep 17 00:00:00 2001 From: Nils-Arne Dreier <dreier@dkrz.de> Date: Thu, 27 Mar 2025 14:12:06 +0100 Subject: [PATCH 16/20] fix: keep all data_handlers for final flush --- apps/hiopy/worker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py index 13f0986..6822d98 100755 --- a/apps/hiopy/worker.py +++ b/apps/hiopy/worker.py @@ -115,13 +115,13 @@ def main(): gid: Coyote(f"{args.process_group}_{gid}") for gid, data_vars, chunk_slice in my_data_vars } + data_handlers = [] + for gid, data_vars, chunk_slice in my_data_vars: coyote = coyote_instances[gid] # all vars in data_vars define the same grid def_grid(coyote, data_vars[0], chunk_slice) - data_handlers = [] - for v in data_vars: # compute timestep var_group = get_var_group(v) -- GitLab From 1c56757c5e3d898b8eab2512612f89ad8908179f Mon Sep 17 00:00:00 2001 From: Nils-Arne Dreier <dreier@dkrz.de> Date: Thu, 27 Mar 2025 18:03:40 +0100 Subject: [PATCH 17/20] fix: avoid reopen the zarr again and again --- apps/hiopy/_zarr_utils.py | 13 +++++++------ apps/hiopy/configure/configure.py | 5 +---- apps/hiopy/configure/create_dataset.py | 6 ++++-- apps/hiopy/worker.py | 12 ++++++------ 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/apps/hiopy/_zarr_utils.py b/apps/hiopy/_zarr_utils.py index dc08f3f..a46b8cb 100644 --- a/apps/hiopy/_zarr_utils.py +++ b/apps/hiopy/_zarr_utils.py @@ -2,12 +2,13 @@ import zarr def get_var_group(v): - store = zarr.open(v.store) - parent_group_path = "/".join(v.path.split("/")[:-1]) - if parent_group_path == "": - return store + if not hasattr(v, "root"): + v.root = zarr.open(v.store) + group_path = "/".join(v.path.split("/")[:-1]) + if group_path == "": + return v.root else: - return store[parent_group_path] + return v.root[group_path] def get_time_axis(v): @@ -25,5 +26,5 @@ def get_time_axis(v): def get_var_parent_group(v): var_group = get_var_group(v) parent_var_path = var_group.attrs["hiopy::parent"] - parent_group = zarr.open(v.store)[parent_var_path] + parent_group = v.root[parent_var_path] return parent_group diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py index badc732..de186ac 100755 --- a/apps/hiopy/configure/configure.py +++ b/apps/hiopy/configure/configure.py @@ -15,10 +15,7 @@ def add_time(dataset, startdate, enddate, dt, name="time"): time_data = (np.arange(startdate + dt, enddate + dt, dt) - startdate) // np.timedelta64(1, "s") for g in _collect_groups(dataset): - time = g.create_array( - name="time", fill_value=None, shape=time_data.shape, dtype=np.longlong - ) - time.append(data=time_data) + time = g.create_dataset(name, data=time_data, fill_value=None, shape=time_data.shape) time.attrs["_ARRAY_DIMENSIONS"] = (name,) time.attrs["axis"] = "T" time.attrs["calendar"] = "proleptic_gregorian" diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py index 1c0f027..8a4d193 100644 --- a/apps/hiopy/configure/create_dataset.py +++ b/apps/hiopy/configure/create_dataset.py @@ -37,16 +37,18 @@ def add_coordinates( lat_list, lon_list = zip(*coordinates) lon = dataset.create_array( - name=coord_names[0], data=np.array(lon_list), shape=(len(coordinates),) + name=coord_names[0], dtype=np.float32, shape=(len(coordinates),) ) + lon[:] = np.array(lon_list) lon.attrs["_ARRAY_DIMENSIONS"] = [coord_names[0]] lon.attrs["long_name"] = "longitude" lon.attrs["units"] = "degree" lon.attrs["standard_name"] = "grid_longitude" lat = dataset.create_array( - name=coord_names[1], data=np.array(lat_list), shape=(len(coordinates),) + name=coord_names[1], dtype=np.float32, shape=(len(coordinates),) ) + lat[:] = np.array(lat_list) lat.attrs["_ARRAY_DIMENSIONS"] = [coord_names[1]] lat.attrs["long_name"] = "latitude" lat.attrs["units"] = "degree" diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py index 6822d98..c5a82c9 100755 --- a/apps/hiopy/worker.py +++ b/apps/hiopy/worker.py @@ -13,7 +13,7 @@ from coyote import ( from ._data_handler import DataHandler from ._distribute_work import distribute_work from ._grids import def_grid, grid_id -from ._zarr_utils import get_var_group, get_var_parent_group +from ._zarr_utils import get_var_group, get_var_parent_group, get_time_axis from .loco import LocoServer from argparse import ArgumentParser @@ -79,15 +79,15 @@ def main(): ) # find all variables considered to be written in the input datasets: - def collect_data_vars(group): + def collect_data_vars(group, root): for _name, item in group.arrays(): if "hiopy::enable" in item.attrs and item.attrs["hiopy::enable"]: + item.root = root yield item for _name, item in group.groups(): - item.parent = group - yield from collect_data_vars(item) + yield from collect_data_vars(item, root) - all_data_vars = list(chain(*[collect_data_vars(z) for z in args.datasets])) + all_data_vars = list(chain(*[collect_data_vars(z, z) for z in args.datasets])) logging.info(f"Found {len(all_data_vars)} variables") if len(all_data_vars) == 0: raise RuntimeError("No variables found by the hiopy worker.") @@ -134,7 +134,7 @@ def main(): - np.datetime64(var_group["time"].attrs["units"][len("seconds since ") :]) ) / np.timedelta64(1, "s") t0_idx = np.searchsorted(var_group["time"], t0) - assert var_group["time"][t0_idx] == t0, "start_datetime not found in time axis" + assert var_group["time"][t0_idx] == t0, f"start_datetime {t0} not found in time axis at index {t0_idx} which has value {var_group['time'][t0_idx]}" # see YAC_REDUCTION_TIME_NONE etc. (TODO: pass constants through coyote) time_methods2yac = {"point": 0, "sum": 1, "mean": 2, "min": 3, "max": 4} -- GitLab From f8a0fc3079a60d251ad9ca9ae41701d20fd2a8ad Mon Sep 17 00:00:00 2001 From: Nils-Arne Dreier <dreier@dkrz.de> Date: Fri, 21 Mar 2025 16:14:57 +0100 Subject: [PATCH 18/20] fix: datetime shift --- apps/hiopy/worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py index c5a82c9..24b999b 100755 --- a/apps/hiopy/worker.py +++ b/apps/hiopy/worker.py @@ -132,7 +132,7 @@ def main(): t0 = ( np.datetime64(start_datetime()) - np.datetime64(var_group["time"].attrs["units"][len("seconds since ") :]) - ) / np.timedelta64(1, "s") + ) / np.timedelta64(1, "s") + dt t0_idx = np.searchsorted(var_group["time"], t0) assert var_group["time"][t0_idx] == t0, f"start_datetime {t0} not found in time axis at index {t0_idx} which has value {var_group['time'][t0_idx]}" -- GitLab From b269ec2fae675305281033566f3e7a9a14057db9 Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Mon, 17 Mar 2025 19:12:10 +0100 Subject: [PATCH 19/20] reconsolidating zmetadata after pulling metadata for fields via yac --- apps/hiopy/_zarr_utils.py | 1 + apps/hiopy/configure/create_dataset.py | 8 ++------ apps/hiopy/tests/CMakeLists.txt | 2 +- apps/hiopy/tests/check_hierarchy.py | 4 +++- apps/hiopy/worker.py | 10 +++++++--- 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/apps/hiopy/_zarr_utils.py b/apps/hiopy/_zarr_utils.py index a46b8cb..4010274 100644 --- a/apps/hiopy/_zarr_utils.py +++ b/apps/hiopy/_zarr_utils.py @@ -23,6 +23,7 @@ def get_time_axis(v): else: return None + def get_var_parent_group(v): var_group = get_var_group(v) parent_var_path = var_group.attrs["hiopy::parent"] diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py index 8a4d193..dd84adf 100644 --- a/apps/hiopy/configure/create_dataset.py +++ b/apps/hiopy/configure/create_dataset.py @@ -36,18 +36,14 @@ def add_coordinates( lat_list, lon_list = zip(*coordinates) - lon = dataset.create_array( - name=coord_names[0], dtype=np.float32, shape=(len(coordinates),) - ) + lon = dataset.create_array(name=coord_names[0], dtype=np.float32, shape=(len(coordinates),)) lon[:] = np.array(lon_list) lon.attrs["_ARRAY_DIMENSIONS"] = [coord_names[0]] lon.attrs["long_name"] = "longitude" lon.attrs["units"] = "degree" lon.attrs["standard_name"] = "grid_longitude" - lat = dataset.create_array( - name=coord_names[1], dtype=np.float32, shape=(len(coordinates),) - ) + lat = dataset.create_array(name=coord_names[1], dtype=np.float32, shape=(len(coordinates),)) lat[:] = np.array(lat_list) lat.attrs["_ARRAY_DIMENSIONS"] = [coord_names[1]] lat.attrs["long_name"] = "latitude" diff --git a/apps/hiopy/tests/CMakeLists.txt b/apps/hiopy/tests/CMakeLists.txt index f1ad72b..6f96015 100644 --- a/apps/hiopy/tests/CMakeLists.txt +++ b/apps/hiopy/tests/CMakeLists.txt @@ -29,7 +29,7 @@ set_tests_properties(hiopy.create_simple_dataset_4threads PROPERTIES add_test( NAME hiopy.check_hierarchy - COMMAND ${Python_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/check_hierarchy.py" "simple_dataset.zarr" + COMMAND "/usr/bin/env" "python3" "${CMAKE_CURRENT_SOURCE_DIR}/check_hierarchy.py" "simple_dataset.zarr" ) set_tests_properties(hiopy.check_hierarchy PROPERTIES diff --git a/apps/hiopy/tests/check_hierarchy.py b/apps/hiopy/tests/check_hierarchy.py index 4b6905a..fbfbcb3 100755 --- a/apps/hiopy/tests/check_hierarchy.py +++ b/apps/hiopy/tests/check_hierarchy.py @@ -21,7 +21,9 @@ def check_interpolation(source_var, target_var): def check_metadata(var): - assert "hiopy::copy_metadata" not in var.attrs + assert ( + "hiopy::copy_metadata" not in var.attrs + ), f"Attributes of {var.name} {var.attrs.asdict()} is not cleaned" def _collect_groups(dataset, parent=None): diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py index 24b999b..ebf7419 100755 --- a/apps/hiopy/worker.py +++ b/apps/hiopy/worker.py @@ -134,7 +134,8 @@ def main(): - np.datetime64(var_group["time"].attrs["units"][len("seconds since ") :]) ) / np.timedelta64(1, "s") + dt t0_idx = np.searchsorted(var_group["time"], t0) - assert var_group["time"][t0_idx] == t0, f"start_datetime {t0} not found in time axis at index {t0_idx} which has value {var_group['time'][t0_idx]}" + assert var_group["time"][t0_idx] == t0, f"start_datetime {t0} not found in time axis \ + at index {t0_idx} which has value {var_group['time'][t0_idx]}" # see YAC_REDUCTION_TIME_NONE etc. (TODO: pass constants through coyote) time_methods2yac = {"point": 0, "sum": 1, "mean": 2, "min": 3, "max": 4} @@ -203,13 +204,16 @@ def main(): if "hiopy::copy_metadata" in v.attrs: comp, grid, field = get_source_triple(v) md_str = get_field_metadata(comp, grid, field) - print(md_str) metadata = json.loads(md_str) - print(metadata) + logging.debug(f"Found metadata for {field}: {metadata}") for key, value in metadata["cf"].items(): v.attrs[key] = value del v.attrs["hiopy::copy_metadata"] # copy only once + # re-consolidate the newly updated metadata + for ds in args.datasets: + zarr.consolidate_metadata(ds.store) + run() for dh in data_handlers: -- GitLab From 9a8307db4378ae21592cf200b50d4fa348d57ec7 Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Fri, 4 Apr 2025 10:41:27 +0200 Subject: [PATCH 20/20] ci: added python3.11 to project work accessible across developers (to be replaced once there is a system installation on levante) --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6f0042d..6f41b7d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -80,7 +80,7 @@ hiopy-levante: script: - | module load git - /home/m/m301120/sw/spack-levante/python-3.11.2-sk474k/bin/python -m venv venv + /work/bk1414/m301120/sw-spack-common/python-3.11.2-sk474k/bin/python -m venv venv . venv/bin/activate ICON_DIR=`pwd -P`/icon ( -- GitLab