From 4605bfb837962e68b67292f2e3aa0a68b971199f Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg-de> Date: Fri, 14 Feb 2025 16:01:25 +0200 Subject: [PATCH 1/7] updated the way to create array in groups as per zarr 3 --- apps/hiopy/configure/configure.py | 20 +++++++++++--------- apps/hiopy/configure/create_dataset.py | 13 +++++++------ requirements-dev.txt | 2 +- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py index 9ed4ba0..d1b1ffc 100755 --- a/apps/hiopy/configure/configure.py +++ b/apps/hiopy/configure/configure.py @@ -40,7 +40,8 @@ def _collect_groups(dataset): def add_height(dataset, name, n): for g in _collect_groups(dataset): - height = g.create_dataset(name, data=np.arange(n)) + height = g.create_array(name, fill_value=None, dtype=np.int32, shape=np.arange(n).shape) + height.append(data=np.arange(n)) height.attrs["_ARRAY_DIMENSIONS"] = [name] height.attrs["axis"] = "Z" height.attrs["long_name"] = "generalized_height" @@ -62,31 +63,32 @@ def add_variable( **kwargs, ): for g in _collect_groups(dataset): - ntime = g.time.shape[0] + ntime = g.get('time').shape[0] - grid_mapping_name = g.crs.attrs["grid_mapping_name"] + grid_mapping_name = g.get('crs').attrs["grid_mapping_name"] spatial_attr = "point" if (grid_mapping_name == "point_cloud") else "cell" crs_len = 0 if grid_mapping_name == "healpix": - crs_len = healpy.nside2npix(g.crs.attrs["healpix_nside"]) + crs_len = healpy.nside2npix(g.get('crs').attrs["healpix_nside"]) else: if "clon" not in g or "clat" not in g: raise Exception("Coordinates not defined appropriately in the dataset.") - assert g.clon.shape[0] == g.clat.shape[0] - crs_len = g.clon.shape[0] + assert g.get('clon').shape[0] == g.get('clat').shape[0] + crs_len = g.get('clon').shape[0] _attributes = attributes or {} if zaxis is None: shape = (ntime, crs_len) - _chunk_shape = np.minimum(chunk_shape, shape) if chunk_shape is not None else None + _chunk_shape = (np.minimum(chunk_shape, shape).item(0), np.minimum(chunk_shape, shape).item(1)) if chunk_shape is not None else () _attributes["_ARRAY_DIMENSIONS"] = ("time", spatial_attr) else: nheight = g[zaxis].shape[0] shape = (ntime, nheight, crs_len) - _chunk_shape = np.minimum(chunk_shape, shape) if chunk_shape is not None else None + _chunk_shape = (np.minimum(chunk_shape, shape).item(0), np.minimum(chunk_shape, shape).item(1), np.minimum(chunk_shape, shape).item(2)) if chunk_shape is not None else None _attributes["_ARRAY_DIMENSIONS"] = ("time", zaxis, spatial_attr) _attributes["grid_mapping"] = "crs" - v = g.create_dataset( + + v = g.create_array( name, shape=shape, dtype=np.float32, fill_value=np.nan, chunks=_chunk_shape, **kwargs ) diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py index 453e213..0e47c6f 100644 --- a/apps/hiopy/configure/create_dataset.py +++ b/apps/hiopy/configure/create_dataset.py @@ -1,13 +1,15 @@ #!/usr/bin/env python3 import numpy as np +import zarr def add_timedata(dataset, startdate, enddate, dt): time_data = ( np.arange(startdate, enddate, np.timedelta64(dt, "s")) - startdate ) // np.timedelta64(1, "s") - time = dataset.create_dataset("time", data=time_data, fill_value=None, shape=time_data.shape) + time = dataset.create_array(name="time", fill_value=None, shape=time_data.shape, dtype=np.longlong) + time.append(data=time_data) time.attrs["_ARRAY_DIMENSIONS"] = ("time",) time.attrs["axis"] = "T" time.attrs["calendar"] = "proleptic_gregorian" @@ -16,8 +18,7 @@ def add_timedata(dataset, startdate, enddate, dt): def create_dataset_icon(dataset, startdate, enddate, dt, grid_name="icon_atmo"): assert grid_name in ["icon_atmo", "icon_ocean", "point_cloud"] - # TODO: update create_dataset() calls to adhrer to zarr 3.0 recommendations - crs = dataset.create_dataset("crs", data=np.array([np.nan], dtype=np.float32)) + crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,)) crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",) crs.attrs["grid_mapping_name"] = grid_name crs.attrs["coordinates"] = "clon clat" @@ -26,8 +27,7 @@ def create_dataset_icon(dataset, startdate, enddate, dt, grid_name="icon_atmo"): def create_dataset_healpix(dataset, startdate, enddate, dt, order): - print(dataset) - crs = dataset.create_dataset("crs", data=np.array([np.nan], dtype=np.float32), shape=(1,)) + crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,)) crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",) crs.attrs["grid_mapping_name"] = "healpix" crs.attrs["healpix_nside"] = 2**order @@ -39,7 +39,8 @@ def create_dataset_healpix(dataset, startdate, enddate, dt, order): # TODO: For the future, allow hierarchy also in other dataset types def create_dataset_healpix_hierarchic(dataset, startdate, enddate, dt, order, prefix="healpix_"): for o in range(order + 1): - zg = dataset.create_group(f"{prefix}{o}") + zg = dataset.create_group(name=f"{prefix}{o}") create_dataset_healpix(zg, startdate, enddate, dt, o) if o < order: zg.attrs["hiopy::parent"] = f"{prefix}{o+1}" + diff --git a/requirements-dev.txt b/requirements-dev.txt index 01b1586..bfb1d4d 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,5 +3,5 @@ wheel ruff pre-commit healpy -zarr<3.0 +zarr>=2,<3 aiohttp -- GitLab From f6f32c33262c13ae9cd3d0025783b81c23b29ab1 Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg-de> Date: Mon, 17 Feb 2025 19:03:44 +0200 Subject: [PATCH 2/7] added argument to allow chunks-per-shard while configuring the dataset --- apps/hiopy/configure/configure.py | 47 ++++++++++++++++++++------ apps/hiopy/configure/create_dataset.py | 8 ++--- apps/hiopy/worker.py | 8 ++--- 3 files changed, 44 insertions(+), 19 deletions(-) diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py index d1b1ffc..86d0202 100755 --- a/apps/hiopy/configure/configure.py +++ b/apps/hiopy/configure/configure.py @@ -40,8 +40,8 @@ def _collect_groups(dataset): def add_height(dataset, name, n): for g in _collect_groups(dataset): - height = g.create_array(name, fill_value=None, dtype=np.int32, shape=np.arange(n).shape) - height.append(data=np.arange(n)) + height = g.create_array(name, fill_value=None, dtype=np.int64, shape=np.arange(n).shape) + height[:] = np.arange(n) height.attrs["_ARRAY_DIMENSIONS"] = [name] height.attrs["axis"] = "Z" height.attrs["long_name"] = "generalized_height" @@ -60,36 +60,60 @@ def add_variable( frac_mask=None, yac_name=None, attributes=None, + chunks_per_shard=None, **kwargs, ): for g in _collect_groups(dataset): - ntime = g.get('time').shape[0] + ntime = g.get("time").shape[0] - grid_mapping_name = g.get('crs').attrs["grid_mapping_name"] + grid_mapping_name = g.get("crs").attrs["grid_mapping_name"] spatial_attr = "point" if (grid_mapping_name == "point_cloud") else "cell" crs_len = 0 if grid_mapping_name == "healpix": - crs_len = healpy.nside2npix(g.get('crs').attrs["healpix_nside"]) + crs_len = healpy.nside2npix(g.get("crs").attrs["healpix_nside"]) else: if "clon" not in g or "clat" not in g: raise Exception("Coordinates not defined appropriately in the dataset.") - assert g.get('clon').shape[0] == g.get('clat').shape[0] - crs_len = g.get('clon').shape[0] + assert g.get("clon").shape[0] == g.get("clat").shape[0] + crs_len = g.get("clon").shape[0] _attributes = attributes or {} + if zaxis is None: shape = (ntime, crs_len) - _chunk_shape = (np.minimum(chunk_shape, shape).item(0), np.minimum(chunk_shape, shape).item(1)) if chunk_shape is not None else () + _chunk_shape = ( + (np.minimum(chunk_shape, shape).item(0), np.minimum(chunk_shape, shape).item(1)) + if chunk_shape is not None + else () + ) _attributes["_ARRAY_DIMENSIONS"] = ("time", spatial_attr) else: nheight = g[zaxis].shape[0] shape = (ntime, nheight, crs_len) - _chunk_shape = (np.minimum(chunk_shape, shape).item(0), np.minimum(chunk_shape, shape).item(1), np.minimum(chunk_shape, shape).item(2)) if chunk_shape is not None else None + _chunk_shape = ( + ( + np.minimum(chunk_shape, shape).item(0), + nheight, + np.minimum(chunk_shape, shape).item(2), + ) + if chunk_shape is not None + else None + ) _attributes["_ARRAY_DIMENSIONS"] = ("time", zaxis, spatial_attr) _attributes["grid_mapping"] = "crs" + _shard_shape = None + if chunks_per_shard is not None: + _shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape) + v = g.create_array( - name, shape=shape, dtype=np.float32, fill_value=np.nan, chunks=_chunk_shape, **kwargs + name, + shape=shape, + dtype=np.float32, + fill_value=np.nan, + chunks=_chunk_shape, + shards=_shard_shape, + **kwargs, ) # TODO: Use a generic name instead of hiopy such that it represents arbitrary grid too @@ -119,7 +143,7 @@ def extend_time(dataset, enddate): def consolidate(dataset): - zarr.convenience.consolidate_metadata(dataset.store) + zarr.consolidate_metadata(dataset.store) def info(dataset, tree): @@ -193,6 +217,7 @@ def main(): "--frac-mask", type=str, default=None, help="Name of the frac_mask array in the same group" ) add_variable_parser.add_argument("--yac-name", type=str, help="name of the yac field") + add_variable_parser.add_argument("--chunks-per-shard", type=int) extend_time_parser = subparsers.add_parser("extend-time") extend_time_parser.set_defaults(func=extend_time) diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py index 0e47c6f..bb048ac 100644 --- a/apps/hiopy/configure/create_dataset.py +++ b/apps/hiopy/configure/create_dataset.py @@ -1,15 +1,16 @@ #!/usr/bin/env python3 import numpy as np -import zarr def add_timedata(dataset, startdate, enddate, dt): time_data = ( np.arange(startdate, enddate, np.timedelta64(dt, "s")) - startdate ) // np.timedelta64(1, "s") - time = dataset.create_array(name="time", fill_value=None, shape=time_data.shape, dtype=np.longlong) - time.append(data=time_data) + time = dataset.create_array( + name="time", fill_value=None, shape=time_data.shape, dtype=np.longlong + ) + time[:] = time_data time.attrs["_ARRAY_DIMENSIONS"] = ("time",) time.attrs["axis"] = "T" time.attrs["calendar"] = "proleptic_gregorian" @@ -43,4 +44,3 @@ def create_dataset_healpix_hierarchic(dataset, startdate, enddate, dt, order, pr create_dataset_healpix(zg, startdate, enddate, dt, o) if o < order: zg.attrs["hiopy::parent"] = f"{prefix}{o+1}" - diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py index 794f769..c031239 100755 --- a/apps/hiopy/worker.py +++ b/apps/hiopy/worker.py @@ -56,7 +56,7 @@ def main(): assert len(args.datasets) == 1, "Loco only supports reading from 1 dataset" loco_store = zarr.MemoryStore() zarr.copy_store(args.datasets[0].store, loco_store) - zarr.convenience.consolidate_metadata(loco_store) + zarr.consolidate_metadata(loco_store) loco_server = LocoServer(loco_store, args.loco_host, args.loco_port) args.datasets = [zarr.open(store=loco_store)] @@ -126,10 +126,10 @@ def main(): # compute time start index t0 = ( np.datetime64(start_datetime()) - - np.datetime64(v.group.time.attrs["units"][len("seconds since ") :]) + - np.datetime64(v.group["time"].attrs["units"][len("seconds since ") :]) ) / np.timedelta64(1, "s") - t0_idx = np.searchsorted(v.group.time, t0) - assert v.group.time[t0_idx] == t0, "start_datetime not found in time axis" + t0_idx = np.searchsorted(v.group["time"], t0) + assert v.group["time"][t0_idx] == t0, "start_datetime not found in time axis" dt = time_coordinate[t0_idx + 1] - time_coordinate[t0_idx] -- GitLab From 3d0a938be375716c07db1a842b14f806a08fe892 Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Wed, 26 Feb 2025 16:40:46 +0100 Subject: [PATCH 3/7] added some documentation to the create-dataset helper functions --- apps/hiopy/configure/create_dataset.py | 77 ++++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 4 deletions(-) diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py index 51b4bdb..ca976b3 100644 --- a/apps/hiopy/configure/create_dataset.py +++ b/apps/hiopy/configure/create_dataset.py @@ -1,8 +1,33 @@ #!/usr/bin/env python3 import numpy as np +import zarr + + +def add_coordinates( + dataset: zarr.Dataset, + coordinates: list[tuple[float, float]], + coord_names: tuple[str, str] = ("lon", "lat"), +) -> None: + """ + Add longitude and latitude coordinates to the specified Zarr dataset. + + Parameters + ---------- + dataset : zarr.Dataset + The Zarr dataset where the coordinates will be added. + coordinates : list[tuple[float, float]] + A list of tuples containing the (longitude, latitude) values for each point. + coord_names : tuple[str, str], optional + The names to use for the longitude and latitude arrays. Defaults to ("lon", "lat"). + + Notes + ----- + This function creates two new arrays in the dataset: `coord_names[0]` for longitude and `coord_names[1]` for latitude. + The `crs` array is also created, with its attributes set to indicate that it's a "point_cloud" coordinate reference system. + Example: add_coordinates(dataset, [(10.2, 45.3), (20.4, 50.5)]) + """ -def add_coordinates(dataset, coordinates, coord_names=("lon", "lat")): crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,)) crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",) crs.attrs["grid_mapping_name"] = "point_cloud" @@ -28,15 +53,59 @@ def add_coordinates(dataset, coordinates, coord_names=("lon", "lat")): lat.attrs["units"] = "degree" lat.attrs["standard_name"] = "grid_latitude" -def add_healpix_grid(dataset, order): + +def add_healpix_grid(dataset: zarr.Dataset, order: int): + """ + Add a HealPix grid to the specified Zarr dataset. + + Parameters + ---------- + dataset : zarr.Dataset + The Zarr dataset where the HealPix grid will be added to the crs. + order : int + The order of the HealPix grid. This corresponds to 2^order for the NSIDE. + + Notes + ----- + The HealPix grid is stored as a single array named "crs" in the dataset, with the healpix_nside and healpix_order attributes set + accordingly. No values are added to it + """ crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,)) crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",) crs.attrs["grid_mapping_name"] = "healpix" crs.attrs["healpix_nside"] = 2**order crs.attrs["healpix_order"] = "nest" -def add_healpix_hierarchy(dataset, order, prefix="healpix_"): - for o in range(order + 1): + +def add_healpix_hierarchy( + dataset: zarr.Dataset, + order: int, + nr_of_coarsenings: int = 4, + prefix: str = "healpix_", +) -> None: + """ + Add a hierarchical structure to the specified Zarr dataset for a given Healpix order. + + This function creates a group hierarchy with each level representing a specific resolution of the Healpix grid. + The `add_healpix_grid` function is used to create the actual grid arrays within each group. + + Parameters + ---------- + dataset : zarr.Dataset + The Zarr dataset where the hierarchy will be added. + order : int + The maximum level in the hierarchy. + nr_of_coarsenings : int + Number of coarsening aggregation levels needed + prefix : str, optional + The prefix to use for naming each group. Defaults to "healpix_". + + Notes + ----- + This function sets up a hierarchical structure with each level representing a specific resolution of the Healpix grid. + The `hiopy::parent` attribute is used to link each group to its parent in the hierarchy, allowing for efficient navigation. + """ + for o in range(order, order - nr_of_coarsenings, -1): zg = dataset.create_group(name=f"{prefix}{o}") add_healpix_grid(zg, o) if o < order: -- GitLab From 3403df03b6f4e45fb2a2b9159981b6ea5da860f6 Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Wed, 26 Feb 2025 18:06:22 +0100 Subject: [PATCH 4/7] bumped zarr required version to 3 and updated the scripts to use python3.11 --- pyproject.toml | 2 +- requirements-dev.txt | 3 ++- scripts/setup_devenv/build_dependencies.sh | 6 +++--- scripts/setup_devenv/levante_omp412.sh | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a8f2422..9153da4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ version = "0.0.1" dependencies = [ "numpy", "pybind11", - "zarr<3.0", + "zarr>=3.0", "healpy", "aiohttp", "regex_engine" diff --git a/requirements-dev.txt b/requirements-dev.txt index bfb1d4d..751652f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,5 +3,6 @@ wheel ruff pre-commit healpy -zarr>=2,<3 +zarr>=3 aiohttp +rich \ No newline at end of file diff --git a/scripts/setup_devenv/build_dependencies.sh b/scripts/setup_devenv/build_dependencies.sh index a6b50a8..33a6022 100755 --- a/scripts/setup_devenv/build_dependencies.sh +++ b/scripts/setup_devenv/build_dependencies.sh @@ -220,9 +220,9 @@ function install_yac { function install_all { - echo "========================" - echo "== building HEALPIX & Co" - check_and_install healpix_cxx + # echo "========================" + # echo "== building HEALPIX & Co" + # check_and_install healpix_cxx echo "========================" echo "== building YAC & Co" check_and_install yac diff --git a/scripts/setup_devenv/levante_omp412.sh b/scripts/setup_devenv/levante_omp412.sh index 0a10e26..92054b2 100755 --- a/scripts/setup_devenv/levante_omp412.sh +++ b/scripts/setup_devenv/levante_omp412.sh @@ -36,7 +36,7 @@ INSTALL_PATH=$BUILD_PATH/install mkdir -p "$BUILD_PATH" pushd "$BUILD_PATH" -eval `spack load --sh python@3.10.10%gcc@=11.2.0` +eval `spack load --sh python@3.11.2%gcc@=11.2.0` # recommended to use a compute node for the build process with > 8 threads THREADS=64 @@ -61,7 +61,7 @@ echo "=== Building coyote ===" CC="${CC}" CXX="${CXX}" FC="${FC}" cmake $ABSOLUTE_coyote_ROOT -DCMAKE_PREFIX_PATH=$INSTALL_PATH -DCMAKE_BUILD_TYPE=Debug cmake --build . -j $THREADS -cp $BUILD_PATH/python/coyote.*.so $VENV_PATH/lib/python3.10/site-packages/ +cp $BUILD_PATH/python/coyote.*.so $VENV_PATH/lib/python3.11/site-packages/ export PYTHONPATH=${BUILD_PATH}/python:${ABSOLUTE_coyote_ROOT}/apps echo $PYTHONPATH -- GitLab From 4160449a7a2d84c464f1063b45e20a6f0d72e837 Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Wed, 26 Feb 2025 18:09:39 +0100 Subject: [PATCH 5/7] fixed the right value to be passed for chunk-shape in zarr 3 --- apps/hiopy/configure/configure.py | 12 ++++++----- apps/hiopy/configure/create_dataset.py | 28 ++++++++++++-------------- requirements-dev.txt | 2 +- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py index 6a9e4d7..602003b 100755 --- a/apps/hiopy/configure/configure.py +++ b/apps/hiopy/configure/configure.py @@ -75,18 +75,20 @@ def add_variable( if zaxis is None: shape = (*ntime, crs_len) - _chunk_shape = (np.min(chunk_shape, shape),) if chunk_shape is not None else None _attributes["_ARRAY_DIMENSIONS"] = (*taxis_tuple, spatial_attr) else: nheight = g.get(zaxis).shape[0] shape = (*ntime, nheight, crs_len) - _chunk_shape = (np.min(chunk_shape, shape),) if chunk_shape is not None else None _attributes["_ARRAY_DIMENSIONS"] = (*taxis_tuple, zaxis, spatial_attr) - _attributes["grid_mapping"] = "crs" + _attributes["grid_mapping"] = "crs" + _chunk_shape = "auto" _shard_shape = None - if chunks_per_shard is not None: - _shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape) + + if chunk_shape is not None: + _chunk_shape = (np.min(chunk_shape, shape),) + if chunks_per_shard is not None: + _shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape) v = g.create_array( name, diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py index ca976b3..097d4f7 100644 --- a/apps/hiopy/configure/create_dataset.py +++ b/apps/hiopy/configure/create_dataset.py @@ -5,7 +5,7 @@ import zarr def add_coordinates( - dataset: zarr.Dataset, + dataset: zarr.Group, coordinates: list[tuple[float, float]], coord_names: tuple[str, str] = ("lon", "lat"), ) -> None: @@ -14,8 +14,8 @@ def add_coordinates( Parameters ---------- - dataset : zarr.Dataset - The Zarr dataset where the coordinates will be added. + dataset : zarr.Group + The Zarr group where the coordinates will be added. coordinates : list[tuple[float, float]] A list of tuples containing the (longitude, latitude) values for each point. coord_names : tuple[str, str], optional @@ -35,33 +35,31 @@ def add_coordinates( lat_list, lon_list = zip(*coordinates) - lon = dataset.create_array( - name=coord_names[0], fill_value=None, shape=lon_list.shape, dtype=np.float32 + lon = dataset.create_dataset( + name=coord_names[0], data=np.array(lon_list), shape=(len(coordinates),) ) - lon[:] = np.array(lon_list) lon.attrs["_ARRAY_DIMENSIONS"] = [coord_names[0]] lon.attrs["long_name"] = "longitude" lon.attrs["units"] = "degree" lon.attrs["standard_name"] = "grid_longitude" - lat = dataset.create_array( - name=coord_names[1], fill_value=None, shape=lat_list.shape, dtype=np.float32 + lat = dataset.create_dataset( + name=coord_names[1], data=np.array(lat_list), shape=(len(coordinates),) ) - lat[:] = np.array(lat_list) lat.attrs["_ARRAY_DIMENSIONS"] = [coord_names[1]] lat.attrs["long_name"] = "latitude" lat.attrs["units"] = "degree" lat.attrs["standard_name"] = "grid_latitude" -def add_healpix_grid(dataset: zarr.Dataset, order: int): +def add_healpix_grid(dataset: zarr.Group, order: int): """ Add a HealPix grid to the specified Zarr dataset. Parameters ---------- - dataset : zarr.Dataset - The Zarr dataset where the HealPix grid will be added to the crs. + dataset : zarr.Group + The Zarr group where the HealPix grid will be added to the crs. order : int The order of the HealPix grid. This corresponds to 2^order for the NSIDE. @@ -78,7 +76,7 @@ def add_healpix_grid(dataset: zarr.Dataset, order: int): def add_healpix_hierarchy( - dataset: zarr.Dataset, + dataset: zarr.Group, order: int, nr_of_coarsenings: int = 4, prefix: str = "healpix_", @@ -91,8 +89,8 @@ def add_healpix_hierarchy( Parameters ---------- - dataset : zarr.Dataset - The Zarr dataset where the hierarchy will be added. + dataset : zarr.Group + The Zarr group where the hierarchy will be added. order : int The maximum level in the hierarchy. nr_of_coarsenings : int diff --git a/requirements-dev.txt b/requirements-dev.txt index 751652f..873ff3b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,4 +5,4 @@ pre-commit healpy zarr>=3 aiohttp -rich \ No newline at end of file +rich -- GitLab From e04afefd7927940a644c6987143925c495521adc Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Tue, 11 Mar 2025 15:01:29 +0100 Subject: [PATCH 6/7] fixed chunk-shape calculation --- apps/hiopy/configure/configure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py index 602003b..5ceb9b0 100755 --- a/apps/hiopy/configure/configure.py +++ b/apps/hiopy/configure/configure.py @@ -86,7 +86,7 @@ def add_variable( _shard_shape = None if chunk_shape is not None: - _chunk_shape = (np.min(chunk_shape, shape),) + _chunk_shape = tuple(min(chunk_shape, shape)) if chunks_per_shard is not None: _shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape) -- GitLab From 9edd63422ae7aaba265ce08b6ba8be41d6796660 Mon Sep 17 00:00:00 2001 From: Siddhant Tibrewal <siddhant.tibrewal@mpimet.mpg.de> Date: Tue, 11 Mar 2025 15:36:18 +0100 Subject: [PATCH 7/7] changed the order of imports in the worker as the numpy (built with different compiler) is causing unexpected behaviour if imported first --- apps/hiopy/worker.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py index c031239..a0d8bca 100755 --- a/apps/hiopy/worker.py +++ b/apps/hiopy/worker.py @@ -1,18 +1,18 @@ #!/usr/bin/env python3 -import logging -from argparse import ArgumentParser -from itertools import chain, groupby - -import numpy as np -import zarr from coyote import Coyote, group_comm_rank, group_comm_size, init, run, start_datetime - from ._data_handler import DataHandler from ._distribute_work import distribute_work from ._grids import def_grid, grid_id from .loco import LocoServer +import numpy as np +import zarr +import logging +from argparse import ArgumentParser +from itertools import chain, groupby + + def main(): parser = ArgumentParser() -- GitLab