diff --git a/apps/hiopy/configure/configure.py b/apps/hiopy/configure/configure.py index f53774aad0a6aee62934cc458888d4294bb1ca1c..5ceb9b0599ea4d55202980dd4e233e97e3c2375a 100755 --- a/apps/hiopy/configure/configure.py +++ b/apps/hiopy/configure/configure.py @@ -31,7 +31,8 @@ def _collect_groups(dataset): def add_height(dataset, name, n): for g in _collect_groups(dataset): - height = g.create_dataset(name, data=np.arange(n)) + height = g.create_array(name, fill_value=None, dtype=np.int64, shape=np.arange(n).shape) + height[:] = np.arange(n) height.attrs["_ARRAY_DIMENSIONS"] = [name] height.attrs["axis"] = "Z" height.attrs["long_name"] = "generalized_height" @@ -51,37 +52,52 @@ def add_variable( frac_mask=None, yac_name=None, attributes=None, + chunks_per_shard=None, **kwargs, ): for g in _collect_groups(dataset): taxis_tuple = tuple() if taxis is None else (taxis,) - ntime = tuple() if taxis is None else (g[taxis].shape[0],) - grid_mapping_name = g.crs.attrs["grid_mapping_name"] + ntime = tuple() if taxis is None else (g.get(taxis).shape[0],) + grid_mapping_name = g.get("crs").attrs["grid_mapping_name"] spatial_attr = "point" if (grid_mapping_name == "point_cloud") else "cell" crs_len = 0 if grid_mapping_name == "healpix": - crs_len = healpy.nside2npix(g.crs.attrs["healpix_nside"]) + crs_len = healpy.nside2npix(g.get("crs").attrs["healpix_nside"]) elif grid_mapping_name == "point_cloud": - lon_coord, lat_coord = g.crs.attrs["coordinates"].split(" ") + lon_coord, lat_coord = g.get("crs").attrs["coordinates"].split(" ") assert lon_coord in g and lat_coord in g - assert g[lon_coord].shape[0] == g[lat_coord].shape[0] - crs_len = g[lat_coord].shape[0] + assert g.get(lon_coord).shape[0] == g.get(lat_coord).shape[0] + crs_len = g.get(lat_coord).shape[0] else: raise Exception("Unknown crs.") _attributes = attributes or {} + if zaxis is None: shape = (*ntime, crs_len) - _chunk_shape = np.minimum(chunk_shape, shape) if chunk_shape is not None else None _attributes["_ARRAY_DIMENSIONS"] = (*taxis_tuple, spatial_attr) else: - nheight = g[zaxis].shape[0] + nheight = g.get(zaxis).shape[0] shape = (*ntime, nheight, crs_len) - _chunk_shape = np.minimum(chunk_shape, shape) if chunk_shape is not None else None _attributes["_ARRAY_DIMENSIONS"] = (*taxis_tuple, zaxis, spatial_attr) + _attributes["grid_mapping"] = "crs" - v = g.create_dataset( - name, shape=shape, dtype=np.float32, fill_value=np.nan, chunks=_chunk_shape, **kwargs + _chunk_shape = "auto" + _shard_shape = None + + if chunk_shape is not None: + _chunk_shape = tuple(min(chunk_shape, shape)) + if chunks_per_shard is not None: + _shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape) + + v = g.create_array( + name, + shape=shape, + dtype=np.float32, + fill_value=np.nan, + chunks=_chunk_shape, + shards=_shard_shape, + **kwargs, ) # TODO: Use a generic name instead of hiopy such that it represents arbitrary grid too diff --git a/apps/hiopy/configure/create_dataset.py b/apps/hiopy/configure/create_dataset.py index 36e336e860beb49bb55003c60c3d395ba3f897ca..097d4f71de59d2a2a73976b5e1c5a562a10564f5 100644 --- a/apps/hiopy/configure/create_dataset.py +++ b/apps/hiopy/configure/create_dataset.py @@ -1,41 +1,110 @@ #!/usr/bin/env python3 import numpy as np +import zarr -def add_coordinates(dataset, coordinates, coord_names=("lon", "lat")): - # TODO: update create_dataset() calls to adhrer to zarr 3.0 recommendations - crs = dataset.create_dataset("crs", data=np.array([np.nan], dtype=np.float32)) +def add_coordinates( + dataset: zarr.Group, + coordinates: list[tuple[float, float]], + coord_names: tuple[str, str] = ("lon", "lat"), +) -> None: + """ + Add longitude and latitude coordinates to the specified Zarr dataset. + + Parameters + ---------- + dataset : zarr.Group + The Zarr group where the coordinates will be added. + coordinates : list[tuple[float, float]] + A list of tuples containing the (longitude, latitude) values for each point. + coord_names : tuple[str, str], optional + The names to use for the longitude and latitude arrays. Defaults to ("lon", "lat"). + + Notes + ----- + This function creates two new arrays in the dataset: `coord_names[0]` for longitude and `coord_names[1]` for latitude. + The `crs` array is also created, with its attributes set to indicate that it's a "point_cloud" coordinate reference system. + Example: add_coordinates(dataset, [(10.2, 45.3), (20.4, 50.5)]) + """ + + crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,)) crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",) crs.attrs["grid_mapping_name"] = "point_cloud" crs.attrs["coordinates"] = f"{coord_names[0]} {coord_names[1]}" lat_list, lon_list = zip(*coordinates) - lon = dataset.create_dataset(coord_names[0], data=np.array(lon_list)) + lon = dataset.create_dataset( + name=coord_names[0], data=np.array(lon_list), shape=(len(coordinates),) + ) lon.attrs["_ARRAY_DIMENSIONS"] = [coord_names[0]] lon.attrs["long_name"] = "longitude" lon.attrs["units"] = "degree" lon.attrs["standard_name"] = "grid_longitude" - lat = dataset.create_dataset(coord_names[1], data=np.array(lat_list)) + lat = dataset.create_dataset( + name=coord_names[1], data=np.array(lat_list), shape=(len(coordinates),) + ) lat.attrs["_ARRAY_DIMENSIONS"] = [coord_names[1]] lat.attrs["long_name"] = "latitude" lat.attrs["units"] = "degree" lat.attrs["standard_name"] = "grid_latitude" -def add_healpix_grid(dataset, order): - crs = dataset.create_dataset("crs", data=np.array([np.nan], dtype=np.float32), shape=(1,)) +def add_healpix_grid(dataset: zarr.Group, order: int): + """ + Add a HealPix grid to the specified Zarr dataset. + + Parameters + ---------- + dataset : zarr.Group + The Zarr group where the HealPix grid will be added to the crs. + order : int + The order of the HealPix grid. This corresponds to 2^order for the NSIDE. + + Notes + ----- + The HealPix grid is stored as a single array named "crs" in the dataset, with the healpix_nside and healpix_order attributes set + accordingly. No values are added to it + """ + crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,)) crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",) crs.attrs["grid_mapping_name"] = "healpix" crs.attrs["healpix_nside"] = 2**order crs.attrs["healpix_order"] = "nest" -def add_healpix_hierarchy(dataset, order, prefix="healpix_"): - for o in range(order + 1): - zg = dataset.create_group(f"{prefix}{o}") +def add_healpix_hierarchy( + dataset: zarr.Group, + order: int, + nr_of_coarsenings: int = 4, + prefix: str = "healpix_", +) -> None: + """ + Add a hierarchical structure to the specified Zarr dataset for a given Healpix order. + + This function creates a group hierarchy with each level representing a specific resolution of the Healpix grid. + The `add_healpix_grid` function is used to create the actual grid arrays within each group. + + Parameters + ---------- + dataset : zarr.Group + The Zarr group where the hierarchy will be added. + order : int + The maximum level in the hierarchy. + nr_of_coarsenings : int + Number of coarsening aggregation levels needed + prefix : str, optional + The prefix to use for naming each group. Defaults to "healpix_". + + Notes + ----- + This function sets up a hierarchical structure with each level representing a specific resolution of the Healpix grid. + The `hiopy::parent` attribute is used to link each group to its parent in the hierarchy, allowing for efficient navigation. + """ + for o in range(order, order - nr_of_coarsenings, -1): + zg = dataset.create_group(name=f"{prefix}{o}") add_healpix_grid(zg, o) if o < order: zg.attrs["hiopy::parent"] = f"{prefix}{o+1}" diff --git a/apps/hiopy/worker.py b/apps/hiopy/worker.py index 794f769237e6c859a01fd7e69dff2fa41ee67975..a0d8bcaae6941af26e9bfc682cc0e845d745c33d 100755 --- a/apps/hiopy/worker.py +++ b/apps/hiopy/worker.py @@ -1,18 +1,18 @@ #!/usr/bin/env python3 -import logging -from argparse import ArgumentParser -from itertools import chain, groupby - -import numpy as np -import zarr from coyote import Coyote, group_comm_rank, group_comm_size, init, run, start_datetime - from ._data_handler import DataHandler from ._distribute_work import distribute_work from ._grids import def_grid, grid_id from .loco import LocoServer +import numpy as np +import zarr +import logging +from argparse import ArgumentParser +from itertools import chain, groupby + + def main(): parser = ArgumentParser() @@ -56,7 +56,7 @@ def main(): assert len(args.datasets) == 1, "Loco only supports reading from 1 dataset" loco_store = zarr.MemoryStore() zarr.copy_store(args.datasets[0].store, loco_store) - zarr.convenience.consolidate_metadata(loco_store) + zarr.consolidate_metadata(loco_store) loco_server = LocoServer(loco_store, args.loco_host, args.loco_port) args.datasets = [zarr.open(store=loco_store)] @@ -126,10 +126,10 @@ def main(): # compute time start index t0 = ( np.datetime64(start_datetime()) - - np.datetime64(v.group.time.attrs["units"][len("seconds since ") :]) + - np.datetime64(v.group["time"].attrs["units"][len("seconds since ") :]) ) / np.timedelta64(1, "s") - t0_idx = np.searchsorted(v.group.time, t0) - assert v.group.time[t0_idx] == t0, "start_datetime not found in time axis" + t0_idx = np.searchsorted(v.group["time"], t0) + assert v.group["time"][t0_idx] == t0, "start_datetime not found in time axis" dt = time_coordinate[t0_idx + 1] - time_coordinate[t0_idx] diff --git a/pyproject.toml b/pyproject.toml index a8f24220ff29cc1e19262aac7925bc8d980dc897..9153da4e38731a17feb88930c40fba148fe47027 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ version = "0.0.1" dependencies = [ "numpy", "pybind11", - "zarr<3.0", + "zarr>=3.0", "healpy", "aiohttp", "regex_engine" diff --git a/requirements-dev.txt b/requirements-dev.txt index 01b1586255b459bf7135b583c6bc9a5ca9114c54..873ff3b324fe9d1a4124b5f011fcae30034562c3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,5 +3,6 @@ wheel ruff pre-commit healpy -zarr<3.0 +zarr>=3 aiohttp +rich diff --git a/scripts/setup_devenv/build_dependencies.sh b/scripts/setup_devenv/build_dependencies.sh index a6b50a8173578dadd9348309b2309c94587d3c92..33a60222ba012e719f1aeec167c21f2d9a47f4ed 100755 --- a/scripts/setup_devenv/build_dependencies.sh +++ b/scripts/setup_devenv/build_dependencies.sh @@ -220,9 +220,9 @@ function install_yac { function install_all { - echo "========================" - echo "== building HEALPIX & Co" - check_and_install healpix_cxx + # echo "========================" + # echo "== building HEALPIX & Co" + # check_and_install healpix_cxx echo "========================" echo "== building YAC & Co" check_and_install yac diff --git a/scripts/setup_devenv/levante_omp412.sh b/scripts/setup_devenv/levante_omp412.sh index 0a10e26a61fc711c9718b63e1f6da88891b24806..92054b2280297b0721956f254ba482a59390b96f 100755 --- a/scripts/setup_devenv/levante_omp412.sh +++ b/scripts/setup_devenv/levante_omp412.sh @@ -36,7 +36,7 @@ INSTALL_PATH=$BUILD_PATH/install mkdir -p "$BUILD_PATH" pushd "$BUILD_PATH" -eval `spack load --sh python@3.10.10%gcc@=11.2.0` +eval `spack load --sh python@3.11.2%gcc@=11.2.0` # recommended to use a compute node for the build process with > 8 threads THREADS=64 @@ -61,7 +61,7 @@ echo "=== Building coyote ===" CC="${CC}" CXX="${CXX}" FC="${FC}" cmake $ABSOLUTE_coyote_ROOT -DCMAKE_PREFIX_PATH=$INSTALL_PATH -DCMAKE_BUILD_TYPE=Debug cmake --build . -j $THREADS -cp $BUILD_PATH/python/coyote.*.so $VENV_PATH/lib/python3.10/site-packages/ +cp $BUILD_PATH/python/coyote.*.so $VENV_PATH/lib/python3.11/site-packages/ export PYTHONPATH=${BUILD_PATH}/python:${ABSOLUTE_coyote_ROOT}/apps echo $PYTHONPATH