Skip to content
Snippets Groups Projects
Commit f9519845 authored by Siddhant Tibrewal's avatar Siddhant Tibrewal
Browse files

Merge branch 'global_hackathon' into global_hackathon_ex

parents 54a1b000 11e1380d
No related branches found
No related tags found
No related merge requests found
Pipeline #101329 waiting for manual action
......@@ -80,7 +80,7 @@ hiopy-levante:
script:
- |
module load git
/sw/spack-levante/python-3.9.9-fwvsvi/bin/python -m venv venv
/home/m/m301120/sw/spack-levante/python-3.11.2-sk474k/bin/python -m venv venv
. venv/bin/activate
ICON_DIR=`pwd -P`/icon
(
......
......@@ -13,7 +13,7 @@ python -m pip install git+https://gitlab.dkrz.de/nils/coyote.git
## Installation with ICON on levante
```bash
/sw/spack-levante/python-3.9.9-fwvsvi/bin/python -m venv ./venv --prompt icon
/home/m/m301120/sw/spack-levante/python-3.11.2-sk474k -m venv ./venv --prompt icon
. ./venv/bin/activate
git clone --recursive git@gitlab.dkrz.de:icon/icon.git icon
pushd icon
......
......@@ -8,3 +8,10 @@ def get_var_group(v):
return z
else:
return z[parent_group_path]
def get_var_parent_group(v):
var_group = get_var_group(v)
parent_var_path = var_group.attrs["hiopy::parent"]
parent_group = zarr.open(v.store)[parent_var_path]
return parent_group
......@@ -29,7 +29,7 @@ def _collect_groups(dataset):
yield from _collect_groups(g)
def add_height(dataset, name, positive_direction: str = "up", values: list[int] = []):
def add_height(dataset, name, positive_direction: str, values: list[int]):
for g in _collect_groups(dataset):
height = g.create_array(name, fill_value=None, dtype=np.int64, shape=(len(values),))
height[:] = np.asarray(values, dtype=np.int64)
......
......@@ -23,8 +23,9 @@ def add_coordinates(
Notes
-----
This function creates two new arrays in the dataset: `coord_names[0]` for longitude and `coord_names[1]` for latitude.
The `crs` array is also created, with its attributes set to indicate that it's a "point_cloud" coordinate reference system.
This function creates two new arrays in the dataset: `coord_names[0]` for longitude and
`coord_names[1]` for latitude. The `crs` array is also created, with its attributes set
to indicate that it's a "point_cloud" coordinate reference system.
Example: add_coordinates(dataset, [(10.2, 45.3), (20.4, 50.5)])
"""
......@@ -54,19 +55,20 @@ def add_coordinates(
def add_healpix_grid(dataset: zarr.Group, order: int):
"""
Add a HealPix grid to the specified Zarr dataset.
Parameters
----------
dataset : zarr.Group
The Zarr group where the HealPix grid will be added to the crs.
order : int
The order of the HealPix grid. This corresponds to 2^order for the NSIDE.
Notes
-----
The HealPix grid is stored as a single array named "crs" in the dataset, with the healpix_nside and healpix_order attributes set
accordingly. No values are added to it
Add a HealPix grid to the specified Zarr dataset.
Parameters
----------
dataset : zarr.Group
The Zarr group where the HealPix grid will be added to the crs.
order : int
The order of the HealPix grid. This corresponds to 2^order for the NSIDE.
Notes
-----
The HealPix grid is stored as a single array named "crs" in the dataset, with
the healpix_nside and healpix_order attributes set accordingly.
No values are added to it
"""
crs = dataset.create_array(name="crs", dtype=np.float32, shape=(1,))
crs.attrs["_ARRAY_DIMENSIONS"] = ("crs",)
......@@ -84,8 +86,9 @@ def add_healpix_hierarchy(
"""
Add a hierarchical structure to the specified Zarr dataset for a given Healpix order.
This function creates a group hierarchy with each level representing a specific resolution of the Healpix grid.
The `add_healpix_grid` function is used to create the actual grid arrays within each group.
This function creates a group hierarchy with each level representing a specific
resolution of the Healpix grid. The `add_healpix_grid` function is used to create the
actual grid arrays within each group.
Parameters
----------
......@@ -100,12 +103,15 @@ def add_healpix_hierarchy(
Notes
-----
This function sets up a hierarchical structure with each level representing a specific resolution of the Healpix grid.
The `hiopy::parent` attribute is used to link each group to its parent in the hierarchy, allowing for efficient navigation.
This function sets up a hierarchical structure with each level representing a
specific resolution of the Healpix grid. The `hiopy::parent` attribute is used
to link each group to its parent in the hierarchy, allowing for efficient navigation.
"""
for o in range(order, order - nr_of_coarsenings, -1):
zg = dataset.create_group(name=f"{prefix}{o}")
add_healpix_grid(zg, o)
if o < order:
parent_name = f"{dataset.basename}/{prefix}{o+1}" if dataset.basename is not "" else f"{prefix}{o+1}"
parent_name = (
f"{dataset.basename}/{prefix}{o+1}" if dataset.basename != "" else f"{prefix}{o+1}"
)
zg.attrs["hiopy::parent"] = parent_name
......@@ -29,8 +29,9 @@ set_tests_properties(hiopy.create_simple_dataset_4threads PROPERTIES
add_test(
NAME hiopy.check_hierarchy
COMMAND "/usr/bin/env" "python3" "${CMAKE_CURRENT_SOURCE_DIR}/check_hierarchy.py" "simple_dataset.zarr"
COMMAND ${Python_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/check_hierarchy.py" "simple_dataset.zarr"
)
set_tests_properties(hiopy.check_hierarchy PROPERTIES
FIXTURES_REQUIRED simple_dataset.zarr
ENVIRONMENT "PYTHONPATH=${CMAKE_BINARY_DIR}/python:${CMAKE_SOURCE_DIR}/apps:$ENV{PYTHONPATH}"
......
......@@ -12,9 +12,9 @@ z = zarr.open("${dataset}")
hc.add_healpix_hierarchy(z, order=3)
hc.add_time(z, "2000-01-01", "2000-01-01T00:01:00", 3)
hc.add_variable(z, "x", "simple_source", "simple_grid")
hc.add_variable(z, "y", "simple_source", "simple_grid")
hc.add_variable(z, "z", "simple_source", "simple_grid")
hc.add_variable(z, "x", "simple_source", "simple_grid", attributes={"hiopy::copy_metadata": True})
hc.add_variable(z, "y", "simple_source", "simple_grid", attributes={"hiopy::copy_metadata": True})
hc.add_variable(z, "z", "simple_source", "simple_grid", attributes={"hiopy::copy_metadata": True})
hc.add_variable(z, "clock", "simple_source", "simple_grid", attributes={"hiopy::copy_metadata": True})
zarr.consolidate_metadata(z.store)
......
#!/usr/bin/env python3
import zarr
from coyote import (
Coyote,
ensure_enddef,
......@@ -14,16 +13,16 @@ from coyote import (
from ._data_handler import DataHandler
from ._distribute_work import distribute_work
from ._grids import def_grid, grid_id
from ._zarr_utils import get_var_group
from ._zarr_utils import get_var_group, get_var_parent_group
from .loco import LocoServer
import numpy as np
import json
import zarr
import logging
from argparse import ArgumentParser
from itertools import chain, groupby
import json
import logging
import numpy as np
import zarr
def main():
......@@ -87,16 +86,16 @@ def main():
for _name, item in group.groups():
yield from collect_data_vars(item)
data_vars = list(chain(*[collect_data_vars(z) for z in args.datasets]))
logging.info(f"Found {len(data_vars)} variables")
if len(data_vars) == 0:
all_data_vars = list(chain(*[collect_data_vars(z) for z in args.datasets]))
logging.info(f"Found {len(all_data_vars)} variables")
if len(all_data_vars) == 0:
raise RuntimeError("No variables found by the hiopy worker.")
# group the variables by the crs grid_mapping.
# This is used to distribute them through the processes and create the coyote instances
grouped_data_vars = {
gid: list(variables)
for gid, variables in groupby(sorted(data_vars, key=grid_id), key=grid_id)
for gid, variables in groupby(sorted(all_data_vars, key=grid_id), key=grid_id)
}
distributed_data_vars = distribute_work(grouped_data_vars, group_comm_size())
......@@ -154,8 +153,8 @@ def main():
src_comp, src_grid = v.attrs["hiopy::yac_source"]
else:
assert "hiopy::parent" in var_group.attrs, f"No source for field {v.name} specified"
parent_var_path = var_group.attrs["hiopy::parent"] + "/" + v.basename
source_var = zarr.open(store=v.store)[parent_var_path]
parent_group = get_var_parent_group(v)
source_var = parent_group[v.basename]
src_name = source_var.name
source_var_gid = grid_id(source_var)
src_comp = src_grid = f"{args.process_group}_{source_var_gid}"
......@@ -197,7 +196,11 @@ def main():
)
def get_source_triple(v):
if "hiopy::yac_source" in v.attrs:
var_group = get_var_group(v)
if "hiopy::parent" in var_group.attrs:
pgroup = get_var_parent_group(v)
return get_source_triple(pgroup[v.basename])
elif "hiopy::yac_source" in v.attrs:
src_comp, src_grid = v.attrs["hiopy::yac_source"]
src_field = v.attrs.get("hiopy::src_name", v.basename)
return src_comp, src_grid, src_field
......@@ -206,10 +209,8 @@ def main():
ensure_enddef()
if group_comm_rank() == 0:
for v in data_vars:
if "hiopy::yac_source" not in v.attrs:
continue
else:
for v in all_data_vars:
if "hiopy::copy_metadata" in v.attrs:
comp, grid, field = get_source_triple(v)
md_str = get_field_metadata(comp, grid, field)
metadata = json.loads(md_str)
......@@ -220,7 +221,7 @@ def main():
# re-consolidate the newly updated metadata
for ds in args.datasets:
zarr.consolidate_metadata(ds.store)
zarr.consolidate_metadata(ds.store)
run()
......
FROM python:3.9-slim
FROM python:3.11-slim
LABEL maintainer="dreier@dkrz.de"
#Remove some warnings
......
......@@ -8,10 +8,11 @@ version = "0.0.1"
dependencies = [
"numpy",
"pybind11",
"zarr>=3.0",
"zarr>=3.0.6",
"healpy",
"aiohttp",
"regex_engine"
"regex_engine",
"rich"
]
[tool.scikit-build]
......@@ -43,7 +44,5 @@ lint.select = [
"B",
# flake8-simplify
"SIM",
# isort
"I",
]
line-length = 100 # accomodate any libc++ motivated requirements of over 80 characters
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment