Skip to content
Snippets Groups Projects

Migrate to Zarr 3

Merged Siddhant Tibrewal requested to merge 6-zarr3-migration into main
All threads resolved!
Files
13
@@ -31,7 +31,8 @@ def _collect_groups(dataset):
def add_height(dataset, name, n):
for g in _collect_groups(dataset):
height = g.create_dataset(name, data=np.arange(n))
height = g.create_array(name, fill_value=None, dtype=np.int64, shape=np.arange(n).shape)
height[:] = np.arange(n)
height.attrs["_ARRAY_DIMENSIONS"] = [name]
height.attrs["axis"] = "Z"
height.attrs["long_name"] = "generalized_height"
@@ -51,18 +52,19 @@ def add_variable(
frac_mask=None,
yac_name=None,
attributes=None,
chunks_per_shard=None,
**kwargs,
):
for g in _collect_groups(dataset):
taxis_tuple = tuple() if taxis is None else (taxis,)
ntime = tuple() if taxis is None else (g[taxis].shape[0],)
grid_mapping_name = g.crs.attrs["grid_mapping_name"]
grid_mapping_name = g["crs"].attrs["grid_mapping_name"]
spatial_attr = "point" if (grid_mapping_name == "point_cloud") else "cell"
crs_len = 0
if grid_mapping_name == "healpix":
crs_len = healpy.nside2npix(g.crs.attrs["healpix_nside"])
crs_len = healpy.nside2npix(g["crs"].attrs["healpix_nside"])
elif grid_mapping_name == "point_cloud":
lon_coord, lat_coord = g.crs.attrs["coordinates"].split(" ")
lon_coord, lat_coord = g["crs"].attrs["coordinates"].split(" ")
assert lon_coord in g and lat_coord in g
assert g[lon_coord].shape[0] == g[lat_coord].shape[0]
crs_len = g[lat_coord].shape[0]
@@ -70,18 +72,32 @@ def add_variable(
raise Exception("Unknown crs.")
_attributes = attributes or {}
if zaxis is None:
shape = (*ntime, crs_len)
_chunk_shape = np.minimum(chunk_shape, shape) if chunk_shape is not None else None
_attributes["_ARRAY_DIMENSIONS"] = (*taxis_tuple, spatial_attr)
else:
nheight = g[zaxis].shape[0]
shape = (*ntime, nheight, crs_len)
_chunk_shape = np.minimum(chunk_shape, shape) if chunk_shape is not None else None
_attributes["_ARRAY_DIMENSIONS"] = (*taxis_tuple, zaxis, spatial_attr)
_attributes["grid_mapping"] = "crs"
v = g.create_dataset(
name, shape=shape, dtype=np.float32, fill_value=np.nan, chunks=_chunk_shape, **kwargs
_chunk_shape = "auto"
_shard_shape = None
if chunk_shape is not None:
_chunk_shape = tuple(min(chunk_shape, shape))
if chunks_per_shard is not None:
_shard_shape = tuple(i * chunks_per_shard for i in _chunk_shape)
v = g.create_array(
name,
shape=shape,
dtype=np.float32,
fill_value=np.nan,
chunks=_chunk_shape,
shards=_shard_shape,
**kwargs,
)
# TODO: Use a generic name instead of hiopy such that it represents arbitrary grid too
Loading