Skip to content
Snippets Groups Projects
Commit 3d787859 authored by Martin Bergemann's avatar Martin Bergemann :speech_balloon:
Browse files

Merge branch 'use-netcdf4' into 'main'

Use netcdf4 as default engine

See merge request !13
parents a0d439c6 2398e787
No related branches found
No related tags found
1 merge request!13Use netcdf4 as default engine
Pipeline #27985 passed
......@@ -48,13 +48,21 @@ test_39:
script:
- /tmp/test/bin/python -m pytest -vv
test_310:
<< : *py_test
before_script:
- conda create -q -p /tmp/test python=3.10 pip dask -y
- /tmp/test/bin/python -m pip install -e .[test]
script:
- /tmp/test/bin/python -m pytest -vv
pages:
stage: report
needs: []
tags:
- conda
before_script:
- conda create -c conda-forge -q -p /tmp/test python=3.10 pip dask -y
- conda create -c conda-forge -q -p /tmp/test python=3.11 pip dask hdf5 -y
- /tmp/test/bin/python -m pip install -e .[test]
script:
- /tmp/test/bin/coverage run -m pytest
......
......@@ -10,7 +10,7 @@ from ._rechunk import (
logger,
)
__version__ = "2208.0.1"
__version__ = "2301.0.0"
PROGRAM_NAME = "rechunk-data"
......@@ -48,7 +48,7 @@ def parse_args(argv: Optional[List[str]]) -> argparse.Namespace:
"--netcdf-engine",
help=("The netcdf engine used to create the new netcdf file."),
choices=("h5netcdf", "netcdf4"),
default="h5netcdf",
default="netcdf4",
type=str,
)
parser.add_argument(
......
......@@ -118,12 +118,13 @@ def _rechunk_dataset(
encoding[data_var] = {
str(k): v for k, v in dset[var].encoding.items() if str(k) in _keywords
}
encoding[data_var]["chunksizes"] = new_chunks
if engine != "netcdf4" or encoding[data_var].get("contiguous", False) is False:
encoding[data_var]["chunksizes"] = new_chunks
return dset, encoding
def rechunk_dataset(
dset: xr.Dataset, engine: Literal["h5netcdf", "netcdf4"] = "h5netcdf"
dset: xr.Dataset, engine: Literal["h5netcdf", "netcdf4"] = "netcdf4"
) -> xr.Dataset:
"""Rechunk a xarray dataset.
......@@ -131,7 +132,7 @@ def rechunk_dataset(
----------
dset: xarray.Dataset
Input dataset that is going to be rechunked
engine: str, default: h5netcdf
engine: str, default: netcdf4
The netcdf engine used to create the new netcdf file.
Returns
......@@ -145,7 +146,7 @@ def rechunk_dataset(
def rechunk_netcdf_file(
input_path: os.PathLike,
output_path: Optional[os.PathLike] = None,
engine: Literal["h5netcdf", "netcdf4"] = "h5netcdf",
engine: Literal["h5netcdf", "netcdf4"] = "netcdf4",
) -> None:
"""Rechunk netcdf files.
......@@ -158,7 +159,7 @@ def rechunk_netcdf_file(
Output file/directory of the chunked netcdf file(s). Note: If ``input``
is a directory output should be a directory. If None given (default)
the ``input`` is overidden.
engine: str, default: h5netcdf
engine: str, default: netcdf4
The netcdf engine used to create the new netcdf file.
"""
input_path = Path(input_path).expanduser().absolute()
......@@ -173,16 +174,21 @@ def rechunk_netcdf_file(
output_file = Path(output_path)
output_file.parent.mkdir(exist_ok=True, parents=True)
try:
with xr.open_mfdataset(str(input_file), parallel=True) as nc_data:
with xr.open_mfdataset(
str(input_file), decode_cf=True, parallel=True
) as nc_data:
new_data, encoding = _rechunk_dataset(nc_data, engine)
if encoding:
logger.debug(
"Loading data into memory (%s).", format_bytes(new_data.nbytes)
"Loading data into memory (%s).",
format_bytes(new_data.nbytes),
)
new_data = new_data.load()
except Exception as error:
logger.error(
"Error while processing file %s: %s", str(input_file), str(error)
"Error while processing file %s: %s",
str(input_file),
str(error),
)
continue
_save_dataset(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment