Skip to content
Snippets Groups Projects
Commit 0285ffb2 authored by Martin Bergemann's avatar Martin Bergemann :speech_balloon:
Browse files

Merge branch 'cf-conventions' into 'main'

Check make cf conventions configurable

See merge request !14
parents 3d787859 cb50c91b
No related branches found
No related tags found
1 merge request!14Check make cf conventions configurable
Pipeline #46074 passed
......@@ -29,20 +29,22 @@ new_data = rechunk_dataset(dset)
```bash
rechunk-data --help
usage: rechunk-data [-h] [--output OUTPUT] [--netcdf-engine {h5netcdf,netcdf4}] [-v] [-V] input
usage: rechunk-data [-h] [--output OUTPUT] [--netcdf-engine {h5netcdf,netcdf4}] [--skip-cf-convention] [-v] [-V] input
Rechunk input netcdf data to optimal chunk-size. approx. 126 MB per chunk
positional arguments:
input Input file/directory. If a directory is given all ``.nc`` files in all sub directories will be processed
optional arguments:
options:
-h, --help show this help message and exit
--output OUTPUT Output file/directory of the chunked netcdf file(s). Note: If ``input`` is a directory output should be a
directory. If None given (default) the ``input`` is overidden. (default: None)
--output OUTPUT Output file/directory of the chunked netcdf file(s).
Note: If ``input`` is a directory output should be a directory.
If None given (default) the ``input`` is overidden. (default: None)
--netcdf-engine {h5netcdf,netcdf4}
The netcdf engine used to create the new netcdf file. (default: h5netcdf)
-v
The netcdf engine used to create the new netcdf file. (default: netcdf4)
--skip-cf-convention Do not assume assume data variables follow CF conventions. (default: False)
-v Increase verbosity (default: 0)
-V, --version show program's version number and exit
```
......
......@@ -10,7 +10,7 @@ from ._rechunk import (
logger,
)
__version__ = "2301.0.0"
__version__ = "2309.0.0"
PROGRAM_NAME = "rechunk-data"
......@@ -51,10 +51,17 @@ def parse_args(argv: Optional[List[str]]) -> argparse.Namespace:
default="netcdf4",
type=str,
)
parser.add_argument(
"--skip-cf-convention",
help="Do not assume assume data variables follow CF conventions.",
action="store_true",
default=False,
)
parser.add_argument(
"-v",
action="count",
default=0,
help="Increase verbosity",
)
parser.add_argument(
"-V",
......@@ -63,11 +70,16 @@ def parse_args(argv: Optional[List[str]]) -> argparse.Namespace:
version=f"%(prog)s {__version__}",
)
args = parser.parse_args(argv)
logger.setLevel(max(logging.ERROR - (10 + args.v * 10), 0))
logger.setLevel(max(logging.ERROR - (10 + args.v * 10), 10))
return args
def cli(argv: Optional[List[str]] = None) -> None:
"""Command line interface calling the rechunking method."""
args = parse_args(argv)
rechunk_netcdf_file(args.input, args.output, engine=args.netcdf_engine)
rechunk_netcdf_file(
args.input,
args.output,
engine=args.netcdf_engine,
decode_cf=args.skip_cf_convention is False,
)
......@@ -92,7 +92,11 @@ def _rechunk_dataset(
) from error
for data_var in dset.data_vars:
var = str(data_var)
if not isinstance(dset[var].data, Array):
if (
not isinstance(dset[var].data, Array)
or "bnds" in var
or "rotated_pole" in var
):
logger.debug("Skipping rechunking variable %s", var)
continue
logger.debug("Rechunking variable %s", var)
......@@ -146,6 +150,7 @@ def rechunk_dataset(
def rechunk_netcdf_file(
input_path: os.PathLike,
output_path: Optional[os.PathLike] = None,
decode_cf: bool = True,
engine: Literal["h5netcdf", "netcdf4"] = "netcdf4",
) -> None:
"""Rechunk netcdf files.
......@@ -159,6 +164,9 @@ def rechunk_netcdf_file(
Output file/directory of the chunked netcdf file(s). Note: If ``input``
is a directory output should be a directory. If None given (default)
the ``input`` is overidden.
decode_cf: bool, default: True
Whether to decode these variables, assuming they were saved according
to CF conventions.
engine: str, default: netcdf4
The netcdf engine used to create the new netcdf file.
"""
......@@ -175,7 +183,9 @@ def rechunk_netcdf_file(
output_file.parent.mkdir(exist_ok=True, parents=True)
try:
with xr.open_mfdataset(
str(input_file), decode_cf=True, parallel=True
str(input_file),
parallel=True,
decode_cf=decode_cf,
) as nc_data:
new_data, encoding = _rechunk_dataset(nc_data, engine)
if encoding:
......
......@@ -67,6 +67,5 @@ def test_wrong_or_format(small_chunk_data, caplog) -> None:
def test_wrong_engine(small_chunk_data) -> None:
with pytest.raises(ValueError):
rechunk_dataset(small_chunk_data, engine="foo")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment