Skip to content
Snippets Groups Projects
Commit 5063872f authored by Martin Bergemann's avatar Martin Bergemann :speech_balloon:
Browse files

Create python module

parent 4e33d965
No related branches found
No related tags found
1 merge request!5Create python module
Pipeline #19330 failed
......@@ -14,7 +14,16 @@ pip install (--user) https://gitlab.dkrz.de/ch1187/rechunk-data/-/archive/2206.0
User the `--user` flag if you do not have super user rights and are not using `anaconda`, `pipenv` or `virtual env`
## Usage
Basic usage:
### Using the python module
```python
from rechunk_data import rechunk_dataset
import xarray as xr
dset = xr.open_mfdataset("/data/*", parallel=True, combine="by_coords")
new_data = rechunk_dataset(dset)
```
### Using the command line interface:
```bash
rechunk-data --help
......
......@@ -6,13 +6,11 @@ from pathlib import Path
from typing import List, Optional
from ._rechunk import (
rechunk_netcdf_file,
_search_for_nc_files,
_save_dataset,
_rechunk_dataset,
rechunk_dataset,
logger,
)
__version__ = "2206.0.2"
__version__ = "2206.0.3"
PROGRAM_NAME = "rechunk-data"
......
......@@ -11,6 +11,7 @@ def _save_dataset(
dset: xr.Dataset, file_name: Path, encoding: Dict[str, Any], engine: str
) -> None: ...
def _rechunk_dataset(dset: xr.Dataset) -> Tuple[xr.Dataset, Dict[str, Any]]: ...
def rechunk_dataset(dset: xr.Dataset) -> xr.dataset: ...
def rechunk_netcdf_file(
input_path: os.PathLike,
output_path: Optional[os.PathLike] = ...,
......
......@@ -90,6 +90,22 @@ def _rechunk_dataset(dset: xr.Dataset) -> Tuple[xr.Dataset, Dict[str, Any]]:
return dset, encoding
def rechunk_dataset(dset: xr.Dataset) -> xr.Dataset:
"""Rechunk a xarray dataset.
Parameters
----------
dset: xarray.Dataset
Input dataset that is going to be rechunked
Returns
-------
xarray.Dataset: rechunked dataset
"""
data, _ = _rechunk_dataset(dset)
return data
def rechunk_netcdf_file(
input_path: os.PathLike,
output_path: Optional[os.PathLike] = None,
......
......@@ -4,7 +4,8 @@ from tempfile import NamedTemporaryFile, TemporaryDirectory
from pathlib import Path
import dask
from rechunk_data import rechunk_netcdf_file, _save_dataset
from rechunk_data import rechunk_netcdf_file, rechunk_dataset
from rechunk_data._rechunk import _save_dataset
def test_rechunk_data_dir_with_overwrite(data_dir: Path) -> None:
......@@ -38,6 +39,13 @@ def test_rechunk_single_data_file(data_file: Path) -> None:
assert Path(temp_file.name).exists()
def test_rechunk_dataset(small_chunk_data) -> None:
"""Test rechunking an xarray dataset."""
with dask.config.set({"array.chunk-size": "1MiB"}):
new_data = rechunk_dataset(small_chunk_data)
assert list(new_data.data_vars) == list(small_chunk_data.data_vars)
def test_wrong_or_format(small_chunk_data, caplog) -> None:
"""Testing wrong file format."""
caplog.clear()
......
......@@ -2,7 +2,7 @@
import xarray as xr
from rechunk_data import _rechunk_dataset
from rechunk_data._rechunk import _rechunk_dataset
def test_rechunking_small_data(
......
"""Unit tests for searching for files."""
from pathlib import Path
from rechunk_data import _search_for_nc_files
from rechunk_data._rechunk import _search_for_nc_files
def test_search_directory(data_dir: Path) -> None:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment