Skip to content
Snippets Groups Projects
Commit 5063872f authored by Martin Bergemann's avatar Martin Bergemann :speech_balloon:
Browse files

Create python module

parent 4e33d965
No related branches found
No related tags found
1 merge request!5Create python module
Pipeline #19330 failed
...@@ -14,7 +14,16 @@ pip install (--user) https://gitlab.dkrz.de/ch1187/rechunk-data/-/archive/2206.0 ...@@ -14,7 +14,16 @@ pip install (--user) https://gitlab.dkrz.de/ch1187/rechunk-data/-/archive/2206.0
User the `--user` flag if you do not have super user rights and are not using `anaconda`, `pipenv` or `virtual env` User the `--user` flag if you do not have super user rights and are not using `anaconda`, `pipenv` or `virtual env`
## Usage ## Usage
Basic usage: ### Using the python module
```python
from rechunk_data import rechunk_dataset
import xarray as xr
dset = xr.open_mfdataset("/data/*", parallel=True, combine="by_coords")
new_data = rechunk_dataset(dset)
```
### Using the command line interface:
```bash ```bash
rechunk-data --help rechunk-data --help
......
...@@ -6,13 +6,11 @@ from pathlib import Path ...@@ -6,13 +6,11 @@ from pathlib import Path
from typing import List, Optional from typing import List, Optional
from ._rechunk import ( from ._rechunk import (
rechunk_netcdf_file, rechunk_netcdf_file,
_search_for_nc_files, rechunk_dataset,
_save_dataset,
_rechunk_dataset,
logger, logger,
) )
__version__ = "2206.0.2" __version__ = "2206.0.3"
PROGRAM_NAME = "rechunk-data" PROGRAM_NAME = "rechunk-data"
......
...@@ -11,6 +11,7 @@ def _save_dataset( ...@@ -11,6 +11,7 @@ def _save_dataset(
dset: xr.Dataset, file_name: Path, encoding: Dict[str, Any], engine: str dset: xr.Dataset, file_name: Path, encoding: Dict[str, Any], engine: str
) -> None: ... ) -> None: ...
def _rechunk_dataset(dset: xr.Dataset) -> Tuple[xr.Dataset, Dict[str, Any]]: ... def _rechunk_dataset(dset: xr.Dataset) -> Tuple[xr.Dataset, Dict[str, Any]]: ...
def rechunk_dataset(dset: xr.Dataset) -> xr.dataset: ...
def rechunk_netcdf_file( def rechunk_netcdf_file(
input_path: os.PathLike, input_path: os.PathLike,
output_path: Optional[os.PathLike] = ..., output_path: Optional[os.PathLike] = ...,
......
...@@ -90,6 +90,22 @@ def _rechunk_dataset(dset: xr.Dataset) -> Tuple[xr.Dataset, Dict[str, Any]]: ...@@ -90,6 +90,22 @@ def _rechunk_dataset(dset: xr.Dataset) -> Tuple[xr.Dataset, Dict[str, Any]]:
return dset, encoding return dset, encoding
def rechunk_dataset(dset: xr.Dataset) -> xr.Dataset:
"""Rechunk a xarray dataset.
Parameters
----------
dset: xarray.Dataset
Input dataset that is going to be rechunked
Returns
-------
xarray.Dataset: rechunked dataset
"""
data, _ = _rechunk_dataset(dset)
return data
def rechunk_netcdf_file( def rechunk_netcdf_file(
input_path: os.PathLike, input_path: os.PathLike,
output_path: Optional[os.PathLike] = None, output_path: Optional[os.PathLike] = None,
......
...@@ -4,7 +4,8 @@ from tempfile import NamedTemporaryFile, TemporaryDirectory ...@@ -4,7 +4,8 @@ from tempfile import NamedTemporaryFile, TemporaryDirectory
from pathlib import Path from pathlib import Path
import dask import dask
from rechunk_data import rechunk_netcdf_file, _save_dataset from rechunk_data import rechunk_netcdf_file, rechunk_dataset
from rechunk_data._rechunk import _save_dataset
def test_rechunk_data_dir_with_overwrite(data_dir: Path) -> None: def test_rechunk_data_dir_with_overwrite(data_dir: Path) -> None:
...@@ -38,6 +39,13 @@ def test_rechunk_single_data_file(data_file: Path) -> None: ...@@ -38,6 +39,13 @@ def test_rechunk_single_data_file(data_file: Path) -> None:
assert Path(temp_file.name).exists() assert Path(temp_file.name).exists()
def test_rechunk_dataset(small_chunk_data) -> None:
"""Test rechunking an xarray dataset."""
with dask.config.set({"array.chunk-size": "1MiB"}):
new_data = rechunk_dataset(small_chunk_data)
assert list(new_data.data_vars) == list(small_chunk_data.data_vars)
def test_wrong_or_format(small_chunk_data, caplog) -> None: def test_wrong_or_format(small_chunk_data, caplog) -> None:
"""Testing wrong file format.""" """Testing wrong file format."""
caplog.clear() caplog.clear()
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
import xarray as xr import xarray as xr
from rechunk_data import _rechunk_dataset from rechunk_data._rechunk import _rechunk_dataset
def test_rechunking_small_data( def test_rechunking_small_data(
......
"""Unit tests for searching for files.""" """Unit tests for searching for files."""
from pathlib import Path from pathlib import Path
from rechunk_data import _search_for_nc_files from rechunk_data._rechunk import _search_for_nc_files
def test_search_directory(data_dir: Path) -> None: def test_search_directory(data_dir: Path) -> None:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment