From 0035b6084afe6602ad5e80728930c8327f7d3830 Mon Sep 17 00:00:00 2001 From: Fabian Wachsmann <wachsmann@dkrz.de> Date: Wed, 5 Jul 2023 09:47:04 +0000 Subject: [PATCH] Setup for ci --- environment.yml | 6 +- .../demo/tutorial_compression_netcdf.ipynb | 115 +++++++++++++++++- 2 files changed, 113 insertions(+), 8 deletions(-) diff --git a/environment.yml b/environment.yml index c236558..8379ea2 100644 --- a/environment.yml +++ b/environment.yml @@ -11,14 +11,15 @@ dependencies: - ipywidgets - intake - intake-esm + - pydantic<2 - xarray - cf_xarray - netcdf4 - numpy - pandas - # - cartopy>0.20 + - cartopy>=0.21.1 - scipy - - conda-forge/label/dev::cdo + # - conda-forge/label/dev::cdo - python-cdo - geopy - folium @@ -62,6 +63,7 @@ dependencies: #for compression - hdf5plugin - h5netcdf + - xbitinfo-python - pip: - sphinx-book-theme - pangeo-xesmf #for raster data diff --git a/notebooks/demo/tutorial_compression_netcdf.ipynb b/notebooks/demo/tutorial_compression_netcdf.ipynb index 1cdac19..fd74dbd 100644 --- a/notebooks/demo/tutorial_compression_netcdf.ipynb +++ b/notebooks/demo/tutorial_compression_netcdf.ipynb @@ -174,14 +174,14 @@ " zstd=dict(\n", " engine=\"h5netcdf\",\n", " #from python 3.11:\n", - " #compr=dict(**hdf5plugin.Bitshuffle(cname=\"zstd\"))\n", - " compr=dict(**hdf5plugin.Zstd())\n", + " compr=dict(**hdf5plugin.Bitshuffle(cname=\"zstd\"))\n", + " #compr=dict(**hdf5plugin.Zstd())\n", " ),\n", " lz4=dict(\n", " engine=\"h5netcdf\",\n", " #from python 3.11:\n", - " #compr=dict(**hdf5plugin.Bitshuffle(cname=\"lz4\"))\n", - " compr=dict(**hdf5plugin.Bitshuffle(lz4=True))\n", + " compr=dict(**hdf5plugin.Bitshuffle(cname=\"lz4\"))\n", + " #compr=dict(**hdf5plugin.Bitshuffle(lz4=True))\n", " ),\n", " blosc=dict(\n", " engine=\"h5netcdf\",\n", @@ -292,8 +292,8 @@ "source": [ "## Lossy\n", "\n", - "1. Direct `BitRound`ing with 16 bits to be kept.\n", - "1. TBD: Calculate number of bits with information level 0.99 via *xbitinfo*" + "1. Direct `BitRound`ing with 16 bits to be kept. This precision can be considered as similar to e.g. ERA5 data (24 bit Integer space).\n", + "1. Calculate number of bits with information level 0.99 via *xbitinfo*." ] }, { @@ -356,6 +356,109 @@ " f.write(f\"{k}_lossy,{sourcesize},{v['speed']},{v['ratio']}\\n\")" ] }, + { + "cell_type": "markdown", + "id": "6cece0ed-0ab0-4fd1-a515-93f36b6753a8", + "metadata": {}, + "source": [ + "### Xbitinfo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ff2e4f1-10ae-4b0b-9d4f-5ffbf871dbdb", + "metadata": {}, + "outputs": [], + "source": [ + "omon2d=xr.open_mfdataset(\n", + " source_uncompressed,\n", + " engine=\"h5netcdf\",\n", + " parallel=False\n", + ") " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09e45692-8bdd-4a78-82eb-9bead71875bb", + "metadata": {}, + "outputs": [], + "source": [ + "import xbitinfo as xb" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c021ba4-7172-4035-9305-ae61e92bb684", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "bitinfostart=time.time()\n", + "for var in omon2d.data_vars:\n", + " if \"bnds\" in var:\n", + " continue\n", + " dims=[dim for dim in omon2d[[var]].dims.keys() if \"ncell\" in dim]\n", + " print(dims)\n", + " if dims:\n", + " bitinfo = xb.get_bitinformation(omon2d[[var]], dim=dims, implementation=\"python\")\n", + " keepbits = xb.get_keepbits(bitinfo, inflevel=0.99)\n", + " print(keepbits)\n", + " if keepbits[var][0] > 0 :\n", + " print(keepbits[var][0])\n", + " omon2d[var] = xb.xr_bitround(omon2d[[var]], keepbits+2)[var] # this one wraps around numcodecs.bitround\n", + "bitinfoend=time.time()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d2def50-c209-41ee-8604-5fdb8554f640", + "metadata": {}, + "outputs": [], + "source": [ + "resultdir={}\n", + "for compr,config in tqdm.tqdm(comprdict.items()):\n", + " enc=dict()\n", + " for var in omon2d.data_vars:\n", + " enc[var]=config[\"compr\"]\n", + " start=time.time()\n", + " \n", + " omon2d.to_netcdf(f\"{pwd}/test_{compr}_compression_lossy_xbit.nc\",\n", + " mode=\"w\",\n", + " engine=config[\"engine\"],\n", + " unlimited_dims=\"time\",\n", + " encoding=enc,\n", + " )\n", + " end=time.time()\n", + " resultdir[compr]=dict(\n", + " speed=sourcesize/(end-start+bitinfoend-bitinfostart)/1024/1024,\n", + " ratio=fs.filesystem(\"file\").du(f\"{pwd}/test_{compr}_compression_lossy_xbit.nc\")/sourcesize\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f95632f-ac07-4192-8a96-b3bb98ebfde2", + "metadata": {}, + "outputs": [], + "source": [ + "with open(f\"results_{str(times)}.csv\",\"a\") as f:\n", + " for k,v in resultdir.items():\n", + " f.write(f\"{k}_lossy_xbit,{sourcesize},{v['speed']},{v['ratio']}\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "3d000b88-f995-4369-b87d-98e910c57e9a", + "metadata": {}, + "source": [ + "### Write the results" + ] + }, { "cell_type": "code", "execution_count": null, -- GitLab