From 0035b6084afe6602ad5e80728930c8327f7d3830 Mon Sep 17 00:00:00 2001
From: Fabian Wachsmann <wachsmann@dkrz.de>
Date: Wed, 5 Jul 2023 09:47:04 +0000
Subject: [PATCH] Setup for ci

---
 environment.yml                               |   6 +-
 .../demo/tutorial_compression_netcdf.ipynb    | 115 +++++++++++++++++-
 2 files changed, 113 insertions(+), 8 deletions(-)

diff --git a/environment.yml b/environment.yml
index c236558..8379ea2 100644
--- a/environment.yml
+++ b/environment.yml
@@ -11,14 +11,15 @@ dependencies:
   - ipywidgets
   - intake
   - intake-esm
+  - pydantic<2
   - xarray
   - cf_xarray
   - netcdf4
   - numpy
   - pandas
-    #  - cartopy>0.20
+  - cartopy>=0.21.1
   - scipy
-  - conda-forge/label/dev::cdo
+    #  - conda-forge/label/dev::cdo
   - python-cdo
   - geopy
   - folium
@@ -62,6 +63,7 @@ dependencies:
 #for compression
   - hdf5plugin
   - h5netcdf
+  - xbitinfo-python
   - pip:
     - sphinx-book-theme
     - pangeo-xesmf #for raster data
diff --git a/notebooks/demo/tutorial_compression_netcdf.ipynb b/notebooks/demo/tutorial_compression_netcdf.ipynb
index 1cdac19..fd74dbd 100644
--- a/notebooks/demo/tutorial_compression_netcdf.ipynb
+++ b/notebooks/demo/tutorial_compression_netcdf.ipynb
@@ -174,14 +174,14 @@
     "    zstd=dict(\n",
     "        engine=\"h5netcdf\",\n",
     "        #from python 3.11:\n",
-    "        #compr=dict(**hdf5plugin.Bitshuffle(cname=\"zstd\"))\n",
-    "        compr=dict(**hdf5plugin.Zstd())\n",
+    "        compr=dict(**hdf5plugin.Bitshuffle(cname=\"zstd\"))\n",
+    "        #compr=dict(**hdf5plugin.Zstd())\n",
     "    ),\n",
     "    lz4=dict(\n",
     "        engine=\"h5netcdf\",\n",
     "        #from python 3.11:\n",
-    "        #compr=dict(**hdf5plugin.Bitshuffle(cname=\"lz4\"))\n",
-    "        compr=dict(**hdf5plugin.Bitshuffle(lz4=True))\n",
+    "        compr=dict(**hdf5plugin.Bitshuffle(cname=\"lz4\"))\n",
+    "        #compr=dict(**hdf5plugin.Bitshuffle(lz4=True))\n",
     "    ),\n",
     "    blosc=dict(\n",
     "        engine=\"h5netcdf\",\n",
@@ -292,8 +292,8 @@
    "source": [
     "## Lossy\n",
     "\n",
-    "1. Direct `BitRound`ing with 16 bits to be kept.\n",
-    "1. TBD: Calculate number of bits with information level 0.99 via *xbitinfo*"
+    "1. Direct `BitRound`ing with 16 bits to be kept. This precision can be considered as similar to e.g. ERA5 data (24 bit Integer space).\n",
+    "1. Calculate number of bits with information level 0.99 via *xbitinfo*."
    ]
   },
   {
@@ -356,6 +356,109 @@
     "        f.write(f\"{k}_lossy,{sourcesize},{v['speed']},{v['ratio']}\\n\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "6cece0ed-0ab0-4fd1-a515-93f36b6753a8",
+   "metadata": {},
+   "source": [
+    "### Xbitinfo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8ff2e4f1-10ae-4b0b-9d4f-5ffbf871dbdb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "omon2d=xr.open_mfdataset(\n",
+    "    source_uncompressed,\n",
+    "    engine=\"h5netcdf\",\n",
+    "    parallel=False\n",
+    ") "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "09e45692-8bdd-4a78-82eb-9bead71875bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import xbitinfo as xb"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3c021ba4-7172-4035-9305-ae61e92bb684",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "bitinfostart=time.time()\n",
+    "for var in omon2d.data_vars:\n",
+    "    if \"bnds\" in var:\n",
+    "        continue\n",
+    "    dims=[dim for dim in omon2d[[var]].dims.keys() if \"ncell\" in dim]\n",
+    "    print(dims)\n",
+    "    if dims:\n",
+    "        bitinfo = xb.get_bitinformation(omon2d[[var]], dim=dims, implementation=\"python\")\n",
+    "        keepbits = xb.get_keepbits(bitinfo, inflevel=0.99)\n",
+    "        print(keepbits)\n",
+    "        if keepbits[var][0] > 0 :\n",
+    "            print(keepbits[var][0])\n",
+    "            omon2d[var] = xb.xr_bitround(omon2d[[var]], keepbits+2)[var] # this one wraps around numcodecs.bitround\n",
+    "bitinfoend=time.time()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0d2def50-c209-41ee-8604-5fdb8554f640",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "resultdir={}\n",
+    "for compr,config in tqdm.tqdm(comprdict.items()):\n",
+    "    enc=dict()\n",
+    "    for var in omon2d.data_vars:\n",
+    "        enc[var]=config[\"compr\"]\n",
+    "    start=time.time()\n",
+    "        \n",
+    "    omon2d.to_netcdf(f\"{pwd}/test_{compr}_compression_lossy_xbit.nc\",\n",
+    "                 mode=\"w\",\n",
+    "                 engine=config[\"engine\"],\n",
+    "                 unlimited_dims=\"time\",\n",
+    "                 encoding=enc,\n",
+    "                )\n",
+    "    end=time.time()\n",
+    "    resultdir[compr]=dict(\n",
+    "        speed=sourcesize/(end-start+bitinfoend-bitinfostart)/1024/1024,\n",
+    "        ratio=fs.filesystem(\"file\").du(f\"{pwd}/test_{compr}_compression_lossy_xbit.nc\")/sourcesize\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4f95632f-ac07-4192-8a96-b3bb98ebfde2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(f\"results_{str(times)}.csv\",\"a\") as f:\n",
+    "    for k,v in resultdir.items():\n",
+    "        f.write(f\"{k}_lossy_xbit,{sourcesize},{v['speed']},{v['ratio']}\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3d000b88-f995-4369-b87d-98e910c57e9a",
+   "metadata": {},
+   "source": [
+    "### Write the results"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-- 
GitLab