Now cdo works

2ca0bdb2 · Fabian Wachsmann · e584d3d7 · 2ca0bdb2 · 2ca0bdb2
Commit 2ca0bdb2 authored 3 months ago by Fabian Wachsmann
--- a/workshop/1_start-an-app.ipynb
+++ b/workshop/1_start-an-app.ipynb
@@ -50,13 +50,48 @@
  {
   "cell_type": "code",
   "execution_count": 1,
+   "id": "0a748c3e-2a25-40ea-aefc-ae40bc13f664",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'l40356.lvt.dkrz.de'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cn=!echo $HOSTNAME\n",
+    "cn=cn[0]\n",
+    "cn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
   "id": "d5e47e26-93ac-465f-90a4-8d84762b1f80",
   "metadata": {
    "tags": []
   },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "....+..+.......+..+.+.....+....+...+.....+.+............+.....+...+.+.....+..........+......+...+++++++++++++++++++++++++++++++++++++++++++++*.....+.+.....+....+..+.......+++++++++++++++++++++++++++++++++++++++++++++*...........+...+.+.....+.+.....+....+..+.......+..+..........+...+.......................+.............+.....+....+......+.....+....+...+........+.+.....+.........+.+........................+...........+...................+..+....+...+...+............+......+...........+.........+...............+......+.+............+.................................+.........+...+..+..................+.+..+....+...+.....+...+++++\n",
+      "......+.....+.......+..+.........+.........+.+......+...+...........+.+...+.....+++++++++++++++++++++++++++++++++++++++++++++*.+.+...+......+...............+++++++++++++++++++++++++++++++++++++++++++++*....+.....+.+...+......+......+..+.......+...+.....+......+.......+...............+.....+............+...+....+...+...+...+...............+.....+.+.........+..+.........................+...+..+.........+.+...+..+.........+......+...+..........+.....+.+.....+...+............+...+...+...+....+...+..+............+....+.....+.......+...+..+............+......+..........+............+.........+........+..........+..+......+....+...........+...+....+...+.....+....+........+.......+........+......+.+...+..+.....................+....+..............+....+......+.........+..+...............+...+.+..+...................+.....+....+.....+.+...........+...............+...............+.......+...+.........+...+.......................+..........+..............+....+.....+.........+...+...............+...+.....................+....+.........+.....+...+....+......+...+............+.....+...+....+...........+.+...+..+.+.................+..........+..+......+...........................+.+.....................+......+..+...+..............................+....+...+........+.............+......+......+..+......+...............+.+........+......+.+.....+..................+.+...............+..+..........+.....+....+...+..................+...............+..+..........+........+...+....+..+....+.........+..+....+..............+...+...+......................+............+............+..............................+........+...+...+.+...+.....+...+++++\n",
+      "-----\n"
+     ]
+    }
+   ],
   "source": [
-    "#!openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -sha256 -days 3650 -nodes -subj \"/C=XX/ST=Hamburg/L=Hamburg/O=Test/OU=Test/CN=localhost\""
+    "!openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -sha256 -days 3650 -nodes -subj \"/C=XX/ST=Hamburg/L=Hamburg/O=Test/OU=Test/CN=\"{cn}"
   ]
  },
  {
@@ -92,7 +127,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "id": "e11d309f-c893-401a-ba5f-9f3f0046e039",
   "metadata": {
    "tags": []
@@ -104,7 +139,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "id": "571a82ea-d7bc-42e3-8169-ae22ef999065",
   "metadata": {
    "tags": []
@@ -162,8 +197,15 @@
    "        glob_inp,\n",
    "        compat=\"override\",\n",
    "        coords=\"minimal\",\n",
-    "        chunks=chunks\n",
+    "        chunks=chunks,\n",
    "    )\n",
+    "    if \"height\" in ds:\n",
+    "        del ds[\"height\"]\n",
+    "    for dv in ds.variables:\n",
+    "        if \"time\" in dv:\n",
+    "            ds[dv]=ds[dv].load()\n",
+    "            ds[dv].encoding[\"dtype\"] = \"float64\"\n",
+    "            ds[dv].encoding[\"compressor\"] = None\n",
    "    ds=ds.set_coords([a for a in ds.data_vars if \"bnds\" in a])\n",
    "    if l_lossy:\n",
    "        ds = xr.apply_ufunc(\n",
@@ -200,7 +242,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "id": "5da13d6b-05f1-4b3b-aecd-1ac3bb635526",
   "metadata": {
    "tags": []
@@ -242,16 +284,13 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "k204210    52510 4121939  0 10:12 ?        00:00:00 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-c6562e74-6d50-4cf3-92d0-27e4f4cae6f8.json\n",
+      "k204210  1885374 1878744  0 09:31 ?        00:00:00 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-449f1aed-0c01-4339-8e8e-3391add9c830.json\n",
-      "k204210    54077   52595  0 10:13 ?        00:00:00 /work/bm0021/conda-envs/cloudify/bin/python -c from multiprocessing.resource_tracker import main;main(27)\n",
+      "k204210  1885397 1878744  0 09:31 ?        00:00:00 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-59fedbd8-7d7f-424a-8226-3980eadf7fc6.json\n",
-      "k204210    54079   52595  2 10:13 ?        00:00:06 /work/bm0021/conda-envs/cloudify/bin/python -c from multiprocessing.spawn import spawn_main; spawn_main(tracker_fd=28, pipe_handle=38) --multiprocessing-fork\n",
+      "k204210  1886037 1878744  1 09:35 ?        00:00:21 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-1a921a3d-65a6-4b07-9c29-d17f990ab11b.json\n",
-      "k204210    54082   52595  2 10:13 ?        00:00:06 /work/bm0021/conda-envs/cloudify/bin/python -c from multiprocessing.spawn import spawn_main; spawn_main(tracker_fd=28, pipe_handle=38) --multiprocessing-fork\n",
+      "k204210  1894380 1878744 17 09:58 ?        00:00:00 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-1d363456-8c8d-40f5-91ea-1e931e364b72.json\n",
-      "k204210    55387   52510  0 10:17 pts/18   00:00:00 /bin/bash -c ps -ef | grep cloudify\n",
+      "k204210  1894407 1894380  0 09:58 pts/3    00:00:00 /bin/bash -c ps -ef | grep cloudify\n",
-      "k204210    55389   55387  0 10:17 pts/18   00:00:00 grep cloudify\n",
+      "k204210  1894411 1894410  0 09:58 ?        00:00:00 /sw/spack-levante/jupyterhub/jupyterhub/bin/python /sw/spack-levante/jupyterhub/jupyterhub/bin/conda shell.posix activate /work/bm0021/conda-envs/cloudify\n",
-      "k204210  4125442 4121939  0 08:57 ?        00:00:06 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-e22a5303-3b28-41cb-b092-ed92a8ff6221.json\n",
+      "k204210  1894413 1894407  0 09:58 pts/3    00:00:00 grep cloudify\n"
-      "k204210  4125444 4121939  0 08:57 ?        00:00:01 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-f2ebf347-166b-4e0f-89ee-e7e663b08a4c.json\n",
-      "k204210  4125452 4121939  0 08:57 ?        00:00:01 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-33489dce-534b-42ed-bbe7-6d125e3f6167.json\n",
-      "k204210  4125453 4121939  0 08:57 ?        00:00:01 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-16ae079b-b9fb-4460-bc7c-808797637e88.json\n"
     ]
    }
   ],
@@ -273,14 +312,22 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
   "id": "af33c134-f4ba-42f7-9687-7bb9948d5dfe",
   "metadata": {
    "tags": []
   },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/bin/bash: line 0: kill: (1882536) - No such process\n"
+     ]
+    }
+   ],
   "source": [
-    "!kill 52595"
+    "!kill 1882536"
   ]
  },
  {

 %% Cell type:markdown id:13ba06b9-5f2e-4de3-90da-511557166bfe tags:
 # Cloudify
 This notebook series guides you through the *cloudify* service: Serving Xarray datasets as zarr-datasets with xpublish and enabled server-side processing with dask. It introduces to the basic concepts with some examples. It was designed to work on DKRZ's HPC.
 %% Cell type:markdown id:a56b764b-23a7-4d86-84b2-bf419d989cb2 tags:
 ## 1. Start an app
 In the following, you will learn how to start and control the cloudify service.
 **Is there any other reason why to run cloudify on the only internally accessible DKRZ HPC?**
 If you *cloudify* a virtual dataset prepared as a highly aggregated, analysis-ready dataset, clients can subset from this *one* large aggregated dataset instead of searching the file system.
 %% Cell type:markdown id:0b17b450-716d-49b3-bbfa-aec943e47120 tags:
 1. Install a kernel for jupyterhub
 ```bash
 source activate /work/bm0021/conda-envs/cloudify
 python -m ipykernel install --user --name cloudify_env
 ```
 -  Choose the kernel
 %% Cell type:markdown id:8cfb6129-aea7-4d87-a016-e04cee5bf084 tags:
 2. For being able to allow secure *https* access, we need a ssl certificate. For testing purposes and for levante, we can use a self-signed one. Additionally, right now, some applications do only allow access through https. We can create it like this:
+%% Cell type:code id:0a748c3e-2a25-40ea-aefc-ae40bc13f664 tags:
+``` python
+cn=!echo $HOSTNAME
+cn=cn[0]
+cn
+```
+%% Output
+    'l40356.lvt.dkrz.de'
 %% Cell type:code id:d5e47e26-93ac-465f-90a4-8d84762b1f80 tags:
 ``` python
-#!openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -sha256 -days 3650 -nodes -subj "/C=XX/ST=Hamburg/L=Hamburg/O=Test/OU=Test/CN=localhost"
+!openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -sha256 -days 3650 -nodes -subj "/C=XX/ST=Hamburg/L=Hamburg/O=Test/OU=Test/CN="{cn}
 ```
+%% Output
+    ....+..+.......+..+.+.....+....+...+.....+.+............+.....+...+.+.....+..........+......+...+++++++++++++++++++++++++++++++++++++++++++++*.....+.+.....+....+..+.......+++++++++++++++++++++++++++++++++++++++++++++*...........+...+.+.....+.+.....+....+..+.......+..+..........+...+.......................+.............+.....+....+......+.....+....+...+........+.+.....+.........+.+........................+...........+...................+..+....+...+...+............+......+...........+.........+...............+......+.+............+.................................+.........+...+..+..................+.+..+....+...+.....+...+++++
+    ......+.....+.......+..+.........+.........+.+......+...+...........+.+...+.....+++++++++++++++++++++++++++++++++++++++++++++*.+.+...+......+...............+++++++++++++++++++++++++++++++++++++++++++++*....+.....+.+...+......+......+..+.......+...+.....+......+.......+...............+.....+............+...+....+...+...+...+...............+.....+.+.........+..+.........................+...+..+.........+.+...+..+.........+......+...+..........+.....+.+.....+...+............+...+...+...+....+...+..+............+....+.....+.......+...+..+............+......+..........+............+.........+........+..........+..+......+....+...........+...+....+...+.....+....+........+.......+........+......+.+...+..+.....................+....+..............+....+......+.........+..+...............+...+.+..+...................+.....+....+.....+.+...........+...............+...............+.......+...+.........+...+.......................+..........+..............+....+.....+.........+...+...............+...+.....................+....+.........+.....+...+....+......+...+............+.....+...+....+...........+.+...+..+.+.................+..........+..+......+...........................+.+.....................+......+..+...+..............................+....+...+........+.............+......+......+..+......+...............+.+........+......+.+.....+..................+.+...............+..+..........+.....+....+...+..................+...............+..+..........+........+...+....+..+....+.........+..+....+..............+...+...+......................+............+............+..............................+........+...+...+.+...+.....+...+++++
+    -----
 %% Cell type:markdown id:190bde7f-6f36-4c87-a9f2-a82ee840302e tags:
 3. We write a cloudify script for data serving and start to host an example dataset in a background process.  We need to consider some settings:
 **Port**
 The resulting service listens on a specifc *port*. In case we share a node, we can only use ports that are not allocated already.  To enbale us all to run an own app, we agree to use a port `90XX` where XX are the last two digits of our account.
 **Dask Cluster**
 Dask is necessary for lazy access of the data. Additionally, a dask cluster can help us to do server-side processing like uniform encoding. When starting the imported predefined dask cluster, it will use the following resources:
 ```python
 n_workers=2,
 threads_per_worker=8,
 memory_limit="16GB"
 ```
 which should be sufficient for at least two clients in parallel. We store it in an environment variable so that xpublish can find it. We futhermore have to allign the two event loops of dask and xpublish's asyncio with `nest_asyncio.apply()`. Event loops can be seen as *while* loops for a permanently running main worker.
 **Plug-ins**
 Xpublish finds pre-installed plugins like the intake-plugin by itself. Own plugins need to be registered.
 Further settings will be discussed later.
 %% Cell type:code id:e11d309f-c893-401a-ba5f-9f3f0046e039 tags:
 ``` python
 xpublish_example_script="xpublish_example.py"
 ```
 %% Cell type:code id:571a82ea-d7bc-42e3-8169-ae22ef999065 tags:
 ``` python
 %%writefile {xpublish_example_script}
 port=9010
 ssl_keyfile="/work/bm0021/k204210/cloudify/workshop/key.pem"
 ssl_certfile="/work/bm0021/k204210/cloudify/workshop/cert.pem"
 from cloudify.plugins.stacer import *
 from cloudify.plugins.geoanimation import *
 from cloudify.utils.daskhelper import *
 import xarray as xr
 import xpublish as xp
 import asyncio
 import nest_asyncio
 import sys
 import os
 nest_asyncio.apply()
 chunks={}
 for coord in ["lon","lat"]:
    chunk_size=os.environ.get(f"XPUBLISH_{coord.upper()}_CHUNK_SIZE",None)
    if chunk_size:
        chunks[coord]=int(chunk_size)
 l_lossy=os.environ.get("L_LOSSY",False)
 def lossy_compress(partds):
    import numcodecs
    rounding = numcodecs.BitRound(keepbits=12)
    return rounding.decode(rounding.encode(partds))
 if __name__ == "__main__":  # This avoids infinite subprocess creation
    import dask
    zarrcluster = asyncio.get_event_loop().run_until_complete(get_dask_cluster())
    os.environ["ZARR_ADDRESS"]=zarrcluster.scheduler._address
    dsname=sys.argv[1]
    glob_inp=sys.argv[2:]
    dsdict={}
    ds=xr.open_mfdataset(
        glob_inp,
        compat="override",
        coords="minimal",
-        chunks=chunks
+        chunks=chunks,
    )
+    if "height" in ds:
+        del ds["height"]
+    for dv in ds.variables:
+        if "time" in dv:
+            ds[dv]=ds[dv].load()
+            ds[dv].encoding["dtype"] = "float64"
+            ds[dv].encoding["compressor"] = None
    ds=ds.set_coords([a for a in ds.data_vars if "bnds" in a])
    if l_lossy:
        ds = xr.apply_ufunc(
            lossy_compress,
            ds,
            dask="parallelized",
            keep_attrs="drop_conflicts"
        )
    dsdict[dsname]=ds
    collection = xp.Rest(dsdict)
    collection.register_plugin(Stac())
    collection.register_plugin(PlotPlugin())
    collection.serve(
        host="0.0.0.0",
        port=port,
        ssl_keyfile=ssl_keyfile,
        ssl_certfile=ssl_certfile
    )
 ```
 %% Output
    Overwriting xpublish_example.py
 %% Cell type:markdown id:ca64c11f-0846-4ddd-9e60-4b22dba8b32c tags:
 You can run this app e.g. for:
 ```
 dsname="example"
 glob_inp="/work/ik1017/CMIP6/data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/*.nc"
 ```
 by applying:
 %% Cell type:code id:5da13d6b-05f1-4b3b-aecd-1ac3bb635526 tags:
 ``` python
 %%bash --bg
 #Cannot use variables from python script here so it is all hard-coded
 source activate /work/bm0021/conda-envs/cloudify
 python xpublish_example.py \
    example \
    /work/ik1017/CMIP6/data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/*.nc
 ```
 %% Cell type:markdown id:634d1952-43a9-40a7-b7c3-9bbff5f07081 tags:
 ### Stop a running app
 Let us try to just run **one** app at the time. Otherwise, we would have multiple ports and dask clusters. It wouldnt end up well.
 You can check for the main *cloudify* processes by finding the dask workers. In a next step, you can *kill* by ID.
 %% Cell type:code id:9a43c4ce-be08-4493-8dd5-a3789f8c0647 tags:
 ``` python
 !ps -ef | grep cloudify
 ```
 %% Output
-    k204210    52510 4121939  0 10:12 ?        00:00:00 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-c6562e74-6d50-4cf3-92d0-27e4f4cae6f8.json
+    k204210  1885374 1878744  0 09:31 ?        00:00:00 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-449f1aed-0c01-4339-8e8e-3391add9c830.json
-    k204210    54077   52595  0 10:13 ?        00:00:00 /work/bm0021/conda-envs/cloudify/bin/python -c from multiprocessing.resource_tracker import main;main(27)
+    k204210  1885397 1878744  0 09:31 ?        00:00:00 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-59fedbd8-7d7f-424a-8226-3980eadf7fc6.json
-    k204210    54079   52595  2 10:13 ?        00:00:06 /work/bm0021/conda-envs/cloudify/bin/python -c from multiprocessing.spawn import spawn_main; spawn_main(tracker_fd=28, pipe_handle=38) --multiprocessing-fork
+    k204210  1886037 1878744  1 09:35 ?        00:00:21 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-1a921a3d-65a6-4b07-9c29-d17f990ab11b.json
-    k204210    54082   52595  2 10:13 ?        00:00:06 /work/bm0021/conda-envs/cloudify/bin/python -c from multiprocessing.spawn import spawn_main; spawn_main(tracker_fd=28, pipe_handle=38) --multiprocessing-fork
+    k204210  1894380 1878744 17 09:58 ?        00:00:00 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-1d363456-8c8d-40f5-91ea-1e931e364b72.json
-    k204210    55387   52510  0 10:17 pts/18   00:00:00 /bin/bash -c ps -ef | grep cloudify
+    k204210  1894407 1894380  0 09:58 pts/3    00:00:00 /bin/bash -c ps -ef | grep cloudify
-    k204210    55389   55387  0 10:17 pts/18   00:00:00 grep cloudify
+    k204210  1894411 1894410  0 09:58 ?        00:00:00 /sw/spack-levante/jupyterhub/jupyterhub/bin/python /sw/spack-levante/jupyterhub/jupyterhub/bin/conda shell.posix activate /work/bm0021/conda-envs/cloudify
-    k204210  4125442 4121939  0 08:57 ?        00:00:06 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-e22a5303-3b28-41cb-b092-ed92a8ff6221.json
+    k204210  1894413 1894407  0 09:58 pts/3    00:00:00 grep cloudify
-    k204210  4125444 4121939  0 08:57 ?        00:00:01 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-f2ebf347-166b-4e0f-89ee-e7e663b08a4c.json
-    k204210  4125452 4121939  0 08:57 ?        00:00:01 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-33489dce-534b-42ed-bbe7-6d125e3f6167.json
-    k204210  4125453 4121939  0 08:57 ?        00:00:01 /work/bm0021/conda-envs/cloudify/bin/python -Xfrozen_modules=off -m ipykernel_launcher -f /home/k/k204210/.local/share/jupyter/runtime/kernel-16ae079b-b9fb-4460-bc7c-808797637e88.json
 %% Cell type:markdown id:5b505b0a-2b48-4eb3-8c91-fb6b7fcdc54b tags:
 **Important note:**
 If you plan to continue with another notebook, do not stop the app now.
 %% Cell type:code id:af33c134-f4ba-42f7-9687-7bb9948d5dfe tags:
 ``` python
-!kill 52595
+!kill 1882536
 ```
+%% Output
+    /bin/bash: line 0: kill: (1882536) - No such process
 %% Cell type:code id:feacaed0-df8d-4e52-af8c-acd094cac6f4 tags:
 ``` python
 ```

--- a/workshop/2_data-access.ipynb
+++ b/workshop/2_data-access.ipynb