diff --git a/.gitignore b/.gitignore index 43871650ed7c45caf2bef3a7073e5a9fe788cbff..1ee30200347a98067da3c2c40d136c735823d755 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ *.nc *.grb *.grib +notebooks/dask-worker-space/* +*png diff --git a/notebooks/use-case_climate_station_data.ipynb b/notebooks/use-case_climate_station_data.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..1bd6684f9f67faefd26b4b2b3de328ffd7fa208b --- /dev/null +++ b/notebooks/use-case_climate_station_data.ipynb @@ -0,0 +1,400 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Import Meteostat library and dependencies\n", + "from datetime import datetime\n", + "import matplotlib.pyplot as plt\n", + "from meteostat import Stations, Daily\n", + "import pandas as pd\n", + "\n", + "# Set coordinates of Hamburg Fuhlsbüttel\n", + "lat = 53.5510\n", + "lon = 9.9936\n", + "\n", + "# Set time period\n", + "start = datetime(1891, 1, 1)\n", + "end = datetime(2018, 12, 31)\n", + "\n", + "# Get closest weather station to Vancouver, BC\n", + "stations = Stations()\n", + "stations = stations.nearby(lat, lon)\n", + "stations = stations.inventory('daily', (start, end))\n", + "station = stations.fetch(1)\n", + "\n", + "# Get daily data for 2018 at the selected weather station\n", + "data = Daily(station, start, end)\n", + "data = data.fetch()\n", + "\n", + "# Calc Daily Mean\n", + "yearly_mean_df = data.groupby(pd.Grouper(freq='1Y')).mean()\n", + "\n", + "# Plot line chart including average, minimum and maximum temperature\n", + "yearly_mean_df.plot(y=['tmin', 'tmax'], ylabel = 'Degrees C', color=['green', 'red'])\n", + "plt.ylim(10., 15.)\n", + "plt.show()\n", + "\n", + "# Plot line chart including average, minimum and maximum temperature\n", + "yearly_mean_df.plot(y=['tavg'], ylabel = 'Degrees C')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>name</th>\n", + " <th>country</th>\n", + " <th>region</th>\n", + " <th>wmo</th>\n", + " <th>icao</th>\n", + " <th>latitude</th>\n", + " <th>longitude</th>\n", + " <th>elevation</th>\n", + " <th>timezone</th>\n", + " <th>hourly_start</th>\n", + " <th>hourly_end</th>\n", + " <th>daily_start</th>\n", + " <th>daily_end</th>\n", + " <th>distance</th>\n", + " </tr>\n", + " <tr>\n", + " <th>id</th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " <th></th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>10147</th>\n", + " <td>Hamburg Airport</td>\n", + " <td>DE</td>\n", + " <td>HH</td>\n", + " <td>10147</td>\n", + " <td>EDDH</td>\n", + " <td>53.6333</td>\n", + " <td>10.0000</td>\n", + " <td>16.0</td>\n", + " <td>Europe/Berlin</td>\n", + " <td>1949-01-01</td>\n", + " <td>2020-12-15</td>\n", + " <td>1891-01-01</td>\n", + " <td>2020-12-03</td>\n", + " <td>9.161085e+03</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10162</th>\n", + " <td>Schwerin</td>\n", + " <td>DE</td>\n", + " <td>MV</td>\n", + " <td>10162</td>\n", + " <td><NA></td>\n", + " <td>53.6500</td>\n", + " <td>11.3833</td>\n", + " <td>59.0</td>\n", + " <td>Europe/Berlin</td>\n", + " <td>1950-01-01</td>\n", + " <td>2020-12-15</td>\n", + " <td>1890-01-01</td>\n", + " <td>2020-12-04</td>\n", + " <td>9.235691e+04</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10224</th>\n", + " <td>Bremen</td>\n", + " <td>DE</td>\n", + " <td>HB</td>\n", + " <td>10224</td>\n", + " <td>EDDW</td>\n", + " <td>53.0500</td>\n", + " <td>8.8000</td>\n", + " <td>3.0</td>\n", + " <td>Europe/Berlin</td>\n", + " <td>1926-01-01</td>\n", + " <td>2020-12-15</td>\n", + " <td>1890-01-01</td>\n", + " <td>2020-12-03</td>\n", + " <td>9.692615e+04</td>\n", + " </tr>\n", + " <tr>\n", + " <th>D2578</th>\n", + " <td>Kirchdorf/Poel</td>\n", + " <td>DE</td>\n", + " <td>MV</td>\n", + " <td><NA></td>\n", + " <td><NA></td>\n", + " <td>53.9995</td>\n", + " <td>11.4341</td>\n", + " <td>12.0</td>\n", + " <td>Europe/Berlin</td>\n", + " <td>2004-07-01</td>\n", + " <td>2020-12-02</td>\n", + " <td>1865-01-01</td>\n", + " <td>2020-12-04</td>\n", + " <td>1.069908e+05</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10361</th>\n", + " <td>Magdeburg</td>\n", + " <td>DE</td>\n", + " <td>ST</td>\n", + " <td>10361</td>\n", + " <td>EDBM</td>\n", + " <td>52.1333</td>\n", + " <td>11.6000</td>\n", + " <td>79.0</td>\n", + " <td>Europe/Berlin</td>\n", + " <td>1951-01-01</td>\n", + " <td>2020-12-15</td>\n", + " <td>1881-01-01</td>\n", + " <td>2020-12-04</td>\n", + " <td>1.910266e+05</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>94964</th>\n", + " <td>Bushy Park</td>\n", + " <td>AU</td>\n", + " <td>TAS</td>\n", + " <td>94964</td>\n", + " <td><NA></td>\n", + " <td>-42.7000</td>\n", + " <td>146.8833</td>\n", + " <td>35.0</td>\n", + " <td>Australia/Hobart</td>\n", + " <td>NaT</td>\n", + " <td>NaT</td>\n", + " <td>1889-04-03</td>\n", + " <td>2020-12-04</td>\n", + " <td>1.855174e+07</td>\n", + " </tr>\n", + " <tr>\n", + " <th>95974</th>\n", + " <td>Fingal</td>\n", + " <td>AU</td>\n", + " <td>TAS</td>\n", + " <td>95974</td>\n", + " <td><NA></td>\n", + " <td>-41.6333</td>\n", + " <td>147.9667</td>\n", + " <td>233.0</td>\n", + " <td>Australia/Hobart</td>\n", + " <td>NaT</td>\n", + " <td>NaT</td>\n", + " <td>1888-10-03</td>\n", + " <td>2020-11-29</td>\n", + " <td>1.857037e+07</td>\n", + " </tr>\n", + " <tr>\n", + " <th>94979</th>\n", + " <td>Lake Leake</td>\n", + " <td>AU</td>\n", + " <td>TAS</td>\n", + " <td>94979</td>\n", + " <td><NA></td>\n", + " <td>-42.0000</td>\n", + " <td>147.7833</td>\n", + " <td>580.0</td>\n", + " <td>Australia/Hobart</td>\n", + " <td>NaT</td>\n", + " <td>NaT</td>\n", + " <td>1890-01-05</td>\n", + " <td>2020-12-04</td>\n", + " <td>1.858111e+07</td>\n", + " </tr>\n", + " <tr>\n", + " <th>94970</th>\n", + " <td>Hobart Regional Office</td>\n", + " <td>AU</td>\n", + " <td>TAS</td>\n", + " <td>94970</td>\n", + " <td><NA></td>\n", + " <td>-42.8833</td>\n", + " <td>147.3167</td>\n", + " <td>51.0</td>\n", + " <td>Australia/Hobart</td>\n", + " <td>NaT</td>\n", + " <td>NaT</td>\n", + " <td>1882-04-01</td>\n", + " <td>2020-12-04</td>\n", + " <td>1.860456e+07</td>\n", + " </tr>\n", + " <tr>\n", + " <th>94967</th>\n", + " <td>Cape Bruny</td>\n", + " <td>AU</td>\n", + " <td>TAS</td>\n", + " <td>94967</td>\n", + " <td><NA></td>\n", + " <td>-43.5000</td>\n", + " <td>147.1500</td>\n", + " <td>55.0</td>\n", + " <td>Australia/Hobart</td>\n", + " <td>NaT</td>\n", + " <td>NaT</td>\n", + " <td>1880-01-03</td>\n", + " <td>2020-12-04</td>\n", + " <td>1.863516e+07</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>229 rows × 14 columns</p>\n", + "</div>" + ], + "text/plain": [ + " name country region wmo icao latitude \\\n", + "id \n", + "10147 Hamburg Airport DE HH 10147 EDDH 53.6333 \n", + "10162 Schwerin DE MV 10162 <NA> 53.6500 \n", + "10224 Bremen DE HB 10224 EDDW 53.0500 \n", + "D2578 Kirchdorf/Poel DE MV <NA> <NA> 53.9995 \n", + "10361 Magdeburg DE ST 10361 EDBM 52.1333 \n", + "... ... ... ... ... ... ... \n", + "94964 Bushy Park AU TAS 94964 <NA> -42.7000 \n", + "95974 Fingal AU TAS 95974 <NA> -41.6333 \n", + "94979 Lake Leake AU TAS 94979 <NA> -42.0000 \n", + "94970 Hobart Regional Office AU TAS 94970 <NA> -42.8833 \n", + "94967 Cape Bruny AU TAS 94967 <NA> -43.5000 \n", + "\n", + " longitude elevation timezone hourly_start hourly_end \\\n", + "id \n", + "10147 10.0000 16.0 Europe/Berlin 1949-01-01 2020-12-15 \n", + "10162 11.3833 59.0 Europe/Berlin 1950-01-01 2020-12-15 \n", + "10224 8.8000 3.0 Europe/Berlin 1926-01-01 2020-12-15 \n", + "D2578 11.4341 12.0 Europe/Berlin 2004-07-01 2020-12-02 \n", + "10361 11.6000 79.0 Europe/Berlin 1951-01-01 2020-12-15 \n", + "... ... ... ... ... ... \n", + "94964 146.8833 35.0 Australia/Hobart NaT NaT \n", + "95974 147.9667 233.0 Australia/Hobart NaT NaT \n", + "94979 147.7833 580.0 Australia/Hobart NaT NaT \n", + "94970 147.3167 51.0 Australia/Hobart NaT NaT \n", + "94967 147.1500 55.0 Australia/Hobart NaT NaT \n", + "\n", + " daily_start daily_end distance \n", + "id \n", + "10147 1891-01-01 2020-12-03 9.161085e+03 \n", + "10162 1890-01-01 2020-12-04 9.235691e+04 \n", + "10224 1890-01-01 2020-12-03 9.692615e+04 \n", + "D2578 1865-01-01 2020-12-04 1.069908e+05 \n", + "10361 1881-01-01 2020-12-04 1.910266e+05 \n", + "... ... ... ... \n", + "94964 1889-04-03 2020-12-04 1.855174e+07 \n", + "95974 1888-10-03 2020-11-29 1.857037e+07 \n", + "94979 1890-01-05 2020-12-04 1.858111e+07 \n", + "94970 1882-04-01 2020-12-04 1.860456e+07 \n", + "94967 1880-01-03 2020-12-04 1.863516e+07 \n", + "\n", + "[229 rows x 14 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stations.fetch(0)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "test_env", + "language": "python", + "name": "test_env" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/use-case_count_summer_days_cmip6.ipynb b/notebooks/use-case_count_summer_days_cmip6.ipynb index 0b6b25a148967d303f32ede0a82e8b2aa1b20ef1..60a10c8a6a5987a6f6101e45f109b6f616461bc9 100644 --- a/notebooks/use-case_count_summer_days_cmip6.ipynb +++ b/notebooks/use-case_count_summer_days_cmip6.ipynb @@ -197,7 +197,7 @@ "source": [ "# Store the name of the model we chose in a variable named \"climate_model\"\n", "\n", - "climate_model = \"MPI-ESM1-2-LR\" # here we choose Max-Plack Institute's Earth Sytem Model in high resolution\n", + "climate_model = \"MPI-ESM1-2-HR\" # here we choose Max-Plack Institute's Earth Sytem Model in high resolution\n", "\n", "# This is how we tell intake what data we want\n", "\n", @@ -258,7 +258,7 @@ "outputs": [], "source": [ "# Select the file that contains the year we selected in the drop down menu above, e.g. 2015\n", - "selected_file = query_result_df_m[(year_box.value >= query_result_df[\"start_year\"]) & (\n", + "selected_file = query_result_df[(year_box.value >= query_result_df[\"start_year\"]) & (\n", " year_box.value <= query_result_df[\"end_year\"])]\n", "\n", "# Path of the file that contains the selected year \n", diff --git a/notebooks/use-case_dask_for_climate_data.ipynb b/notebooks/use-case_dask_for_climate_data.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..e66fa9a0fdda6cee3b5fff0d15dc73f7ca16e23d --- /dev/null +++ b/notebooks/use-case_dask_for_climate_data.ipynb @@ -0,0 +1,359 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to use Dask for Climate Data Processing?\n", + "\n", + "This tutorial builds requires the skills, which have learned in the summer days tutorial (provide link)\n", + "\n", + "1. What is Dask?\n", + " 1. Parallelism\n", + " 1. Overview\n", + " 1. `dask.delayed`\n", + "2. Process climate data with dask?\n", + "3. Common mistakes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. What is Dask?\n", + "Dask is an open source library for parallel computing written in Python. It is used to process larger-than memory datasets (e.g. large climate data sets). All information can be found here: https://docs.dask.org" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1C. Parallelism\n", + "- use Maria's metaphor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1B. Dask Overview" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1C. `dask.delayed`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let us start with an easy example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dask.distributed import Client\n", + "\n", + "client = Client(n_workers=4)\n", + "\n", + "client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dask import delayed\n", + "import time" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Not Parallel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "\n", + "def inc(x):\n", + " time.sleep(0.5)\n", + " return x + 1\n", + "\n", + "def double(x):\n", + " time.sleep(0.5)\n", + " return 2 * x\n", + "\n", + "def add(x, y):\n", + " time.sleep(0.5)\n", + " return x + y\n", + "\n", + "data = list(range(4))\n", + "\n", + "output = []\n", + "for x in data:\n", + " a = inc(x)\n", + " b = double(x)\n", + " c = add(a, b)\n", + " output.append(c)\n", + "\n", + "total = sum(output)\n", + "total" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Parallel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@delayed\n", + "def inc(x):\n", + " time.sleep(0.5)\n", + " return x + 1\n", + "\n", + "@delayed\n", + "def double(x):\n", + " time.sleep(0.5)\n", + " return 2 * x\n", + "\n", + "#@delayed\n", + "def add(x, y):\n", + " time.sleep(0.5)\n", + " return x + y\n", + "\n", + "data = list(range(4))\n", + "\n", + "output = []\n", + "for x in data:\n", + " a = inc(x)\n", + " b = double(x)\n", + " c = add(a, b)\n", + " output.append(c)\n", + "\n", + "total_delayed = sum(output) #also delay sum because it is a function\n", + "#%time total_delayed.compute()\n", + "total_delayed.visualize()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "total_delayed.visualize()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "client.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Looks good in theory, right? Now, let us apply our knowldege to climate model data." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Process Climate Data with Dask\n", + "- load with intake\n", + "- think about processing\n", + "- compare conventional with dask" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import intake\n", + "import xarray as xr\n", + "#from import Basemap, cm\n", + "import matplotlib.pyplot as plt\n", + "from netCDF4 import Dataset as open_ncfile\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Path to catalog descriptor on the DKRZ server\n", + "col_url = \"/work/ik1017/Catalogs/mistral-cmip6.json\"\n", + "\n", + "# Open the catalog with the intake package and name it \"col\" as short for \"collection\"\n", + "col = intake.open_esm_datastore(col_url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Store the name of the model we chose in a variable named \"climate_model\"\n", + "\n", + "climate_model = \"MPI-ESM1-2-HR\" # here we choose Max-Plack Institute's Earth Sytem Model in high resolution\n", + "\n", + "# This is how we tell intake what data we want\n", + "\n", + "query = dict(\n", + " source_id = \"MPI-ESM1-2-HR\", # the model \n", + " variable_id = \"tasmax\", # temperature at surface, maximum\n", + " table_id = \"day\", # daily maximum\n", + " experiment_id = \"historical\", # what we selected in the drop down menu,e.g. SSP2.4-5 2015-2100\n", + " member_id = \"r10i1p1f1\", # \"r\" realization, \"i\" initialization, \"p\" physics, \"f\" forcing\n", + " time_range =\"20100101-20141231\",\n", + ")\n", + "\n", + "# Intake looks for the query we just defined in the catalog of the CMIP6 data pool at DKRZ\n", + "cat = col.search(**query)\n", + "\n", + "# Show query results\n", + "cat.df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = xr.open_dataset(cat.df['path'][0])\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "var = ds.variables['tasmax'][0,:,:]\n", + "lat = ds.variables['lat'][:]\n", + "lon = ds.variables['lon'][:]\n", + "\n", + "#-- create figure and axes instadses\n", + "dpi = 100\n", + "fig = plt.figure(figsize=(1100/dpi, 1100/dpi), dpi=dpi)\n", + "ax = fig.add_axes([0.1,0.1,0.8,0.9])\n", + "\n", + "#-- create map\n", + "map = Basemap(projection='cyl',llcrnrlat= -90.,urcrnrlat= 90.,\\\n", + " resolution='c', llcrnrlon=-180.,urcrnrlon=180.)\n", + "\n", + "#-- draw coastlines, state and country boundaries, edge of map\n", + "map.drawcoastlines()\n", + "map.drawstates()\n", + "map.drawcountries()\n", + "\n", + "#-- create and draw meridians and parallels grid lines\n", + "map.drawparallels(np.arange( -90., 90.,30.),labels=[1,0,0,0],fontsize=10)\n", + "map.drawmeridians(np.arange(-180.,180.,30.),labels=[0,0,0,1],fontsize=10)\n", + "\n", + "#-- convert latitude/longitude values to plot x/y values\n", + "x, y = map(*np.meshgrid(lon,lat))\n", + "\n", + "#-- contour levels\n", + "clevs = np.arange(210,320,5)\n", + "\n", + "#-- draw filled contours\n", + "cnplot = map.contourf(x,y,var,clevs,cmap=plt.cm.jet)\n", + "\n", + "#-- add colorbar\n", + "cbar = map.colorbar(cnplot,location='bottom',pad=\"10%\") #-- pad: distadse between map and colorbar\n", + "cbar.set_label('deg K') #-- add colorbar title string\n", + "\n", + "#-- add plot title\n", + "plt.title('Temperature')\n", + "\n", + "#-- displa on screen\n", + "#plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# <font color='darkred'>Now use Gradient Operator in parallel</font>" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# <font color='darkgreen'>Take home message</font>\n", + "\n", + "Parallelism brings extra complexiity and often it is not necessary for your problems. Before using Dask you may want try alternatives:\n", + "- use better algorithms or data structures\n", + "- better file formats\n", + "- compiled code\n", + "- sampling\n", + "- profile your code" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 unstable (using the module python3/unstable)", + "language": "python", + "name": "python3_unstable" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/use-case_plot-unstructured_psyplot_cmip6.ipynb b/notebooks/use-case_plot-unstructured_psyplot_cmip6.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..3904b570cf8b2bed96f0e1994834a83bc462bd2c --- /dev/null +++ b/notebooks/use-case_plot-unstructured_psyplot_cmip6.ipynb @@ -0,0 +1,229 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Plot Earth System Model data on unstructured grids with psyplot\n", + "\n", + "This notebook introduces you to the `mapplot` function of the package `psyplot`.\n", + "It is suitable to plot maps from data on unstructured grids like the ones from ICON and FESOM.\n", + "\n", + "We therefore search for the corresponding data in the CMIP6 data pool with intake-esm.\n", + "Afterwards, we open a file with `xarray` and configure the opened xarray dataset as well as psyplot for a map plot.\n", + "\n", + "In the end, we discuss the functions of `psyplot.project.plot.mapplot`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import psyplot.project as psy\n", + "import matplotlib as mpl\n", + "import xarray as xr\n", + "import intake" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We open a swift catalog from dkrz cloud which is accessible without authentication." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "col_url = \"https://swift.dkrz.de/v1/dkrz_a44962e3ba914c309a7421573a6949a6/intake-esm/mistral-cmip6.json\"\n", + "col = intake.open_esm_datastore(col_url)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example, we aim at plotting the Sea Surface Temperature of the upper boundary of the liquid ocean, including temperatures below sea-ice and floating ice shelves from AWI.\n", + "We therefore search for `tos` in the catalog for monthly frequency. We use 1 realization of 1 experiment only." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tos=col.search(source_id=\"AWI-CM-1-1-MR\",\n", + " experiment_id=\"ssp370\",\n", + " variable_id=\"tos\",\n", + " table_id=\"Omon\",\n", + " member_id=\"r1i1p1f1\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now open the file on the mistral file system." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dset = xr.open_dataset(tos.df[\"path\"].to_list()[0])\n", + "#dset = xr.open_mfdataset(tos.df[\"path\"].to_list())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In order to make `tos` plottable, we set the following configuration.\n", + "- The `CDI_grid_type` is a keyword for `psyplot`.\n", + "- Coordinates are not fully recognized by `xarray` so that we have to add some manually (version from Dec 2020)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dset[\"tos\"][\"CDI_grid_type\"]=\"unstructured\"\n", + "coordlist=[\"vertices_latitude\", \"vertices_longitude\", \"lat_bnds\", \"lon_bnds\"]\n", + "dset=dset.set_coords([coord for coord in dset.data_vars if coord in coordlist])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is based on the example from:\n", + "https://psyplot.readthedocs.io/projects/psy-maps/en/latest/examples/example_ugrid.html#gallery-examples-example-ugrid-ipynb" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "psy.rcParams['plotter.maps.xgrid'] = False\n", + "psy.rcParams['plotter.maps.ygrid'] = False\n", + "mpl.rcParams['figure.figsize'] = [10, 8.]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iconplot11=psy.plot.mapplot(\n", + " dset, name=\"tos\", cmap='rainbow',\n", + " clabel=dset[\"tos\"].description,\n", + " stock_img=True, lsm='50m')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now do the same with a smaller subset to see the fine resolution of the AWI ocean model FESOM.\n", + "The subsetting is required because the plotting takes too long otherwise." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dset2 = dset.isel(time=slice(1,3)).where( (dset.lon > -10. ) &\n", + " (dset.lon < 50. ) &\n", + " (dset.lat > 40. ) &\n", + " (dset.lat < 70. ), drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dset2.to_netcdf(\"/home/dkrz/k204210/test.nc\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dset2=xr.open_dataset(\"/home/dkrz/k204210/test.nc\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dset2[\"tos\"][\"CDI_grid_type\"]=\"unstructured\"\n", + "coordlist=[\"vertices_latitude\", \"vertices_longitude\", \"lat_bnds\", \"lon_bnds\"]\n", + "dset2=dset2.set_coords([coord for coord in dset2.data_vars if coord in coordlist])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iconplot12=psy.plot.mapplot(\n", + " dset2, name=\"tos\", cmap='rainbow',\n", + " lonlatbox='Ireland',\n", + " clabel=dset[\"tos\"].description,\n", + " stock_img=True,\n", + " lsm='50m',\n", + " datagrid=dict(c='b', lw=0.2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 unstable (using the module python3/unstable)", + "language": "python", + "name": "python3_unstable" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}