Commit 88b3ef4c authored by Fabian Wachsmann's avatar Fabian Wachsmann
Browse files

updated two notebooks

parent b0026bad
Pipeline #19415 passed with stage
in 3 minutes and 9 seconds
......@@ -70,7 +70,7 @@
"outputs": [],
"source": [
"# Open the catalog with the intake package and name it \"col\" as short for \"collection\"\n",
"cols=dkrz_catalog.metadata[\"parameters\"][\"cmip6_columns\"][\"default\"]+[\"opendap_url\"]\n",
"cols=dkrz_catalog._entries[\"dkrz_cmip6_disk\"]._open_args[\"csv_kwargs\"][\"usecols\"]+[\"opendap_url\"]\n",
"col=dkrz_catalog.dkrz_cmip6_disk(csv_kwargs=dict(usecols=cols))\n",
"cat = col.search(variable_id = \"thetaot\",\n",
" experiment_id = \"historical\",\n",
......
%% Cell type:markdown id: tags:
# Prepare modeling data for GIS analyses
The most common way to work with climate modeling data sets is storing them in NetCDF or GRIB format and processing them using Xarray. Raster data sets used in GIS applications are usually stored in raster formats such as GeoTIFF, BIL, or IMG and being processed mainly with the help of GDAL-based python libraries. While modeling data sets are normally georeferenced using grids explicitly described by coordinates and conforming to e.g., CF sandard, geographic reference of GIS data sets is described by a coordinate reference system (CRS). This diferences imply some steps that need to be undertaken in order to convert data sets from one format to another.
In this notebook we create an aggregated map of pre-industrial Vertically Averaged Sea Water Potential Temperature (thetaot) from CMIP6 and save it as GeoTIFF.
First, we import all necessary libraries.
%% Cell type:code id: tags:
``` python
%matplotlib inline
import intake, requests, aiohttp
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import numpy as np
import xarray as xr
import cf_xarray as cfxr
import xesmf as xe
import scipy.sparse as sps
# Rioxarray is an experimental interface between Xarray and Rasterio libraries
# made for loading GDAL-readable rasters as xarray datasets and saving xarrays as raster files.
import rioxarray
#print("Using roocs/clisops in version %s" % cl.__version__)
print("Using xESMF in version %s" % xe.__version__)
xr.set_options(display_style = "html");
import warnings
warnings.simplefilter("ignore")
```
%% Cell type:markdown id: tags:
## 1. Find and load the data set
Then we find the data set we need in the DKRZ open catalogue and print its metadata.
%% Cell type:code id: tags:
``` python
# Path to master catalog on the DKRZ server
#dkrz_catalog=intake.open_catalog(["https://dkrz.de/s/intake"])
#
#only for the web page we need to take the original link:
dkrz_catalog=intake.open_catalog(["https://gitlab.dkrz.de/data-infrastructure-services/intake-esm/-/raw/master/esm-collections/cloud-access/dkrz_catalog.yaml"])
# Print DKRZ open catalogues
list(dkrz_catalog)
```
%% Cell type:code id: tags:
``` python
# Open the catalog with the intake package and name it "col" as short for "collection"
cols=dkrz_catalog.metadata["parameters"]["cmip6_columns"]["default"]+["opendap_url"]
cols=dkrz_catalog._entries["dkrz_cmip6_disk"]._open_args["csv_kwargs"]["usecols"]+["opendap_url"]
col=dkrz_catalog.dkrz_cmip6_disk(csv_kwargs=dict(usecols=cols))
cat = col.search(variable_id = "thetaot",
experiment_id = "historical",
time_range = "185001-194912",
member_id = "r1i1p1f3"
)
cat.df
```
%% Cell type:code id: tags:
``` python
dset = xr.open_dataset(cat.df.uri.values[0], chunks = {"time": 6})
dset["thetaot"]
```
%% Cell type:markdown id: tags:
As we can see, coordinates are stored as arrays of grid cell indices, which is not a common format in the GIS community.
## 2. Calculate pre-industrail mean
We make a subset limiting the data set to pre-industrial era, calculate the mean values per grid cell, and plot the result.
%% Cell type:code id: tags:
``` python
dset["time"] = dset["time"].dt.strftime("%Y%m%d")
pre_industrial = dset["thetaot"].sel(time = slice("1850", "1880"))
pre_industrial_mean = pre_industrial.mean(dim = "time", keep_attrs = True)
pre_industrial_mean.plot()
```
%% Cell type:markdown id: tags:
If we plot the coastlines from cartopy over the dataset, we will see that the coordinate systems of these datasets are... a bit different.
%% Cell type:code id: tags:
``` python
fig = plt.figure(1, figsize = [30, 13])
# Set the projection to use for plotting
ax = plt.subplot(1, 1, 1, projection=ccrs.PlateCarree())
ax.coastlines()
# Pass ax as an argument when plotting. Here we assume data is in the same coordinate reference system than the projection chosen for plotting
# isel allows to select by indices instead of the time values
pre_industrial_mean.plot.pcolormesh(ax = ax, cmap = "coolwarm")
```
%% Cell type:markdown id: tags:
If we visualize our data set's grid, it becomes obvious that it is far from a standard grid.
%% Cell type:code id: tags:
``` python
import textwrap
def plot_curv_grid(ds, var = "tos"):
lat = cfxr.accessor._get_with_standard_name(ds, "latitude")[0]
lon = cfxr.accessor._get_with_standard_name(ds, "longitude")[0]
if any([i == None for i in [lat, lon]]):
print(ds.attrs["source_id"], ": Cannot identify latitude/longitude.")
return
plt.figure(figsize = (16, 9), dpi=120)
plt.scatter(x = ds[lon], y = ds[lat], s = 0.01)
# x, y = np.meshgrid(ds[lon], ds[lat])
# plt.scatter(x = x, y = y, s = 0.01)
try:
plt.title("\n".join(textwrap.wrap(ds.attrs["source_id"] + "(" + ds.attrs["source"].split("ocean:")[-1].split("\n")[0] + ")", 120)))
except (KeyError, IndexError):
plt.title(ds.attrs["source_id"])
plot_curv_grid(dset)
```
%% Cell type:markdown id: tags:
## 3. Reformat the grid and regridd the data set.
As we mentioned before, the coordinates need to be re-defined.
%% Cell type:code id: tags:
``` python
# Later one can just install and use clisops, once functions like this are merged into the master branch
def grid_reformat(ds):
# Define lat, lon, lat_bnds, lon_bnds
lat = ds.lat[:, 0]
lon = ds.lon[0, :]
lat_bnds = np.zeros((lat.shape[0], 2), dtype = "double")
lon_bnds = np.zeros((lon.shape[0], 2), dtype = "double")
# From (N + 1, M + 1) shaped bounds to (N, M, 4) shaped vertices
lat_vertices = cfxr.vertices_to_bounds(
ds.lat_b, ("bnds", "lat", "lon")
).values
lon_vertices = cfxr.vertices_to_bounds(
ds.lon_b, ("bnds", "lat", "lon")
).values
lat_vertices = np.moveaxis(lat_vertices, 0, -1)
lon_vertices = np.moveaxis(lon_vertices, 0, -1)
# From (N, M, 4) shaped vertices to (N, 2) and (M, 2) shaped bounds
lat_bnds[:, 0] = np.min(lat_vertices[:, 0, :], axis = 1)
lat_bnds[:, 1] = np.max(lat_vertices[:, 0, :], axis = 1)
lon_bnds[:, 0] = np.min(lon_vertices[0, :, :], axis = 1)
lon_bnds[:, 1] = np.max(lon_vertices[0, :, :], axis = 1)
# Create dataset
ds_ref = xr.Dataset(
coords = {
"lat": (["lat"], lat.data),
"lon": (["lon"], lon.data),
"lat_bnds": (["lat", "bnds"], lat_bnds.data),
"lon_bnds": (["lon", "bnds"], lon_bnds.data),
},
)
# Add variable attributes to the coordinate variables
ds_ref["lat"].attrs = {
"bounds": "lat_bnds",
"units": "degrees_north",
"long_name": "latitude",
"standard_name": "latitude",
"axis": "Y",
}
ds_ref["lon"].attrs = {
"bounds": "lon_bnds",
"units": "degrees_east",
"long_name": "longitude",
"standard_name": "longitude",
"axis": "X",
}
ds_ref["lat_bnds"].attrs = {
"long_name": "latitude_bounds",
"units": "degrees_north",
}
ds_ref["lon_bnds"].attrs = {
"long_name": "longitude_bounds",
"units": "degrees_east",
}
return ds_ref
```
%% Cell type:code id: tags:
``` python
# Specify a global 1deg target grid
# With clisops
#ds_out = clore.Grid(grid_instructor=(1., )).ds
# Without clisops
ds_out = grid_reformat(xe.util.grid_global(1., 1.))
ds_out
```
%% Cell type:markdown id: tags:
And now we regrid the data set to a normal 1-degree global grid.
%% Cell type:code id: tags:
``` python
# In case of problems, activate ESMF verbose mode
import ESMF
ESMF.Manager(debug = True)
# Regridding methods
#method_list = ["bilinear", "nearest_s2d", "patch", "conservative", "conservative_normed"]
method_list = ["bilinear", "nearest_s2d", "patch"]
# Function to generate the weights
# If grids have problems of degenerated cells near the poles there is the ignore_degenerate option
def regrid(ds_in, ds_out, method, periodic, unmapped_to_nan = True, ignore_degenerate = None):
"""Convenience function for calculating regridding weights"""
ds_in["lat"] = ds_in["latitude"]
ds_in["lon"] = ds_in["longitude"]
return xe.Regridder(ds_in, ds_out, method, periodic = periodic,
ignore_degenerate = ignore_degenerate)
```
%% Cell type:code id: tags:
``` python
%time regridder = regrid(pre_industrial_mean, ds_out, "bilinear", periodic = True, unmapped_to_nan = True, ignore_degenerate = None)
```
%% Cell type:code id: tags:
``` python
regridder
```
%% Cell type:code id: tags:
``` python
regridded_ds = regridder(pre_industrial_mean, keep_attrs = True)
regridded_ds
```
%% Cell type:code id: tags:
``` python
fig = plt.figure(1, figsize = [30, 13])
# Set the projection to use for plotting
ax = plt.subplot(1, 1, 1, projection = ccrs.PlateCarree())
ax.coastlines()
# Pass ax as an argument when plotting. Here we assume data is in the same coordinate reference system than the projection chosen for plotting
# isel allows to select by indices instead of the time values
regridded_ds.plot.pcolormesh(ax = ax, cmap = "coolwarm")
```
%% Cell type:markdown id: tags:
As we can see, the data has a correct CRS now.
## 4. Write a CRS and save the data set as a GeoTIFF
It is pretty straitforward to write a CRS to an xarray with rioxarray. Here we used the World Geodetic System 1984 CRS which is quite common and used by default, for example in QGIS.
%% Cell type:code id: tags:
``` python
#You might need to rename the axes before
transformed = regridded_ds.rename({"lon":"x", "lat":"y"})
transformed = transformed.rio.write_crs("EPSG:4326")
```
%% Cell type:markdown id: tags:
If we want to re-project the data set to e.g., Web Mercator, wich is used by default in Google Maps and ArcGIS, we do it as follows:
%% Cell type:code id: tags:
``` python
transformed = transformed.rio.reproject("EPSG:3857")
```
%% Cell type:markdown id: tags:
And finally, saving our dataset as a GeoTIFF is as easy as this:
%% Cell type:code id: tags:
``` python
transformed.rio.to_raster("regridded_3857.tif")
```
......
......@@ -15,7 +15,7 @@
"metadata": {},
"outputs": [],
"source": [
"filename = '/pool/data/CMIP6/data/CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/tas/gn/v20190308/tas_Amon_CESM2_historical_r1i1p1f1_gn_185001-201412.nc'"
"filename = '/pool/data/CMIP6/data/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/tas/gn/v20190308/tas_Amon_CESM2_historical_r1i1p1f1_gn_185001-201412.nc'"
]
},
{
......@@ -289,7 +289,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "python3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
......@@ -303,9 +303,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
\ No newline at end of file
}
%% Cell type:markdown id: tags:
# Simple visualisation of a CMIP6 dataset with Python Xarray
- this is an adoption of [NordicESMhub example](https://nordicesmhub.github.io/NEGI-Abisko-2019/training/CMIP6_example.html) to use CMIP6 data stored in the DKRZ data pool
%% Cell type:code id: tags:
``` python
filename = '/pool/data/CMIP6/data/CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/tas/gn/v20190308/tas_Amon_CESM2_historical_r1i1p1f1_gn_185001-201412.nc'
filename = '/pool/data/CMIP6/data/CMIP/NCAR/CESM2/historical/r1i1p1f1/Amon/tas/gn/v20190308/tas_Amon_CESM2_historical_r1i1p1f1_gn_185001-201412.nc'
```
%% Cell type:markdown id: tags:
### Import python packages
%% Cell type:code id: tags:
``` python
import xarray as xr
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import numpy as np
import cftime
```
%% Cell type:markdown id: tags:
## Open dataset
- Use `xarray` python package to analyze netCDF dataset
- `open_dataset` allows to get all the metadata without loading data into memory.
- with `xarray`, we only load into memory what is needed.
%% Cell type:code id: tags:
``` python
dset = xr.open_dataset(filename, decode_times=True, use_cftime=True)
print(dset)
```
%% Cell type:markdown id: tags:
### Get metadata corresponding to near-surface air temperature (tas)
%% Cell type:code id: tags:
``` python
print(dset['tas'])
```
%% Cell type:code id: tags:
``` python
dset.time.values
```
%% Cell type:markdown id: tags:
### Select time
- Select a specific time
%% Cell type:code id: tags:
``` python
dset['tas'].sel(time=cftime.DatetimeNoLeap(1850, 1, 15, 12, 0, 0, 0, 2, 15)).plot(cmap = 'coolwarm')
```
%% Cell type:markdown id: tags:
- select the nearest time. Here from 1st April 1950
%% Cell type:code id: tags:
``` python
dset['tas'].sel(time=cftime.DatetimeNoLeap(1850, 4, 1), method='nearest').plot(cmap='coolwarm')
```
%% Cell type:markdown id: tags:
## Customize plot
### Set the size of the figure and add coastlines
%% Cell type:code id: tags:
``` python
fig = plt.figure(1, figsize=[30,13])
# Set the projection to use for plotting
ax = plt.subplot(1, 1, 1, projection=ccrs.PlateCarree())
ax.coastlines()
# Pass ax as an argument when plotting. Here we assume data is in the same coordinate reference system than the projection chosen for plotting
# isel allows to select by indices instead of the time values
dset['tas'].isel(time=0).plot.pcolormesh(ax=ax, cmap='coolwarm')
```
%% Cell type:markdown id: tags:
### Change plotting projection
%% Cell type:code id: tags:
``` python
fig = plt.figure(1, figsize=[10,10])
# We're using cartopy and are plotting in Orthographic projection
# (see documentation on cartopy)
ax = plt.subplot(1, 1, 1, projection=ccrs.Orthographic(0, 90))
ax.coastlines()
# We need to project our data to the new Orthographic projection and for this we use `transform`.
# we set the original data projection in transform (here PlateCarree)
dset['tas'].isel(time=0).plot(ax=ax, transform=ccrs.PlateCarree(), cmap='coolwarm')
# One way to customize your title
plt.title(dset.time.values[0].strftime("%B %Y"), fontsize=18)
```
%% Cell type:markdown id: tags:
### Choose the extent of values
- Fix your minimum and maximum values in your plot and
- Use extend so values below the minimum and max
%% Cell type:code id: tags:
``` python
fig = plt.figure(1, figsize=[10,10])
ax = plt.subplot(1, 1, 1, projection=ccrs.Orthographic(0, 90))
ax.coastlines()
# Fix extent
minval = 240
maxval = 300
# pass extent with vmin and vmax parameters
dset['tas'].isel(time=0).plot(ax=ax, vmin=minval, vmax=maxval, transform=ccrs.PlateCarree(), cmap='coolwarm')
# One way to customize your title
plt.title(dset.time.values[0].strftime("%B %Y"), fontsize=18)
```
%% Cell type:markdown id: tags:
## Multiplots
### Faceting
%% Cell type:code id: tags:
``` python
proj_plot = ccrs.Orthographic(0, 90)
p = dset['tas'].sel(time = dset.time.dt.year.isin([1850, 2014])).plot(x='lon', y='lat',
transform=ccrs.PlateCarree(),
aspect=dset.dims["lon"] / dset.dims["lat"], # for a sensible figsize
subplot_kws={"projection": proj_plot},
col='time', col_wrap=6, robust=True, cmap='PiYG')
# We have to set the map's options on all four axes
for ax,i in zip(p.axes.flat, dset.time.sel(time = dset.time.dt.year.isin([1850, 2014])).values):
ax.coastlines()
ax.set_title(i.strftime("%B %Y"), fontsize=18)
```
%% Cell type:markdown id: tags:
### Combine plots with different projections
%% Cell type:code id: tags:
``` python
fig = plt.figure(1, figsize=[20,10])
# Fix extent
minval = 240
maxval = 300
# Plot 1 for Northern Hemisphere subplot argument (nrows, ncols, nplot)
# here 1 row, 2 columns and 1st plot
ax1 = plt.subplot(1, 2, 1, projection=ccrs.Orthographic(0, 90))
# Plot 2 for Southern Hemisphere
# 2nd plot
ax2 = plt.subplot(1, 2, 2, projection=ccrs.Orthographic(180, -90))
tsel = 0
for ax,t in zip([ax1, ax2], ["Northern", "Southern"]):
map = dset['tas'].isel(time=tsel).plot(ax=ax, vmin=minval, vmax=maxval,
transform=ccrs.PlateCarree(),
cmap='coolwarm',
add_colorbar=False)
ax.set_title(t + " Hemisphere \n" , fontsize=15)
ax.coastlines()
ax.gridlines()
# Title for both plots
fig.suptitle('Near Surface Temperature\n' + dset.time.values[tsel].strftime("%B %Y"), fontsize=20)
cb_ax = fig.add_axes([0.325, 0.05, 0.4, 0.04])
cbar = plt.colorbar(map, cax=cb_ax, extend='both', orientation='horizontal', fraction=0.046, pad=0.04)
cbar.ax.tick_params(labelsize=25)
cbar.ax.set_ylabel('K', fontsize=25)
```
%% Cell type:code id: tags:
``` python
```
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment