Skip to content
Snippets Groups Projects
Commit 60b12023 authored by Aaron Spring's avatar Aaron Spring
Browse files

output and cdo_post

parent 2c5808cb
No related branches found
No related tags found
No related merge requests found
[run]
omit =
pymistral/tests/*.py
setup.py
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.2.3
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-docstring-first
- id: check-yaml
- id: double-quote-string-fixer
- id: no-commit-to-branch
- id: debug-statements
- id: check-merge-conflict
- repo: https://github.com/ambv/black
rev: 19.3b0
hooks:
- id: black
args: ["--line-length", "79", "--skip-string-normalization"]
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.2.3
hooks:
- id: flake8
args: ["--max-line-length=79", "--exclude=__init__.py", "--ignore=C901,W605,W503,F722"]
- repo: https://github.com/asottile/blacken-docs
rev: v1.0.0
hooks:
- id: blacken-docs
---
Markdown:
beautifiers:
- Prettier
indent_size: 4
indent_char: " "
wrap_line_length: 80
Python:
beautifiers:
- ["Black"]
indent_size: 2
indent_char: " "
indent_space: "space"
wrap_line_length: 80
This diff is collapsed.
......@@ -189,13 +189,13 @@ sources:
version:
description: version name
type: str
default: 's85_v4.2'
allowed: ['prior_v4.2','s76_v4.2','s85_v4.2','s93_v4.2']
default: s85_v4.2
allowed: [sEXTocNEET_v4.3, prior_v4.2, s76_v4.2, s81oc_v4.3, s85_v4.2, s93_v4.2, s99oc_v4.3, s04oc_v4.3, s10oc_v4.3]
t_res:
description: temporal resolution
type: str
default: 'monmean'
allowed: ['daily','monmean','yearmonmean']
default: yearmonmean
allowed: [daily, monmean, yearmonmean]
args:
urlpath: '/work/mh0727/data/Jena_CarboScope/atmospheric_CO2_inversion/{{version}}_{{t_res}}.nc'
......@@ -566,6 +566,7 @@ by Grigory Monterey, Sydney Levitus'
allowed: [3,3v]
args:
urlpath: /pool/data/ICDC/atmosphere/hadcrut/DATA/hadcrut{{version}}/HadCRUT{{version}}.nc
HadCRUT4:
driver: netcdf
description: HadCRUT Surface (Air) Temperature Anomalies
......
......@@ -32,8 +32,9 @@ dependencies:
- xrviz
- xhistogram
- pip:
- git+https://github.com/aaronspring/cmip6_preprocessing.git
- git+https://github.com/jbusecke/cmip6_preprocessing.git
- git+https://github.com/xgcm/xgcm.git
- git+https://github.com/xgcm/xrft.git
- git+https://github.com/mathause/regionmask.git
- git+https://github.com/ncar/intake-esm.git
- pre-commit
......@@ -4,11 +4,15 @@ pymistral
A package for analyzing, processing, and mapping ESM output on mistral.
Available Modules:
Available Modules
-----------------
- setup
- plot: Cartopy Mapping for MPIOM curvilinear grids
- hamocc: HAMOCC-specific helper functions
- slurm_post: write python code to file and send to SLURM (experimental)
- testing: FDR for xarray masked
- cdo_post: postprocessing with CDO into xarray
"""
from . import hamocc, plot, setup, slurm_post
from . import cdo_post, hamocc, plot, setup, slurm_post, testing
from .setup import cdo
import glob
import os
import warnings
import pandas as pd
import xarray as xr
import cdo
warnings.simplefilter('ignore')
xr.set_options(keep_attrs=True)
cdo = cdo.Cdo()
cdo.forceOutput = True
expid = 'asp_esmControl_PMassim_3014_TSDICALK_over_3170'
exppath = '/work/bm1124/m300524/experiments'
year = 3171 # year to get output labels from
outpath = os.path.expanduser('~/pymistral/') # folder to save output_df to
def find_all_outdatatypes_in_exp(expid=expid, exppath=exppath, year=year):
"""Find all outdatatypes (tracer,atm,data_2d,...) from experiment `expid`
from path `exppath` of a given `year`."""
outdatatypes = []
for model in ['echam6', 'jsbach', 'mpiom', 'hamocc']:
path = f'{exppath}/{expid}/outdata/{model}/{expid}_{model}_*_{year}*'
paths = glob.glob(path)
for f in paths:
file = os.path.basename(f)
# remove expid
for s in expid.split('_'):
file = file.strip(s + '_')
file = file.strip(str(year))
# print(file)
parts = file.split('_')
model = parts[0]
# print(parts)
if len(parts[1:]) < 2:
# print(parts[1:])
outdatatype = parts[1].split('.')[0]
else:
outdatatype = '_'.join(
('_'.join(parts[1:-1]), parts[-1].split('.')[0])
)
outdatatype = outdatatype.strip(str(year))
outdatatype = outdatatype.strip(f'{year}0101_{year}1231')
if outdatatype == 'co':
outdatatype = 'co2'
ending = parts[-1].split('.')[-1]
print(
f'Found file: model={model} outdatatype={outdatatype} '
f'ending={ending}.'
)
outdatatypes.append(f'{model}_{outdatatype}')
return outdatatypes
def read_all_outdatatype_files_to_ds(
outdatatypes, expid=expid, exppath=exppath, year=year
):
"""Read all outdatatypes from experiment `expid` from path `exppath` of a
given `year` and return xr.Dataset."""
ds_list = []
for outdatatype_id in outdatatypes:
print(f'Read {outdatatype_id} to xr.Dataset.')
parts = outdatatype_id.split('_')
model = parts[0]
outdatatype = '_'.join(parts[1:])
path = (
f'{exppath}/{expid}/outdata/{model}/'
+ f'{expid}_{model}_{outdatatype}_{year}*'
)
if model in ['jsbach', 'echam6']:
options = ' '
if (
'BOT' in outdatatype
or 'ATM' in outdatatype
or 'LOG' in outdatatype
):
options += ' -t echam6'
else:
table = (
f'{exppath}/{expid}/log/'
+ f'{expid}_{model}_{outdatatype[:2]}*.codes'
)
options += ' -t ' + table
else:
options = ''
ds = cdo.copy(input=path, options=options, returnXDataset=True)
if 'time' not in ds.dims:
ds = ds.expand_dims('time')
ds.to_netcdf(f'{exppath}/sample_files/{model}_{outdatatype}.nc')
# add outdatatype
for v in ds.data_vars:
ds[v].attrs['outdatatype'] = outdatatype
ds[v].attrs['model'] = model
ds[v].attrs['dims'] = list(ds[v].dims)
ds_list.append(ds.isel(time=0).mean())
return xr.merge(ds_list, compat='override')
def create_dataframe_of_output_info(ds, outpath=outpath):
"""Create pd.Dataframe about output from `ds` and save to `outpath`."""
df = pd.DataFrame(
index=ds.data_vars,
columns=[
'varname',
'long_name',
'code',
'table',
'units',
'model',
'outdatatype',
'dims',
],
)
for v in list(ds.data_vars):
for c in list(df.columns):
df[c].loc[v] = ds[v].attrs[c] if c in ds[v].attrs else ''
df['stream'] = df['model'] + '_' + df['outdatatype']
df.to_csv(outpath + 'MPI-ESM-1-2-LR_output.csv')
return df
def generate_output_df(
expid=expid, exppath=exppath, year=year, outpath=outpath, recalc=False
):
"""Combine all functions above to generate output from `expid` or just load
if not `recalc`."""
path = outpath + 'MPI-ESM-1-2-LR_output.csv'
if not recalc and os.path.exists(path):
print(f'Read df from path: {path}')
output_df = (
pd.read_csv(path, index_col='varname')
.rename(columns={'Unnamed: 0': 'varname'})
.set_index('varname')
)
else:
outdatatypes = find_all_outdatatypes_in_exp(
expid=expid, exppath=exppath, year=year
)
ds = read_all_outdatatype_files_to_ds(
outdatatypes, expid=expid, exppath=exppath, year=year
)
output_df = create_dataframe_of_output_info(ds, outpath=outpath)
return output_df
output_df = generate_output_df(recalc=False)
def get_model_outdatatype_from_var(
var, output_df=output_df, expid=expid, exppath=exppath
):
if isinstance(var, list): # if list check first
var = var[0]
if not isinstance(output_df, pd.DataFrame):
raise ValueError(
f'df needs to be pd.Dataframe, found {type(output_df)}'
)
model = output_df.T[var]['model']
outdatatype = output_df.T[var]['outdatatype']
if model in ['mpiom', 'hamocc']:
ending = 'nc'
options = ''
else:
ending = 'grb'
options = ' -f nc '
if (
'BOT' in outdatatype
or 'ATM' in outdatatype
or 'LOG' in outdatatype
):
options += ' -t echam6 '
else:
table = f'{exppath}/{expid}/log/{expid}_{model}_{outdatatype[:2]}*.codes'
options += '-t ' + table + ' '
return model, outdatatype, options, ending
def load_var_cdo(
var, expid=expid, exppath=exppath, output_df=output_df, timestr='', sel=''
):
"""Load variable `var` from experiment_id `expid` with cdo into xarray."""
model, outdatatype, options, ending = get_model_outdatatype_from_var(
var, output_df=output_df
)
def load_cdo(var, path, options, sel=sel):
print(f'cdo {options} -select,name={var}{sel} {path}')
return cdo.select(
f'name={var}{sel}',
input=path,
returnXDataset=True,
options=options,
)
path = f'{exppath}/{expid}/outdata/{model}/{expid}_{model}_{outdatatype}_{timestr}*'
ds = load_cdo(var, path, options, sel=sel).squeeze()
return ds
import cartopy as cp
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
from cartopy.mpl.ticker import LatitudeFormatter, LongitudeFormatter
from matplotlib.ticker import MaxNLocator
import cartopy as cp
import cartopy.crs as ccrs
from cartopy.mpl.ticker import LatitudeFormatter, LongitudeFormatter
@xr.register_dataarray_accessor('plot_map')
class CartopyMap(object):
"""
Plot the given 2D array on a cartopy axes with ('xc','lon','longitude') assumed as Longitude and ('yc','lat','latitude') assumed as Latitude.
Plot the given 2D array on a cartopy axes with
('xc','lon','longitude') assumed as Longitude and
('yc','lat','latitude') assumed as Latitude.
The default projection is PlateCarree, but can be:
cartopy.crs.<ProjectionName>()
If you would like to create a figure with multiple subplots
......@@ -27,10 +30,36 @@ class CartopyMap(object):
def __init__(self, xarray_obj):
self._obj = xarray_obj
def __call__(self, ax=None, proj=ccrs.PlateCarree(), plot_lon_lat_axis=True, feature='land', plot_type='pcolormesh', rm_cyclic=True, **kwargs):
return self._cartopy(ax=ax, proj=proj, feature=feature, plot_lon_lat_axis=plot_lon_lat_axis, plot_type=plot_type, rm_cyclic=rm_cyclic, **kwargs)
def _cartopy(self, ax=None, proj=ccrs.PlateCarree(), feature='land', plot_lon_lat_axis=True, plot_type='pcolormesh', rm_cyclic=True, **kwargs):
def __call__(
self,
ax=None,
proj=ccrs.PlateCarree(),
plot_lon_lat_axis=True,
feature='land',
plot_type='pcolormesh',
rm_cyclic=True,
**kwargs,
):
return self._cartopy(
ax=ax,
proj=proj,
feature=feature,
plot_lon_lat_axis=plot_lon_lat_axis,
plot_type=plot_type,
rm_cyclic=rm_cyclic,
**kwargs,
)
def _cartopy(
self,
ax=None,
proj=ccrs.PlateCarree(),
feature='land',
plot_lon_lat_axis=True,
plot_type='pcolormesh',
rm_cyclic=True,
**kwargs,
):
# TODO: CESM2 and GFDL-ESM4 have lon issue
......@@ -41,18 +70,26 @@ class CartopyMap(object):
xda = xda[xda.data_vars[0]]
else:
raise ValueError(
f'Please provide xr.DataArray, found {type(xda)}')
assert (xda.ndim == 2) or (xda.ndim == 3 and 'col' in kwargs or 'row' in kwargs) or (
xda.ndim == 4 and 'col' in kwargs and 'row' in kwargs)
f'Please provide xr.DataArray, found {type(xda)}'
)
assert (
(xda.ndim == 2)
or (xda.ndim == 3 and 'col' in kwargs or 'row' in kwargs)
or (xda.ndim == 4 and 'col' in kwargs and 'row' in kwargs)
)
single_plot = True if xda.ndim == 2 else False
stereo_maps = (ccrs.Stereographic,
ccrs.NorthPolarStereo,
ccrs.SouthPolarStereo)
stereo_maps = (
ccrs.Stereographic,
ccrs.NorthPolarStereo,
ccrs.SouthPolarStereo,
)
if isinstance(proj, stereo_maps):
raise ValueError(
'Not implemented, see https://github.com/luke-gregor/xarray_tools/blob/master/accessors.py#L222')
'Not implemented, see'
'https://github.com/luke-gregor/xarray_tools/accessors.py#L222'
)
# find whether curv or not
curv = False
......@@ -65,13 +102,13 @@ class CartopyMap(object):
lon = c
if c in ['yc', 'lat', 'latitude']:
lat = c
assert lon != None
assert lat != None
assert lon is not None
assert lat is not None
if proj in [ccrs.Robinson()]:
plot_lon_lat_axis = False
if plot_type is 'contourf':
if plot_type == 'contourf':
rm_cyclic = False
if curv and rm_cyclic:
xda = _rm_singul_lon(xda, lon=lon, lat=lat)
......@@ -79,18 +116,29 @@ class CartopyMap(object):
if 'robust' not in kwargs:
kwargs['robust'] = True
if 'cbar_kwargs' not in kwargs:
kwargs['cbar_kwargs'] = {'shrink': .6}
kwargs['cbar_kwargs'] = {'shrink': 0.6}
if ax is None:
if single_plot:
axm = getattr(xda.plot, plot_type)(
lon, lat, ax=plt.axes(projection=proj), transform=ccrs.PlateCarree(), **kwargs)
lon,
lat,
ax=plt.axes(projection=proj),
transform=ccrs.PlateCarree(),
**kwargs,
)
else:
axm = getattr(xda.plot, plot_type)(
lon, lat, subplot_kws={'projection': proj}, transform=ccrs.PlateCarree(), **kwargs)
lon,
lat,
subplot_kws={'projection': proj},
transform=ccrs.PlateCarree(),
**kwargs,
)
else:
axm = getattr(xda.plot, plot_type)(
lon, lat, ax=ax, transform=ccrs.PlateCarree(), **kwargs)
lon, lat, ax=ax, transform=ccrs.PlateCarree(), **kwargs
)
def work_on_axes(axes):
if 'coastline_color' in kwargs:
......@@ -99,11 +147,15 @@ class CartopyMap(object):
coastline_color = 'gray'
axes.coastlines(color=coastline_color, linewidth=1.5)
if feature is not None:
axes.add_feature(getattr(cp.feature, feature.upper()),
zorder=100, edgecolor='k')
axes.add_feature(
getattr(cp.feature, feature.upper()),
zorder=100,
edgecolor='k',
)
if plot_lon_lat_axis:
_set_lon_lat_axis(axes, proj)
if single_plot:
if ax is None:
ax = plt.gca()
......@@ -122,41 +174,12 @@ def _rm_singul_lon(ds, lon='lon', lat='lat'):
lons = ds[lon].values
fixed_lons = lons.copy()
for i, start in enumerate(np.argmax(np.abs(np.diff(lons)) > 180, axis=1)):
fixed_lons[i, start + 1:] += 360
fixed_lons[i, start + 1 :] += 360
lons_da = xr.DataArray(fixed_lons, ds[lat].coords)
ds = ds.assign_coords({lon: lons_da})
return ds
def my_facetgrid(da,
projection=ccrs.PlateCarree(),
coastline_color='gray',
curv=False,
col='year',
col_wrap=2,
**kwargs):
"""Wrap xr.facetgrid plot with cartopy"""
transform = ccrs.PlateCarree()
p = da.plot.pcolormesh(
'lon',
'lat',
transform=transform,
col=col,
col_wrap=col_wrap,
subplot_kws={'projection': projection},
**kwargs)
for ax in p.axes.flat:
if curv:
ax.add_feature(cp.feature.LAND, zorder=100, edgecolor='k')
if projection == ccrs.PlateCarree():
_set_lon_lat_axis(ax, projection)
ax.set_xlabel('')
ax.set_ylabel('')
ax.coastlines()
# ax.set_extent([-160, -30, 5, 75])
# ax.set_aspect('equal', 'box-forced')
def _set_lon_lat_axis(ax, projection, talk=False):
"""Add longitude and latitude coordinates to cartopy plots."""
ax.set_xticks([-180, -120, -60, 0, 60, 120, 180], crs=projection)
......
import numpy as np
from statsmodels.stats.multitest import multipletests
def xr_multipletest(p, alpha=0.05, method='fdr_bh', **multipletests_kwargs):
"""Apply statsmodels.stats.multitest.multipletests for multi-dimensional
xr.objects.
Args:
p (xr.object): uncorrected p-values.
alpha (type): FWER, family-wise error rate. Defaults to 0.05.
method (str): Method used for testing and adjustment of pvalues. Can be
either the full name or initial letters. Available methods are:
- bonferroni : one-step correction
- sidak : one-step correction
- holm-sidak : step down method using Sidak adjustments
- holm : step-down method using Bonferroni adjustments
- simes-hochberg : step-up method (independent)
- hommel : closed method based on Simes tests (non-negative)
- fdr_bh : Benjamini/Hochberg (non-negative)
- fdr_by : Benjamini/Yekutieli (negative)
- fdr_tsbh : two stage fdr correction (non-negative)
- fdr_tsbky : two stage fdr correction (non-negative)
Defaults to 'fdr_bh'.
**multipletests_kwargs (optional dict): is_sorted, returnsorted
see statsmodels.stats.multitest.multitest
Returns:
reject (xr.object): true for hypothesis that can be rejected for given
alpha
pvals_corrected (xr.object): p-values corrected for multiple tests
Example:
reject, xpvals_corrected = xr_multipletest(p)
"""
# stack all to 1d array
p_stacked = p.stack(s=p.dims)
# mask only where not nan:
# https://github.com/statsmodels/statsmodels/issues/2899
mask = np.isfinite(p_stacked)
pvals_corrected = np.full(p_stacked.shape, np.nan)
reject = np.full(p_stacked.shape, np.nan)
# apply test where mask
reject[mask] = multipletests(
p_stacked[mask], alpha=alpha, method=method, **multipletests_kwargs
)[0]
pvals_corrected[mask] = multipletests(
p_stacked[mask], alpha=alpha, method=method, **multipletests_kwargs
)[1]
def unstack(reject, p_stacked):
"""Exchange values from p_stacked w/ reject (1d array) and unstack."""
xreject = p_stacked.copy()
xreject.values = reject
xreject = xreject.unstack()
return xreject
reject = unstack(reject, p_stacked)
pvals_corrected = unstack(pvals_corrected, p_stacked)
return reject, pvals_corrected
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment