gather_variables.sh 7.87 KB
Newer Older
1
2
#!/bin/bash

3
4
5
6
7
8
# version: 8.3.2018
# contributions from Sebastian Milinski and Dian Putrasahan

# extract selected variables from range of experiments
# merge all ensemble members into one file with dimensions (ens|time|lat|lon)

9
10
11
12
13
14

usage ()
{
  echo 'This script gathers one or more variables from an experiment of the MPI-ESM-GE and creates a merged file that has an ensemble dimension.'
  echo 'By default, the historial ensemble is used and variable is gathered from the ECHAM BOT files.'
  echo 'At least -v has to be specified, other arguments are optional.'
15
16
17
18
19
20
21
22
  echo 'Usage : gather_variable.sh -e <experiment>       (options: hist,onepct,rcp26,rcp45,rcp85)'
  echo '                           -v <variables>        (options: single variable name or comma separated list.'
  echo '                                                  Make sure the variables are available in the selected filetype.)'
  echo '                           -r0 <first member>    (options: Integer from 1 to 100, default is 1)'
  echo '                           -rn <last member>     (options: Integer from 1 to 100, default is 100)'
  echo '                           -f <echam filetype>   (options: ATM or BOT)'
  echo '                           -o <output directory> (directory for output, default is /scratch/m/$USER )'
  echo '                           -P <parallel jobs>    NOT IMPLEMENTED! (options: any integer, default is 1)'
23
24
25
26
27
28
29
  echo '                           -h  show help'
  exit
}

module load nco
module load cdo

30
31
basedir=/work/mh1007/MPI-GE # directory with symlinks to all runs
workdir=/scratch/m/$USER/MPI-GE
32
33
34


# set default values
35
experiment=hist # hist | onepct | rcp26 | rcp45 | rcp85 | pictrl
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#first run in sequence
run_0=1
#last run in sequence
run_n=100
# choose ATM or BOT file
echam_filetype=BOT
# choose variable(s)
vars=
# Number of parallel jobs allowed
pjobs=1

while [ "$1" != "" ]; do
    case $1 in
        -e | --experiment )     shift
                                experiment=$1
                                ;;
        -v | --variables )      shift
                                vars=$1
                                ;;
        -r0 | --run_0 )         shift
                                run_0=$1
                                ;;
        -rn | --run_n )         shift
                                run_n=$1
                                ;;
        -f | --echam_filetype )     shift
                                echam_filetype=$1
                                ;;
        -P )                    shift
                                pjobs=$1
                                ;;
67
68
69
        -o )                    shift
                                workdir=$1
                                ;;
70
71
72
73
74
75
76
77
78
        -h | --help )           usage
                                exit
                                ;;
        * )                     usage
                                exit 1
    esac
    shift
done

79
80
81
82
83
if [[ $echam_filetype == ATM ]]; then
    echo "Sorry, processing of ATM files currently disabled because of bug (see issue #4)"
    exit
fi

84
85
86
87
88
89
if [[ $experiment == onepct ]]; then
    yearstart=1850
    yearend=1999
elif [[ $experiment == hist ]]; then
    yearstart=1850
    yearend=2005
90
91
92
93
94
95
96
97
98
99
100
elif [[ $experiment == pictrl ]] && [[ $run_0 == 1 ]]; then
    yearstart=1850
    yearend=3850
    run_n=1
elif [[ $experiment == pictrl ]] && [[ $run_0 == 2 ]]; then
    yearstart=1850
    yearend=2870
    run_n=2
elif [[ $experiment == pictrl ]] ; then
    echo "for pictrl, choose -r0 1 OR -r0 2 (-run_n option will be ignored, control runs can only be processed separately.))"
    exit
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
elif [[ $experiment == rcp26 ]] || [[ $experiment == rcp45 ]] || [[ $experiment == rcp85 ]]; then
    yearstart=2006
    yearend=2099
else
    echo "Unknown experiment: $experiment. Please enter a valid experiment. Use -h to show help."
fi

if [[ $vars == "" ]]; then
    echo ' '
    echo 'Error: no variable specified! Choose a variable using the -v option.'
    echo ' '
    usage
    exit
fi

echo "Experiment: $experiment"
echo "Variables: $vars"
echo "Members: $run_0 to $run_n"
echo "Years: $yearstart - $yearend"
echo "Using ECHAM $echam_filetype files"
echo "running up to $pjobs parallel jobs"

# derive some variables to be used
124
outdir=${workdir}/${experiment}
125
126
tmpdir=$SCRATCH/tmp/${experiment}
varstring=$(echo $vars | sed "s/,/_/g") # replace , with _ so that variable list can be used in filename
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
filestring=${experiment}_${yearstart}-${yearend}_ens_${run_0}-${run_n} # used for filenames
# create array from $vars by using , as a separator
IFS=',' read -r -a var_array <<< "$vars"
num_missfiles=0 #initiate counter for number
for var in "${var_array[@]}"
do
    filename=${outdir}/${filestring}.${var}.nc
    if [ ! -f $filename ]; then
        num_missfiles=$[num_missfiles + 1]
        echo "$filename is missing."
    fi
done

if [ $num_missfiles -ge 1 ]; then
    echo "At least one requested variable has not been processed. Starting processing..."
else
    echo "All requested variables have already been processed. Nothing left to be done."
    exit
fi
# test if all reuested variables have already been processed. If this is true, exit.
# for future version: do processing only for the missing variables

149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164

#============= Loop through each run
for rrr in $(seq $run_0 $run_n); do

    expid=${experiment}$(printf "%04d" $rrr)
    echo ${expid}
    ###### I/O
    datdir=${basedir}/${experiment}/${expid}/outdata/echam6
    curtmpdir=${tmpdir}/${expid}

    mkdir -p ${curtmpdir}
    mkdir -p ${outdir}

#     ####### processing
    ifiles=${datdir}/${expid}_echam6_${echam_filetype}_mm_????.grb
    merged=${curtmpdir}/${expid}_echam6_${varstring}_mm.nc
165
166
167
    # vertsum=${curtmpdir}/${expid}_echam6_${varstring}_mm_vertsum.nc
    # if [ ! -f $merged ] && [ ! -f $vertsum ]; then
    if [ ! -f $merged ]; then
168
169
170
171
172
173
174
175
176
177
178
179
180
181
        # different number of variables in some BOT files does not allow use of cdo select (only applies to 1%CO2 69-100)
        if [ $experiment == onepct ] && [ $rrr -ge 69 ]; then
            # loop over all years, extract variables and use mergetime in the end
            for year in $(seq $yearstart $yearend); do
                ifile_year=${datdir}/${expid}_echam6_${echam_filetype}_mm_${year}.grb
                tmpfile_year=${curtmpdir}/${expid}_echam6_${varstring}_mm_${year}.nc
                cdo -t echam6 -f nc selname,${vars} $ifile_year $tmpfile_year
            done
            ifiles_years=${curtmpdir}/${expid}_echam6_${varstring}_mm_????.nc
            cdo mergetime ${ifiles_years} ${merged}
            rm ${ifiles_years}
        else
            cdo -t echam6 -f nc select,name=${vars},year=${yearstart}/${yearend} ${ifiles} ${merged}
        fi
182
    else
183
184
        :
        # echo "processed file found, skipping processing step."
185
    fi
186

187
188
189
190
191
192
193
194
195
    # if [[ $echam_filetype == ATM ]] && [[ $vars == q ]]; then
    #     if [ ! -f $vertsum ]; then
    #         #if input comes from 3D files and is specific humidity, calculate vertical sum
    #         cdo vertsum ${merged} ${vertsum}
    #         rm ${merged}
    #     else
    #         echo "Vertical sum file found, skipping processing step."
    #     fi
    # fi
196
197
198
done

# merge ensemble into one file
199
if [[ ! $experiment == pictrl ]] ; then
200
201
202
203
204
205
206
207
208
209
    ensfile=${outdir}/${varstring}_${experiment}_${yearstart}-${yearend}_ens_${run_0}-${run_n}.nc
    if [ ! -f $ensfile ]; then
        #concatenate the files and introduces a new record dimension called ens (order matters!)
        ncecat -O -u ens ${tmpdir}/*/*${varstring}*.nc $ensfile
        #Switch record and time dimension (so that time gets unlimited and record gets fixed)
        ncpdq -O -a time,ens $ensfile $ensfile
    else
        echo "merged ensemble file found, skipping step"
    fi
    echo "file created: $ensfile"
210
211
fi

212
echo "Splitting variables into files..."
213
if [[ ! $experiment == pictrl ]] ; then
214
215
216
217
218
    cdo splitname $ensfile ${outdir}/${filestring}.
else
    cdo splitname $curtmpdir/*${varstring}*.nc ${outdir}/${filestring}.
fi

219
220

echo "Created output files in ${outdir}"