percentiles_hist.cc 12.4 KB
Newer Older
Uwe Schulzweida's avatar
Uwe Schulzweida committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/*
  This file is part of CDO. CDO is a collection of Operators to
  manipulate and analyse Climate model Data.

  Copyright (C) 2006 Brockmann Consult
  See COPYING file for copying and redistribution conditions.

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; version 2 of the License.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
*/

18
#include <algorithm>
Uwe Schulzweida's avatar
Uwe Schulzweida committed
19
#include <stdlib.h>
Uwe Schulzweida's avatar
Uwe Schulzweida committed
20

Ralf Mueller's avatar
Ralf Mueller committed
21
#include <cdi.h>
Oliver Heidmann's avatar
Oliver Heidmann committed
22

23
#include "cdo_output.h"
24
#include "percentiles.h"
25
#include "percentiles_hist.h"
Uwe Schulzweida's avatar
Uwe Schulzweida committed
26

27
28
#define NBINS_DEFAULT 101
#define NBINS_MINIMUM 11
Uwe Schulzweida's avatar
Uwe Schulzweida committed
29

30
31
#define DBL_CAPACITY(n) ((int) (((n) * sizeof(int)) / sizeof(double)))
#define DBL_PTR(p) ((double *) (p))
32
#define FLT_PTR(p) ((float *) (p))
33
#define INT_PTR(p) ((int *) (p))
Uwe Schulzweida's avatar
Uwe Schulzweida committed
34

35
36
37
38
39
40
enum value_operations
{
  OP_ADD = 1,
  OP_SUB = 2
};

41
42
static int
histGetEnvNBins()
Uwe Schulzweida's avatar
Uwe Schulzweida committed
43
44
{
  const char *str = getenv("CDO_PCTL_NBINS");
45

46
  return str != nullptr ? std::max(atoi(str), NBINS_MINIMUM) : NBINS_DEFAULT;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
47
48
}

49
static void
Uwe Schulzweida's avatar
Uwe Schulzweida committed
50
histDefBounds(Histogram &hist, double a, double b)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
51
{
Uwe Schulzweida's avatar
Uwe Schulzweida committed
52
  assert(hist.nbins > 0);
53

54
55
  hist.min = std::min(a, b);
  hist.max = std::max(a, b);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
56
57
  hist.step = (hist.max - hist.min) / hist.nbins;
  hist.nsamp = 0;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
58

Uwe Schulzweida's avatar
Uwe Schulzweida committed
59
  for (int i = 0; i < hist.nbins; i++) INT_PTR(hist.ptr)[i] = 0;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
60
61
}

62
static void
Uwe Schulzweida's avatar
Uwe Schulzweida committed
63
histBinValue(Histogram &hist, double value)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
64
{
Uwe Schulzweida's avatar
Uwe Schulzweida committed
65
  assert(hist.step > 0);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
66

67
  const int bin = std::min((int) ((value - hist.min) / hist.step), hist.nbins - 1);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
68
  if (bin >= 0 && bin < hist.nbins) INT_PTR(hist.ptr)[bin]++;
69
}
Uwe Schulzweida's avatar
Uwe Schulzweida committed
70

71
72
73
74
75
static void
histBinSubValue(Histogram &hist, double value)
{
  assert(hist.step > 0);

Fabian Wachsmann's avatar
Fabian Wachsmann committed
76
  const int bin = std::min((int) ((value - hist.min) / hist.step), hist.nbins - 1);
77
78
79
  if (bin >= 0 && bin < hist.nbins && INT_PTR(hist.ptr)[bin] > 0) INT_PTR(hist.ptr)[bin]--;
}

80
template <typename T>
81
static void
Uwe Schulzweida's avatar
Uwe Schulzweida committed
82
histBin(Histogram &hist)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
83
{
Uwe Schulzweida's avatar
Uwe Schulzweida committed
84
  assert(hist.nsamp == DBL_CAPACITY(hist.nbins));
85

Uwe Schulzweida's avatar
Uwe Schulzweida committed
86
  std::vector<double> values(hist.nsamp);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
87

88
  for (int i = 0; i < hist.nsamp; i++) values[i] = ((T *) (hist.ptr))[i];
Uwe Schulzweida's avatar
Uwe Schulzweida committed
89
90
  for (int i = 0; i < hist.nbins; i++) INT_PTR(hist.ptr)[i] = 0;
  for (int i = 0; i < hist.nsamp; i++) histBinValue(hist, values[i]);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
91
92
}

93
template <typename T>
94
95
96
97
98
99
100
static int
histReset(Histogram &hist)
{
  assert(hist.nbins > 0);

  if (hist.nsamp < DBL_CAPACITY(hist.nbins))
    {
101
      for (int i = 0; i < hist.nsamp; i++) ((T *) (hist.ptr))[i] = 0.;
102
103
104
105
106
107
108
109
110
111
    }
  else
    {
      for (int i = 0; i < hist.nbins; i++) INT_PTR(hist.ptr)[i] = 0;
    }
  hist.nsamp = 0;

  return 0;
}

112
template <typename T>
113
static int
114
histAddValue(Histogram &hist, T value)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
115
{
Uwe Schulzweida's avatar
Uwe Schulzweida committed
116
  assert(hist.nbins > 0);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
117

Uwe Schulzweida's avatar
Uwe Schulzweida committed
118
119
120
  // 2011-08-01 Uwe Schulzweida: added check for rounding errors
  if (value < hist.min && (hist.min - value) < 1e5) value = hist.min;
  if (value > hist.max && (value - hist.max) < 1e5) value = hist.max;
121

Uwe Schulzweida's avatar
Uwe Schulzweida committed
122
123
  if (IS_EQUAL(hist.min, hist.max)) return 0;
  if (value < hist.min || value > hist.max) return 1;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
124

Uwe Schulzweida's avatar
Uwe Schulzweida committed
125
  if (hist.nsamp < DBL_CAPACITY(hist.nbins))
Uwe Schulzweida's avatar
Uwe Schulzweida committed
126
    {
127
      ((T *) (hist.ptr))[hist.nsamp] = value;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
128
      hist.nsamp++;
129
    }
Uwe Schulzweida's avatar
Uwe Schulzweida committed
130
  else if (hist.nsamp > DBL_CAPACITY(hist.nbins))
Uwe Schulzweida's avatar
Uwe Schulzweida committed
131
132
    {
      histBinValue(hist, value);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
133
      hist.nsamp++;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
134
135
136
    }
  else
    {
137
      histBin<T>(hist);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
138
      histBinValue(hist, value);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
139
      hist.nsamp++;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
140
    }
141

Uwe Schulzweida's avatar
Uwe Schulzweida committed
142
143
144
  return 0;
}

145
template <typename T>
146
static void
147
histRemoveValue(Histogram &hist, T value)
148
149
150
151
{
  int i = 0;
  for ( i = 0; i < hist.nsamp; i++ )
    {
152
      if ( IS_EQUAL(((T *) (hist.ptr))[i], value) )
153
        {
154
          if ( i != hist.nsamp-1 ) ((T *) (hist.ptr))[i] = ((T *) (hist.ptr))[hist.nsamp-1];
155
156
157
158
159
160
161
162
163
          break;
        }
    }
  if ( i == hist.nsamp )
    cdoWarning("'%f' not found in histogram!", value);
  else
    hist.nsamp--;
}

164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
static int
histSubValue(Histogram &hist, double value)
{
  assert(hist.nbins > 0);

  // 2011-08-01 Uwe Schulzweida: added check for rounding errors
  if (value < hist.min && (hist.min - value) < 1e5) value = hist.min;
  if (value > hist.max && (value - hist.max) < 1e5) value = hist.max;

  if (IS_EQUAL(hist.min, hist.max)) return 0;
  if (value < hist.min || value > hist.max) return 1;

  if (hist.nsamp < DBL_CAPACITY(hist.nbins))
    {
      histRemoveValue(hist, value);
    }
  else if (hist.nsamp > DBL_CAPACITY(hist.nbins))
    {
      histBinSubValue(hist, value);
      hist.nsamp--;
    }
185
  else
186
187
188
189
190
    return 1;

  return 0;
}

191
static double
192
histGetPercentile(const Histogram &hist, double p, int ptype)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
193
{
Uwe Schulzweida's avatar
Uwe Schulzweida committed
194
195
  assert(hist.nsamp > 0);
  assert(hist.nbins > 0);
196
197
198
  assert(p >= 0);
  assert(p <= 100);

Uwe Schulzweida's avatar
Uwe Schulzweida committed
199
  if (hist.nsamp > DBL_CAPACITY(hist.nbins))
200
    {
Uwe Schulzweida's avatar
Uwe Schulzweida committed
201
      double s = hist.nsamp * (p / 100.0);
202
      int i = 0, count = 0;
203

204
      do
Uwe Schulzweida's avatar
Uwe Schulzweida committed
205
        count += INT_PTR(hist.ptr)[i++];
206
207
208
      while (count < s);

      assert(i > 0);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
209
210
211
      assert(i - 1 < hist.nbins);
      assert(INT_PTR(hist.ptr)[i - 1] > 0);
      assert(hist.step > 0.0);
212

Uwe Schulzweida's avatar
Uwe Schulzweida committed
213
214
      double t = (count - s) / INT_PTR(hist.ptr)[i - 1];
      return hist.min + (i - t) * hist.step;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
215
216
217
    }
  else
    {
218
      if ((ptype & FIELD_FLT))
219
        {
220
          std::vector<double> values(hist.nsamp);
221
          for (int i = 0; i < hist.nsamp; i++) values[i] = (double)FLT_PTR(hist.ptr)[i];
222
          return percentile(values.data(), hist.nsamp, p);
223
224
225
        }
      else
        return percentile(DBL_PTR(hist.ptr), hist.nsamp, p);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
226
227
228
    }
}

229
void
Uwe Schulzweida's avatar
Uwe Schulzweida committed
230
HistogramSet::createVarLevels(int varID, int nlevels, size_t nhists)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
231
{
Uwe Schulzweida's avatar
Uwe Schulzweida committed
232
  const int nbins = histGetEnvNBins();
233
234

  assert(nbins > 0);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
235
  assert(nlevels > 0);
236
237
  assert(nhists > 0);

Uwe Schulzweida's avatar
Uwe Schulzweida committed
238
  if (varID < 0 || varID >= nvars) cdoAbort("Illegal argument: varID %d is undefined (%s)", varID, __func__);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
239

Uwe Schulzweida's avatar
Uwe Schulzweida committed
240
241
242
  this->var_nlevels[varID] = nlevels;
  this->var_nhists[varID] = nhists;
  this->histograms[varID].resize(nlevels);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
243

244
  for (int levelID = 0; levelID < nlevels; levelID++)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
245
    {
Uwe Schulzweida's avatar
Uwe Schulzweida committed
246
247
      this->histograms[varID][levelID].resize(nhists);
      auto &hists = this->histograms[varID][levelID];
248

Uwe Schulzweida's avatar
Uwe Schulzweida committed
249
      for (size_t histID = 0; histID < nhists; histID++)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
250
251
252
253
254
255
        {
          hists[histID].min = 0.0;
          hists[histID].max = 0.0;
          hists[histID].step = 0.0;
          hists[histID].nbins = nbins;
          hists[histID].nsamp = 0;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
256
          hists[histID].ptr = (int *) malloc(nbins * sizeof(int));
Uwe Schulzweida's avatar
Uwe Schulzweida committed
257
          if (hists[histID].ptr == nullptr) cdoAbort("Not enough memory (%s)", __func__);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
258
259
260
261
        }
    }
}

262
void
Uwe Schulzweida's avatar
Uwe Schulzweida committed
263
HistogramSet::defVarLevelBounds(int varID, int levelID, const Field &field1, const Field &field2)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
264
{
265
266
  const auto &array1 = field1.vec_d;
  const auto &array2 = field2.vec_d;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
267
268
  assert(!array1.empty());
  assert(!array2.empty());
269

Uwe Schulzweida's avatar
Uwe Schulzweida committed
270
  if (varID < 0 || varID >= nvars) cdoAbort("Illegal argument: varID %d is undefined (%s)", varID, __func__);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
271

Uwe Schulzweida's avatar
Uwe Schulzweida committed
272
  const auto nlevels = this->var_nlevels[varID];
Uwe Schulzweida's avatar
Uwe Schulzweida committed
273
  if (levelID < 0 || levelID >= nlevels) cdoAbort("Illegal argument: levelID %d is undefined (%s)", levelID, __func__);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
274

Uwe Schulzweida's avatar
Uwe Schulzweida committed
275
276
  const auto nhists = this->var_nhists[varID];
  if (nhists != gridInqSize(field1.grid) || nhists != gridInqSize(field2.grid)) cdoAbort("Grids are different (%s)", __func__);
277

Uwe Schulzweida's avatar
Uwe Schulzweida committed
278
  auto &hists = this->histograms[varID][levelID];
279

Uwe Schulzweida's avatar
Uwe Schulzweida committed
280
  for (size_t i = 0; i < nhists; i++)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
281
    {
282
283
      const auto a = array1[i];
      const auto b = array2[i];
284

Uwe Schulzweida's avatar
Uwe Schulzweida committed
285
286
      if (DBL_IS_EQUAL(a, field1.missval) || DBL_IS_EQUAL(b, field2.missval) || DBL_IS_EQUAL(a, b))
        histDefBounds(hists[i], 0.0, 0.0);
287
      else
Uwe Schulzweida's avatar
Uwe Schulzweida committed
288
        histDefBounds(hists[i], a, b);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
289
290
291
    }
}

292
void
293
294
HistogramSet::defVarLevelBoundsF(int varID, int levelID, const Field &field1, const Field &field2)
{
295
296
  const auto &array1 = field1.vec_f;
  const auto &array2 = field2.vec_f;
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
  assert(!array1.empty());
  assert(!array2.empty());

  if (varID < 0 || varID >= nvars) cdoAbort("Illegal argument: varID %d is undefined (%s)", varID, __func__);

  const auto nlevels = this->var_nlevels[varID];
  if (levelID < 0 || levelID >= nlevels) cdoAbort("Illegal argument: levelID %d is undefined (%s)", levelID, __func__);

  const auto nhists = this->var_nhists[varID];
  if (nhists != gridInqSize(field1.grid) || nhists != gridInqSize(field2.grid)) cdoAbort("Grids are different (%s)", __func__);

  auto &hists = this->histograms[varID][levelID];

  for (size_t i = 0; i < nhists; i++)
    {
312
313
      const auto a = array1[i];
      const auto b = array2[i];
314
315
316
317

      if (DBL_IS_EQUAL(a, field1.missval) || DBL_IS_EQUAL(b, field2.missval) || DBL_IS_EQUAL(a, b))
        histDefBounds(hists[i], 0.0, 0.0);
      else
318
        histDefBounds(hists[i], a, b);
319
320
321
    }
}

322
int
323
HistogramSet::addSubVarLevelValues(int varID, int levelID, const Field &field, int operation, int ptype)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
324
{
325
  const auto &array = field.vec_d;
326
  const auto &arrayf = field.vec_f;
327

Uwe Schulzweida's avatar
Uwe Schulzweida committed
328
  if (varID < 0 || varID >= nvars) cdoAbort("Illegal argument: varID %d is undefined (%s)", varID, __func__);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
329

Uwe Schulzweida's avatar
Uwe Schulzweida committed
330
  const auto nlevels = this->var_nlevels[varID];
Uwe Schulzweida's avatar
Uwe Schulzweida committed
331
  if (levelID < 0 || levelID >= nlevels) cdoAbort("Illegal argument: levelID %d is undefined (%s)", levelID, __func__);
332

333
  const auto nhists = this->var_nhists[varID];
Uwe Schulzweida's avatar
Uwe Schulzweida committed
334
  if (nhists != gridInqSize(field.grid)) cdoAbort("Grids are different (%s)", __func__);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
335

Uwe Schulzweida's avatar
Uwe Schulzweida committed
336
  auto &hists = this->histograms[varID][levelID];
337

338
339
  int nign = 0;

340
  const auto missval = field.missval;
341
  if ( (ptype & FIELD_FLT))
Uwe Schulzweida's avatar
Uwe Schulzweida committed
342
    {
343
344
      assert(!arrayf.empty());
      if ( operation == OP_ADD )
345
        {
346
347
348
          if (field.nmiss)
            {
              for (size_t i = 0; i < nhists; i++)
349
                if (!DBL_IS_EQUAL(array[i], missval)) nign += histAddValue<float>(hists[i], arrayf[i]);  
350
351
352
            }  
          else
            {
353
              for (size_t i = 0; i < nhists; i++) nign += histAddValue<float>(hists[i], arrayf[i]);
354
            }
355
356
357
        }
      else
        {
358
359
360
          if (field.nmiss)
            {
              for (size_t i = 0; i < nhists; i++)
361
                if (!DBL_IS_EQUAL(array[i], missval)) nign += histSubValue(hists[i], arrayf[i]);
362
363
364
            }
          else
            {
365
              for (size_t i = 0; i < nhists; i++) nign += histSubValue(hists[i], arrayf[i]);
366
            }
367
        }
Uwe Schulzweida's avatar
Uwe Schulzweida committed
368
369
    }
  else
370
    {
371
372
      assert(!array.empty());
      if ( operation == OP_ADD )
373
        {
374
375
376
          if (field.nmiss)
            {
              for (size_t i = 0; i < nhists; i++)
377
                if (!DBL_IS_EQUAL(array[i], missval)) nign += histAddValue<double>(hists[i], array[i]);  
378
379
380
            }  
          else
            {
381
              for (size_t i = 0; i < nhists; i++) nign += histAddValue<double>(hists[i], array[i]);
382
            }
383
384
385
        }
      else
        {
386
387
388
389
390
391
392
393
394
          if (field.nmiss)
            {
              for (size_t i = 0; i < nhists; i++)
                if (!DBL_IS_EQUAL(array[i], missval)) nign += histSubValue(hists[i], array[i]);
            }
          else
            {
              for (size_t i = 0; i < nhists; i++) nign += histSubValue(hists[i], array[i]);
            }
395
        }
Uwe Schulzweida's avatar
Uwe Schulzweida committed
396
397
    }

398
399
400
401
402
403
  if (nign)
    {
      cdoWarning("%d out of %d grid values are out of bounds and have been ignored (%s)", nign, nhists, __func__);
      return 1;
    }

404
  return 0;
405
}
406

407
void
408
HistogramSet::Reset(int varID, int levelID, int ptype)
409
410
411
412
413
{
  assert(nvars > 0);

  if (varID < 0 || varID >= nvars) cdoAbort("Illegal argument: varID %d is undefined (%s)", varID, __func__);

414
  const auto nlevels = this->var_nlevels[varID];
415
416
417
418
  assert(nlevels > 0);

  if (levelID < 0 || levelID >= nlevels) cdoAbort("Illegal argument: levelID %d is undefined (%s)", levelID, __func__);

419
  const auto nhists = this->var_nhists[varID];
420
421
422
423

  auto &hists = this->histograms[varID][levelID];

  assert(nhists > 0);
424
425
  if ((ptype & FIELD_FLT))
    for (size_t i = 0; i < nhists; i++) histReset<float>(hists[i]);
426
  else
427
    for (size_t i = 0; i < nhists; i++) histReset<double>(hists[i]);
428
429
}

430
void
431
HistogramSet::getVarLevelPercentiles(Field &field, int varID, int levelID, double p, int ptype)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
432
{
433
  auto &array = field.vec_d;
434
435

  assert(!array.empty());
436

Uwe Schulzweida's avatar
Uwe Schulzweida committed
437
  if (varID < 0 || varID >= nvars) cdoAbort("Illegal argument: varID %d is undefined (%s)", varID, __func__);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
438

Uwe Schulzweida's avatar
Uwe Schulzweida committed
439
  const auto nlevels = this->var_nlevels[varID];
Uwe Schulzweida's avatar
Uwe Schulzweida committed
440
  if (levelID < 0 || levelID >= nlevels) cdoAbort("Illegal argument: levelID %d is undefined (%s)", levelID, __func__);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
441

Uwe Schulzweida's avatar
Uwe Schulzweida committed
442
443
  const auto nhists = this->var_nhists[varID];
  if (nhists != gridInqSize(field.grid)) cdoAbort("Grids are different (%s)", __func__);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
444

Uwe Schulzweida's avatar
Uwe Schulzweida committed
445
  const auto &hists = this->histograms[varID][levelID];
446

Uwe Schulzweida's avatar
Uwe Schulzweida committed
447
  field.nmiss = 0;
448

Uwe Schulzweida's avatar
Uwe Schulzweida committed
449
  for (size_t i = 0; i < nhists; i++)
Uwe Schulzweida's avatar
Uwe Schulzweida committed
450
    {
451
      if (hists[i].nsamp)
452
        array[i] = histGetPercentile(hists[i], p, ptype);
Uwe Schulzweida's avatar
Uwe Schulzweida committed
453
454
      else
        {
Uwe Schulzweida's avatar
Uwe Schulzweida committed
455
456
          array[i] = field.missval;
          field.nmiss++;
Uwe Schulzweida's avatar
Uwe Schulzweida committed
457
        }
458
    }
Uwe Schulzweida's avatar
Uwe Schulzweida committed
459
}