/* This file is part of CDO. CDO is a collection of Operators to manipulate and analyse Climate model Data. Copyright (C) 2006 Brockmann Consult See COPYING file for copying and redistribution conditions. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. */ #include #include #include #include "cdo_output.h" #include "percentiles.h" #include "percentiles_hist.h" #define NBINS_DEFAULT 101 #define NBINS_MINIMUM 11 #define DBL_CAPACITY(n) ((int) (((n) * sizeof(int)) / sizeof(double))) #define DBL_PTR(p) ((double *) (p)) #define FLT_PTR(p) ((float *) (p)) #define INT_PTR(p) ((int *) (p)) enum value_operations { OP_ADD = 1, OP_SUB = 2 }; static int histGetEnvNBins() { const char *str = getenv("CDO_PCTL_NBINS"); return str != nullptr ? std::max(atoi(str), NBINS_MINIMUM) : NBINS_DEFAULT; } static void histDefBounds(Histogram &hist, double a, double b) { assert(hist.nbins > 0); hist.min = std::min(a, b); hist.max = std::max(a, b); hist.step = (hist.max - hist.min) / hist.nbins; hist.nsamp = 0; for (int i = 0; i < hist.nbins; i++) INT_PTR(hist.ptr)[i] = 0; } static void histBinValue(Histogram &hist, double value) { assert(hist.step > 0); const int bin = std::min((int) ((value - hist.min) / hist.step), hist.nbins - 1); if (bin >= 0 && bin < hist.nbins) INT_PTR(hist.ptr)[bin]++; } static void histBinSubValue(Histogram &hist, double value) { assert(hist.step > 0); const int bin = std::min((int) ((value - hist.min) / hist.step), hist.nbins - 1); if (bin >= 0 && bin < hist.nbins && INT_PTR(hist.ptr)[bin] > 0) INT_PTR(hist.ptr)[bin]--; } template static void histBin(Histogram &hist) { assert(hist.nsamp == DBL_CAPACITY(hist.nbins)); std::vector values(hist.nsamp); for (int i = 0; i < hist.nsamp; i++) values[i] = ((T *) (hist.ptr))[i]; for (int i = 0; i < hist.nbins; i++) INT_PTR(hist.ptr)[i] = 0; for (int i = 0; i < hist.nsamp; i++) histBinValue(hist, values[i]); } template static int histReset(Histogram &hist) { assert(hist.nbins > 0); if (hist.nsamp < DBL_CAPACITY(hist.nbins)) { for (int i = 0; i < hist.nsamp; i++) ((T *) (hist.ptr))[i] = 0.; } else { for (int i = 0; i < hist.nbins; i++) INT_PTR(hist.ptr)[i] = 0; } hist.nsamp = 0; return 0; } template static int histAddValue(Histogram &hist, T value) { assert(hist.nbins > 0); // 2011-08-01 Uwe Schulzweida: added check for rounding errors if (value < hist.min && (hist.min - value) < 1e5) value = hist.min; if (value > hist.max && (value - hist.max) < 1e5) value = hist.max; if (IS_EQUAL(hist.min, hist.max)) return 0; if (value < hist.min || value > hist.max) return 1; if (hist.nsamp < DBL_CAPACITY(hist.nbins)) { ((T *) (hist.ptr))[hist.nsamp] = value; hist.nsamp++; } else if (hist.nsamp > DBL_CAPACITY(hist.nbins)) { histBinValue(hist, value); hist.nsamp++; } else { histBin(hist); histBinValue(hist, value); hist.nsamp++; } return 0; } template static void histRemoveValue(Histogram &hist, T value) { int i = 0; for ( i = 0; i < hist.nsamp; i++ ) { if ( IS_EQUAL(((T *) (hist.ptr))[i], value) ) { if ( i != hist.nsamp-1 ) ((T *) (hist.ptr))[i] = ((T *) (hist.ptr))[hist.nsamp-1]; break; } } if ( i == hist.nsamp ) cdoWarning("'%f' not found in histogram!", value); else hist.nsamp--; } static int histSubValue(Histogram &hist, double value) { assert(hist.nbins > 0); // 2011-08-01 Uwe Schulzweida: added check for rounding errors if (value < hist.min && (hist.min - value) < 1e5) value = hist.min; if (value > hist.max && (value - hist.max) < 1e5) value = hist.max; if (IS_EQUAL(hist.min, hist.max)) return 0; if (value < hist.min || value > hist.max) return 1; if (hist.nsamp < DBL_CAPACITY(hist.nbins)) { histRemoveValue(hist, value); } else if (hist.nsamp > DBL_CAPACITY(hist.nbins)) { histBinSubValue(hist, value); hist.nsamp--; } else return 1; return 0; } static double histGetPercentile(const Histogram &hist, double p, int ptype) { assert(hist.nsamp > 0); assert(hist.nbins > 0); assert(p >= 0); assert(p <= 100); if (hist.nsamp > DBL_CAPACITY(hist.nbins)) { double s = hist.nsamp * (p / 100.0); int i = 0, count = 0; do count += INT_PTR(hist.ptr)[i++]; while (count < s); assert(i > 0); assert(i - 1 < hist.nbins); assert(INT_PTR(hist.ptr)[i - 1] > 0); assert(hist.step > 0.0); double t = (count - s) / INT_PTR(hist.ptr)[i - 1]; return hist.min + (i - t) * hist.step; } else { if ((ptype & FIELD_FLT)) { std::vector values(hist.nsamp); for (int i = 0; i < hist.nsamp; i++) values[i] = (double)FLT_PTR(hist.ptr)[i]; return percentile(values.data(), hist.nsamp, p); } else return percentile(DBL_PTR(hist.ptr), hist.nsamp, p); } } void HistogramSet::createVarLevels(int varID, int nlevels, size_t nhists) { const int nbins = histGetEnvNBins(); assert(nbins > 0); assert(nlevels > 0); assert(nhists > 0); if (varID < 0 || varID >= nvars) cdoAbort("Illegal argument: varID %d is undefined (%s)", varID, __func__); this->var_nlevels[varID] = nlevels; this->var_nhists[varID] = nhists; this->histograms[varID].resize(nlevels); for (int levelID = 0; levelID < nlevels; levelID++) { this->histograms[varID][levelID].resize(nhists); auto &hists = this->histograms[varID][levelID]; for (size_t histID = 0; histID < nhists; histID++) { hists[histID].min = 0.0; hists[histID].max = 0.0; hists[histID].step = 0.0; hists[histID].nbins = nbins; hists[histID].nsamp = 0; hists[histID].ptr = (int *) malloc(nbins * sizeof(int)); if (hists[histID].ptr == nullptr) cdoAbort("Not enough memory (%s)", __func__); } } } void HistogramSet::defVarLevelBounds(int varID, int levelID, const Field &field1, const Field &field2) { const auto &array1 = field1.vec_d; const auto &array2 = field2.vec_d; assert(!array1.empty()); assert(!array2.empty()); if (varID < 0 || varID >= nvars) cdoAbort("Illegal argument: varID %d is undefined (%s)", varID, __func__); const auto nlevels = this->var_nlevels[varID]; if (levelID < 0 || levelID >= nlevels) cdoAbort("Illegal argument: levelID %d is undefined (%s)", levelID, __func__); const auto nhists = this->var_nhists[varID]; if (nhists != gridInqSize(field1.grid) || nhists != gridInqSize(field2.grid)) cdoAbort("Grids are different (%s)", __func__); auto &hists = this->histograms[varID][levelID]; for (size_t i = 0; i < nhists; i++) { const auto a = array1[i]; const auto b = array2[i]; if (DBL_IS_EQUAL(a, field1.missval) || DBL_IS_EQUAL(b, field2.missval) || DBL_IS_EQUAL(a, b)) histDefBounds(hists[i], 0.0, 0.0); else histDefBounds(hists[i], a, b); } } void HistogramSet::defVarLevelBoundsF(int varID, int levelID, const Field &field1, const Field &field2) { const auto &array1 = field1.vec_f; const auto &array2 = field2.vec_f; assert(!array1.empty()); assert(!array2.empty()); if (varID < 0 || varID >= nvars) cdoAbort("Illegal argument: varID %d is undefined (%s)", varID, __func__); const auto nlevels = this->var_nlevels[varID]; if (levelID < 0 || levelID >= nlevels) cdoAbort("Illegal argument: levelID %d is undefined (%s)", levelID, __func__); const auto nhists = this->var_nhists[varID]; if (nhists != gridInqSize(field1.grid) || nhists != gridInqSize(field2.grid)) cdoAbort("Grids are different (%s)", __func__); auto &hists = this->histograms[varID][levelID]; for (size_t i = 0; i < nhists; i++) { const auto a = array1[i]; const auto b = array2[i]; if (DBL_IS_EQUAL(a, field1.missval) || DBL_IS_EQUAL(b, field2.missval) || DBL_IS_EQUAL(a, b)) histDefBounds(hists[i], 0.0, 0.0); else histDefBounds(hists[i], a, b); } } int HistogramSet::addSubVarLevelValues(int varID, int levelID, const Field &field, int operation, int ptype) { const auto &array = field.vec_d; const auto &arrayf = field.vec_f; if (varID < 0 || varID >= nvars) cdoAbort("Illegal argument: varID %d is undefined (%s)", varID, __func__); const auto nlevels = this->var_nlevels[varID]; if (levelID < 0 || levelID >= nlevels) cdoAbort("Illegal argument: levelID %d is undefined (%s)", levelID, __func__); const auto nhists = this->var_nhists[varID]; if (nhists != gridInqSize(field.grid)) cdoAbort("Grids are different (%s)", __func__); auto &hists = this->histograms[varID][levelID]; int nign = 0; const auto missval = field.missval; if ( (ptype & FIELD_FLT)) { assert(!arrayf.empty()); if ( operation == OP_ADD ) { if (field.nmiss) { for (size_t i = 0; i < nhists; i++) if (!DBL_IS_EQUAL(array[i], missval)) nign += histAddValue(hists[i], arrayf[i]); } else { for (size_t i = 0; i < nhists; i++) nign += histAddValue(hists[i], arrayf[i]); } } else { if (field.nmiss) { for (size_t i = 0; i < nhists; i++) if (!DBL_IS_EQUAL(array[i], missval)) nign += histSubValue(hists[i], arrayf[i]); } else { for (size_t i = 0; i < nhists; i++) nign += histSubValue(hists[i], arrayf[i]); } } } else { assert(!array.empty()); if ( operation == OP_ADD ) { if (field.nmiss) { for (size_t i = 0; i < nhists; i++) if (!DBL_IS_EQUAL(array[i], missval)) nign += histAddValue(hists[i], array[i]); } else { for (size_t i = 0; i < nhists; i++) nign += histAddValue(hists[i], array[i]); } } else { if (field.nmiss) { for (size_t i = 0; i < nhists; i++) if (!DBL_IS_EQUAL(array[i], missval)) nign += histSubValue(hists[i], array[i]); } else { for (size_t i = 0; i < nhists; i++) nign += histSubValue(hists[i], array[i]); } } } if (nign) { cdoWarning("%d out of %d grid values are out of bounds and have been ignored (%s)", nign, nhists, __func__); return 1; } return 0; } void HistogramSet::Reset(int varID, int levelID, int ptype) { assert(nvars > 0); if (varID < 0 || varID >= nvars) cdoAbort("Illegal argument: varID %d is undefined (%s)", varID, __func__); const auto nlevels = this->var_nlevels[varID]; assert(nlevels > 0); if (levelID < 0 || levelID >= nlevels) cdoAbort("Illegal argument: levelID %d is undefined (%s)", levelID, __func__); const auto nhists = this->var_nhists[varID]; auto &hists = this->histograms[varID][levelID]; assert(nhists > 0); if ((ptype & FIELD_FLT)) for (size_t i = 0; i < nhists; i++) histReset(hists[i]); else for (size_t i = 0; i < nhists; i++) histReset(hists[i]); } void HistogramSet::getVarLevelPercentiles(Field &field, int varID, int levelID, double p, int ptype) { auto &array = field.vec_d; assert(!array.empty()); if (varID < 0 || varID >= nvars) cdoAbort("Illegal argument: varID %d is undefined (%s)", varID, __func__); const auto nlevels = this->var_nlevels[varID]; if (levelID < 0 || levelID >= nlevels) cdoAbort("Illegal argument: levelID %d is undefined (%s)", levelID, __func__); const auto nhists = this->var_nhists[varID]; if (nhists != gridInqSize(field.grid)) cdoAbort("Grids are different (%s)", __func__); const auto &hists = this->histograms[varID][levelID]; field.nmiss = 0; for (size_t i = 0; i < nhists; i++) { if (hists[i].nsamp) array[i] = histGetPercentile(hists[i], p, ptype); else { array[i] = field.missval; field.nmiss++; } } }