/* This file is part of CDO. CDO is a collection of Operators to manipulate and analyse Climate model Data. Copyright (C) 2003-2017 Uwe Schulzweida, See COPYING file for copying and redistribution conditions. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. */ /* This module contains the following operators: Ensstat3 ensrkhist_space Ensemble ranked histogram averaged over time Ensstat3 ensrkhist_time Ensemble ranked histogram averaged over space Ensstat3 ensroccurve Ensamble Receiver Operating Characteristics */ #include #include "cdo.h" #include "cdo_int.h" #include "pstream.h" #include "util.h" // Defines for rank histogram enum TDATA_TYPE {TIME, SPACE}; #define time_data TIME #define space_data SPACE // Defines for Receiver Operating Characteristics (ROC) #define DEBUG_ROC 0 enum CONTINGENCY_TYPE {TP, FP, FN, TN}; /* TP - True positive ( event forecast and occured) HIT */ /* FP - False positive ( event forecast and not occured) FALSE ALARM */ /* TN - True negative ( event not forecast and not ocurred) CORRECT REJECTION */ /* FN - False negative ( event not forecast and ocurred) MISSED */ enum ROC_ENUM_TYPE {TPR, FPR}; /* TPR = True Positive Rate = TP / ( TP + FN ) */ /* FNR = False Negtive Rate = FN / ( FP + TN ) */ double roc_curve_integrate(const double **roc, const int n); void *Ensstat3(void *argument) { int i,j; int nrecs = 0, nrecs0, nmiss; int cum; int chksum; // for check of histogram population int levelID, varID, binID = 0; int vlistID; int gridID, gridID2; int have_miss = 0; int streamID = 0, streamID2 = 0; int **array2 = NULL; int **ctg_tab = NULL, *hist = NULL; // contingency table and histogram double missval; double *dat; // pointer to ensemble data for ROC double *uThresh = NULL, *lThresh = NULL; // thresholds for histograms double **roc = NULL; // receiver operating characteristics table double val; int ival; typedef struct { int streamID; int vlistID; double *array; } ens_file_t; cdoInitialize(argument); // clang-format off cdoOperatorAdd("ensroc", func_roc, 0, NULL); cdoOperatorAdd("ensrkhist_space", func_rank, space_data, NULL); cdoOperatorAdd("ensrkhist_time", func_rank, time_data, NULL); // clang-format on int operatorID = cdoOperatorID(); int operfunc = cdoOperatorF1(operatorID); int datafunc = cdoOperatorF2(operatorID); int nbins = 0; if ( operfunc == func_roc ) { operatorInputArg("Number of eigen functions to write out"); nbins = parameter2int(operatorArgv()[0]); } int nfiles = cdoStreamCnt() - 1; int nens = nfiles-1; if ( cdoVerbose ) cdoPrint("Ensemble over %d files.", nfiles); const char *ofilename = cdoStreamName(nfiles)->args; if ( !cdoOverwriteMode && fileExists(ofilename) && !userFileOverwrite(ofilename) ) cdoAbort("Outputfile %s already exists!", ofilename); ens_file_t *ef = (ens_file_t*) Malloc(nfiles*sizeof(ens_file_t)); /* *************************************************** */ /* should each thread be allocating memory locally???? */ /* ("first touch strategy") */ /* --> #pragma omp parallel for ... */ /* *************************************************** */ field_type *field = (field_type*) Malloc(ompNumThreads*sizeof(field_type)); for ( i = 0; i < ompNumThreads; i++ ) { field_init(&field[i]); field[i].size = nfiles; field[i].ptr = (double*) Malloc(nfiles*sizeof(double)); field[i].weight = (double*) Malloc(nfiles*sizeof(double)); for ( int fileID = 0; fileID < nfiles; fileID++ ) field[i].weight[fileID] = 1; } for ( int fileID = 0; fileID < nfiles; fileID++ ) { streamID = pstreamOpenRead(cdoStreamName(fileID)); vlistID = pstreamInqVlist(streamID); ef[fileID].streamID = streamID; ef[fileID].vlistID = vlistID; } /* check for identical contents of all ensemble members */ for ( int fileID = 1; fileID < nfiles; fileID++ ) vlistCompare(ef[0].vlistID, ef[fileID].vlistID, CMP_ALL); int vlistID1 = ef[0].vlistID; int vlistID2 = vlistCreate(); int nvars = vlistNvars(vlistID1); int *varID2 = (int*) Malloc(nvars*sizeof(int)); double *levs = (double*) Calloc(nfiles, sizeof(double)); int zaxisID2 = zaxisCreate(ZAXIS_GENERIC, nfiles); for ( i=0; i 1 ) { cdoWarning("More than one level not supported when processing ranked histograms."); cdoWarning("Try to use `cdo splitlevel` to split the dataset into levels and apply"); cdoWarning("the operator seperately to each level."); cdoAbort("Exit due to unsupported file structure"); } } if ( operfunc != func_roc ) { streamID2 = pstreamOpenWrite(cdoStreamName(nfiles), cdoFiletype()); pstreamDefVlist(streamID2, vlistID2); } int gridsize = vlistGridsizeMax(vlistID1); for ( int fileID = 0; fileID < nfiles; fileID++ ) ef[fileID].array = (double*) Malloc(gridsize*sizeof(double)); if ( operfunc == func_rank && datafunc == SPACE ) { /*need to memorize data for entire grid before writing */ array2 = (int**) Malloc((nfiles+1)*sizeof(int*)); for ( binID=0; binIDargs); else cdoAbort("Inconsistent ensemble file, number of records at time step %d of %s and %s differ!", tsID+1, cdoStreamName(0)->args, cdoStreamName(fileID)->args); } } if ( operfunc == func_rank && ( datafunc == TIME || tsID == 0 ) ) { taxisCopyTimestep(taxisID2, taxisID1); if ( nrecs0 > 0 ) pstreamDefTimestep(streamID2, tsID); } // fprintf(stderr,"TIMESTEP %i varID %i rec %i\n",tsID,varID,recID); for ( int recID = 0; recID < nrecs0; recID++ ) { #if defined(_OPENMP) #pragma omp parallel for default(none) shared(ef, nfiles) \ private(streamID, nmiss) \ lastprivate(varID, levelID) #endif for ( int fileID = 0; fileID < nfiles; fileID++ ) { streamID = ef[fileID].streamID; pstreamInqRecord(streamID, &varID, &levelID); pstreamReadRecord(streamID, ef[fileID].array, &nmiss); } gridID = vlistInqVarGrid(vlistID1, varID); gridsize = gridInqSize(gridID); missval = vlistInqVarMissval(vlistID1, varID); nmiss = 0; if ( datafunc == TIME && operfunc == func_rank) for ( binID=0;binID%i\n",binID); if ( datafunc == SPACE && ! have_miss) array2[binID][i]++; else if ( ! have_miss ) array2[binID][0]++; break; case ( func_roc ): /* ********************************** */ /* RECEIVER OPERATING CHARACTERISTICS */ /* ********************************** */ dat = &field[ompthID].ptr[1]; ival = field[ompthID].ptr[0] > 0.5? 1 : 0; for ( binID=0; binID= lThresh[binID] && dat[j] < uThresh[binID] ) hist[binID]++; chksum = 0; for ( binID=0; binID 0 ); if ( operfunc == func_rank ) { double *tmpdoub; int osize = gridsize; if ( datafunc == TIME ) osize = 1; tmpdoub = (double*) Malloc(osize*sizeof(double)); for ( binID = 0; binID < nfiles; binID++ ) { for ( i = 0; i < osize; i++ ) tmpdoub[i] = (double) array2[binID][i]; pstreamDefRecord(streamID2, varID2[varID], binID); pstreamWriteRecord(streamID2, tmpdoub, nmiss); } Free(tmpdoub); } else if ( operfunc == func_roc ) { fprintf(stdout, "# : TP FP FN TN TPR FPR\n"); for ( i=0; i<= nbins; i++ ) { int p = ctg_tab[i][TP] + ctg_tab[i][FN]; int n = ctg_tab[i][FP] + ctg_tab[i][TN]; double tpr = ctg_tab[i][TP]/(double) p; double fpr = ctg_tab[i][FP]/(double) n; chksum += ctg_tab[i][0] + ctg_tab[i][1] + ctg_tab[i][2] + ctg_tab[i][3]; roc[i][TPR] = tpr; roc[i][FPR] = fpr; int sum = ctg_tab[i][TP] + ctg_tab[i][TN] + ctg_tab[i][FP] + ctg_tab[i][FN]; fprintf(stdout, "%3i %10.4g: %6i %6i %6i %6i (%6i): %10.4g %10.4g\n", i,i