Commit 191bfe69 authored by Uwe Schulzweida's avatar Uwe Schulzweida
Browse files

eigen_solution_of_symmetric_matrix(): changed type parameter covar to Varray2D<double> &.

parent e9e9c0b5
......@@ -36,7 +36,6 @@
#include "cdo_options.h"
#include "cdo_vlist.h"
#include "dmemory.h"
#include "process_int.h"
#include "param_conversion.h"
#include <mpim_grid.h>
......@@ -49,21 +48,21 @@
static void
scale_eigvec_grid(Varray<double> &out, int tsID, size_t npack, const std::vector<size_t> &pack, const Varray<double> &weight,
double **covar, double sum_w)
Varray2D<double> &covar, double sum_w)
{
for (size_t i = 0; i < npack; ++i) out[pack[i]] = covar[tsID][i] / std::sqrt(weight[pack[i]] / sum_w);
}
static void
scale_eigvec_time(Varray<double> &out, int tsID, int nts, size_t npack, const std::vector<size_t> &pack, const Varray<double> &weight,
double **covar, const Varray2D<double> &data, double missval, double sum_w)
const Varray2D<double> &covar, const Varray2D<double> &data, double missval, double sum_w)
{
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(npack, nts, tsID, pack, data, covar, out)
#endif
for (size_t i = 0; i < npack; ++i)
{
double sum = 0;
double sum = 0.0;
for (int j = 0; j < nts; ++j) sum += data[j][i] * covar[tsID][j];
out[pack[i]] = sum;
......@@ -77,7 +76,7 @@ scale_eigvec_time(Varray<double> &out, int tsID, int nts, size_t npack, const st
*/
// Normalizing
double sum = 0;
double sum = 0.0;
#ifdef _OPENMP
#pragma omp parallel for default(none) reduction(+ : sum) shared(out, weight, pack, npack)
......@@ -88,7 +87,7 @@ scale_eigvec_time(Varray<double> &out, int tsID, int nts, size_t npack, const st
sum += weight[pack[i]] * out[pack[i]] * out[pack[i]];
}
if (sum > 0)
if (sum > 0.0)
{
sum = std::sqrt(sum / sum_w);
#ifdef _OPENMP
......@@ -129,8 +128,7 @@ EOFs(void *process)
bool init;
bool first_call;
Varray<double> eig_val;
double *covar_array;
double **covar;
Varray2D<double> covar;
Varray2D<double> data;
};
......@@ -273,8 +271,6 @@ EOFs(void *process)
{
eofdata[varID][levelID].init = false;
eofdata[varID][levelID].first_call = true;
eofdata[varID][levelID].covar_array = nullptr;
eofdata[varID][levelID].covar = nullptr;
if (time_space) eofdata[varID][levelID].data.resize(nts);
}
......@@ -282,8 +278,7 @@ EOFs(void *process)
if (Options::cdoVerbose) cdoPrint("Allocated eigenvalue/eigenvector structures with nts=%d gridsize=%zu", nts, gridsizemax);
double **covar = nullptr;
double sum_w = 1.;
double sum_w = 1.0;
int tsID = 0;
......@@ -315,7 +310,7 @@ EOFs(void *process)
if (weight_mode == WEIGHT_ON)
{
sum_w = 0;
sum_w = 0.0;
for (size_t i = 0; i < npack; i++) sum_w += weight[pack[i]];
}
}
......@@ -337,20 +332,11 @@ EOFs(void *process)
{
if (!eofdata[varID][levelID].init)
{
n = npack;
double *covar_array = (double *) Malloc(npack * npack * sizeof(double));
covar = (double **) Malloc(npack * sizeof(double *));
for (size_t i = 0; i < npack; ++i) covar[i] = covar_array + npack * i;
for (size_t i = 0; i < npack; ++i)
for (size_t j = 0; j < npack; ++j) covar[i][j] = 0;
eofdata[varID][levelID].covar_array = covar_array;
eofdata[varID][levelID].covar = covar;
}
else
{
covar = eofdata[varID][levelID].covar;
eofdata[varID][levelID].covar.resize(npack);
for (size_t i = 0; i < npack; ++i) eofdata[varID][levelID].covar[i].resize(npack, 0.0);
}
auto &covar = eofdata[varID][levelID].covar;
#ifdef _OPENMP
#pragma omp parallel for default(shared)
#endif
......@@ -375,9 +361,9 @@ EOFs(void *process)
if (tsID == 1) cdoAbort("File consists of only one timestep!");
/* write files with eigenvalues (ID3) and eigenvectors (ID2) */
// write files with eigenvalues (ID3) and eigenvectors (ID2)
/* eigenvalues */
// eigenvalues
const auto streamID2 = cdoOpenWrite(1);
const auto vlistID2 = vlistDuplicate(vlistID1);
......@@ -389,12 +375,12 @@ EOFs(void *process)
const auto gridID2 = gridCreate(GRID_LONLAT, 1);
gridDefXsize(gridID2, 1);
gridDefYsize(gridID2, 1);
double xvals = 0, yvals = 0;
double xvals = 0.0, yvals = 0.0;
gridDefXvals(gridID2, &xvals);
gridDefYvals(gridID2, &yvals);
for (int i = 0; i < ngrids; i++) vlistChangeGridIndex(vlistID2, i, gridID2);
/* eigenvectors */
// eigenvectors
const auto streamID3 = cdoOpenWrite(2);
const auto vlistID3 = vlistDuplicate(vlistID1);
......@@ -441,6 +427,7 @@ EOFs(void *process)
for (levelID = 0; levelID < nlevs; levelID++)
{
const auto &data = eofdata[varID][levelID].data;
auto &covar = eofdata[varID][levelID].covar;
if (eofdata[varID][levelID].first_call)
{
......@@ -456,8 +443,6 @@ EOFs(void *process)
{
if (npack) eofdata[varID][levelID].eig_val.resize(npack);
covar = eofdata[varID][levelID].covar;
for (size_t ipack = 0; ipack < npack; ++ipack)
{
const size_t i = pack[ipack];
......@@ -481,13 +466,10 @@ EOFs(void *process)
{
if (Options::cdoVerbose) cdoPrint("allocating covar with %i x %i elements | npack=%zu", nts, nts, npack);
double *covar_array = (double *) Malloc(nts * nts * sizeof(double));
covar = (double **) Malloc(nts * sizeof(double *));
for (int i = 0; i < nts; ++i) covar[i] = covar_array + nts * i;
covar.resize(nts);
for (int i = 0; i < nts; ++i) covar[i].resize(nts);
eofdata[varID][levelID].eig_val.resize(nts);
eofdata[varID][levelID].covar_array = covar_array;
eofdata[varID][levelID].covar = covar;
#ifdef _OPENMP
#pragma omp parallel for default(shared) schedule(dynamic)
......@@ -499,7 +481,7 @@ EOFs(void *process)
for (int j2 = j1; j2 < nts; j2++)
{
const auto &df2p = data[j2];
double sum = 0;
double sum = 0.0;
for (size_t i = 0; i < npack; i++) sum += weight[pack[i]] * df1p[i] * df2p[i];
covar[j1][j2] = sum / sum_w / nts;
}
......@@ -510,7 +492,7 @@ EOFs(void *process)
if (Options::Timer) timer_stop(timer_cov);
/* SOLVE THE EIGEN PROBLEM */
// SOLVE THE EIGEN PROBLEM
if (Options::Timer) timer_start(timer_eig);
auto &eig_val = eofdata[varID][levelID].eig_val;
......@@ -521,16 +503,12 @@ EOFs(void *process)
cdo::eigen_solution_of_symmetric_matrix(covar, eig_val, n, __func__);
if (Options::Timer) timer_stop(timer_eig);
/* NOW: covar contains the eigenvectors, eig_val the eigenvalues */
// NOW: covar contains the eigenvectors, eig_val the eigenvalues
for (size_t i = 0; i < gridsize; ++i) out[i] = missval;
// for ( int i = 0; i < n; i++ ) eig_val[i] *= sum_w;
} // first_call
else
{
covar = eofdata[varID][levelID].covar;
}
if (tsID < n_eig)
{
......@@ -554,15 +532,6 @@ EOFs(void *process)
} // loop nvars
}
for (varID = 0; varID < nvars; varID++)
{
for (levelID = 0; levelID < varList1[varID].nlevels; levelID++)
{
if (eofdata[varID][levelID].covar_array) Free(eofdata[varID][levelID].covar_array);
if (eofdata[varID][levelID].covar) Free(eofdata[varID][levelID].covar);
}
}
cdoStreamClose(streamID3);
cdoStreamClose(streamID2);
cdoStreamClose(streamID1);
......
......@@ -36,7 +36,6 @@
#include "cdo_options.h"
#include "cdo_vlist.h"
#include "dmemory.h"
#include "process_int.h"
#include "param_conversion.h"
#include <mpim_grid.h>
......@@ -68,7 +67,6 @@ EOF3d(void *process)
int calendar = CALENDAR_STANDARD;
double sum_w;
double **cov = nullptr; // TODO: covariance matrix / eigenvectors after solving
if (Options::Timer)
{
......@@ -279,9 +277,9 @@ EOF3d(void *process)
continue;
}
cov = (double **) Malloc(nts * sizeof(double *));
for (int j1 = 0; j1 < nts; j1++) cov[j1] = (double *) Malloc(nts * sizeof(double));
Varray<double> eigv(n);
Varray2D<double> covar(nts);
for (int j1 = 0; j1 < nts; j1++) covar[j1].resize(nts);
if (Options::cdoVerbose)
{
......@@ -300,13 +298,13 @@ EOF3d(void *process)
double *df2p = datafields[varID][j2].data();
double sum = 0.0;
for (size_t i = 0; i < npack; i++) sum += weight[pack[i] % gridsizemax] * df1p[pack[i]] * df2p[pack[i]];
cov[j2][j1] = cov[j1][j2] = sum / sum_w / nts;
covar[j2][j1] = covar[j1][j2] = sum / sum_w / nts;
}
}
if (Options::cdoVerbose) cdoPrint("calculated cov-matrix");
/* SOLVE THE EIGEN PROBLEM */
// SOLVE THE EIGEN PROBLEM
if (Options::Timer) timer_stop(timer_cov);
if (Options::Timer) timer_start(timer_eig);
......@@ -314,10 +312,10 @@ EOF3d(void *process)
if (Options::cdoVerbose) cdoPrint("Processed correlation matrix for var %2i | npack: %zu", varID, n);
if (eigen_mode == JACOBI)
cdo::parallel_eigen_solution_of_symmetric_matrix(cov, eigv, n, __func__);
cdo::parallel_eigen_solution_of_symmetric_matrix(covar, eigv, n, __func__);
else
cdo::eigen_solution_of_symmetric_matrix(cov, eigv, n, __func__);
// NOW: cov contains the eigenvectors, eigv the eigenvalues
cdo::eigen_solution_of_symmetric_matrix(covar, eigv, n, __func__);
// NOW: covar contains the eigenvectors, eigv the eigenvalues
if (Options::cdoVerbose) cdoPrint("Processed SVD decomposition for var %d from %d x %d matrix", varID, n, n);
......@@ -330,12 +328,12 @@ EOF3d(void *process)
double *eigenvec = eigenvectors[varID][eofID].data();
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(varID, nts, eofID, npack, pack, cov, datafields, eigenvec)
#pragma omp parallel for default(none) shared(varID, nts, eofID, npack, pack, covar, datafields, eigenvec)
#endif
for (size_t i = 0; i < npack; i++)
{
double sum = 0.0;
for (int j = 0; j < nts; j++) sum += datafields[varID][j][pack[i]] * cov[eofID][j];
for (int j = 0; j < nts; j++) sum += datafields[varID][j][pack[i]] * covar[eofID][j];
eigenvec[pack[i]] = sum;
}
......@@ -364,9 +362,6 @@ EOF3d(void *process)
for (size_t i = 0; i < npack; i++) eigenvec[pack[i]] = missval;
}
} // for ( eofID = 0; eofID < n_eig; eofID++ )
for (int i = 0; i < n; i++)
if (cov[i]) Free(cov[i]);
} // for ( varID = 0; varID < nvars; varID++ )
// write files with eigenvalues (ID3) and eigenvectors (ID2)
......@@ -383,7 +378,7 @@ EOF3d(void *process)
const auto gridID2 = gridCreate(GRID_LONLAT, 1);
gridDefXsize(gridID2, 1);
gridDefYsize(gridID2, 1);
double xvals = 0, yvals = 0;
double xvals = 0.0, yvals = 0.0;
gridDefXvals(gridID2, &xvals);
gridDefYvals(gridID2, &yvals);
......@@ -399,7 +394,7 @@ EOF3d(void *process)
const auto nzaxis = vlistNzaxis(vlistID2);
for (int i = 0; i < nzaxis; i++) vlistChangeZaxisIndex(vlistID2, i, zaxisID2);
/* eigenvectors */
// eigenvectors
const auto streamID3 = cdoOpenWrite(2);
const auto vlistID3 = vlistDuplicate(vlistID1);
......
......@@ -55,18 +55,18 @@ static int n_finished;
namespace cdo
{
void
heap_sort(Varray<double> &eig_val, double **a, int n)
static void
heap_sort(Varray<double> &eig_val, Varray2D<double> &a, long n)
{
int j_next;
long j_next;
// First part of heap sort:
for (int i = n / 2 - 1; i >= 0; i--)
for (long i = n / 2 - 1; i >= 0; i--)
{
for (int j = i; 2 * j + 1 < n; j = j_next)
for (long j = i; 2 * j + 1 < n; j = j_next)
{
int k1 = 2 * j + 1;
int k2 = 2 * j + 2;
auto k1 = 2 * j + 1;
auto k2 = 2 * j + 2;
j_next = j;
if (eig_val[k1] < eig_val[j_next]) j_next = k1;
if (k2 < n && eig_val[k2] < eig_val[j_next]) j_next = k2;
......@@ -76,14 +76,14 @@ heap_sort(Varray<double> &eig_val, double **a, int n)
}
}
// Second part of head sort:
for (int i = n - 1; i > 0; i--)
for (long i = n - 1; i > 0; i--)
{
std::swap(eig_val[0], eig_val[i]);
std::swap(a[0], a[i]);
for (int j = 0; 2 * j + 1 < i; j = j_next)
for (long j = 0; 2 * j + 1 < i; j = j_next)
{
int k1 = 2 * j + 1;
int k2 = 2 * j + 2;
auto k1 = 2 * j + 1;
auto k2 = 2 * j + 2;
j_next = j;
if (eig_val[k1] < eig_val[j_next]) j_next = k1;
if (k2 < i && eig_val[k2] < eig_val[j_next]) j_next = k2;
......@@ -95,9 +95,9 @@ heap_sort(Varray<double> &eig_val, double **a, int n)
}
static void
make_symmetric_matrix_triangular(double **a, int n, Varray<double> &d, Varray<double> &e)
make_symmetric_matrix_triangular(Varray2D<double> &a, long n, Varray<double> &d, Varray<double> &e)
{
int i, j, k;
long i, j, k;
double f, g, h, hh, scale;
for (i = n - 1; i >= 1; i--)
......@@ -172,13 +172,13 @@ pythagoras(double a, double b)
{
auto sqr = abs_b / abs_a;
sqr *= sqr;
return abs_a * std::sqrt(1 + sqr);
return abs_a * std::sqrt(1.0 + sqr);
}
else if (abs_b > abs_a)
{
auto sqr = abs_a / abs_b;
sqr *= sqr;
return abs_b * std::sqrt(1 + sqr);
return abs_b * std::sqrt(1.0 + sqr);
}
else
return M_SQRT2 * abs_a;
......@@ -187,15 +187,15 @@ pythagoras(double a, double b)
#define MAX_ITER 1000
static void
eigen_solution_of_triangular_matrix(Varray<double> &d, Varray<double> &e, int n, double **a, const char *prompt)
eigen_solution_of_triangular_matrix(Varray<double> &d, Varray<double> &e, long n, Varray2D<double> &a, const char *prompt)
{
constexpr double eps = 1.e-6;
int i, k, l, m, iter;
long i, k, l, m, iter;
double b, c, f, g, p, r, s;
for (i = 1; i < n; i++) e[i - 1] = e[i];
e[n - 1] = 0;
e[n - 1] = 0.0;
for (l = 0; l < n; l++)
{
iter = 0;
......@@ -214,7 +214,7 @@ eigen_solution_of_triangular_matrix(Varray<double> &d, Varray<double> &e, int n,
fprintf(stderr, "%s: ERROR! Too many iterations while determining the eigensolution!\n", prompt);
exit(1);
}
g = (d[l + 1] - d[l]) / (2 * e[l]);
g = (d[l + 1] - d[l]) / (2.0 * e[l]);
r = pythagoras(g, 1);
g = d[m] - d[l] + e[l] / (g + (std::fabs(g) > 0 ? (g >= 0 ? std::fabs(r) : -std::fabs(r)) : r));
s = c = 1;
......@@ -225,17 +225,17 @@ eigen_solution_of_triangular_matrix(Varray<double> &d, Varray<double> &e, int n,
b = c * e[i];
e[i + 1] = r = pythagoras(f, g);
if (DBL_IS_EQUAL(r, 0.))
if (DBL_IS_EQUAL(r, 0.0))
{
d[i + 1] -= p;
e[m] = 0;
e[m] = 0.0;
break;
}
s = f / r;
c = g / r;
g = d[i + 1] - p;
r = (d[i] - g) * s + 2 * c * b;
r = (d[i] - g) * s + 2.0 * c * b;
p = s * r;
d[i + 1] = g + p;
g = c * r - b;
......@@ -247,17 +247,17 @@ eigen_solution_of_triangular_matrix(Varray<double> &d, Varray<double> &e, int n,
}
}
if (DBL_IS_EQUAL(r, 0.) && i >= l) continue;
if (DBL_IS_EQUAL(r, 0.0) && i >= l) continue;
d[l] -= p;
e[l] = g;
e[m] = 0;
e[m] = 0.0;
}
}
}
void
eigen_solution_of_symmetric_matrix(double **a, Varray<double> &eig_val, int n, const char *prompt)
eigen_solution_of_symmetric_matrix(Varray2D<double> &a, Varray<double> &eig_val, long n, const char *prompt)
// After return the rows (!!!) of a are the eigenvectors
{
{
......@@ -266,8 +266,8 @@ eigen_solution_of_symmetric_matrix(double **a, Varray<double> &eig_val, int n, c
eigen_solution_of_triangular_matrix(eig_val, e, n, a, prompt);
}
for (int i = 0; i < n; i++)
for (int j = i + 1; j < n; j++) std::swap(a[i][j], a[j][i]);
for (long i = 0; i < n; i++)
for (long j = i + 1; j < n; j++) std::swap(a[i][j], a[j][i]);
heap_sort(eig_val, a, n);
}
......@@ -402,15 +402,15 @@ inverse_of_matrix(double **a, double **b, int n)
}
*/
void
fft(double *restrict real, double *restrict imag, int n, int sign)
fft(double *restrict real, double *restrict imag, long n, int sign)
{
// n must be a power of 2
// sign should be 1 (FT) or -1 (reverse FT)
int j, j1, j2;
int bit;
long j, j1, j2;
long bit;
// Bit reversal part
for (int i = j = 0; i < n; i++) // The bit pattern of i and j are reverse
for (long i = j = 0; i < n; i++) // The bit pattern of i and j are reverse
{
if (i > j) std::swap(real[i], real[j]);
if (i > j) std::swap(imag[i], imag[j]);
......@@ -420,13 +420,13 @@ fft(double *restrict real, double *restrict imag, int n, int sign)
}
// Danielson-Lanczos Part
for (int step = 1; step < n; step <<= 1)
for (long step = 1; step < n; step <<= 1)
{
const auto w_r = std::cos(M_PI / step);
const auto w_i = std::sin(M_PI / step) * sign;
double ww_r = 1.0;
double ww_i = 0.0;
for (int i = 0; i < step; i++)
for (long i = 0; i < step; i++)
{
double temp_r, temp_i;
for (j1 = i, j2 = i + step; j2 < n; j1 += 2 * step, j2 += 2 * step)
......@@ -445,12 +445,12 @@ fft(double *restrict real, double *restrict imag, int n, int sign)
}
const auto norm = 1.0 / std::sqrt(n);
for (int i = 0; i < n; i++) real[i] *= norm;
for (int i = 0; i < n; i++) imag[i] *= norm;
for (long i = 0; i < n; i++) real[i] *= norm;
for (long i = 0; i < n; i++) imag[i] *= norm;
}
void
ft(double *real, double *imag, int n, int sign)
ft(double *real, double *imag, long n, int sign)
{
// sign should be 1 (FT) or -1 (reverse FT)
static double *work_r = 0, *work_i = 0;
......@@ -467,7 +467,7 @@ ft(double *real, double *imag, int n, int sign)
/* free_at_exit (work_i); */
}
for (int k = 0; k < n; k++)
for (long k = 0; k < n; k++)
{
const auto w_r = std::cos(2 * M_PI * k / n);
const auto w_i = std::sin(2 * M_PI * k / n) * sign;
......@@ -475,7 +475,7 @@ ft(double *real, double *imag, int n, int sign)
double ww_i = 0.0;
double sum_r = 0.0;
double sum_i = 0.0;
for (int j = 0; j < n; j++)
for (long j = 0; j < n; j++)
{
sum_r += real[j] * ww_r - imag[j] * ww_i;
sum_i += real[j] * ww_i + imag[j] * ww_r;
......@@ -488,18 +488,18 @@ ft(double *real, double *imag, int n, int sign)
}
const auto norm = 1. / std::sqrt(n);
for (int k = 0; k < n; k++) real[k] = work_r[k] * norm;
for (int k = 0; k < n; k++) imag[k] = work_i[k] * norm;
for (long k = 0; k < n; k++) real[k] = work_r[k] * norm;
for (long k = 0; k < n; k++) imag[k] = work_i[k] * norm;
}
// reentrant version of ft
void
ft_r(double *restrict real, double *restrict imag, int n, int sign, double *restrict work_r, double *restrict work_i)
ft_r(double *restrict real, double *restrict imag, long n, int sign, double *restrict work_r, double *restrict work_i)
{
// sign should be 1 (FT) or -1 (reverse FT)
double temp_r;
for (int k = 0; k < n; k++)
for (long k = 0; k < n; k++)
{
const auto w_r = std::cos(2 * M_PI * k / n);
const auto w_i = std::sin(2 * M_PI * k / n) * sign;
......@@ -507,7 +507,7 @@ ft_r(double *restrict real, double *restrict imag, int n, int sign, double *rest
double ww_i = 0.0;
double sum_r = 0.0;
double sum_i = 0.0;
for (int j = 0; j < n; j++)
for (long j = 0; j < n; j++)
{
sum_r += real[j] * ww_r - imag[j] * ww_i;
sum_i += real[j] * ww_i + imag[j] * ww_r;
......@@ -520,8 +520,8 @@ ft_r(double *restrict real, double *restrict imag, int n, int sign, double *rest
}
const auto norm = 1.0 / std::sqrt(n);
for (int k = 0; k < n; k++) real[k] = work_r[k] * norm;
for (int k = 0; k < n; k++) imag[k] = work_i[k] * norm;
for (long k = 0; k < n; k++) real[k] = work_r[k] * norm;
for (long k = 0; k < n; k++) imag[k] = work_i[k] * norm;
}
double
......@@ -1058,17 +1058,17 @@ fisher(double m, double n, double x, const char *prompt)
/* the same time. */
/* ******************************************************************************** */
static void
annihilate_1side(double **M, long i, long j, long n)
annihilate_1side(Varray2D<double> &M, long i, long j, long n)
{
i--;
j--;
if (j < i) std::swap(i, j);
double *restrict Mi = M[i];
double *restrict Mj = M[j];
auto &Mi = M[i];
auto &Mj = M[j];
double alpha = 0, beta = 0, gamma = 0;
double alpha = 0.0, beta = 0.0, gamma = 0.0;
#ifdef HAVE_OPENMP4
#pragma omp simd reduction(+ : alpha) reduction(+ : beta) reduction(+ : gamma)
#endif
......@@ -1110,7 +1110,7 @@ annihilate_1side(double **M, long i, long j, long n)
}
static int
jacobi_1side(double **M, Varray<double> &A, long n)
jacobi_1side(Varray2D<double> &M, Varray<double> &A, long n)
{
Varray2D<int> annihilations(2);
annihilations[0].resize(n * n);
......@@ -1221,7 +1221,7 @@ jacobi_1side(double **M, Varray<double> &A, long n)
cdoWarning("Setting Matrix and Eigenvalues to 0 before return");
for (long i = 0; i < n; i++)
{
memset(M[i], 0, n * sizeof(double));
memset(M[i].data(), 0, n * sizeof(double));<