Commit d7c510f1 authored by Cedrick Ansorge's avatar Cedrick Ansorge
Browse files

parallel version of src/EOFs.c with one sided jacobi scheme

parent 554cbb0d
......@@ -270,6 +270,262 @@ m4/check_gnu_make.m4 -text
m4/gcc_ac_c_char_bit.m4 -text
m4/starlink_fpp.m4 -text
m4/tj_find_type.m4 -text
my_src/Arith.c -text
my_src/Arithc.c -text
my_src/Arithdays.c -text
my_src/Arithlat.c -text
my_src/CDItest.c -text
my_src/Cat.c -text
my_src/Change.c -text
my_src/Change_e5slm.c -text
my_src/Cloudlayer.c -text
my_src/Command.c -text
my_src/Comp.c -text
my_src/Compc.c -text
my_src/Complextorect.c -text
my_src/Cond.c -text
my_src/Cond2.c -text
my_src/Condc.c -text
my_src/Consecstat.c -text
my_src/Copy.c -text
my_src/Deltime.c -text
my_src/Detrend.c -text
my_src/Diff.c -text
my_src/EOFs.c -text
my_src/EcaIndices.c -text
my_src/Echam5ini.c -text
my_src/Enlarge.c -text
my_src/Enlargegrid.c -text
my_src/Ensstat.c -text
my_src/Eofcoeff.c -text
my_src/Exprf.c -text
my_src/Filedes.c -text
my_src/Fillmiss.c -text
my_src/Filter.c -text
my_src/Fldrms.c -text
my_src/Fldstat.c -text
my_src/Fldstat2.c -text
my_src/Fourier.c -text
my_src/Gather.c -text
my_src/Gengrid.c -text
my_src/Gradsdes.c -text
my_src/Gridboxstat.c -text
my_src/Gridcell.c -text
my_src/Harmonic.c -text
my_src/Hi.c -text
my_src/Histogram.c -text
my_src/Importamsr.c -text
my_src/Importbinary.c -text
my_src/Importcmsaf.c -text
my_src/Importobs.c -text
my_src/Info.c -text
my_src/Input.c -text
my_src/Intgrid.c -text
my_src/Intgridtraj.c -text
my_src/Intlevel.c -text
my_src/Intntime.c -text
my_src/Inttime.c -text
my_src/Intyear.c -text
my_src/Invert.c -text
my_src/Invertlev.c -text
my_src/Log.c -text
my_src/Makefile.am -text
my_src/Makefile.in -text
my_src/Maskbox.c -text
my_src/Mastrfu.c -text
my_src/Math.c -text
my_src/Merge.c -text
my_src/Mergegrid.c -text
my_src/Mergetime.c -text
my_src/Merstat.c -text
my_src/Monarith.c -text
my_src/Mrotuv.c -text
my_src/Mrotuvb.c -text
my_src/Ninfo.c -text
my_src/Nmltest.c -text
my_src/Output.c -text
my_src/Outputgmt.c -text
my_src/Pinfo.c -text
my_src/Pressure.c -text
my_src/Regres.c -text
my_src/Remap.c -text
my_src/Remapeta.c -text
my_src/Replace.c -text
my_src/Replacevalues.c -text
my_src/Rotuv.c -text
my_src/Runpctl.c -text
my_src/Runstat.c -text
my_src/Scatter.c -text
my_src/Seascount.c -text
my_src/Seaspctl.c -text
my_src/Seasstat.c -text
my_src/Selbox.c -text
my_src/Select.c -text
my_src/Seloperator.c -text
my_src/Selrec.c -text
my_src/Seltime.c -text
my_src/Selvar.c -text
my_src/Set.c -text
my_src/Setbox.c -text
my_src/Setgatt.c -text
my_src/Setgrid.c -text
my_src/Sethalo.c -text
my_src/Setmiss.c -text
my_src/Setrcaname.c -text
my_src/Settime.c -text
my_src/Setzaxis.c -text
my_src/Showinfo.c -text
my_src/Sinfo.c -text
my_src/Smooth9.c -text
my_src/Sort.c -text
my_src/Sorttimestamp.c -text
my_src/Specinfo.c -text
my_src/Spectral.c -text
my_src/Spectrum.c -text
my_src/Split.c -text
my_src/Splitrec.c -text
my_src/Splitsel.c -text
my_src/Splittime.c -text
my_src/Splityear.c -text
my_src/Subtrend.c -text
my_src/Templates.c -text
my_src/Test.c -text
my_src/Tests.c -text
my_src/Timcount.c -text
my_src/Timpctl.c -text
my_src/Timselpctl.c -text
my_src/Timselstat.c -text
my_src/Timsort.c -text
my_src/Timstat.c -text
my_src/Timstat2.c -text
my_src/Timstat3.c -text
my_src/Tinfo.c -text
my_src/Tocomplex.c -text
my_src/Transpose.c -text
my_src/Trend.c -text
my_src/Trms.c -text
my_src/Tstepcount.c -text
my_src/Vardup.c -text
my_src/Vargen.c -text
my_src/Varrms.c -text
my_src/Vertint.c -text
my_src/Vertstat.c -text
my_src/Vertwind.c -text
my_src/Wct.c -text
my_src/Wind.c -text
my_src/Writegrid.c -text
my_src/Writerandom.c -text
my_src/Ydaypctl.c -text
my_src/Ydaystat.c -text
my_src/Ydrunpctl.c -text
my_src/Ydrunstat.c -text
my_src/Yhourstat.c -text
my_src/Ymonarith.c -text
my_src/Ymonpctl.c -text
my_src/Ymonstat.c -text
my_src/Yseaspctl.c -text
my_src/Yseasstat.c -text
my_src/Zonstat.c -text
my_src/cdi.h -text
my_src/cdilib.c -text
my_src/cdo.c -text
my_src/cdo.h -text
my_src/cdo_int.h -text
my_src/cdo_pthread.c -text
my_src/cdo_vlist.c -text
my_src/cdotest.c -text
my_src/color.c -text
my_src/color.h -text
my_src/commandline.c -text
my_src/config.h.in -text
my_src/counter.h -text
my_src/dmemory.h -text
my_src/dtypes.h -text
my_src/ecacore.c -text
my_src/ecacore.h -text
my_src/ecautil.c -text
my_src/ecautil.h -text
my_src/error.h -text
my_src/etopo.h -text
my_src/exception.c -text
my_src/expr.c -text
my_src/expr.h -text
my_src/expr_lex.c -text
my_src/expr_lex.l -text
my_src/expr_yacc.c -text
my_src/expr_yacc.h -text
my_src/expr_yacc.y -text
my_src/field.c -text
my_src/field.h -text
my_src/field2.c -text
my_src/fieldc.c -text
my_src/fieldmem.c -text
my_src/fieldmer.c -text
my_src/fieldzon.c -text
my_src/fouriertrans.c -text
my_src/functs.h -text
my_src/gradsdeslib.c -text
my_src/gradsdeslib.h -text
my_src/grid.c -text
my_src/grid.h -text
my_src/grid_gme.c -text
my_src/grid_lcc.c -text
my_src/grid_rot.c -text
my_src/griddes.c -text
my_src/griddes.h -text
my_src/griddes_h5.c -text
my_src/griddes_nc.c -text
my_src/hetaeta.c -text
my_src/hetaeta.h -text
my_src/history.c -text
my_src/institution.c -text
my_src/interpol.c -text
my_src/interpol.h -text
my_src/job.c -text
my_src/juldate.c -text
my_src/legendre.c -text
my_src/list.c -text
my_src/list.h -text
my_src/modules.c -text
my_src/modules.h -text
my_src/namelist.c -text
my_src/namelist.h -text
my_src/normal.c -text
my_src/nth_element.c -text
my_src/nth_element.h -text
my_src/operator_help.h -text
my_src/par_io.c -text
my_src/par_io.h -text
my_src/percentiles.c -text
my_src/percentiles.h -text
my_src/pipe.c -text
my_src/pipe.h -text
my_src/printinfo.h -text
my_src/process.c -text
my_src/process.h -text
my_src/pstream.c -text
my_src/pstream.h -text
my_src/pstream_int.h -text
my_src/pthread_debug.c -text
my_src/pthread_debug.h -text
my_src/readline.c -text
my_src/realtime.c -text
my_src/remap.h -text
my_src/remaplib.c -text
my_src/specspace.c -text
my_src/specspace.h -text
my_src/statistic.c -text
my_src/statistic.h -text
my_src/table.c -text
my_src/timebase.h -text
my_src/timer.c -text
my_src/userlog.c -text
my_src/util.c -text
my_src/util.h -text
my_src/vinterp.c -text
my_src/vinterp.h -text
my_src/yacc_lex -text
my_src/zaxis.c -text
src/Arith.c -text
src/Arithc.c -text
src/Arithdays.c -text
......
......@@ -4,7 +4,7 @@
Name: cdo
#BuildRequires:
Version: 1.4.5.1
Version: 1.4.6rc2
Release: 1
Summary: Climate Data Operators
License: GNU GENERAL PUBLIC LICENSE Version 2, June 1991
......
......@@ -36,6 +36,37 @@
#include "pstream.h"
#include "statistic.h"
/* ********************************** */
/* HEADER FOR PARALLEL EIGEN SOLUTION */
/* -->SEE END OF ROUTINE */
/* ********************************** */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
#define PI_QU 1*atan(1)
#define PI_HA 2*atan(1)
#define PI 4*atan(1)
#define MAX_ITER 20
#define FNORM_PRECISION 1e-12
void parallel_eigen_solution_of_symmetric_matrix(double **M, double *A,
int n1, int n2, const char func[]);
int jacobi_1side(double **M, double *A, int n);
void annihilate_1side(double **M, int i, int j, int k, int n);
int n_finished;
/* **************************************** */
/* ENDOF HEADER FOR PARALLEL EIGEN SOLUTION */
/* -->SEE END OF ROUTINE */
/* **************************************** */
// NO MISSING VALUE SUPPORT ADDED SO FAR
void *EOFs(void * argument)
......@@ -86,6 +117,7 @@ void *EOFs(void * argument)
timer_start(timer_init);
}
cdoInitialize(argument);
cdoOperatorAdd("eof", EOF_, 0, NULL);
cdoOperatorAdd("eoftime", EOF_TIME, 0, NULL);
......@@ -312,7 +344,6 @@ void *EOFs(void * argument)
int i2;
double tmp;
streamInqRecord(streamID1, &varID, &levelID);
gridsize = gridInqSize(vlistInqVarGrid(vlistID1, varID));
......@@ -447,26 +478,34 @@ void *EOFs(void * argument)
/* SOLVE THE EIGEN PROBLEM */
if ( cdoTimer ) timer_start(timer_eig);
eigen_solution_of_symmetric_matrix(&cov[0], &eigv[0], n, n, func);
parallel_eigen_solution_of_symmetric_matrix(&cov[0],&eigv[0],n,n,func);
if ( cdoTimer ) timer_stop(timer_eig);
/* NOW: cov contains the eigenvectors, eigv the eigenvalues */
/* NOW: cov contains the eigenvectors, eigv the eigenvalues */
// need to multiply igen values by sum_w
// TODO --> find out why!
if ( cdoTimer ) timer_start(timer_post);
for (i = 0; i < n; i++)
eigenvalues[varID][levelID][i].ptr[0] = eigv[i]*sum_w;
for (i = 0; i < n_eig; i++)
{
if ( grid_space )
// Do not need to normalize by
// w[pack[j]]/sum_w (checked) --> find out why!!!
#if defined (_OPENMP)
#pragma omp parallel for private(j)
#endif
for(j = 0; j < npack; j++)
eigenvectors[varID][levelID][i].ptr[pack[j]] =
cov[i][j] / sqrt(weight[pack[j]]);
else if ( time_space )
{
#if defined (_OPENMP)
#pragma omp parallel for private(i2,sum) shared(datafields,eigenvectors)
#endif
for ( i2 = 0; i2 < npack; i2++ )
{
sum = 0;
......@@ -477,6 +516,11 @@ void *EOFs(void * argument)
}
// NORMALIZING
sum = 0;
#if defined (_OPENMP)
#pragma omp parallel for private(i2) default(none) reduction(+:sum) \
shared(eigenvectors,weight,pack,varID,levelID,i,npack)
#endif
for ( i2 = 0; i2 < npack; i2++ )
sum += weight[pack[i2]] *
eigenvectors[varID][levelID][i].ptr[pack[i2]] *
......@@ -485,11 +529,19 @@ void *EOFs(void * argument)
{
sum = sqrt(sum);
// sum = sqrt(sum/sum_w);
#if defined (_OPENMP)
#pragma omp parallel for private(i2) default(none) \
shared(npack,varID,levelID,i,pack,sum,eigenvectors)
#endif
for( i2 = 0; i2 < npack; i2++ )
eigenvectors[varID][levelID][i].ptr[pack[i2]] /= sum;
}
else
{
#if defined (_OPENMP)
#pragma omp parallel for private(i2) default(none) \
shared(npack,varID,levelID,i,pack,sum,eigenvectors,missval)
#endif
for( i2 = 0; i2 < npack; i2++ )
eigenvectors[varID][levelID][i].ptr[pack[i2]] = missval;
}
......@@ -581,3 +633,217 @@ void *EOFs(void * argument)
return (0);
}
/* ******************************************************************************** */
/* */
/* P A R A L L E L S O L U T I O N O F T H E E I G E N P R O B L E M */
/* WITH ONE SIDED JACOBI ALGORITHM */
/* */
/* ******************************************************************************** */
void parallel_eigen_solution_of_symmetric_matrix(double **M, double *A, int n1, int n2, const char func[])
{
if ( n1 != n2 )
{
fprintf(stderr,
"WARNING: Parallel eigenvalue computation of non-squared matrices\n"
" Not implemented yet.\n"
" Using sequential algorithm");
eigen_solution_of_symmetric_matrix(M,A,n1,n2,func);
}
else
jacobi_1side(M,A,n1);
return;
}
/* ******************************************************************************** */
/* This routine rotates columns/rows i and j of a symmetric Matrix M in a fashion, */
/* thus that the dot product of columns i and j 0 afterwards */
/* */
/* As this is done by a right-multiplication with a rotation matrix, which only */
/* changes columns i and j, this can be carried out for n/2 pairs of columns at */
/* the same time. */
/* ******************************************************************************** */
void annihilate_1side(double **M, int i, int j, int k, int n)
{
double tk, ck, sk, alpha=0, beta=0, gamma=0, zeta=0;
double tmp, *mi=NULL, *mj=NULL;
const char func[] = "annihilate_1side";
int first_annihilation = 0;
int r;
i--; j--;
mi = malloc(n*sizeof(double));
mj = malloc(n*sizeof(double));
if ( ! mj || ! mi)
fprintf(stderr,
"ERROR: allocation error - cannot allocate memory\n"
"ERROR: check stacksize and physically available memory\n");
if ( j < i ) { int tmp = i; i = j; j = tmp; }
for ( r=0; r<n; r++ ) {
alpha += M[j][r]*M[j][r];
beta += M[i][r]*M[i][r];
gamma += M[i][r]*M[j][r];
}
tmp = fabs(gamma/sqrt(alpha/beta));
if ( tmp < FNORM_PRECISION ) {
#if defined (_OPENMP)
#pragma omp critical
#endif
{
n_finished++;
}
free(mi);
free(mj);
return;
}
zeta = (beta-alpha)/(2.*gamma); // tan(2*theta)
tk = 1./(fabs(zeta)+sqrt(1.+zeta*zeta));
tk = zeta>0? tk : -tk; // = cot(2*theta)
ck = 1./sqrt(1.+tk*tk); // = cos(theta)
sk = ck*tk; // = sin(theta)
// calculate a_i,j - tilde
for ( r=0; r<n; r++ ) {
mi[r] = ck*M[i][r] + sk*M[j][r];
mj[r] =-sk*M[i][r] + ck*M[j][r];
}
for ( r=0; r<n; r++ ) {
M[i][r] = mi[r];
M[j][r] = mj[r];
}
free(mi);
free(mj);
return;
}
int jacobi_1side(double **M, double *A, int n)
{
const char func[] = "jacobi_1side";
int i,j,k,m,r,i_ann,j_ann;
int n_iter = 0;
int idx;
int count=0;
int **annihilations, *annihilations_buff;
annihilations_buff = malloc (n*n*2*sizeof(int));
annihilations = malloc((n*n)*sizeof(int*));
for(i=0;i<n*n;i++)
annihilations[i] = & annihilations_buff[2*i];
for( k=1; k<n+1; k++ ) {
if ( k < n ) {
for ( i=1;i<=(int)ceil(1./2.*(n-k));i++ ) {
j = n-k+2-i;
annihilations[count][0] = i;
annihilations[count][1] = j;
count++;
}
if ( k > 2 ) {
for ( i=n-k+2;i<=n-(int)floor(1./2.*k);i++ ) {
j = 2*n-k+2-i;
annihilations[count][0] = i;
annihilations[count][1] = j;
count++;
}
}
}
else if ( k == n ) {
for(i=2;i<=(int)ceil(1./2.*n);i++) {
j = n+2-i;
annihilations[count][0] = i;
annihilations[count][1] = j;
count++;
}
}
}
// fprintf(stderr, "%i annihilations per sweep\n",count);
n_finished = 0;
// override global openmp settings works
// omp_set_num_threads(2);
while ( n_iter < MAX_ITER && n_finished < count ) {
n_finished = 0;
if ( n%2 == 1 ) {
for(m=0;m<n;m++) {
#if defined (_OPENMP)
#pragma omp parallel for private(i,idx,i_ann,j_ann) shared(M,annihilations,n) reduction(+:n_finished)
#endif
for(i=0;i<n/2;i++) {
idx = m*(n/2)+i;
i_ann = annihilations[idx][0];
j_ann = annihilations[idx][1];
if ( i_ann != j_ann && i_ann && j_ann )
annihilate_1side(M,i_ann,j_ann,0,n);
}
}
}
else { // n%2 == 0
for(m=0;m<n;m++) {
#if defined (_OPENMP)
#pragma omp parallel for private(i,idx,i_ann,j_ann) shared(M,annihilations,n) reduction(+:n_finished)
#endif
for(i=0;i<n/2-(m%2);i++) {
idx = m/2 * ( n/2 + n/2-1);
if ( m % 2 ) idx += n/2;
i_ann = annihilations[idx+i][0];
j_ann = annihilations[idx+i][1];
if ( i_ann && j_ann && i_ann != j_ann )
annihilate_1side(M,i_ann,j_ann,0,n);
}
}
}
n_iter++;
}
// fprintf(stderr,"finished after %i sweeps (n_finished %i)\n",n_iter,n_finished);
if ( n_iter == MAX_ITER )
{
fprintf(stderr,
"WARNING: Eigenvalue computation with one-sided jacobi scheme\n"
" Did not converge properly. %i pairs of columns did\n"
" not achieve requested orthogonality of %10.6g\n",
count-n_finished, FNORM_PRECISION);
for ( i=0; i<n; i++ )
memset(M[i],0,n*sizeof(double));
memset(A,0,n*sizeof(double));
}
// calculate eigen values as sqrt(||m_i||)
for ( i=0; i<n; i++ )
{
A[i] = 0;
for ( r=0; r<n; r++ )
A[i] += M[i][r] * M[i][r];
A[i] = sqrt(A[i]);
for ( r=0; r<n; r++ )
M[i][r] /= A[i];
}
heap_sort(A,M,n);
free(annihilations);
free(annihilations_buff);
return n_iter;
}
......@@ -90,9 +90,12 @@
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define to 1 if `struct stat' is a member of `st_blksize'. */
/* Define to 1 if `st_blksize' is a member of `struct stat'. */
#undef HAVE_STRUCT_STAT_ST_BLKSIZE
/* Define to 1 if you have the <sys/param.h> header file. */
#undef HAVE_SYS_PARAM_H
/* Define to 1 if you have the <sys/resource.h> header file. */
#undef HAVE_SYS_RESOURCE_H
......
......@@ -1275,15 +1275,18 @@ int gridGenArea(int gridID, double *area)
if ( lgriddestroy ) gridDestroy(gridID);
total_area = 0;
fprintf(stderr, "entering parallel region in gridInqArea\n");
#if defined (_OPENMP)
#pragma omp parallel for default(none) \
#pragma omp parallel for default(none) \
shared(gridsize, area, nv, grid_center_lon, grid_center_lat, grid_corner_lon, grid_corner_lat) \
private(i)
#endif
for ( i = 0; i < gridsize; ++i )
{
area[i] = cell_area(i, nv, grid_center_lon, grid_center_lat, grid_corner_lon, grid_corner_lat);
// total_area += area[i];