Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
mpim-sw
cdo
Commits
d7c510f1
Commit
d7c510f1
authored
Aug 05, 2010
by
Cedrick Ansorge
Browse files
parallel version of src/EOFs.c with one sided jacobi scheme
parent
554cbb0d
Changes
7
Hide whitespace changes
Inline
Side-by-side
.gitattributes
View file @
d7c510f1
...
...
@@ -270,6 +270,262 @@ m4/check_gnu_make.m4 -text
m4/gcc_ac_c_char_bit.m4 -text
m4/starlink_fpp.m4 -text
m4/tj_find_type.m4 -text
my_src/Arith.c -text
my_src/Arithc.c -text
my_src/Arithdays.c -text
my_src/Arithlat.c -text
my_src/CDItest.c -text
my_src/Cat.c -text
my_src/Change.c -text
my_src/Change_e5slm.c -text
my_src/Cloudlayer.c -text
my_src/Command.c -text
my_src/Comp.c -text
my_src/Compc.c -text
my_src/Complextorect.c -text
my_src/Cond.c -text
my_src/Cond2.c -text
my_src/Condc.c -text
my_src/Consecstat.c -text
my_src/Copy.c -text
my_src/Deltime.c -text
my_src/Detrend.c -text
my_src/Diff.c -text
my_src/EOFs.c -text
my_src/EcaIndices.c -text
my_src/Echam5ini.c -text
my_src/Enlarge.c -text
my_src/Enlargegrid.c -text
my_src/Ensstat.c -text
my_src/Eofcoeff.c -text
my_src/Exprf.c -text
my_src/Filedes.c -text
my_src/Fillmiss.c -text
my_src/Filter.c -text
my_src/Fldrms.c -text
my_src/Fldstat.c -text
my_src/Fldstat2.c -text
my_src/Fourier.c -text
my_src/Gather.c -text
my_src/Gengrid.c -text
my_src/Gradsdes.c -text
my_src/Gridboxstat.c -text
my_src/Gridcell.c -text
my_src/Harmonic.c -text
my_src/Hi.c -text
my_src/Histogram.c -text
my_src/Importamsr.c -text
my_src/Importbinary.c -text
my_src/Importcmsaf.c -text
my_src/Importobs.c -text
my_src/Info.c -text
my_src/Input.c -text
my_src/Intgrid.c -text
my_src/Intgridtraj.c -text
my_src/Intlevel.c -text
my_src/Intntime.c -text
my_src/Inttime.c -text
my_src/Intyear.c -text
my_src/Invert.c -text
my_src/Invertlev.c -text
my_src/Log.c -text
my_src/Makefile.am -text
my_src/Makefile.in -text
my_src/Maskbox.c -text
my_src/Mastrfu.c -text
my_src/Math.c -text
my_src/Merge.c -text
my_src/Mergegrid.c -text
my_src/Mergetime.c -text
my_src/Merstat.c -text
my_src/Monarith.c -text
my_src/Mrotuv.c -text
my_src/Mrotuvb.c -text
my_src/Ninfo.c -text
my_src/Nmltest.c -text
my_src/Output.c -text
my_src/Outputgmt.c -text
my_src/Pinfo.c -text
my_src/Pressure.c -text
my_src/Regres.c -text
my_src/Remap.c -text
my_src/Remapeta.c -text
my_src/Replace.c -text
my_src/Replacevalues.c -text
my_src/Rotuv.c -text
my_src/Runpctl.c -text
my_src/Runstat.c -text
my_src/Scatter.c -text
my_src/Seascount.c -text
my_src/Seaspctl.c -text
my_src/Seasstat.c -text
my_src/Selbox.c -text
my_src/Select.c -text
my_src/Seloperator.c -text
my_src/Selrec.c -text
my_src/Seltime.c -text
my_src/Selvar.c -text
my_src/Set.c -text
my_src/Setbox.c -text
my_src/Setgatt.c -text
my_src/Setgrid.c -text
my_src/Sethalo.c -text
my_src/Setmiss.c -text
my_src/Setrcaname.c -text
my_src/Settime.c -text
my_src/Setzaxis.c -text
my_src/Showinfo.c -text
my_src/Sinfo.c -text
my_src/Smooth9.c -text
my_src/Sort.c -text
my_src/Sorttimestamp.c -text
my_src/Specinfo.c -text
my_src/Spectral.c -text
my_src/Spectrum.c -text
my_src/Split.c -text
my_src/Splitrec.c -text
my_src/Splitsel.c -text
my_src/Splittime.c -text
my_src/Splityear.c -text
my_src/Subtrend.c -text
my_src/Templates.c -text
my_src/Test.c -text
my_src/Tests.c -text
my_src/Timcount.c -text
my_src/Timpctl.c -text
my_src/Timselpctl.c -text
my_src/Timselstat.c -text
my_src/Timsort.c -text
my_src/Timstat.c -text
my_src/Timstat2.c -text
my_src/Timstat3.c -text
my_src/Tinfo.c -text
my_src/Tocomplex.c -text
my_src/Transpose.c -text
my_src/Trend.c -text
my_src/Trms.c -text
my_src/Tstepcount.c -text
my_src/Vardup.c -text
my_src/Vargen.c -text
my_src/Varrms.c -text
my_src/Vertint.c -text
my_src/Vertstat.c -text
my_src/Vertwind.c -text
my_src/Wct.c -text
my_src/Wind.c -text
my_src/Writegrid.c -text
my_src/Writerandom.c -text
my_src/Ydaypctl.c -text
my_src/Ydaystat.c -text
my_src/Ydrunpctl.c -text
my_src/Ydrunstat.c -text
my_src/Yhourstat.c -text
my_src/Ymonarith.c -text
my_src/Ymonpctl.c -text
my_src/Ymonstat.c -text
my_src/Yseaspctl.c -text
my_src/Yseasstat.c -text
my_src/Zonstat.c -text
my_src/cdi.h -text
my_src/cdilib.c -text
my_src/cdo.c -text
my_src/cdo.h -text
my_src/cdo_int.h -text
my_src/cdo_pthread.c -text
my_src/cdo_vlist.c -text
my_src/cdotest.c -text
my_src/color.c -text
my_src/color.h -text
my_src/commandline.c -text
my_src/config.h.in -text
my_src/counter.h -text
my_src/dmemory.h -text
my_src/dtypes.h -text
my_src/ecacore.c -text
my_src/ecacore.h -text
my_src/ecautil.c -text
my_src/ecautil.h -text
my_src/error.h -text
my_src/etopo.h -text
my_src/exception.c -text
my_src/expr.c -text
my_src/expr.h -text
my_src/expr_lex.c -text
my_src/expr_lex.l -text
my_src/expr_yacc.c -text
my_src/expr_yacc.h -text
my_src/expr_yacc.y -text
my_src/field.c -text
my_src/field.h -text
my_src/field2.c -text
my_src/fieldc.c -text
my_src/fieldmem.c -text
my_src/fieldmer.c -text
my_src/fieldzon.c -text
my_src/fouriertrans.c -text
my_src/functs.h -text
my_src/gradsdeslib.c -text
my_src/gradsdeslib.h -text
my_src/grid.c -text
my_src/grid.h -text
my_src/grid_gme.c -text
my_src/grid_lcc.c -text
my_src/grid_rot.c -text
my_src/griddes.c -text
my_src/griddes.h -text
my_src/griddes_h5.c -text
my_src/griddes_nc.c -text
my_src/hetaeta.c -text
my_src/hetaeta.h -text
my_src/history.c -text
my_src/institution.c -text
my_src/interpol.c -text
my_src/interpol.h -text
my_src/job.c -text
my_src/juldate.c -text
my_src/legendre.c -text
my_src/list.c -text
my_src/list.h -text
my_src/modules.c -text
my_src/modules.h -text
my_src/namelist.c -text
my_src/namelist.h -text
my_src/normal.c -text
my_src/nth_element.c -text
my_src/nth_element.h -text
my_src/operator_help.h -text
my_src/par_io.c -text
my_src/par_io.h -text
my_src/percentiles.c -text
my_src/percentiles.h -text
my_src/pipe.c -text
my_src/pipe.h -text
my_src/printinfo.h -text
my_src/process.c -text
my_src/process.h -text
my_src/pstream.c -text
my_src/pstream.h -text
my_src/pstream_int.h -text
my_src/pthread_debug.c -text
my_src/pthread_debug.h -text
my_src/readline.c -text
my_src/realtime.c -text
my_src/remap.h -text
my_src/remaplib.c -text
my_src/specspace.c -text
my_src/specspace.h -text
my_src/statistic.c -text
my_src/statistic.h -text
my_src/table.c -text
my_src/timebase.h -text
my_src/timer.c -text
my_src/userlog.c -text
my_src/util.c -text
my_src/util.h -text
my_src/vinterp.c -text
my_src/vinterp.h -text
my_src/yacc_lex -text
my_src/zaxis.c -text
src/Arith.c -text
src/Arithc.c -text
src/Arithdays.c -text
...
...
cdo.spec
View file @
d7c510f1
...
...
@@ -4,7 +4,7 @@
Name: cdo
#BuildRequires:
Version: 1.4.
5.1
Version: 1.4.
6rc2
Release: 1
Summary: Climate Data Operators
License: GNU GENERAL PUBLIC LICENSE Version 2, June 1991
...
...
src/EOFs.c
View file @
d7c510f1
...
...
@@ -36,6 +36,37 @@
#include
"pstream.h"
#include
"statistic.h"
/* ********************************** */
/* HEADER FOR PARALLEL EIGEN SOLUTION */
/* -->SEE END OF ROUTINE */
/* ********************************** */
#include
<stdio.h>
#include
<stdlib.h>
#include
<string.h>
#include
<math.h>
#include
<time.h>
#define PI_QU 1*atan(1)
#define PI_HA 2*atan(1)
#define PI 4*atan(1)
#define MAX_ITER 20
#define FNORM_PRECISION 1e-12
void
parallel_eigen_solution_of_symmetric_matrix
(
double
**
M
,
double
*
A
,
int
n1
,
int
n2
,
const
char
func
[]);
int
jacobi_1side
(
double
**
M
,
double
*
A
,
int
n
);
void
annihilate_1side
(
double
**
M
,
int
i
,
int
j
,
int
k
,
int
n
);
int
n_finished
;
/* **************************************** */
/* ENDOF HEADER FOR PARALLEL EIGEN SOLUTION */
/* -->SEE END OF ROUTINE */
/* **************************************** */
// NO MISSING VALUE SUPPORT ADDED SO FAR
void
*
EOFs
(
void
*
argument
)
...
...
@@ -86,6 +117,7 @@ void *EOFs(void * argument)
timer_start
(
timer_init
);
}
cdoInitialize
(
argument
);
cdoOperatorAdd
(
"eof"
,
EOF_
,
0
,
NULL
);
cdoOperatorAdd
(
"eoftime"
,
EOF_TIME
,
0
,
NULL
);
...
...
@@ -312,7 +344,6 @@ void *EOFs(void * argument)
int
i2
;
double
tmp
;
streamInqRecord
(
streamID1
,
&
varID
,
&
levelID
);
gridsize
=
gridInqSize
(
vlistInqVarGrid
(
vlistID1
,
varID
));
...
...
@@ -447,26 +478,34 @@ void *EOFs(void * argument)
/* SOLVE THE EIGEN PROBLEM */
if
(
cdoTimer
)
timer_start
(
timer_eig
);
eigen_solution_of_symmetric_matrix
(
&
cov
[
0
],
&
eigv
[
0
],
n
,
n
,
func
);
parallel_eigen_solution_of_symmetric_matrix
(
&
cov
[
0
],
&
eigv
[
0
],
n
,
n
,
func
);
if
(
cdoTimer
)
timer_stop
(
timer_eig
);
/* NOW: cov contains the eigenvectors, eigv the eigenvalues */
/* NOW: cov contains the eigenvectors, eigv the eigenvalues */
// need to multiply igen values by sum_w
// TODO --> find out why!
if
(
cdoTimer
)
timer_start
(
timer_post
);
for
(
i
=
0
;
i
<
n
;
i
++
)
eigenvalues
[
varID
][
levelID
][
i
].
ptr
[
0
]
=
eigv
[
i
]
*
sum_w
;
for
(
i
=
0
;
i
<
n_eig
;
i
++
)
{
if
(
grid_space
)
// Do not need to normalize by
// w[pack[j]]/sum_w (checked) --> find out why!!!
#if defined (_OPENMP)
#pragma omp parallel for private(j)
#endif
for
(
j
=
0
;
j
<
npack
;
j
++
)
eigenvectors
[
varID
][
levelID
][
i
].
ptr
[
pack
[
j
]]
=
cov
[
i
][
j
]
/
sqrt
(
weight
[
pack
[
j
]]);
else
if
(
time_space
)
{
#if defined (_OPENMP)
#pragma omp parallel for private(i2,sum) shared(datafields,eigenvectors)
#endif
for
(
i2
=
0
;
i2
<
npack
;
i2
++
)
{
sum
=
0
;
...
...
@@ -477,6 +516,11 @@ void *EOFs(void * argument)
}
// NORMALIZING
sum
=
0
;
#if defined (_OPENMP)
#pragma omp parallel for private(i2) default(none) reduction(+:sum) \
shared(eigenvectors,weight,pack,varID,levelID,i,npack)
#endif
for
(
i2
=
0
;
i2
<
npack
;
i2
++
)
sum
+=
weight
[
pack
[
i2
]]
*
eigenvectors
[
varID
][
levelID
][
i
].
ptr
[
pack
[
i2
]]
*
...
...
@@ -485,11 +529,19 @@ void *EOFs(void * argument)
{
sum
=
sqrt
(
sum
);
// sum = sqrt(sum/sum_w);
#if defined (_OPENMP)
#pragma omp parallel for private(i2) default(none) \
shared(npack,varID,levelID,i,pack,sum,eigenvectors)
#endif
for
(
i2
=
0
;
i2
<
npack
;
i2
++
)
eigenvectors
[
varID
][
levelID
][
i
].
ptr
[
pack
[
i2
]]
/=
sum
;
}
else
{
#if defined (_OPENMP)
#pragma omp parallel for private(i2) default(none) \
shared(npack,varID,levelID,i,pack,sum,eigenvectors,missval)
#endif
for
(
i2
=
0
;
i2
<
npack
;
i2
++
)
eigenvectors
[
varID
][
levelID
][
i
].
ptr
[
pack
[
i2
]]
=
missval
;
}
...
...
@@ -581,3 +633,217 @@ void *EOFs(void * argument)
return
(
0
);
}
/* ******************************************************************************** */
/* */
/* P A R A L L E L S O L U T I O N O F T H E E I G E N P R O B L E M */
/* WITH ONE SIDED JACOBI ALGORITHM */
/* */
/* ******************************************************************************** */
void
parallel_eigen_solution_of_symmetric_matrix
(
double
**
M
,
double
*
A
,
int
n1
,
int
n2
,
const
char
func
[])
{
if
(
n1
!=
n2
)
{
fprintf
(
stderr
,
"WARNING: Parallel eigenvalue computation of non-squared matrices
\n
"
" Not implemented yet.
\n
"
" Using sequential algorithm"
);
eigen_solution_of_symmetric_matrix
(
M
,
A
,
n1
,
n2
,
func
);
}
else
jacobi_1side
(
M
,
A
,
n1
);
return
;
}
/* ******************************************************************************** */
/* This routine rotates columns/rows i and j of a symmetric Matrix M in a fashion, */
/* thus that the dot product of columns i and j 0 afterwards */
/* */
/* As this is done by a right-multiplication with a rotation matrix, which only */
/* changes columns i and j, this can be carried out for n/2 pairs of columns at */
/* the same time. */
/* ******************************************************************************** */
void
annihilate_1side
(
double
**
M
,
int
i
,
int
j
,
int
k
,
int
n
)
{
double
tk
,
ck
,
sk
,
alpha
=
0
,
beta
=
0
,
gamma
=
0
,
zeta
=
0
;
double
tmp
,
*
mi
=
NULL
,
*
mj
=
NULL
;
const
char
func
[]
=
"annihilate_1side"
;
int
first_annihilation
=
0
;
int
r
;
i
--
;
j
--
;
mi
=
malloc
(
n
*
sizeof
(
double
));
mj
=
malloc
(
n
*
sizeof
(
double
));
if
(
!
mj
||
!
mi
)
fprintf
(
stderr
,
"ERROR: allocation error - cannot allocate memory
\n
"
"ERROR: check stacksize and physically available memory
\n
"
);
if
(
j
<
i
)
{
int
tmp
=
i
;
i
=
j
;
j
=
tmp
;
}
for
(
r
=
0
;
r
<
n
;
r
++
)
{
alpha
+=
M
[
j
][
r
]
*
M
[
j
][
r
];
beta
+=
M
[
i
][
r
]
*
M
[
i
][
r
];
gamma
+=
M
[
i
][
r
]
*
M
[
j
][
r
];
}
tmp
=
fabs
(
gamma
/
sqrt
(
alpha
/
beta
));
if
(
tmp
<
FNORM_PRECISION
)
{
#if defined (_OPENMP)
#pragma omp critical
#endif
{
n_finished
++
;
}
free
(
mi
);
free
(
mj
);
return
;
}
zeta
=
(
beta
-
alpha
)
/
(
2
.
*
gamma
);
// tan(2*theta)
tk
=
1
.
/
(
fabs
(
zeta
)
+
sqrt
(
1
.
+
zeta
*
zeta
));
tk
=
zeta
>
0
?
tk
:
-
tk
;
// = cot(2*theta)
ck
=
1
.
/
sqrt
(
1
.
+
tk
*
tk
);
// = cos(theta)
sk
=
ck
*
tk
;
// = sin(theta)
// calculate a_i,j - tilde
for
(
r
=
0
;
r
<
n
;
r
++
)
{
mi
[
r
]
=
ck
*
M
[
i
][
r
]
+
sk
*
M
[
j
][
r
];
mj
[
r
]
=-
sk
*
M
[
i
][
r
]
+
ck
*
M
[
j
][
r
];
}
for
(
r
=
0
;
r
<
n
;
r
++
)
{
M
[
i
][
r
]
=
mi
[
r
];
M
[
j
][
r
]
=
mj
[
r
];
}
free
(
mi
);
free
(
mj
);
return
;
}
int
jacobi_1side
(
double
**
M
,
double
*
A
,
int
n
)
{
const
char
func
[]
=
"jacobi_1side"
;
int
i
,
j
,
k
,
m
,
r
,
i_ann
,
j_ann
;
int
n_iter
=
0
;
int
idx
;
int
count
=
0
;
int
**
annihilations
,
*
annihilations_buff
;
annihilations_buff
=
malloc
(
n
*
n
*
2
*
sizeof
(
int
));
annihilations
=
malloc
((
n
*
n
)
*
sizeof
(
int
*
));
for
(
i
=
0
;
i
<
n
*
n
;
i
++
)
annihilations
[
i
]
=
&
annihilations_buff
[
2
*
i
];
for
(
k
=
1
;
k
<
n
+
1
;
k
++
)
{
if
(
k
<
n
)
{
for
(
i
=
1
;
i
<=
(
int
)
ceil
(
1
.
/
2
.
*
(
n
-
k
));
i
++
)
{
j
=
n
-
k
+
2
-
i
;
annihilations
[
count
][
0
]
=
i
;
annihilations
[
count
][
1
]
=
j
;
count
++
;
}
if
(
k
>
2
)
{
for
(
i
=
n
-
k
+
2
;
i
<=
n
-
(
int
)
floor
(
1
.
/
2
.
*
k
);
i
++
)
{
j
=
2
*
n
-
k
+
2
-
i
;
annihilations
[
count
][
0
]
=
i
;
annihilations
[
count
][
1
]
=
j
;
count
++
;
}
}
}
else
if
(
k
==
n
)
{
for
(
i
=
2
;
i
<=
(
int
)
ceil
(
1
.
/
2
.
*
n
);
i
++
)
{
j
=
n
+
2
-
i
;
annihilations
[
count
][
0
]
=
i
;
annihilations
[
count
][
1
]
=
j
;
count
++
;
}
}
}
// fprintf(stderr, "%i annihilations per sweep\n",count);
n_finished
=
0
;
// override global openmp settings works
// omp_set_num_threads(2);
while
(
n_iter
<
MAX_ITER
&&
n_finished
<
count
)
{
n_finished
=
0
;
if
(
n
%
2
==
1
)
{
for
(
m
=
0
;
m
<
n
;
m
++
)
{
#if defined (_OPENMP)
#pragma omp parallel for private(i,idx,i_ann,j_ann) shared(M,annihilations,n) reduction(+:n_finished)
#endif
for
(
i
=
0
;
i
<
n
/
2
;
i
++
)
{
idx
=
m
*
(
n
/
2
)
+
i
;
i_ann
=
annihilations
[
idx
][
0
];
j_ann
=
annihilations
[
idx
][
1
];
if
(
i_ann
!=
j_ann
&&
i_ann
&&
j_ann
)
annihilate_1side
(
M
,
i_ann
,
j_ann
,
0
,
n
);
}
}
}
else
{
// n%2 == 0
for
(
m
=
0
;
m
<
n
;
m
++
)
{
#if defined (_OPENMP)
#pragma omp parallel for private(i,idx,i_ann,j_ann) shared(M,annihilations,n) reduction(+:n_finished)
#endif
for
(
i
=
0
;
i
<
n
/
2
-
(
m
%
2
);
i
++
)
{
idx
=
m
/
2
*
(
n
/
2
+
n
/
2
-
1
);
if
(
m
%
2
)
idx
+=
n
/
2
;
i_ann
=
annihilations
[
idx
+
i
][
0
];
j_ann
=
annihilations
[
idx
+
i
][
1
];
if
(
i_ann
&&
j_ann
&&
i_ann
!=
j_ann
)
annihilate_1side
(
M
,
i_ann
,
j_ann
,
0
,
n
);
}
}
}
n_iter
++
;
}
// fprintf(stderr,"finished after %i sweeps (n_finished %i)\n",n_iter,n_finished);
if
(
n_iter
==
MAX_ITER
)
{
fprintf
(
stderr
,
"WARNING: Eigenvalue computation with one-sided jacobi scheme
\n
"
" Did not converge properly. %i pairs of columns did
\n
"
" not achieve requested orthogonality of %10.6g
\n
"
,
count
-
n_finished
,
FNORM_PRECISION
);
for
(
i
=
0
;
i
<
n
;
i
++
)
memset
(
M
[
i
],
0
,
n
*
sizeof
(
double
));
memset
(
A
,
0
,
n
*
sizeof
(
double
));
}
// calculate eigen values as sqrt(||m_i||)
for
(
i
=
0
;
i
<
n
;
i
++
)
{
A
[
i
]
=
0
;
for
(
r
=
0
;
r
<
n
;
r
++
)
A
[
i
]
+=
M
[
i
][
r
]
*
M
[
i
][
r
];
A
[
i
]
=
sqrt
(
A
[
i
]);
for
(
r
=
0
;
r
<
n
;
r
++
)
M
[
i
][
r
]
/=
A
[
i
];
}
heap_sort
(
A
,
M
,
n
);
free
(
annihilations
);
free
(
annihilations_buff
);
return
n_iter
;
}
src/config.h.in
View file @
d7c510f1
...
...
@@ -90,9 +90,12 @@
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define to 1 if `st
ruct stat
' is a member of `st
_blksize
'. */
/* Define to 1 if `st
_blksize
' is a member of `st
ruct stat
'. */
#undef HAVE_STRUCT_STAT_ST_BLKSIZE
/* Define to 1 if you have the <sys/param.h> header file. */
#undef HAVE_SYS_PARAM_H
/* Define to 1 if you have the <sys/resource.h> header file. */
#undef HAVE_SYS_RESOURCE_H
...
...
src/grid.c
View file @
d7c510f1
...
...
@@ -1275,15 +1275,18 @@ int gridGenArea(int gridID, double *area)
if
(
lgriddestroy
)
gridDestroy
(
gridID
);
total_area
=
0
;
fprintf
(
stderr
,
"entering parallel region in gridInqArea
\n
"
);
#if defined (_OPENMP)
#pragma omp parallel for default(none)
\
#pragma omp parallel for default(none)
\
shared(gridsize, area, nv, grid_center_lon, grid_center_lat, grid_corner_lon, grid_corner_lat) \
private(i)
#endif
for
(
i
=
0
;
i
<
gridsize
;
++
i
)
{
area
[
i
]
=
cell_area
(
i
,
nv
,
grid_center_lon
,
grid_center_lat
,
grid_corner_lon
,
grid_corner_lat
);
// total_area += area[i];