Commit 9ad0e9a0 authored by Uwe Schulzweida's avatar Uwe Schulzweida
Browse files

Added fftw version of gp2fc()-

parent bc87d6a7
......@@ -218,8 +218,6 @@ Fourier(void *process)
int nts = tsID;
const bool isPower2 = ((nts & (nts - 1)) == 0);
std::vector<FourierMemory> ompmem(Threading::ompNumThreads);
if (use_fftw)
......@@ -236,6 +234,7 @@ Fourier(void *process)
}
else
{
const bool isPower2 = ((nts & (nts - 1)) == 0);
for (int i = 0; i < Threading::ompNumThreads; i++)
{
ompmem[i].real.resize(nts);
......
......@@ -17,8 +17,8 @@ libcdo_la_SOURCES = \
cdi_uuid.h \
cdoStream.cc \
cdoStream.h \
cdo_apply.cc \
cdo_apply.h \
cdo_apply.cc \
cdo_apply.h \
cdo_cdiWrapper.cc \
cdo_cdiWrapper.h \
cdo_cmor.h \
......@@ -26,6 +26,8 @@ libcdo_la_SOURCES = \
cdo_defaultValues.h \
cdo_features.cc \
cdo_features.h \
cdo_fctrans.cc \
cdo_fctrans.h \
cdo_fill.cc \
cdo_fill.h \
cdo_getopt.cc \
......@@ -38,7 +40,7 @@ libcdo_la_SOURCES = \
cdo_output.cc \
cdo_output.h \
cdo_pthread.cc \
cdo_pthread.h \
cdo_pthread.h \
cdo_read.cc \
cdo_season.cc \
cdo_season.h \
......
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifdef HAVE_LIBFFTW3
#include <fftw3.h>
#endif
#include <vector>
#include "cdo_output.h"
#include "cdo_options.h"
#include "cimdOmp.h"
void
gp2fc(const double *gp, double *fc, long nlat, long nlon, long nfc)
{
#ifdef HAVE_LIBFFTW3
double norm = 1. / nlon;
struct FourierMemory
{
double *in_fft;
fftw_complex *out_fft;
fftw_plan plan;
};
std::vector<FourierMemory> ompmem(Threading::ompNumThreads);
for (int i = 0; i < Threading::ompNumThreads; i++)
{
ompmem[i].in_fft = (double*) fftw_malloc(nlon*sizeof(double));
ompmem[i].out_fft = fftw_alloc_complex(nlon / 2 + 1);
ompmem[i].plan = fftw_plan_dft_r2c_1d(nlon, ompmem[i].in_fft, ompmem[i].out_fft, FFTW_ESTIMATE);
}
if (Options::cdoVerbose) fftw_print_plan(ompmem[0].plan);
#ifdef _OPENMP
#pragma omp parallel for default(shared)
#endif
for (long ilat = 0; ilat < nlat; ++ilat)
{
const auto ompthID = cdo_omp_get_thread_num();
auto in_fft = ompmem[ompthID].in_fft;
auto out_fft = ompmem[ompthID].out_fft;
for (long ilon = 0; ilon < nlon; ++ilon) in_fft[ilon] = gp[ilat*nlon+ilon];
fftw_execute(ompmem[ompthID].plan);
for (long ifc = 0; ifc < nfc/2; ++ifc)
{
fc[2*ifc*nlat+ilat] = norm * out_fft[ifc][0];
fc[(2*ifc+1)*nlat+ilat] = norm * out_fft[ifc][1];
}
}
for (int i = 0; i < Threading::ompNumThreads; i++)
{
fftw_free(ompmem[i].in_fft);
fftw_free(ompmem[i].out_fft);
fftw_destroy_plan(ompmem[i].plan);
}
#else
cdoAbort("FFTW support not compiled in!");
#endif
}
void gp2fc(const double *gp, double *fc, long nlat, long nlon, long nfc);
......@@ -17,6 +17,7 @@
#include <cdi.h>
#include "cdo_output.h"
#include "cdo_fctrans.h"
#include "specspace.h"
#include <mpim_grid.h>
......@@ -32,7 +33,11 @@ grid2spec(SPTRANS &sptrans, int gridIDin, double *arrayIn, int gridIDout, double
std::vector<double> fpwork(nlat * nfc * nlev);
gp2fc(sptrans.vtrig.data(), sptrans.ifax, arrayIn, fpwork.data(), nlat, nlon, nlev, nfc);
if (sptrans.use_fftw)
gp2fc(arrayIn, fpwork.data(), nlat, nlon, nfc);
else
gp2fc(sptrans.vtrig.data(), sptrans.ifax, arrayIn, fpwork.data(), nlat, nlon, nlev, nfc);
fc2sp(fpwork.data(), arrayOut, sptrans.vpold.data(), nlev, nlat, nfc, ntr);
}
......@@ -101,7 +106,10 @@ grid2four(SPTRANS &sptrans, int gridIDin, double *arrayIn, int gridIDout, double
const long waves = ntr + 1;
const long nfc = waves * 2;
gp2fc(sptrans.vtrig.data(), sptrans.ifax, arrayIn, arrayOut, nlat, nlon, nlev, nfc);
if (sptrans.use_fftw)
gp2fc(arrayIn, arrayOut, nlat, nlon, nfc);
else
gp2fc(sptrans.vtrig.data(), sptrans.ifax, arrayIn, arrayOut, nlat, nlon, nlev, nfc);
}
void
......@@ -139,8 +147,16 @@ trans_uv2dv(SPTRANS &sptrans, long nlev, int gridID1, double *gu, double *gv, in
std::vector<double> fpwork1(nlat * nfc * nlev);
std::vector<double> fpwork2(nlat * nfc * nlev);
gp2fc(sptrans.vtrig.data(), sptrans.ifax, gu, fpwork1.data(), nlat, nlon, nlev, nfc);
gp2fc(sptrans.vtrig.data(), sptrans.ifax, gv, fpwork2.data(), nlat, nlon, nlev, nfc);
if (sptrans.use_fftw)
{
gp2fc(gu, fpwork1.data(), nlat, nlon, nfc);
gp2fc(gv, fpwork2.data(), nlat, nlon, nfc);
}
else
{
gp2fc(sptrans.vtrig.data(), sptrans.ifax, gu, fpwork1.data(), nlat, nlon, nlev, nfc);
gp2fc(sptrans.vtrig.data(), sptrans.ifax, gv, fpwork2.data(), nlat, nlon, nlev, nfc);
}
scaluv(fpwork1.data(), sptrans.vcoslat.data(), nlat, nfc * nlev);
scaluv(fpwork2.data(), sptrans.vcoslat.data(), nlat, nfc * nlev);
......
......@@ -23,6 +23,7 @@
class SPTRANS
{
public:
bool use_fftw = false;
long nlon = 0;
long nlat = 0;
long ntr = 0;
......@@ -56,8 +57,9 @@ public:
poldim = nsp / 2 * nlat;
vtrig.resize(nlon);
int status = fft_set(vtrig.data(), ifax, nlon);
const auto status = fft_set(vtrig.data(), ifax, nlon);
if (status < 0) cdoAbort("FFT error!");
//if (status < 0) use_fftw = true;
vpoli.resize(poldim);
vpold.resize(poldim);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment