Commit 4e94492f authored by Uwe Schulzweida's avatar Uwe Schulzweida
Browse files

Fourier: added support for libfftw.

parent 2da563b7
......@@ -3,6 +3,10 @@
* Using CDI library version 1.9.8
* Version 1.9.8 release
2019-10-24 Uwe Schulzweida
* Fourier: added support for libfftw
2019-10-21 Uwe Schulzweida
* Expr: added function isMissing(x)
......
......@@ -40,7 +40,7 @@
#include "cdo_options.h"
#include "datetime.h"
#include "cimdOmp.h"
#include "dmemory.h"
#define NALLOC_INC 1024
......@@ -80,28 +80,28 @@ filter_fftw(int nts, const std::vector<int> &fmasc, fftw_complex *fft_out, fftw_
#endif
static void
filter_intrinsic(int nts, const std::vector<int> &fmasc, double *array1, double *array2)
filter_intrinsic(int nts, const std::vector<int> &fmasc, double *real, double *imag)
{
const bool lpower2 = ((nts & (nts - 1)) == 0);
const bool isPower2 = ((nts & (nts - 1)) == 0);
Varray<double> work_r;
Varray<double> work_i;
if (!lpower2) work_r.resize(nts);
if (!lpower2) work_i.resize(nts);
if (!isPower2) work_r.resize(nts);
if (!isPower2) work_i.resize(nts);
if (lpower2)
fft(array1, array2, nts, 1);
if (isPower2)
fft(real, imag, nts, 1);
else
ft_r(array1, array2, nts, 1, work_r.data(), work_i.data());
ft_r(real, imag, nts, 1, work_r.data(), work_i.data());
for (int i = 0; i < nts; i++)
if (!fmasc[i]) array1[i] = array2[i] = 0;
if (!fmasc[i]) real[i] = imag[i] = 0;
if (lpower2)
fft(array1, array2, nts, -1);
if (isPower2)
fft(real, imag, nts, -1);
else
ft_r(array1, array2, nts, -1, work_r.data(), work_i.data());
ft_r(real, imag, nts, -1, work_r.data(), work_i.data());
return;
}
......@@ -121,14 +121,13 @@ Filter(void *process)
int varID, levelID;
size_t nmiss;
int year0, month0, day0;
bool use_fftw = false;
double fdata = 0;
TimeIncrement timeIncr0 = { 0, TimeUnit::SECONDS };
DateTimeList dtlist;
struct FftMemory
struct FourierMemory
{
Varray<double> array1;
Varray<double> array2;
Varray<double> real;
Varray<double> imag;
#ifdef HAVE_LIBFFTW3
fftw_complex *in_fft;
fftw_complex *out_fft;
......@@ -146,6 +145,7 @@ Filter(void *process)
const auto operatorID = cdoOperatorID();
const auto operfunc = cdoOperatorF1(operatorID);
bool use_fftw = false;
if (Options::Use_FFTW)
{
#ifdef HAVE_LIBFFTW3
......@@ -238,15 +238,15 @@ Filter(void *process)
const auto nts = tsID;
if (nts <= 1) cdoAbort("Number of time steps <= 1!");
std::vector<FftMemory> ompmem(Threading::ompNumThreads);
std::vector<FourierMemory> ompmem(Threading::ompNumThreads);
if (use_fftw)
{
#ifdef HAVE_LIBFFTW3
for (int i = 0; i < Threading::ompNumThreads; i++)
{
ompmem[i].in_fft = (fftw_complex *) Malloc(nts * sizeof(fftw_complex));
ompmem[i].out_fft = (fftw_complex *) Malloc(nts * sizeof(fftw_complex));
ompmem[i].in_fft = fftw_alloc_complex(nts);
ompmem[i].out_fft = fftw_alloc_complex(nts);
ompmem[i].p_T2S = fftw_plan_dft_1d(nts, ompmem[i].in_fft, ompmem[i].out_fft, 1, FFTW_ESTIMATE);
ompmem[i].p_S2T = fftw_plan_dft_1d(nts, ompmem[i].out_fft, ompmem[i].in_fft, -1, FFTW_ESTIMATE);
}
......@@ -256,8 +256,8 @@ Filter(void *process)
{
for (int i = 0; i < Threading::ompNumThreads; i++)
{
ompmem[i].array1.resize(nts);
ompmem[i].array2.resize(nts);
ompmem[i].real.resize(nts);
ompmem[i].imag.resize(nts);
}
}
......@@ -333,13 +333,13 @@ Filter(void *process)
{
const auto ompthID = cdo_omp_get_thread_num();
for (int tsID = 0; tsID < nts; tsID++) ompmem[ompthID].array1[tsID] = vars[tsID][varID][levelID].vec[i];
for (int tsID = 0; tsID < nts; tsID++) ompmem[ompthID].real[tsID] = vars[tsID][varID][levelID].vec[i];
varrayFill(ompmem[ompthID].array2, 0.0);
varrayFill(ompmem[ompthID].imag, 0.0);
filter_intrinsic(nts, fmasc, ompmem[ompthID].array1.data(), ompmem[ompthID].array2.data());
filter_intrinsic(nts, fmasc, ompmem[ompthID].real.data(), ompmem[ompthID].imag.data());
for (int tsID = 0; tsID < nts; tsID++) vars[tsID][varID][levelID].vec[i] = ompmem[ompthID].array1[tsID];
for (int tsID = 0; tsID < nts; tsID++) vars[tsID][varID][levelID].vec[i] = ompmem[ompthID].real[tsID];
}
}
}
......@@ -350,8 +350,10 @@ Filter(void *process)
{
for (int i = 0; i < Threading::ompNumThreads; i++)
{
Free(ompmem[i].in_fft);
Free(ompmem[i].out_fft);
fftw_free(ompmem[i].in_fft);
fftw_free(ompmem[i].out_fft);
fftw_destroy_plan(ompmem[i].p_T2S);
fftw_destroy_plan(ompmem[i].p_S2T);
}
}
#endif
......
......@@ -15,6 +15,14 @@
GNU General Public License for more details.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifdef HAVE_LIBFFTW3
#include <fftw3.h>
#endif
#include <cdi.h>
#include "process_int.h"
......@@ -24,8 +32,116 @@
#include "cdo_options.h"
#include "cimdOmp.h"
#define NALLOC_INC 1024
struct FourierMemory
{
Varray<double> real;
Varray<double> imag;
Varray<double> work_r;
Varray<double> work_i;
#ifdef HAVE_LIBFFTW3
fftw_complex *in_fft;
fftw_complex *out_fft;
fftw_plan plan;
#endif
};
static void
fourier_fftw(int sign, int varID, int levelID, int nts, size_t gridsize, double missval, FieldVector3D &vars, std::vector<FourierMemory> &ompmem)
{
#ifdef HAVE_LIBFFTW3
const double norm = 1. / std::sqrt(nts);
#ifdef _OPENMP
#pragma omp parallel for default(shared)
#endif
for (size_t i = 0; i < gridsize; i++)
{
const auto ompthID = cdo_omp_get_thread_num();
bool hasMissvals = false;
for (int tsID = 0; tsID < nts; tsID++)
{
const auto real = vars[tsID][varID][levelID].vec[2 * i];
const auto imag = vars[tsID][varID][levelID].vec[2 * i + 1];
ompmem[ompthID].in_fft[tsID][0] = real;
ompmem[ompthID].in_fft[tsID][1] = imag;
if (DBL_IS_EQUAL(real, missval) || DBL_IS_EQUAL(imag, missval))
hasMissvals = true;
}
if (hasMissvals)
{
for (int tsID = 0; tsID < nts; tsID++)
{
vars[tsID][varID][levelID].vec[2 * i] = missval;
vars[tsID][varID][levelID].vec[2 * i + 1] = missval;
}
}
else
{
fftw_execute(ompmem[ompthID].plan);
for (int tsID = 0; tsID < nts; tsID++)
{
vars[tsID][varID][levelID].vec[2 * i] = ompmem[ompthID].out_fft[tsID][0] * norm;
vars[tsID][varID][levelID].vec[2 * i + 1] = ompmem[ompthID].out_fft[tsID][1] *norm;
}
}
}
#endif
}
static void
fourier_intrinsic(int sign, int varID, int levelID, int nts, size_t gridsize, double missval, FieldVector3D &vars, std::vector<FourierMemory> &ompmem)
{
const bool isPower2 = ((nts & (nts - 1)) == 0);
#ifdef _OPENMP
#pragma omp parallel for default(shared)
#endif
for (size_t i = 0; i < gridsize; i++)
{
const auto ompthID = cdo_omp_get_thread_num();
bool hasMissvals = false;
for (int tsID = 0; tsID < nts; tsID++)
{
const auto real = vars[tsID][varID][levelID].vec[2 * i];
const auto imag = vars[tsID][varID][levelID].vec[2 * i + 1];
ompmem[ompthID].real[tsID] = real;
ompmem[ompthID].imag[tsID] = imag;
if (DBL_IS_EQUAL(real, missval) || DBL_IS_EQUAL(imag, missval))
hasMissvals = true;
}
if (hasMissvals)
{
for (int tsID = 0; tsID < nts; tsID++)
{
vars[tsID][varID][levelID].vec[2 * i] = missval;
vars[tsID][varID][levelID].vec[2 * i + 1] = missval;
}
}
else
{
if (isPower2) // nts is a power of 2
fft(ompmem[ompthID].real.data(), ompmem[ompthID].imag.data(), nts, sign);
else
ft_r(ompmem[ompthID].real.data(), ompmem[ompthID].imag.data(), nts, sign, ompmem[ompthID].work_r.data(),
ompmem[ompthID].work_i.data());
for (int tsID = 0; tsID < nts; tsID++)
{
vars[tsID][varID][levelID].vec[2 * i] = ompmem[ompthID].real[tsID];
vars[tsID][varID][levelID].vec[2 * i + 1] = ompmem[ompthID].imag[tsID];
}
}
}
}
void *
Fourier(void *process)
{
......@@ -33,16 +149,22 @@ Fourier(void *process)
int varID, levelID;
int nalloc = 0;
size_t nmiss;
struct FourierMemory
{
Varray<double> real;
Varray<double> imag;
Varray<double> work_r;
Varray<double> work_i;
};
cdoInitialize(process);
bool use_fftw = false;
if (Options::Use_FFTW)
{
#ifdef HAVE_LIBFFTW3
if (Options::cdoVerbose) cdoPrint("Using fftw3 lib");
use_fftw = true;
#else
if (Options::cdoVerbose) cdoPrint("LIBFFTW3 support not compiled in!");
#endif
}
if (Options::cdoVerbose && !use_fftw) cdoPrint("Using intrinsic FFT function!");
operatorInputArg("the sign of the exponent (-1 for normal or 1 for reverse transformation)!");
const auto sign = parameter2int(cdoOperatorArgv(0));
......@@ -85,7 +207,7 @@ Fourier(void *process)
for (int recID = 0; recID < nrecs; recID++)
{
cdoInqRecord(streamID1, &varID, &levelID);
const size_t gridsize = varList[varID].gridsize;
const auto gridsize = varList[varID].gridsize;
vars[tsID][varID][levelID].resize(2 * gridsize);
cdoReadRecord(streamID1, vars[tsID][varID][levelID].vec.data(), &nmiss);
vars[tsID][varID][levelID].nmiss = nmiss;
......@@ -99,12 +221,28 @@ Fourier(void *process)
const bool isPower2 = ((nts & (nts - 1)) == 0);
std::vector<FourierMemory> ompmem(Threading::ompNumThreads);
for (int i = 0; i < Threading::ompNumThreads; i++)
if (use_fftw)
{
#ifdef HAVE_LIBFFTW3
for (int i = 0; i < Threading::ompNumThreads; i++)
{
ompmem[i].in_fft = fftw_alloc_complex(nts);
ompmem[i].out_fft = fftw_alloc_complex(nts);
ompmem[i].plan = fftw_plan_dft_1d(nts, ompmem[i].in_fft, ompmem[i].out_fft, sign, FFTW_ESTIMATE);
}
if (Options::cdoVerbose) fftw_print_plan(ompmem[0].plan);
#endif
}
else
{
ompmem[i].real.resize(nts);
ompmem[i].imag.resize(nts);
if (!isPower2) ompmem[i].work_r.resize(nts);
if (!isPower2) ompmem[i].work_i.resize(nts);
for (int i = 0; i < Threading::ompNumThreads; i++)
{
ompmem[i].real.resize(nts);
ompmem[i].imag.resize(nts);
if (!isPower2) ompmem[i].work_r.resize(nts);
if (!isPower2) ompmem[i].work_i.resize(nts);
}
}
for (varID = 0; varID < nvars; varID++)
......@@ -114,47 +252,25 @@ Fourier(void *process)
const auto nlevels = varList[varID].nlevels;
for (levelID = 0; levelID < nlevels; levelID++)
{
#ifdef _OPENMP
#pragma omp parallel for default(shared) private(tsID)
#endif
for (size_t i = 0; i < gridsize; i++)
{
bool hasMissvals = false;
const auto ompthID = cdo_omp_get_thread_num();
for (tsID = 0; tsID < nts; tsID++)
{
ompmem[ompthID].real[tsID] = vars[tsID][varID][levelID].vec[2 * i];
ompmem[ompthID].imag[tsID] = vars[tsID][varID][levelID].vec[2 * i + 1];
if (DBL_IS_EQUAL(ompmem[ompthID].real[tsID], missval) || DBL_IS_EQUAL(ompmem[ompthID].imag[tsID], missval))
hasMissvals = true;
}
if (use_fftw)
fourier_fftw(sign, varID, levelID, nts, gridsize, missval, vars, ompmem);
else
fourier_intrinsic(sign, varID, levelID, nts, gridsize, missval, vars, ompmem);
}
}
if (hasMissvals)
{
for (tsID = 0; tsID < nts; tsID++)
{
vars[tsID][varID][levelID].vec[2 * i] = missval;
vars[tsID][varID][levelID].vec[2 * i + 1] = missval;
}
}
else
{
if (isPower2) // nts is a power of 2
fft(ompmem[ompthID].real.data(), ompmem[ompthID].imag.data(), nts, sign);
else
ft_r(ompmem[ompthID].real.data(), ompmem[ompthID].imag.data(), nts, sign, ompmem[ompthID].work_r.data(),
ompmem[ompthID].work_i.data());
for (tsID = 0; tsID < nts; tsID++)
{
vars[tsID][varID][levelID].vec[2 * i] = ompmem[ompthID].real[tsID];
vars[tsID][varID][levelID].vec[2 * i + 1] = ompmem[ompthID].imag[tsID];
}
}
}
#ifdef HAVE_LIBFFTW3
if (use_fftw)
{
for (int i = 0; i < Threading::ompNumThreads; i++)
{
fftw_free(ompmem[i].in_fft);
fftw_free(ompmem[i].out_fft);
fftw_destroy_plan(ompmem[i].plan);
}
fftw_cleanup();
}
#endif
for (tsID = 0; tsID < nts; tsID++)
{
......
......@@ -441,7 +441,6 @@ fft(double *restrict real, double *restrict imag, int n, int sign)
// sign should be 1 (FT) or -1 (reverse FT)
int j, j1, j2;
int bit;
double temp_r, temp_i;
// Bit reversal part
for (int i = j = 0; i < n; i++) // The bit pattern of i and j are reverse
......@@ -462,6 +461,7 @@ fft(double *restrict real, double *restrict imag, int n, int sign)
double ww_i = 0;
for (int i = 0; i < step; i++)
{
double temp_r, temp_i;
for (j1 = i, j2 = i + step; j2 < n; j1 += 2 * step, j2 += 2 * step)
{
temp_r = ww_r * real[j2] - ww_i * imag[j2];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment