Commit 123d5ce6 authored by Uwe Schulzweida's avatar Uwe Schulzweida
Browse files

Intlevel: optimization; removed lev_idx2 and lev_wgt2.

parent ee8ee427
Pipeline #4371 passed with stages
in 15 minutes and 2 seconds
......@@ -48,23 +48,56 @@ vert_interp_lev_kernel(float w1, float w2, const T var1L1, const T var1L2, const
// clang-format on
}
constexpr int BottomLevel = 32000;
constexpr int TopLevel = 32001;
static void
restore_index_and_weights(int nlev1, int idx, float wgt, int &idx1, int &idx2, float &wgt1, float &wgt2)
{
if (idx == BottomLevel)
{
idx1 = 0;
idx2 = 0;
wgt1 = 0.0f;
wgt2 = wgt;
}
else if (idx == TopLevel)
{
idx1 = nlev1 - 1;
idx2 = nlev1 - 1;
wgt1 = wgt;
wgt2 = 0.0f;
}
else
{
idx1 = (idx < 0) ? -idx : idx;
idx2 = (idx < 0) ? idx1 - 1 : idx1 + 1;
wgt1 = wgt;
wgt2 = 1.0f - wgt;
}
// printf("%d %d %g %g\n", idx1, idx2, wgt1, wgt2);
}
// 1D vertical interpolation
template <typename T>
static void
vert_interp_lev(size_t gridsize, T missval, const Varray<T> &vardata1, Varray<T> &vardata2, int nlev2,
const Varray<int> &lev_idx1, const Varray<int> &lev_idx2, const Varray<float> &lev_wgt1, const Varray<float> &lev_wgt2)
vert_interp_lev(size_t gridsize, int nlev1, T missval, const Varray<T> &vardata1, Varray<T> &vardata2, int nlev2,
const Varray<int> &lev_idx, const Varray<float> &lev_wgt)
{
for (int ilev = 0; ilev < nlev2; ++ilev)
{
const auto idx1 = lev_idx1[ilev];
const auto idx2 = lev_idx2[ilev];
auto wgt1 = lev_wgt1[ilev];
auto wgt2 = lev_wgt2[ilev];
auto var2 = &vardata2[gridsize * ilev];
const auto idx = lev_idx[ilev];
const auto wgt = lev_wgt[ilev];
int idx1, idx2;
float wgt1, wgt2;
restore_index_and_weights(nlev1, idx, wgt, idx1, idx2, wgt1, wgt2);
// upper/lower values from input field
auto var1L1 = &vardata1[gridsize * idx1];
auto var1L2 = &vardata1[gridsize * idx2];
auto var2 = &vardata2[gridsize * ilev];
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(gridsize, var2, var1L1, var1L2, missval, wgt1, wgt2)
#endif
......@@ -76,24 +109,24 @@ vert_interp_lev(size_t gridsize, T missval, const Varray<T> &vardata1, Varray<T>
}
static void
vert_interp_lev(size_t gridsize, double missval, const Field3D &field1, Field3D &field2, int nlev2,
const Varray<int> &lev_idx1, const Varray<int> &lev_idx2, const Varray<float> &lev_wgt1, const Varray<float> &lev_wgt2)
vert_interp_lev(size_t gridsize, int nlev1, double missval, const Field3D &field1, Field3D &field2, int nlev2,
const Varray<int> &lev_idx, const Varray<float> &lev_wgt)
{
if (field1.memType == MemType::Float)
{
vert_interp_lev(gridsize, (float)missval, field1.vec_f, field2.vec_f, nlev2, lev_idx1, lev_idx2, lev_wgt1, lev_wgt2);
vert_interp_lev(gridsize, nlev1, (float)missval, field1.vec_f, field2.vec_f, nlev2, lev_idx, lev_wgt);
}
else
{
vert_interp_lev(gridsize, missval, field1.vec_d, field2.vec_d, nlev2, lev_idx1, lev_idx2, lev_wgt1, lev_wgt2);
vert_interp_lev(gridsize, nlev1, missval, field1.vec_d, field2.vec_d, nlev2, lev_idx, lev_wgt);
}
}
// 3D vertical interpolation
template <typename T>
void
vert_interp_lev3d(size_t gridsize, T missval, const Varray<T> &vardata1, Varray<T> &vardata2, int nlev2,
const Varray<int> &lev_idx1, const Varray<int> &lev_idx2, const Varray<float> &lev_wgt1, const Varray<float> &lev_wgt2)
vert_interp_lev3d(size_t gridsize, int nlev1, T missval, const Varray<T> &vardata1, Varray<T> &vardata2, int nlev2,
const Varray<int> &lev_idx, const Varray<float> &lev_wgt)
{
for (int ilev = 0; ilev < nlev2; ilev++)
{
......@@ -101,18 +134,19 @@ vert_interp_lev3d(size_t gridsize, T missval, const Varray<T> &vardata1, Varray<
auto var2 = &vardata2[offset];
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(gridsize, offset, vardata1, var2, lev_idx1, lev_idx2, lev_wgt1, lev_wgt2, missval)
#pragma omp parallel for default(none) shared(gridsize, nlev1, offset, vardata1, var2, lev_idx, lev_wgt, missval)
#endif
for (size_t i = 0; i < gridsize; i++)
{
const auto idx1 = lev_idx1[offset + i] * gridsize + i;
const auto idx2 = lev_idx2[offset + i] * gridsize + i;
const auto wgt1 = lev_wgt1[offset + i];
const auto wgt2 = lev_wgt2[offset + i];
const auto idx = lev_idx[offset + i];
const auto wgt = lev_wgt[offset + i];
int idx1, idx2;
float wgt1, wgt2;
restore_index_and_weights(nlev1, idx, wgt, idx1, idx2, wgt1, wgt2);
// upper/lower values from input field
const auto var1L1 = vardata1[idx1];
const auto var1L2 = vardata1[idx2];
const auto var1L1 = vardata1[idx1 * gridsize + i];
const auto var1L2 = vardata1[idx2 * gridsize + i];
var2[i] = vert_interp_lev_kernel(wgt1, wgt2, var1L1, var1L2, missval);
}
......@@ -121,29 +155,28 @@ vert_interp_lev3d(size_t gridsize, T missval, const Varray<T> &vardata1, Varray<
// Explicit instantiation
template void
vert_interp_lev3d(size_t gridsize, float missval, const Varray<float> &vardata1, Varray<float> &vardata2, int nlev2,
const Varray<int> &lev_idx1, const Varray<int> &lev_idx2, const Varray<float> &lev_wgt1, const Varray<float> &lev_wgt2);
vert_interp_lev3d(size_t gridsize, int nlev1, float missval, const Varray<float> &vardata1, Varray<float> &vardata2, int nlev2,
const Varray<int> &lev_idx, const Varray<float> &lev_wgt);
template void
vert_interp_lev3d(size_t gridsize, double missval, const Varray<double> &vardata1, Varray<double> &vardata2, int nlev2,
const Varray<int> &lev_idx1, const Varray<int> &lev_idx2, const Varray<float> &lev_wgt1, const Varray<float> &lev_wgt2);
vert_interp_lev3d(size_t gridsize, int nlev1, double missval, const Varray<double> &vardata1, Varray<double> &vardata2, int nlev2,
const Varray<int> &lev_idx, const Varray<float> &lev_wgt);
void
vert_interp_lev3d(size_t gridsize, double missval, const Field3D &field1, Field3D &field2, int nlev2,
const Varray<int> &lev_idx1, const Varray<int> &lev_idx2, const Varray<float> &lev_wgt1, const Varray<float> &lev_wgt2)
vert_interp_lev3d(size_t gridsize, int nlev1, double missval, const Field3D &field1, Field3D &field2, int nlev2,
const Varray<int> &lev_idx, const Varray<float> &lev_wgt)
{
if (field1.memType == MemType::Float)
{
vert_interp_lev3d(gridsize, (float)missval, field1.vec_f, field2.vec_f, nlev2, lev_idx1, lev_idx2, lev_wgt1, lev_wgt2);
vert_interp_lev3d(gridsize, nlev1, (float)missval, field1.vec_f, field2.vec_f, nlev2, lev_idx, lev_wgt);
}
else
{
vert_interp_lev3d(gridsize, missval, field1.vec_d, field2.vec_d, nlev2, lev_idx1, lev_idx2, lev_wgt1, lev_wgt2);
vert_interp_lev3d(gridsize, nlev1, missval, field1.vec_d, field2.vec_d, nlev2, lev_idx, lev_wgt);
}
}
void
vert_gen_weights(int expol, int nlev1, const Varray<double> &lev1, int nlev2, const Varray<double> &lev2, Varray<int> &lev_idx1, Varray<int> &lev_idx2,
Varray<float> &lev_wgt1, Varray<float> &lev_wgt2)
vert_gen_weights(int expol, int nlev1, const Varray<double> &lev1, int nlev2, const Varray<double> &lev2, Varray<int> &lev_idx, Varray<float> &lev_wgt)
{
for (int i2 = 0; i2 < nlev2; ++i2)
{
......@@ -166,29 +199,21 @@ vert_gen_weights(int expol, int nlev1, const Varray<double> &lev1, int nlev2, co
if (i1 - 1 == 0) // destination levels is not covert by the first two input z levels
{
lev_idx1[i2] = 0;
lev_idx2[i2] = 0;
lev_wgt1[i2] = 0.0f;
lev_wgt2[i2] = (expol || IS_EQUAL(lev2[i2], val2));
lev_idx[i2] = BottomLevel;
lev_wgt[i2] = (expol || IS_EQUAL(lev2[i2], val2));
}
else if (i1 == nlev1 - 1) // destination level is beyond the last value of the input z field
{
lev_idx1[i2] = nlev1 - 3;
lev_idx2[i2] = nlev1 - 3;
lev_wgt1[i2] = (expol || IS_EQUAL(lev2[i2], val2));
lev_wgt2[i2] = 0.0f;
lev_idx[i2] = TopLevel;
lev_wgt[i2] = (expol || IS_EQUAL(lev2[i2], val2));
}
else // target z values has two bounday values in input z field
{
lev_idx1[i2] = idx1 - 1;
lev_idx2[i2] = idx2 - 1;
lev_wgt1[i2] = (lev1[idx2] - lev2[i2]) / (lev1[idx2] - lev1[idx1]);
lev_wgt2[i2] = (lev2[i2] - lev1[idx1]) / (lev1[idx2] - lev1[idx1]);
lev_idx[i2] = idx1 - 1;
if (idx1 > idx2) lev_idx[i2] = -lev_idx[i2];
lev_wgt[i2] = (lev1[idx2] - lev2[i2]) / (lev1[idx2] - lev1[idx1]);
}
/*
printf("%d %g %d %d %g %g %d %d %g %g\n", i2, lev2[i2], idx1, idx2, lev1[idx1], lev1[idx2],
lev_idx1[i2], lev_idx2[i2], lev_wgt1[i2], lev_wgt2[i2]);
*/
// printf("%d %g %d %d %g %g %d %g\n", i2, lev2[i2], idx1, idx2, lev1[idx1], lev1[idx2], lev_idx[i2], lev_wgt[i2]);
}
}
......@@ -215,12 +240,12 @@ levelDirDown(const int nlev, const double *const lev)
template <typename T>
static void
vert_gen_weights3d1d(bool expol, size_t gridsize, int nlev1, const Varray<T> &xlev1, int nlev2, const Varray<double> &lev2,
Varray<int> &xlev_idx1, Varray<int> &xlev_idx2, Varray<float> &xlev_wgt1, Varray<float> &xlev_wgt2)
Varray<int> &xlev_idx, Varray<float> &xlev_wgt)
{
const auto nthreads = Threading::ompNumThreads;
Varray2D<double> lev1p2(nthreads, Varray<double>(nlev1 + 2));
Varray2D<float> lev_wgt1(nthreads, Varray<float>(nlev2)), lev_wgt2(nthreads, Varray<float>(nlev2));
Varray2D<int> lev_idx1(nthreads, Varray<int>(nlev2)), lev_idx2(nthreads, Varray<int>(nlev2));
Varray2D<float> lev_wgt(nthreads, Varray<float>(nlev2));
Varray2D<int> lev_idx(nthreads, Varray<int>(nlev2));
// Check monotony of vertical levels
for (int k = 0; k < nlev1; ++k) lev1p2[0][k] = xlev1[k * gridsize];
......@@ -231,7 +256,7 @@ vert_gen_weights3d1d(bool expol, size_t gridsize, int nlev1, const Varray<T> &xl
double level_N = lup ? 1.e33 : -1.e33;
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(expol, gridsize, nlev1, nlev2, lev2, level_0, level_N, lev1p2, xlev1, xlev_idx1, xlev_idx2, xlev_wgt1, xlev_wgt2, lev_idx1, lev_idx2, lev_wgt1, lev_wgt2)
#pragma omp parallel for default(none) shared(expol, gridsize, nlev1, nlev2, lev2, level_0, level_N, lev1p2, xlev1, xlev_idx, xlev_wgt, lev_idx, lev_wgt)
#endif
for (size_t i = 0; i < gridsize; i++)
{
......@@ -241,26 +266,24 @@ vert_gen_weights3d1d(bool expol, size_t gridsize, int nlev1, const Varray<T> &xl
lev1p2[ompthID][nlev1 + 1] = level_N;
for (int k = 0; k < nlev1; ++k) lev1p2[ompthID][k + 1] = xlev1[k * gridsize + i];
vert_gen_weights(expol, nlev1 + 2, lev1p2[ompthID], nlev2, lev2, lev_idx1[ompthID], lev_idx2[ompthID], lev_wgt1[ompthID], lev_wgt2[ompthID]);
vert_gen_weights(expol, nlev1 + 2, lev1p2[ompthID], nlev2, lev2, lev_idx[ompthID], lev_wgt[ompthID]);
for (int k = 0; k < nlev2; ++k) xlev_idx1[k * gridsize + i] = lev_idx1[ompthID][k];
for (int k = 0; k < nlev2; ++k) xlev_idx2[k * gridsize + i] = lev_idx2[ompthID][k];
for (int k = 0; k < nlev2; ++k) xlev_wgt1[k * gridsize + i] = lev_wgt1[ompthID][k];
for (int k = 0; k < nlev2; ++k) xlev_wgt2[k * gridsize + i] = lev_wgt2[ompthID][k];
for (int k = 0; k < nlev2; ++k) xlev_idx[k * gridsize + i] = lev_idx[ompthID][k];
for (int k = 0; k < nlev2; ++k) xlev_wgt[k * gridsize + i] = lev_wgt[ompthID][k];
}
}
static void
vert_gen_weights3d1d(bool expol, size_t gridsize, int nlev1, Field3D &field1, int nlev2, const Varray<double> &lev2,
Varray<int> &lev_idx1, Varray<int> &lev_idx2, Varray<float> &lev_wgt1, Varray<float> &lev_wgt2)
Varray<int> &lev_idx, Varray<float> &lev_wgt)
{
if (field1.memType == MemType::Float)
{
vert_gen_weights3d1d(expol, gridsize, nlev1, field1.vec_f, nlev2, lev2, lev_idx1, lev_idx2, lev_wgt1, lev_wgt2);
vert_gen_weights3d1d(expol, gridsize, nlev1, field1.vec_f, nlev2, lev2, lev_idx, lev_wgt);
}
else
{
vert_gen_weights3d1d(expol, gridsize, nlev1, field1.vec_d, nlev2, lev2, lev_idx1, lev_idx2, lev_wgt1, lev_wgt2);
vert_gen_weights3d1d(expol, gridsize, nlev1, field1.vec_d, nlev2, lev2, lev_idx, lev_wgt);
}
}
......@@ -520,8 +543,8 @@ Intlevel(void *process)
varListInit(varList2, vlistID2);
varListSetMemtype(varList2, memtype);
Varray<int> lev_idx1(wisize), lev_idx2(wisize);
Varray<float> lev_wgt1(wisize), lev_wgt2(wisize);
Varray<int> lev_idx(wisize);
Varray<float> lev_wgt(wisize);
const auto streamID2 = cdoOpenWrite(1);
......@@ -571,9 +594,9 @@ Intlevel(void *process)
if (tsID == 0 || zvarIsVarying)
{
if (!zvarname.empty())
vert_gen_weights3d1d(expol, zvarGridsize, nlev1, vardata1[zvarID], nlev2, lev2, lev_idx1, lev_idx2, lev_wgt1, lev_wgt2);
vert_gen_weights3d1d(expol, zvarGridsize, nlev1, vardata1[zvarID], nlev2, lev2, lev_idx, lev_wgt);
else
vert_gen_weights(expol, nlev1 + 2, lev1, nlev2, lev2, lev_idx1, lev_idx2, lev_wgt1, lev_wgt2);
vert_gen_weights(expol, nlev1 + 2, lev1, nlev2, lev2, lev_idx, lev_wgt);
}
for (varID = 0; varID < nvars; varID++)
......@@ -584,9 +607,9 @@ Intlevel(void *process)
const auto gridsize = varList1[varID].gridsize;
if (!zvarname.empty())
vert_interp_lev3d(gridsize, missval, vardata1[varID], vardata2[varID], nlev2, lev_idx1, lev_idx2, lev_wgt1, lev_wgt2);
vert_interp_lev3d(gridsize, nlev1, missval, vardata1[varID], vardata2[varID], nlev2, lev_idx, lev_wgt);
else
vert_interp_lev(gridsize, missval, vardata1[varID], vardata2[varID], nlev2, lev_idx1, lev_idx2, lev_wgt1, lev_wgt2);
vert_interp_lev(gridsize, nlev1, missval, vardata1[varID], vardata2[varID], nlev2, lev_idx, lev_wgt);
for (levelID = 0; levelID < nlev2; levelID++)
{
......
......@@ -30,10 +30,10 @@
#include "cdo_vlist.h"
#include "cdi_lockedIO.h"
void vert_interp_lev3d(size_t gridsize, double missval, const Field3D &field1, Field3D &field2, int nlev2,
const Varray<int> &lev_idx1, const Varray<int> &lev_idx2, const Varray<float> &lev_wgt1, const Varray<float> &lev_wgt2);
void vert_interp_lev3d(size_t gridsize, int nlev1, double missval, const Field3D &field1, Field3D &field2, int nlev2,
const Varray<int> &lev_idx, const Varray<float> &lev_wgt);
void vert_gen_weights(int expol, int nlev1, const Varray<double> &lev1, int nlev2, const Varray<double> &lev2,
Varray<int> &lev_idx1, Varray<int> &lev_idx2, Varray<float> &lev_wgt1, Varray<float> &lev_wgt2);
Varray<int> &lev_idx, Varray<float> &lev_wgt);
bool levelDirUp(const int nlev, const double *const lev);
bool levelDirDown(const int nlev, const double *const lev);
......@@ -47,13 +47,13 @@ bool levelDirDown(const int nlev, const double *const lev);
*/
static void
vert_gen_weights3d(bool expol, size_t gridsize, int nlev1, Varray<float> &xlev1, int nlev2, Varray<double> &xlev2,
Varray<int> &xlev_idx1, Varray<int> &xlev_idx2, Varray<float> &xlev_wgt1, Varray<float> &xlev_wgt2)
Varray<int> &xlev_idx, Varray<float> &xlev_wgt)
{
const auto nthreads = Threading::ompNumThreads;
Varray2D<double> lev1p2(nthreads, Varray<double>(nlev1 + 2));
Varray2D<double> lev2(nthreads, Varray<double>(nlev2));
Varray2D<float> lev_wgt1(nthreads, Varray<float>(nlev2)), lev_wgt2(nthreads, Varray<float>(nlev2));
Varray2D<int> lev_idx1(nthreads, Varray<int>(nlev2)), lev_idx2(nthreads, Varray<int>(nlev2));
Varray2D<float> lev_wgt(nthreads, Varray<float>(nlev2));
Varray2D<int> lev_idx(nthreads, Varray<int>(nlev2));
// Check monotony of vertical levels
for (int k = 0; k < nlev1; ++k) lev1p2[0][k] = xlev1[k * gridsize];
......@@ -64,7 +64,7 @@ vert_gen_weights3d(bool expol, size_t gridsize, int nlev1, Varray<float> &xlev1,
double level_N = lup ? 1.e33 : -1.e33;
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(expol, gridsize, nlev1, nlev2, lev2, xlev2, level_0, level_N, lev1p2, xlev1, xlev_idx1, xlev_idx2, xlev_wgt1, xlev_wgt2, lev_idx1, lev_idx2, lev_wgt1, lev_wgt2)
#pragma omp parallel for default(none) shared(expol, gridsize, nlev1, nlev2, lev2, xlev2, level_0, level_N, lev1p2, xlev1, xlev_idx, xlev_wgt, lev_idx, lev_wgt)
#endif
for (size_t i = 0; i < gridsize; i++)
{
......@@ -75,12 +75,10 @@ vert_gen_weights3d(bool expol, size_t gridsize, int nlev1, Varray<float> &xlev1,
for (int k = 0; k < nlev1; ++k) lev1p2[ompthID][k + 1] = xlev1[k * gridsize + i];
for (int k = 0; k < nlev2; ++k) lev2[ompthID][k] = xlev2[k * gridsize + i];
vert_gen_weights(expol, nlev1 + 2, lev1p2[ompthID], nlev2, lev2[ompthID], lev_idx1[ompthID], lev_idx2[ompthID], lev_wgt1[ompthID], lev_wgt2[ompthID]);
vert_gen_weights(expol, nlev1 + 2, lev1p2[ompthID], nlev2, lev2[ompthID], lev_idx[ompthID], lev_wgt[ompthID]);
for (int k = 0; k < nlev2; ++k) xlev_idx1[k * gridsize + i] = lev_idx1[ompthID][k];
for (int k = 0; k < nlev2; ++k) xlev_idx2[k * gridsize + i] = lev_idx2[ompthID][k];
for (int k = 0; k < nlev2; ++k) xlev_wgt1[k * gridsize + i] = lev_wgt1[ompthID][k];
for (int k = 0; k < nlev2; ++k) xlev_wgt2[k * gridsize + i] = lev_wgt2[ompthID][k];
for (int k = 0; k < nlev2; ++k) xlev_idx[k * gridsize + i] = lev_idx[ompthID][k];
for (int k = 0; k < nlev2; ++k) xlev_wgt[k * gridsize + i] = lev_wgt[ompthID][k];
}
}
......@@ -230,10 +228,10 @@ Intlevel3d(void *process)
if (i == nzaxis) cdoAbort("No processable variable found (grid coordinate differ)!");
// Create weights for later interpolation - assumption: input vertical correct is constant in time
Varray<int> lev_idx1(nlevo * gridSize), lev_idx2(nlevo * gridSize);
Varray<float> lev_wgt1(nlevo * gridSize), lev_wgt2(nlevo * gridSize);
Varray<int> lev_idx(nlevo * gridSize);
Varray<float> lev_wgt(nlevo * gridSize);
vert_gen_weights3d(expol, gridSize, nlevi, zlevels_in, nlevo, zlevels_out, lev_idx1, lev_idx2, lev_wgt1, lev_wgt2);
vert_gen_weights3d(expol, gridSize, nlevi, zlevels_in, nlevo, zlevels_out, lev_idx, lev_wgt);
varrayFree(zlevels_in);
// Copy z-axis information to output z-axis
......@@ -338,7 +336,7 @@ Intlevel3d(void *process)
const auto gridsize = varList1[varID].gridsize;
const auto missval = varList1[varID].missval;
vert_interp_lev3d(gridsize, missval, vardata1[varID], vardata2[varID], nlevo, lev_idx1, lev_idx2, lev_wgt1, lev_wgt2);
vert_interp_lev3d(gridsize, nlevi, missval, vardata1[varID], vardata2[varID], nlevo, lev_idx, lev_wgt);
for (levelID = 0; levelID < nlevo; levelID++)
{
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment