From a5c1b54e781ff85dacad451a492cc8f44e626e44 Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Tue, 25 Feb 2025 21:46:48 +0000 Subject: [PATCH] cpp version of mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib (icon-libraries/libiconmath!34) ## What is the new feature cpp version of mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib ## How is it implemented Kept separate file from other `mo_lib_intp_rbf` routines to avoid merge conflicts. Will be resolved by Ali and Dylan later. Co-authored-by: Pradipta Samanta <samanta@dkrz.de> Merged-by: Pradipta Samanta <samanta@dkrz.de> Changelog: feature --- src/interpolation/CMakeLists.txt | 5 +- ...b_intp_rbf-rbf_vec_interpol_vertex_lib.cpp | 197 ++++++++++++++++++ ...b_intp_rbf-rbf_vec_interpol_vertex_lib.hpp | 32 +++ ...f-rbf_vec_interpol_vertex_lib_bindings.cpp | 134 ++++++++++++ ...rbf-rbf_vec_interpol_vertex_lib_bindings.h | 54 +++++ test/c/CMakeLists.txt | 1 + test/c/test_intp_rbf.cpp | 126 +++++++++++ 7 files changed, 548 insertions(+), 1 deletion(-) create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h create mode 100644 test/c/test_intp_rbf.cpp diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index f982f3b..37c3ad0 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -14,7 +14,10 @@ add_library( mo_lib_interpolation_scalar.F90 mo_lib_interpolation_vector.F90 mo_lib_interpolation_vector.cpp - mo_lib_intp_rbf.F90) + mo_lib_intp_rbf.F90 + mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp + mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp +) add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation) diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp new file mode 100644 index 0000000..c9b776e --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp @@ -0,0 +1,197 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +/// Contains the only mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib() +/// +/// Separate to avoid conflicts with Ali working on rest of mo_lib_intp_rbf + +#include <type_traits> +#include <Kokkos_Core.hpp> +#include "mo_lib_loopindices.hpp" +#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" + + +constexpr int rbf_vec_dim_v = 6; + +//------------------------------------------------------------------------- +// +// +//> +/// Performs vector RBF reconstruction at triangle vertices. +/// +/// Theory described in Narcowich and Ward (Math Comp. 1994) and +/// Bonaventura and Baudisch (Mox Report n. 75). +/// It takes edge based variables as input and combines them +/// into three dimensional cartesian vectors at each vertex. +/// +/// Two templated variables in order to support mixed precision. +/// Intended that type_traits::is_floating_point(T,S)==TRUE +/// precision(T) >= precision(S) +template <typename T, typename S> +void rbf_vec_interpol_vertex_lib( + const T* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const T* rbf_vec_coeff_v, + S* p_u_out, + S* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + // Dimensions for the arrays. + const int nlev, const int nblks_e, const int nblks_v + ) +{ + /* +#ifdef DIM_ENABLE_GPU + if (lacc){ using MemSpace = Kokkos::CudaSpace; + } else { using MemSpace = Kokkos::HostSpace; } +#else + using MemSpace = Kokkos::HostSpace; +#endif + + */ + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; + + + + // input components of velocity or horizontal vorticity vectors at edge midpoints + // dim: (nproma,nlev,nblks_e) + UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e); + + // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex + // (rbf_vec_dim_v,nproma,nblks_v) + UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v); + UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v); + + // coefficients are working precision array containing the coefficients used for vector rbf interpolation + // at each tringle vertex (input is normal component), + // dim: (rbf_vec_dim_v,2,nproma,nblks_v) + UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v); + + // reconstructed x-component (u) of velocity vector, + // dim: (nproma,nlev,nblks_v) + UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v); + // reconstructed y-component (v) of velocity vector, + // dim: (nproma,nlev,nblks_v) + UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v); + + // Local vars + //int jv, jk, jb; // integer over vertices, levels, and blocks, + int jb; // integer over vertices, levels, and blocks, + int i_startidx; // start index + int i_endidx; // end index + + for (jb=i_startblk; jb <= i_endblk; ++jb){ + + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + + // NOTE: Static indexes reduced by 1 from Fortran version + p_u_out_view(jv, jk, jb) = + ptr_coeff_view(0, 0, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + + ptr_coeff_view(1, 0, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + + ptr_coeff_view(2, 0, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + + ptr_coeff_view(3, 0, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + + ptr_coeff_view(4, 0, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + + ptr_coeff_view(5, 0, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); + p_v_out_view(jv, jk, jb) = + ptr_coeff_view(0, 1, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + + ptr_coeff_view(1, 1, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + + ptr_coeff_view(2, 1, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + + ptr_coeff_view(3, 1, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + + ptr_coeff_view(4, 1, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + + ptr_coeff_view(5, 1, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); + } + ); + } +} + +// Explicit instantiation - double precision +template +void rbf_vec_interpol_vertex_lib<double, double>( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + double* p_u_out, + double* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +// Explicit instantiation - single precision +template +void rbf_vec_interpol_vertex_lib<float, float>( + const float* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const float* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +// Explicit instantiation - mixed precision +template +void rbf_vec_interpol_vertex_lib<double, float>( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp new file mode 100644 index 0000000..c0b6f05 --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp @@ -0,0 +1,32 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +template <typename T, typename S> +void rbf_vec_interpol_vertex_lib( + const T* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const T* rbf_vec_coeff_v, + S* p_u_out, + S* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_c + ); \ No newline at end of file diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp new file mode 100644 index 0000000..06dc467 --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp @@ -0,0 +1,134 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h" +#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" + +void rbf_vec_interpol_vertex_lib_dp( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + double* p_u_out, + double* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<double, double>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); +} + + +void rbf_vec_interpol_vertex_lib_sp( + const float* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const float* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<float, float>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); + +} + +void rbf_vec_interpol_vertex_lib_mixprec( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<double, float>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); + +} + diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h new file mode 100644 index 0000000..4356f88 --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h @@ -0,0 +1,54 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +extern "C" { + +void rbf_vec_interpol_vertex_lib_dp( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + double* p_u_out, + double* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +void rbf_vec_interpol_vertex_lib_sp( + const float* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const float* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +} \ No newline at end of file diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 95ca08b..13c5dfe 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -25,6 +25,7 @@ set(SOURCES main.cpp test_tdma_solver.cpp test_interpolation_vector.cpp + test_intp_rbf.cpp ) # Create the test executable from your test files, including main.cpp. add_executable(iconmath_test_c ${SOURCES}) diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp new file mode 100644 index 0000000..0aa4f9b --- /dev/null +++ b/test/c/test_intp_rbf.cpp @@ -0,0 +1,126 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <gtest/gtest.h> +#include <Kokkos_Core.hpp> +#include <vector> +#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" + +// Free-function helpers for 3D and 4D array sizes (assumed column-major) +template<typename T> +size_t num_elements_3d(int d1, int d2, int d3) { + return static_cast<size_t>(d1) * d2 * d3; +} + +template<typename T> +size_t num_elements_4d(int d1, int d2, int d3, int d4) { + return static_cast<size_t>(d1) * d2 * d3 * d4; +} + +// Define a helper struct that holds the two types. +template<typename InT, typename OutT> +struct MixedPrecision { + using in_type = InT; + using out_type = OutT; +}; + +// Define the list of type pairs we want to test. +typedef ::testing::Types< MixedPrecision<double, double>, + MixedPrecision<double, float>, + MixedPrecision<float, float> > MixedTypes; + +// Define a typed test fixture. +template <typename TypePair> +class RbfVecInterpolVertexMixedTestFixture : public ::testing::Test { +public: + using InType = typename TypePair::in_type; + using OutType = typename TypePair::out_type; + + // Constant dimensions. + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 4; // number of vertical levels + static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) + static constexpr int nblks_v = 2; // number of vertex blocks (for rbf arrays and outputs) + static constexpr int rbf_vec_dim = 6; // fixed dimension for rbf vector (stencil points) + + // Parameter values. + int i_startblk = 0; + int i_endblk = 1; // Test blocks [0, 1] + int i_startidx_in = 0; + int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1 + int slev = 0; + int elev = nlev - 1; // Full vertical range (0 .. nlev-1) + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // No asynchronous execution. + + // Arrays stored in std::vector. + std::vector<InType> p_e_in; // Dimensions: (nproma, nlev, nblks_e) + std::vector<int> rbf_vec_idx_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) + std::vector<int> rbf_vec_blk_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) + std::vector<InType> rbf_vec_coeff_v; // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v) + std::vector<OutType> p_u_out; // Dimensions: (nproma, nlev, nblks_v) + std::vector<OutType> p_v_out; // Dimensions: (nproma, nlev, nblks_v) + + RbfVecInterpolVertexMixedTestFixture() { + // Allocate and initialize inputs. + p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1)); + rbf_vec_idx_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1); + rbf_vec_blk_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 0); + rbf_vec_coeff_v.resize(num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v), static_cast<InType>(1)); + + // Allocate output arrays and initialize to zero. + p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + } +}; + +TYPED_TEST_SUITE(RbfVecInterpolVertexMixedTestFixture, MixedTypes); + +TYPED_TEST(RbfVecInterpolVertexMixedTestFixture, BasicTest) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + // Call the function with mixed precision. + rbf_vec_interpol_vertex_lib<InType, OutType>( + this->p_e_in.data(), + this->rbf_vec_idx_v.data(), + this->rbf_vec_blk_v.data(), + this->rbf_vec_coeff_v.data(), + this->p_u_out.data(), + this->p_v_out.data(), + this->i_startblk, + this->i_endblk, + this->i_startidx_in, + this->i_endidx_in, + this->slev, + this->elev, + this->nproma, + this->lacc, + this->acc_async, + this->nlev, + RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_e, + RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_v); + + // Check the outputs only for blocks in the range [i_startblk, i_endblk]. + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = 0; level < this->nlev; ++level) { + for (int i = 0; i < this->nproma; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 6 stencil points, expect 6. + EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } +} -- GitLab