diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index 10515165cbe3a16354226aa9f11756200bfa33fd..96f281c37d02ffb82d28f0ef6c638950f4e01b29 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -16,10 +16,8 @@ add_library( mo_lib_interpolation_vector.F90 mo_lib_interpolation_vector.cpp mo_lib_intp_rbf.F90 - mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp - mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp - interpolation_bindings.cpp -) + mo_lib_intp_rbf.cpp + interpolation_bindings.cpp) add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation) diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp index 628f4115978bc8981084b3418478d8e1844b2232..4524ad760c29d92a3716b58fea8dba8592214b05 100644 --- a/src/interpolation/interpolation_bindings.cpp +++ b/src/interpolation/interpolation_bindings.cpp @@ -12,6 +12,7 @@ #include "interpolation_bindings.h" #include "mo_lib_interpolation_scalar.hpp" #include "mo_lib_interpolation_vector.hpp" +#include "mo_lib_intp_rbf.hpp" // This is the binding for mo_interpolation_vector::edges2cells_vector_lib // (wp=dp) @@ -326,3 +327,132 @@ void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx, avg_psi_c, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, nlev, nblks_c, lacc); } + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_vertex_dp_lib +void rbf_vec_interpol_vertex_lib_dp( + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v) { + rbf_vec_interpol_vertex_lib<double, double>( + p_e_in, rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, p_v_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + lacc, acc_async, nlev, nblks_e, nblks_v); +} + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_vertex_sp_lib +void rbf_vec_interpol_vertex_lib_sp( + const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v) { + rbf_vec_interpol_vertex_lib<float, float>( + p_e_in, rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, p_v_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + lacc, acc_async, nlev, nblks_e, nblks_v); +} + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_vertex_dpsp_lib +void rbf_vec_interpol_vertex_lib_dpsp( + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v) { + rbf_vec_interpol_vertex_lib<double, float>( + p_e_in, rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, p_v_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + lacc, acc_async, nlev, nblks_e, nblks_v); +} + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=dp) +void rbf_interpol_c2grad_lib_sp(const float *p_cell_in, + const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, + const float *rbf_c2grad_coeff, float *grad_x, + float *grad_y, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblk_c, bool lacc) { + + rbf_interpol_c2grad_lib<float>( + p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, rbf_c2grad_coeff, grad_x, + grad_y, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, + nproma, rbf_c2grad_dim, nlev, nblk_c, lacc); +} + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=sp) +void rbf_interpol_c2grad_lib_dp(const double *p_cell_in, + const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, + const double *rbf_c2grad_coeff, double *grad_x, + double *grad_y, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblk_c, bool lacc) { + + rbf_interpol_c2grad_lib<double>( + p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, rbf_c2grad_coeff, grad_x, + grad_y, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, + nproma, rbf_c2grad_dim, nlev, nblk_c, lacc); +} + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=dp) +void rbf_vec_interpol_cell_lib_sp( + const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async) { + + rbf_vec_interpol_cell_lib<float>( + p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, p_u_out, p_v_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async); +} + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=sp) +void rbf_vec_interpol_cell_lib_dp( + const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async) { + + rbf_vec_interpol_cell_lib<double>( + p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, p_u_out, p_v_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma, + nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async); +} + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_edge_lib (wp=dp) +void rbf_vec_interpol_edge_lib_dp( + const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, + int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async) { + + rbf_vec_interpol_edge_lib<double>( + p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, rbf_vec_coeff_e, p_vt_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nlev, + nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async); +} + +// This is the binding for mo_intp_rbf::rbf_vec_interpol_edge_lib (wp=sp) +void rbf_vec_interpol_edge_lib_sp( + const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nlev, + int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, bool acc_async) { + + rbf_vec_interpol_edge_lib<float>( + p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, rbf_vec_coeff_e, p_vt_out, + i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nlev, + nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async); +} diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h index 7cb873d8ac2f8ba1bffa0553d28847b2b9b11c58..64c6a8c0a88f1dab691e6b69d13ade4b64954604 100644 --- a/src/interpolation/interpolation_bindings.h +++ b/src/interpolation/interpolation_bindings.h @@ -185,4 +185,69 @@ void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx, const int i_startidx_in, const int i_endidx_in, const int slev, const int elev, const int nproma, const int nlev, const int nblks_c, const bool lacc); + +void rbf_vec_interpol_vertex_lib_dp( + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); + +void rbf_vec_interpol_vertex_lib_sp( + const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); + +void rbf_vec_interpol_vertex_lib_dpsp( + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); + +void rbf_interpol_c2grad_lib_sp( + const float *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, const float *rbf_c2grad_coeff, float *grad_x, + const float *grad_y, int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblk_c, bool lacc); + +void rbf_interpol_c2grad_lib_dp( + const double *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, const double *rbf_c2grad_coeff, double *grad_x, + const double *grad_y, int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblk_c, bool lacc); + +void rbf_vec_interpol_cell_lib_sp( + const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async); + +void rbf_vec_interpol_cell_lib_dp( + const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async); + +void rbf_vec_interpol_edge_lib_dp( + const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, + int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async); + +void rbf_vec_interpol_edge_lib_sp( + const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nlev, + int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, bool acc_async); } diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp deleted file mode 100644 index c9b776e5f409eb7878cd8b2dab3203c655422b1f..0000000000000000000000000000000000000000 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp +++ /dev/null @@ -1,197 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -/// Contains the only mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib() -/// -/// Separate to avoid conflicts with Ali working on rest of mo_lib_intp_rbf - -#include <type_traits> -#include <Kokkos_Core.hpp> -#include "mo_lib_loopindices.hpp" -#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" - - -constexpr int rbf_vec_dim_v = 6; - -//------------------------------------------------------------------------- -// -// -//> -/// Performs vector RBF reconstruction at triangle vertices. -/// -/// Theory described in Narcowich and Ward (Math Comp. 1994) and -/// Bonaventura and Baudisch (Mox Report n. 75). -/// It takes edge based variables as input and combines them -/// into three dimensional cartesian vectors at each vertex. -/// -/// Two templated variables in order to support mixed precision. -/// Intended that type_traits::is_floating_point(T,S)==TRUE -/// precision(T) >= precision(S) -template <typename T, typename S> -void rbf_vec_interpol_vertex_lib( - const T* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const T* rbf_vec_coeff_v, - S* p_u_out, - S* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - // Dimensions for the arrays. - const int nlev, const int nblks_e, const int nblks_v - ) -{ - /* -#ifdef DIM_ENABLE_GPU - if (lacc){ using MemSpace = Kokkos::CudaSpace; - } else { using MemSpace = Kokkos::HostSpace; } -#else - using MemSpace = Kokkos::HostSpace; -#endif - - */ - - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; - - - - // input components of velocity or horizontal vorticity vectors at edge midpoints - // dim: (nproma,nlev,nblks_e) - UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e); - - // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex - // (rbf_vec_dim_v,nproma,nblks_v) - UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v); - UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v); - - // coefficients are working precision array containing the coefficients used for vector rbf interpolation - // at each tringle vertex (input is normal component), - // dim: (rbf_vec_dim_v,2,nproma,nblks_v) - UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v); - - // reconstructed x-component (u) of velocity vector, - // dim: (nproma,nlev,nblks_v) - UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v); - // reconstructed y-component (v) of velocity vector, - // dim: (nproma,nlev,nblks_v) - UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v); - - // Local vars - //int jv, jk, jb; // integer over vertices, levels, and blocks, - int jb; // integer over vertices, levels, and blocks, - int i_startidx; // start index - int i_endidx; // end index - - for (jb=i_startblk; jb <= i_endblk; ++jb){ - - get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, - i_startidx, i_endidx); - - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); - - Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy, - KOKKOS_LAMBDA(const int jk, const int jv) { - - // NOTE: Static indexes reduced by 1 from Fortran version - p_u_out_view(jv, jk, jb) = - ptr_coeff_view(0, 0, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + - ptr_coeff_view(1, 0, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + - ptr_coeff_view(2, 0, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + - ptr_coeff_view(3, 0, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + - ptr_coeff_view(4, 0, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + - ptr_coeff_view(5, 0, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); - p_v_out_view(jv, jk, jb) = - ptr_coeff_view(0, 1, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + - ptr_coeff_view(1, 1, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + - ptr_coeff_view(2, 1, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + - ptr_coeff_view(3, 1, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + - ptr_coeff_view(4, 1, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + - ptr_coeff_view(5, 1, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); - } - ); - } -} - -// Explicit instantiation - double precision -template -void rbf_vec_interpol_vertex_lib<double, double>( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - double* p_u_out, - double* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - -// Explicit instantiation - single precision -template -void rbf_vec_interpol_vertex_lib<float, float>( - const float* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const float* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - -// Explicit instantiation - mixed precision -template -void rbf_vec_interpol_vertex_lib<double, float>( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp deleted file mode 100644 index c0b6f05bed1d70d2ee625f45b4ad5b457bc84451..0000000000000000000000000000000000000000 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp +++ /dev/null @@ -1,32 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#pragma once - -template <typename T, typename S> -void rbf_vec_interpol_vertex_lib( - const T* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const T* rbf_vec_coeff_v, - S* p_u_out, - S* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_c - ); \ No newline at end of file diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp deleted file mode 100644 index 06dc4678e1e3234c7e3a5a337ca36339100f6d20..0000000000000000000000000000000000000000 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp +++ /dev/null @@ -1,134 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h" -#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" - -void rbf_vec_interpol_vertex_lib_dp( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - double* p_u_out, - double* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ) -{ - rbf_vec_interpol_vertex_lib<double, double>( - p_e_in, - rbf_vec_idx_v, - rbf_vec_blk_v, - rbf_vec_coeff_v, - p_u_out, - p_v_out, - i_startblk, // start_block needed for get_indices_c_lib - i_endblk, // end_block needed for get_indices_c_lib - i_startidx_in, // start_index needed for get_indices_c_lib - i_endidx_in, // end_index needed for get_indices_c_lib - slev, // vertical start level - elev, // vertical end level - nproma, // inner loop length/vector length - lacc, // if true, use Cuda mem-/exec-spaces - acc_async, // [deprecated] use async acc - nlev, nblks_e, nblks_v - ); -} - - -void rbf_vec_interpol_vertex_lib_sp( - const float* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const float* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ) -{ - rbf_vec_interpol_vertex_lib<float, float>( - p_e_in, - rbf_vec_idx_v, - rbf_vec_blk_v, - rbf_vec_coeff_v, - p_u_out, - p_v_out, - i_startblk, // start_block needed for get_indices_c_lib - i_endblk, // end_block needed for get_indices_c_lib - i_startidx_in, // start_index needed for get_indices_c_lib - i_endidx_in, // end_index needed for get_indices_c_lib - slev, // vertical start level - elev, // vertical end level - nproma, // inner loop length/vector length - lacc, // if true, use Cuda mem-/exec-spaces - acc_async, // [deprecated] use async acc - nlev, nblks_e, nblks_v - ); - -} - -void rbf_vec_interpol_vertex_lib_mixprec( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ) -{ - rbf_vec_interpol_vertex_lib<double, float>( - p_e_in, - rbf_vec_idx_v, - rbf_vec_blk_v, - rbf_vec_coeff_v, - p_u_out, - p_v_out, - i_startblk, // start_block needed for get_indices_c_lib - i_endblk, // end_block needed for get_indices_c_lib - i_startidx_in, // start_index needed for get_indices_c_lib - i_endidx_in, // end_index needed for get_indices_c_lib - slev, // vertical start level - elev, // vertical end level - nproma, // inner loop length/vector length - lacc, // if true, use Cuda mem-/exec-spaces - acc_async, // [deprecated] use async acc - nlev, nblks_e, nblks_v - ); - -} - diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h deleted file mode 100644 index 4356f886bee8b5e30f0fe17686367873b34f05db..0000000000000000000000000000000000000000 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h +++ /dev/null @@ -1,54 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#pragma once - -extern "C" { - -void rbf_vec_interpol_vertex_lib_dp( - const double* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const double* rbf_vec_coeff_v, - double* p_u_out, - double* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - -void rbf_vec_interpol_vertex_lib_sp( - const float* p_e_in, - const int* rbf_vec_idx_v, - const int* rbf_vec_blk_v, - const float* rbf_vec_coeff_v, - float* p_u_out, - float* p_v_out, - const int i_startblk, // start_block needed for get_indices_c_lib - const int i_endblk, // end_block needed for get_indices_c_lib - const int i_startidx_in, // start_index needed for get_indices_c_lib - const int i_endidx_in, // end_index needed for get_indices_c_lib - const int slev, // vertical start level - const int elev, // vertical end level - const int nproma, // inner loop length/vector length - const bool lacc, // if true, use Cuda mem-/exec-spaces - const bool acc_async, // [deprecated] use async acc - const int nlev, const int nblks_e, const int nblks_v - ); - -} \ No newline at end of file diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d1178a65397571818db4104ca546ff67eb35d01e --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf.cpp @@ -0,0 +1,475 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include "mo_lib_intp_rbf.hpp" +#include <Kokkos_Core.hpp> + +constexpr int rbf_vec_dim_v = 6; + +//------------------------------------------------------------------------- +// +// +//> +/// Performs vector RBF reconstruction at triangle vertices. +/// +/// Theory described in Narcowich and Ward (Math Comp. 1994) and +/// Bonaventura and Baudisch (Mox Report n. 75). +/// It takes edge based variables as input and combines them +/// into three dimensional cartesian vectors at each vertex. +/// +/// Two templated variables in order to support mixed precision. +/// Intended that type_traits::is_floating_point(T,S)==TRUE +/// precision(T) >= precision(S) +template <typename T, typename S> +void rbf_vec_interpol_vertex_lib( + const T *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const T *rbf_vec_coeff_v, S *p_u_out, S *p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + // Dimensions for the arrays. + const int nlev, const int nblks_e, const int nblks_v) { + /* +#ifdef DIM_ENABLE_GPU + if (lacc){ using MemSpace = Kokkos::CudaSpace; + } else { using MemSpace = Kokkos::HostSpace; } +#else + using MemSpace = Kokkos::HostSpace; +#endif + + */ + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + typedef Kokkos::View<S ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedS3D; + + // input components of velocity or horizontal vorticity vectors at edge + // midpoints dim: (nproma,nlev,nblks_e) + UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e); + + // index array defining the stencil of surrounding edges for vector rbf + // interpolation at each triangle vertex (rbf_vec_dim_v,nproma,nblks_v) + UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v); + UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v); + + // coefficients are working precision array containing the coefficients used + // for vector rbf interpolation at each tringle vertex (input is normal + // component), dim: (rbf_vec_dim_v,2,nproma,nblks_v) + UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, + nblks_v); + + // reconstructed x-component (u) of velocity vector, + // dim: (nproma,nlev,nblks_v) + UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v); + // reconstructed y-component (v) of velocity vector, + // dim: (nproma,nlev,nblks_v) + UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v); + + // Local vars + // int jv, jk, jb; // integer over vertices, levels, and blocks, + int jb; // integer over vertices, levels, and blocks, + int i_startidx; // start index + int i_endidx; // end index + + for (jb = i_startblk; jb <= i_endblk; ++jb) { + + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "rbf_vec_interpol_vertex_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + // NOTE: Static indexes reduced by 1 from Fortran version + p_u_out_view(jv, jk, jb) = + ptr_coeff_view(0, 0, jv, jb) * + p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + + ptr_coeff_view(1, 0, jv, jb) * + p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + + ptr_coeff_view(2, 0, jv, jb) * + p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + + ptr_coeff_view(3, 0, jv, jb) * + p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + + ptr_coeff_view(4, 0, jv, jb) * + p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + + ptr_coeff_view(5, 0, jv, jb) * + p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); + p_v_out_view(jv, jk, jb) = + ptr_coeff_view(0, 1, jv, jb) * + p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + + ptr_coeff_view(1, 1, jv, jb) * + p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + + ptr_coeff_view(2, 1, jv, jb) * + p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + + ptr_coeff_view(3, 1, jv, jb) * + p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + + ptr_coeff_view(4, 1, jv, jb) * + p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + + ptr_coeff_view(5, 1, jv, jb) * + p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); + }); + } +} + +template <typename T> +void rbf_interpol_c2grad_lib(const T *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, + const T *rbf_c2grad_coeff, T *grad_x, T *grad_y, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int rbf_c2grad_dim, int nlev, int nblks_c, + bool lacc) { + + // aliases for unmanaged Kokkos views + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + + // to avoid memory ownership issues + UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); + UnmanagedT3D grad_x_view(grad_x, nproma, nlev, nblks_c); + UnmanagedT3D grad_y_view(grad_y, nproma, nlev, nblks_c); + UnmanagedConstInt3D rbf_c2grad_idx_view(rbf_c2grad_idx, rbf_c2grad_dim, + nproma, nblks_c); + UnmanagedConstInt3D rbf_c2grad_blk_view(rbf_c2grad_blk, rbf_c2grad_dim, + nproma, nblks_c); + UnmanagedConstT4D rbf_c2grad_coeff_view(rbf_c2grad_coeff, rbf_c2grad_dim, 2, + nproma, nblks_c); + + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "rbf_interpol_c2grad", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + grad_x_view(jc, jk, jb) = + rbf_c2grad_coeff_view(0, 1, jc, jb) * p_cell_in_view(jc, jk, jb) + + rbf_c2grad_coeff_view(1, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk, + rbf_c2grad_blk_view(1, jc, jb)) + + rbf_c2grad_coeff_view(2, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk, + rbf_c2grad_blk_view(2, jc, jb)) + + rbf_c2grad_coeff_view(3, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk, + rbf_c2grad_blk_view(3, jc, jb)) + + rbf_c2grad_coeff_view(4, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk, + rbf_c2grad_blk_view(4, jc, jb)) + + rbf_c2grad_coeff_view(5, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk, + rbf_c2grad_blk_view(5, jc, jb)) + + rbf_c2grad_coeff_view(6, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk, + rbf_c2grad_blk_view(6, jc, jb)) + + rbf_c2grad_coeff_view(7, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk, + rbf_c2grad_blk_view(7, jc, jb)) + + rbf_c2grad_coeff_view(8, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, + rbf_c2grad_blk_view(8, jc, jb)) + + rbf_c2grad_coeff_view(9, 1, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, + rbf_c2grad_blk_view(9, jc, jb)); + + grad_y_view(jc, jk, jb) = + rbf_c2grad_coeff_view(0, 2, jc, jb) * p_cell_in_view(jc, jk, jb) + + rbf_c2grad_coeff_view(1, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk, + rbf_c2grad_blk_view(1, jc, jb)) + + rbf_c2grad_coeff_view(2, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk, + rbf_c2grad_blk_view(2, jc, jb)) + + rbf_c2grad_coeff_view(3, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk, + rbf_c2grad_blk_view(3, jc, jb)) + + rbf_c2grad_coeff_view(4, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk, + rbf_c2grad_blk_view(4, jc, jb)) + + rbf_c2grad_coeff_view(5, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk, + rbf_c2grad_blk_view(5, jc, jb)) + + rbf_c2grad_coeff_view(6, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk, + rbf_c2grad_blk_view(6, jc, jb)) + + rbf_c2grad_coeff_view(7, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk, + rbf_c2grad_blk_view(7, jc, jb)) + + rbf_c2grad_coeff_view(8, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, + rbf_c2grad_blk_view(8, jc, jb)) + + rbf_c2grad_coeff_view(9, 2, jc, jb) * + p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, + rbf_c2grad_blk_view(9, jc, jb)); + }); + + } // for +} // void + +//------------------------------------------rbf_vec_interpol_cell_lib--------------------------------------------- + +template <typename T> +void rbf_vec_interpol_cell_lib(const T *p_vn_in, const int *rbf_vec_idx_c, + const int *rbf_vec_blk_c, + const T *rbf_vec_coeff_c, T *p_u_out, T *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int nlev, int nblks_c, int nblks_e, + int rbf_vec_dim_c, bool lacc, bool acc_async) { + + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + + UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); + UnmanagedConstInt3D rbf_vec_idx_c_view(rbf_vec_idx_c, rbf_vec_dim_c, nproma, + nblks_c); + UnmanagedConstInt3D rbf_vec_blk_c_view(rbf_vec_blk_c, rbf_vec_dim_c, nproma, + nblks_c); + UnmanagedConstT4D rbf_vec_coeff_c_view(rbf_vec_coeff_c, nproma, + nblks_c); // TODO + UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c); + UnmanagedT3D p_v_out_view(p_u_out, nproma, nlev, nblks_c); + + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "rbf_vec_interpol_cell_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_u_out_view(jc, jk, jb) = + rbf_vec_coeff_c_view(0, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, + rbf_vec_blk_c_view(0, jc, jb)) + + rbf_vec_coeff_c_view(1, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, + rbf_vec_blk_c_view(1, jc, jb)) + + rbf_vec_coeff_c_view(2, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, + rbf_vec_blk_c_view(2, jc, jb)) + + rbf_vec_coeff_c_view(3, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, + rbf_vec_blk_c_view(3, jc, jb)) + + rbf_vec_coeff_c_view(4, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, + rbf_vec_blk_c_view(4, jc, jb)) + + rbf_vec_coeff_c_view(5, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, + rbf_vec_blk_c_view(5, jc, jb)) + + rbf_vec_coeff_c_view(6, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, + rbf_vec_blk_c_view(6, jc, jb)) + + rbf_vec_coeff_c_view(7, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, + rbf_vec_blk_c_view(7, jc, jb)) + + rbf_vec_coeff_c_view(8, 1, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, + rbf_vec_blk_c_view(8, jc, jb)); + + p_v_out_view(jc, jk, jb) = + rbf_vec_coeff_c_view(0, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, + rbf_vec_blk_c_view(0, jc, jb)) + + rbf_vec_coeff_c_view(1, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, + rbf_vec_blk_c_view(1, jc, jb)) + + rbf_vec_coeff_c_view(2, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, + rbf_vec_blk_c_view(2, jc, jb)) + + rbf_vec_coeff_c_view(3, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, + rbf_vec_blk_c_view(3, jc, jb)) + + rbf_vec_coeff_c_view(4, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, + rbf_vec_blk_c_view(4, jc, jb)) + + rbf_vec_coeff_c_view(5, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, + rbf_vec_blk_c_view(5, jc, jb)) + + rbf_vec_coeff_c_view(6, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, + rbf_vec_blk_c_view(6, jc, jb)) + + rbf_vec_coeff_c_view(7, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, + rbf_vec_blk_c_view(7, jc, jb)) + + rbf_vec_coeff_c_view(8, 2, jc, jb) * + p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, + rbf_vec_blk_c_view(8, jc, jb)); + }); + Kokkos::fence(); + } // for +} // void + +//------------------------------------------rbf_vec_interpol_edge_lib--------------------------------------------- + +template <typename T> +void rbf_vec_interpol_edge_lib(const T *p_vn_in, const int *rbf_vec_idx_e, + const int *rbf_vec_blk_e, + const T *rbf_vec_coeff_e, T *p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nlev, + int nproma, int rbf_vec_dim_e, int nblks_e, + bool lacc, bool acc_async) { + + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e); + UnmanagedConstInt3D rbf_vec_idx_e_view(rbf_vec_idx_e, rbf_vec_dim_e, nproma, + nblks_e); + UnmanagedConstInt3D rbf_vec_blk_e_view(rbf_vec_blk_e, rbf_vec_dim_e, nproma, + nblks_e); + UnmanagedConstT3D rbf_vec_coeff_e_view(rbf_vec_coeff_e, rbf_vec_dim_e, nproma, + nblks_e); + UnmanagedT3D p_vt_out_view(p_vt_out, nproma, nlev, nblks_e); + + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + + Kokkos::parallel_for( + "rbf_vec_interpol_edge_lib", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int je) { + p_vt_out_view(je, jk, jb) = + rbf_vec_coeff_e_view(0, je, jb) * + p_vn_in_view(rbf_vec_idx_e_view(0, je, jb), jk, + rbf_vec_blk_e_view(0, je, jb)) + + rbf_vec_coeff_e_view(1, je, jb) * + p_vn_in_view(rbf_vec_idx_e_view(1, je, jb), jk, + rbf_vec_blk_e_view(1, je, jb)) + + rbf_vec_coeff_e_view(2, je, jb) * + p_vn_in_view(rbf_vec_idx_e_view(2, je, jb), jk, + rbf_vec_blk_e_view(2, je, jb)) + + rbf_vec_coeff_e_view(3, je, jb) * + p_vn_in_view(rbf_vec_idx_e_view(3, je, jb), jk, + rbf_vec_blk_e_view(3, je, jb)); + }); + } +} + +// Explicit instantiation - double precision +template void rbf_vec_interpol_vertex_lib<double, double>( + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); + +// Explicit instantiation - single precision +template void rbf_vec_interpol_vertex_lib<float, float>( + const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); + +// Explicit instantiation - mixed precision +template void rbf_vec_interpol_vertex_lib<double, float>( + const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out, + const int i_startblk, const int i_endblk, const int i_startidx_in, + const int i_endidx_in, const int slev, const int elev, const int nproma, + const bool lacc, const bool acc_async, const int nlev, const int nblks_e, + const int nblks_v); + +template void rbf_vec_interpol_cell_lib<double>( + const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async); + +template void rbf_vec_interpol_cell_lib<float>( + const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c, + const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c, + bool lacc, bool acc_async); + +template void rbf_interpol_c2grad_lib<double>( + const double *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, const double *rbf_c2grad_coeff, double *grad_x, + double *grad_y, int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblks_c, bool lacc); + +template void rbf_interpol_c2grad_lib<float>( + const float *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, const float *rbf_c2grad_coeff, float *grad_x, + float *grad_y, int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim, + int nlev, int nblks_c, bool lacc); + +template void rbf_vec_interpol_edge_lib<double>( + const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, + int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, + bool acc_async); + +template void rbf_vec_interpol_edge_lib<float>( + const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e, + const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nlev, + int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, bool acc_async); diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8a85502a08d3d77cae147ff6e9ea298abe891f6c --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf.hpp @@ -0,0 +1,50 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- +#pragma once + +#include "mo_lib_loopindices.hpp" +#include <Kokkos_Core.hpp> +#include <vector> + +template <typename T, typename S> +void rbf_vec_interpol_vertex_lib( + const T *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v, + const T *rbf_vec_coeff_v, S *p_u_out, S *p_v_out, const int i_startblk, + const int i_endblk, const int i_startidx_in, const int i_endidx_in, + const int slev, const int elev, const int nproma, const bool lacc, + const bool acc_async, const int nlev, const int nblks_e, const int nblks_c); + +template <typename T> +void rbf_interpol_c2grad_lib(const T *p_cell_in, const int *rbf_c2grad_idx, + const int *rbf_c2grad_blk, + const T *rbf_c2grad_coeff, T *grad_x, T *grad_y, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int rbf_c2grad_dim, int nlev, int nblks_c, + bool lacc); + +template <typename T> +void rbf_vec_interpol_cell_lib(const T *p_vn_in, const int *rbf_vec_idx_c, + const int *rbf_vec_blk_c, + const T *rbf_vec_coeff_c, T *p_u_out, T *p_v_out, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int nlev, int nblks_c, int nblks_e, + int rbf_vec_dim_c, bool lacc, bool acc_async); + +template <typename T> +void rbf_vec_interpol_edge_lib(const T *p_vn_in, const int *rbf_vec_idx_e, + const int *rbf_vec_blk_e, + const T *rbf_vec_coeff_e, T *p_vt_out, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nlev, + int nproma, int rbf_vec_dim_e, int nblks_e, + bool lacc, bool acc_async); diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp index 0ee7fa37cad59ded759911ddf10b4e986adbd35a..507ec3f01ae5e79b6053cd52fc0d1af41cc67f41 100644 --- a/test/c/test_interpolation_scalar.cpp +++ b/test/c/test_interpolation_scalar.cpp @@ -48,8 +48,7 @@ public: static constexpr int nlev = 7; // number of vertical levels static constexpr int nblks_c = 2; // number of cell blocks static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) - static constexpr int nblks_v = - 2; // number of vertex blocks (for rbf arrays and outputs) + static constexpr int nblks_v = 2; // number of vertex blocks // Parameter values. const int i_startblk = 0; @@ -385,7 +384,7 @@ TYPED_TEST_SUITE(InterpolationScalarMixedTestFixture, MixedTypesSP2DP); // //////////////////////////////////////////////////////////////////////////////// -TYPED_TEST(InterpolationScalarMixedTestFixture, cells2edges) { +TYPED_TEST(InterpolationScalarMixedTestFixture, Cells2Edges) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; @@ -424,7 +423,7 @@ TYPED_TEST(InterpolationScalarMixedTestFixture, cells2edges) { // //////////////////////////////////////////////////////////////////////////////// -TYPED_TEST(InterpolationScalarMixedTestFixture, cells2verts) { +TYPED_TEST(InterpolationScalarMixedTestFixture, Cells2Verts) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; @@ -496,7 +495,7 @@ public: TYPED_TEST_SUITE(Cells2vertsriScalarLibTestFixture, MixedTypes); // Add test -TYPED_TEST(Cells2vertsriScalarLibTestFixture, cells2verts_ri) { +TYPED_TEST(Cells2vertsriScalarLibTestFixture, Cells2VertsRI) { using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp index 0aa4f9b6861f4e68207ffaa76eb5feb39795e84a..040d440223c683407a585de764511e5e2b384aea 100644 --- a/test/c/test_intp_rbf.cpp +++ b/test/c/test_intp_rbf.cpp @@ -9,117 +9,295 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- -#include <gtest/gtest.h> +#include "mo_lib_intp_rbf.hpp" #include <Kokkos_Core.hpp> +#include <algorithm> +#include <gtest/gtest.h> +#include <numeric> #include <vector> -#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" // Free-function helpers for 3D and 4D array sizes (assumed column-major) -template<typename T> -size_t num_elements_3d(int d1, int d2, int d3) { +template <typename T> size_t num_elements_3d(int d1, int d2, int d3) { return static_cast<size_t>(d1) * d2 * d3; } -template<typename T> -size_t num_elements_4d(int d1, int d2, int d3, int d4) { +template <typename T> size_t num_elements_4d(int d1, int d2, int d3, int d4) { return static_cast<size_t>(d1) * d2 * d3 * d4; } // Define a helper struct that holds the two types. -template<typename InT, typename OutT> -struct MixedPrecision { - using in_type = InT; +template <typename InT, typename OutT> struct MixedPrecision { + using in_type = InT; using out_type = OutT; }; // Define the list of type pairs we want to test. -typedef ::testing::Types< MixedPrecision<double, double>, - MixedPrecision<double, float>, - MixedPrecision<float, float> > MixedTypes; +typedef ::testing::Types<MixedPrecision<double, double>, + MixedPrecision<double, float>, + MixedPrecision<float, float>> + MixedTypes; + +class interp_dimensions { +public: + // Constant dimensions. + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 4; // number of vertical levels + static constexpr int nblks_c = 2; // number of cell blocks + static constexpr int nblks_e = 2; // number of edge blocks + static constexpr int nblks_v = 2; // number of vertex blocks + static constexpr int rbf_c2grad_dim = 10; // fixed dimension + static constexpr int rbf_vec_dim_c = 9; + static constexpr int rbf_vec_dim_e = 4; + + // Parameter values. + const int i_startblk = 0; + const int i_endblk = 1; // Test blocks [0, 1] + const int i_startidx_in = 0; + const int i_endidx_in = nproma - 1; + const int slev = 0; + const int elev = nlev - 1; + const bool lacc = false; // Not using ACC-specific behavior. + const bool acc_async = false; // No asynchronous execution. +}; + +// Define a typed test fixture for the functions which have the same input and +// output types +template <typename T> +class RbfInterpolTypedTestFixture : public ::testing::Test, + public interp_dimensions { +public: + // Data arrays. + std::vector<T> p_cell_in; // size: nproma * nlev * nblks_c + std::vector<int> rbf_c2grad_idx; // size: rbf_c2grad_dim * nproma * nblks_c + std::vector<int> rbf_c2grad_blk; // size: rbf_c2grad_dim * nproma * nblks_c + std::vector<int> rbf_vec_idx_c; // size: rbf_vec_dim_c * nproma * nblks_c + std::vector<int> rbf_vec_blk_c; // size: rbf_vec_dim_c * nproma * nblks_c + std::vector<T> + rbf_c2grad_coeff; // size: rbf_c2grad_dim * 2 * nproma * nblks_c + std::vector<T> grad_x; // size: nproma * nlev * nblks_c + std::vector<T> grad_y; // size: nproma * nlev * nblks_c + std::vector<T> p_vn_in; + std::vector<T> rbf_vec_coeff_c; + std::vector<T> p_u_out; + std::vector<T> p_v_out; + + std::vector<int> rbf_vec_idx_e; + std::vector<int> rbf_vec_blk_e; + std::vector<T> rbf_vec_coeff_e; + std::vector<T> p_vt_out; + + RbfInterpolTypedTestFixture() { + size_t size3d = static_cast<size_t>(nproma) * nlev * nblks_c; + size_t size3d_idx = static_cast<size_t>(rbf_c2grad_dim) * nproma * nblks_c; + size_t size4d = static_cast<size_t>(rbf_c2grad_dim) * 2 * nproma * nblks_c; + + size_t size3d_vec_dim = + static_cast<size_t>(rbf_vec_dim_c) * nproma * nblks_c; + size_t size_4d_vec_dim = + static_cast<size_t>(rbf_vec_dim_c) * 2 * nproma * nblks_c; + + size_t size3d_edge_lib = + static_cast<size_t>(rbf_vec_dim_e) * nproma * nblks_c; + size_t size_4d_edge_lib = + static_cast<size_t>(rbf_vec_dim_e) * 2 * nproma * nblks_c; + + p_cell_in.resize(size3d, static_cast<T>(1)); + p_vn_in.resize(size3d, static_cast<T>(1)); + + rbf_vec_idx_c.resize(size3d_vec_dim, 1); + rbf_vec_blk_c.resize(size3d_vec_dim, 0); + rbf_c2grad_idx.resize(size3d_idx, 1); + rbf_c2grad_blk.resize(size3d_idx, 0); // Set block indices to 0 for testing. + rbf_vec_idx_e.resize(size3d_vec_dim, 1); + rbf_vec_blk_e.resize(size3d_vec_dim, 0); + + rbf_vec_coeff_c.resize(size_4d_vec_dim, static_cast<T>(1)); + rbf_c2grad_coeff.resize(size4d, static_cast<T>(1)); + rbf_vec_coeff_e.resize(size_4d_edge_lib, static_cast<T>(1)); + + p_u_out.resize(size3d_vec_dim, static_cast<T>(0)); + p_v_out.resize(size3d_vec_dim, static_cast<T>(0)); + p_vt_out.resize(size3d_edge_lib, static_cast<T>(0)); + + grad_x.resize(size3d, static_cast<T>(0)); + grad_y.resize(size3d, static_cast<T>(0)); + } +}; + +typedef ::testing::Types<float, double> MyTypes; + +TYPED_TEST_SUITE(RbfInterpolTypedTestFixture, MyTypes); + +TYPED_TEST(RbfInterpolTypedTestFixture, C2Grad) { + using T = TypeParam; + rbf_interpol_c2grad_lib<TypeParam>( + this->p_cell_in.data(), this->rbf_c2grad_idx.data(), + this->rbf_c2grad_blk.data(), this->rbf_c2grad_coeff.data(), + this->grad_x.data(), this->grad_y.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->rbf_c2grad_dim, this->nlev, this->nblks_c, + this->lacc); -// Define a typed test fixture. + // For each block from i_startblk to i_endblk-1, and for each (i, level) + // the kernel sums rbf_c2grad_dim contributions, each equal to 1. + // Therefore, we expect grad_x and grad_y to equal rbf_c2grad_dim (i.e., 10). + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + for (int jk = 0; jk < this->nlev; ++jk) { + for (int i = 0; i < this->nproma; ++i) { + size_t idx = i + static_cast<size_t>(jk) * this->nproma + + static_cast<size_t>(jb) * this->nproma * this->nlev; + EXPECT_NEAR(this->grad_x[idx], + static_cast<TypeParam>(this->rbf_c2grad_dim), + static_cast<TypeParam>(1e-5)) + << "grad_x failure at block " << jb << ", level " << jk + << ", index " << i; + EXPECT_NEAR(this->grad_y[idx], + static_cast<TypeParam>(this->rbf_c2grad_dim), + static_cast<TypeParam>(1e-5)) + << "grad_y failure at block " << jb << ", level " << jk + << ", index " << i; + } + } + } +} + +TYPED_TEST(RbfInterpolTypedTestFixture, Cell) { + using T = TypeParam; + + rbf_vec_interpol_cell_lib<T>( + this->p_vn_in.data(), this->rbf_vec_idx_c.data(), + this->rbf_vec_blk_c.data(), this->rbf_vec_coeff_c.data(), + this->p_u_out.data(), this->p_v_out.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->rbf_c2grad_dim, this->nlev, this->nblks_c, + this->nblks_e, this->lacc, this->acc_async); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + for (int jk = 0; jk < this->nlev; ++jk) { + for (int i = 0; i < this->nproma; ++i) { + size_t idx = i + static_cast<size_t>(jk) * this->nproma + + static_cast<size_t>(jb) * this->nproma * this->nlev; + EXPECT_NEAR(this->p_u_out[idx], static_cast<T>(this->rbf_vec_dim_c), + static_cast<T>(1e-5)) + << "p_u_out failure at block " << jb << ", level " << jk + << ", index " << i; + } + } + } +} + +TYPED_TEST(RbfInterpolTypedTestFixture, Edge) { + using T = TypeParam; + + rbf_vec_interpol_edge_lib<T>( + this->p_vn_in.data(), this->rbf_vec_idx_e.data(), + this->rbf_vec_blk_e.data(), this->rbf_vec_coeff_e.data(), + this->p_vt_out.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev, this->elev, + this->nlev, this->nproma, this->rbf_vec_dim_e, this->nblks_e, this->lacc, + this->acc_async); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + for (int jk = 0; jk < this->nlev; ++jk) { + for (int i = 0; i < this->nproma; ++i) { + size_t idx = i + static_cast<size_t>(jk) * this->nproma + + static_cast<size_t>(jb) * this->nproma * this->nlev; + EXPECT_NEAR(this->p_vt_out[idx], static_cast<T>(this->rbf_vec_dim_e), + static_cast<T>(1e-5)) + << "p_vt_out failure at block " << jb << ", level " << jk + << ", index " << i; + } + } + } +} + +// Define a typed test fixture for the functions which have different input and +// output types template <typename TypePair> -class RbfVecInterpolVertexMixedTestFixture : public ::testing::Test { +class RbfVecInterpolMixedTestFixture : public ::testing::Test, + public interp_dimensions { public: - using InType = typename TypePair::in_type; + using InType = typename TypePair::in_type; using OutType = typename TypePair::out_type; // Constant dimensions. - static constexpr int nproma = 3; // inner loop length - static constexpr int nlev = 4; // number of vertical levels - static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) - static constexpr int nblks_v = 2; // number of vertex blocks (for rbf arrays and outputs) - static constexpr int rbf_vec_dim = 6; // fixed dimension for rbf vector (stencil points) + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 4; // number of vertical levels + static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) + static constexpr int nblks_v = + 2; // number of vertex blocks (for rbf arrays and outputs) + static constexpr int rbf_vec_dim = + 6; // fixed dimension for rbf vector (stencil points) // Parameter values. - int i_startblk = 0; - int i_endblk = 1; // Test blocks [0, 1] + int i_startblk = 0; + int i_endblk = 1; // Test blocks [0, 1] int i_startidx_in = 0; - int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1 - int slev = 0; - int elev = nlev - 1; // Full vertical range (0 .. nlev-1) - bool lacc = false; // Not using ACC-specific behavior. - bool acc_async = false; // No asynchronous execution. + int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1 + int slev = 0; + int elev = nlev - 1; // Full vertical range (0 .. nlev-1) + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // No asynchronous execution. // Arrays stored in std::vector. - std::vector<InType> p_e_in; // Dimensions: (nproma, nlev, nblks_e) - std::vector<int> rbf_vec_idx_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) - std::vector<int> rbf_vec_blk_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) - std::vector<InType> rbf_vec_coeff_v; // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v) - std::vector<OutType> p_u_out; // Dimensions: (nproma, nlev, nblks_v) - std::vector<OutType> p_v_out; // Dimensions: (nproma, nlev, nblks_v) - - RbfVecInterpolVertexMixedTestFixture() { + std::vector<InType> p_e_in; // Dimensions: (nproma, nlev, nblks_e) + std::vector<int> rbf_vec_idx_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) + std::vector<int> rbf_vec_blk_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) + std::vector<InType> + rbf_vec_coeff_v; // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v) + std::vector<OutType> p_u_out; // Dimensions: (nproma, nlev, nblks_v) + std::vector<OutType> p_v_out; // Dimensions: (nproma, nlev, nblks_v) + + RbfVecInterpolMixedTestFixture() { // Allocate and initialize inputs. - p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1)); + p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), + static_cast<InType>(1)); rbf_vec_idx_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1); rbf_vec_blk_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 0); - rbf_vec_coeff_v.resize(num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v), static_cast<InType>(1)); + rbf_vec_coeff_v.resize( + num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v), + static_cast<InType>(1)); // Allocate output arrays and initialize to zero. - p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); - p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), + static_cast<OutType>(0)); + p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), + static_cast<OutType>(0)); } }; -TYPED_TEST_SUITE(RbfVecInterpolVertexMixedTestFixture, MixedTypes); +TYPED_TEST_SUITE(RbfVecInterpolMixedTestFixture, MixedTypes); -TYPED_TEST(RbfVecInterpolVertexMixedTestFixture, BasicTest) { - using InType = typename TestFixture::InType; +TYPED_TEST(RbfVecInterpolMixedTestFixture, Vertex) { + using InType = typename TestFixture::InType; using OutType = typename TestFixture::OutType; // Call the function with mixed precision. rbf_vec_interpol_vertex_lib<InType, OutType>( - this->p_e_in.data(), - this->rbf_vec_idx_v.data(), - this->rbf_vec_blk_v.data(), - this->rbf_vec_coeff_v.data(), - this->p_u_out.data(), - this->p_v_out.data(), - this->i_startblk, - this->i_endblk, - this->i_startidx_in, - this->i_endidx_in, - this->slev, - this->elev, - this->nproma, - this->lacc, - this->acc_async, - this->nlev, - RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_e, - RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_v); + this->p_e_in.data(), this->rbf_vec_idx_v.data(), + this->rbf_vec_blk_v.data(), this->rbf_vec_coeff_v.data(), + this->p_u_out.data(), this->p_v_out.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->lacc, this->acc_async, this->nlev, + this->nblks_e, this->nblks_v); // Check the outputs only for blocks in the range [i_startblk, i_endblk]. for (int block = this->i_startblk; block <= this->i_endblk; ++block) { for (int level = 0; level < this->nlev; ++level) { for (int i = 0; i < this->nproma; ++i) { // Compute the linear index for a 3D array in column-major order: - size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; - // Since every contribution is 1 and there are 6 stencil points, expect 6. - EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; - EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) - << "Failure at block " << block << ", level " << level << ", index " << i; + size_t idx = + i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 6 stencil points, + // expect 6. + EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6), + static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; + EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6), + static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " + << i; } } }