From 8dfa058d11c7062a41863a861bad68f131cd3a06 Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Tue, 25 Feb 2025 09:12:14 +0100 Subject: [PATCH 1/6] Mixed precision templating of mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib --- src/interpolation/CMakeLists.txt | 5 +- ...b_intp_rbf-rbf_vec_interpol_vertex_lib.cpp | 205 ++++++++++++++++++ ...b_intp_rbf-rbf_vec_interpol_vertex_lib.hpp | 32 +++ ...f-rbf_vec_interpol_vertex_lib_bindings.cpp | 134 ++++++++++++ ...rbf-rbf_vec_interpol_vertex_lib_bindings.h | 54 +++++ 5 files changed, 429 insertions(+), 1 deletion(-) create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index f982f3b..13abb41 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -14,7 +14,10 @@ add_library( mo_lib_interpolation_scalar.F90 mo_lib_interpolation_vector.F90 mo_lib_interpolation_vector.cpp - mo_lib_intp_rbf.F90) + mo_lib_intp_rbf.F90 + mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp + mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp + ) add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation) diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp new file mode 100644 index 0000000..c84495a --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp @@ -0,0 +1,205 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +/// Contains the only mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib() +/// +/// Separate to avoid conflicts with Ali working on rest of mo_lib_intp_rbf + + +#include <type_traits> +#include <Kokkos_Core.hpp> +#include "mo_lib_loopindices.hpp" +#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" + + +constexpr int rbf_vec_dim_v = 6; + +//------------------------------------------------------------------------- +// +// +//> +/// Performs vector RBF reconstruction at triangle vertices. +/// +/// Theory described in Narcowich and Ward (Math Comp. 1994) and +/// Bonaventura and Baudisch (Mox Report n. 75). +/// It takes edge based variables as input and combines them +/// into three dimensional cartesian vectors at each vertex. +/// +/// Two templated variables in order to support mixed precision. +/// Intended that type_traits::is_floating_point(T,S)==TRUE +/// precision(T) >= precision(S) +template <typename T, typename S> +void rbf_vec_interpol_vertex_lib( + const T* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const T* rbf_vec_coeff_v, + S* p_u_out, + S* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + // Dimensions for the arrays. + const int nlev, const int nblks_e, const int nblks_v + ) +{ + /* +#ifdef DIM_ENABLE_GPU + if (lacc){ using MemSpace = Kokkos::CudaSpace; + } else { using MemSpace = Kokkos::HostSpace; } +#else + using MemSpace = Kokkos::HostSpace; +#endif + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, MemSpace, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<T***, Kokkos::LayoutLeft, MemSpace, Kokkos::MemoryUnmanaged> UnmanagedT3D; + ... + */ + + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; + typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; + typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D; + + + + // input components of velocity or horizontal vorticity vectors at edge midpoints + // dim: (nproma,nlev,nblks_e) + UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e); + + // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex + // (rbf_vec_dim_v,nproma,nblks_v) + //UnmanagedConstInt3D rbf_vec_idx_v_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v); + //UnmanagedConstInt3D rbf_vec_blk_v_view(rbf_vec_blk_v, rbf_vec_blk_v, nproma, nblks_v); + UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v); + UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v); + + // coefficients are working precision array containing the coefficients used for vector rbf interpolation + // at each tringle vertex (input is normal component), + // dim: (rbf_vec_dim_v,2,nproma,nblks_v) + //UnmanagedConstT4D rbf_vec_coeff_v_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v); + UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v); + + // reconstructed x-component (u) of velocity vector, + // dim: (nproma,nlev,nblks_v) + UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v); + // reconstructed y-component (v) of velocity vector, + // dim: (nproma,nlev,nblks_v) + UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v); + + // Local vars + //int jv, jk, jb; // integer over vertices, levels, and blocks, + int jb; // integer over vertices, levels, and blocks, + int i_startidx; // start index + int i_endidx; // end index + + for (jb=i_startblk; jb<i_endblk; ++jb){ + + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, + i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev, i_endidx}); + + Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy, + KOKKOS_LAMBDA(const int jv, const int jk) { + + // NOTE: Static indexes reduced by 1 from Fortran version + p_u_out_view(jv, jk, jb) = + ptr_coeff_view(0, 0, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + + ptr_coeff_view(1, 0, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + + ptr_coeff_view(2, 0, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + + ptr_coeff_view(3, 0, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + + ptr_coeff_view(4, 0, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + + ptr_coeff_view(5, 0, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); + p_v_out_view(jv, jk, jb) = + ptr_coeff_view(0, 1, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) + + ptr_coeff_view(1, 1, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) + + ptr_coeff_view(2, 1, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) + + ptr_coeff_view(3, 1, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) + + ptr_coeff_view(4, 1, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) + + ptr_coeff_view(5, 1, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb)); + } + ); + } +} + +// Explicit instantiation - double precision +template +void rbf_vec_interpol_vertex_lib<double, double>( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + double* p_u_out, + double* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +// Explicit instantiation - single precision +template +void rbf_vec_interpol_vertex_lib<float, float>( + const float* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const float* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +// Explicit instantiation - mixed precision +template +void rbf_vec_interpol_vertex_lib<double, float>( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp new file mode 100644 index 0000000..cd6553e --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp @@ -0,0 +1,32 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +template <typename T, typename S> +void rbf_vec_interpol_vertex_lib( + const T* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const T* rbf_vec_coeff_v, + S* p_u_out, + S* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_c + ); \ No newline at end of file diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp new file mode 100644 index 0000000..1972341 --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp @@ -0,0 +1,134 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h" +#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" + +void rbf_vec_interpol_vertex_lib_dp( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + double* p_u_out, + double* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<double, double>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); +} + + +void rbf_vec_interpol_vertex_lib_sp( + const float* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const float* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<float, float>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); + +} + +void rbf_vec_interpol_vertex_lib_mixprec( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ) +{ + rbf_vec_interpol_vertex_lib<double, float>( + p_e_in, + rbf_vec_idx_v, + rbf_vec_blk_v, + rbf_vec_coeff_v, + p_u_out, + p_v_out, + i_startblk, // start_block needed for get_indices_c_lib + i_endblk, // end_block needed for get_indices_c_lib + i_startidx_in, // start_index needed for get_indices_c_lib + i_endidx_in, // end_index needed for get_indices_c_lib + slev, // vertical start level + elev, // vertical end level + nproma, // inner loop length/vector length + lacc, // if true, use Cuda mem-/exec-spaces + acc_async, // [deprecated] use async acc + nlev, nblks_e, nblks_v + ); + +} + diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h new file mode 100644 index 0000000..68d497a --- /dev/null +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h @@ -0,0 +1,54 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +extern "C" { + +void rbf_vec_interpol_vertex_lib_dp( + const double* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const double* rbf_vec_coeff_v, + double* p_u_out, + double* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +void rbf_vec_interpol_vertex_lib_sp( + const float* p_e_in, + const int* rbf_vec_idx_v, + const int* rbf_vec_blk_v, + const float* rbf_vec_coeff_v, + float* p_u_out, + float* p_v_out, + const int i_startblk, // start_block needed for get_indices_c_lib + const int i_endblk, // end_block needed for get_indices_c_lib + const int i_startidx_in, // start_index needed for get_indices_c_lib + const int i_endidx_in, // end_index needed for get_indices_c_lib + const int slev, // vertical start level + const int elev, // vertical end level + const int nproma, // inner loop length/vector length + const bool lacc, // if true, use Cuda mem-/exec-spaces + const bool acc_async, // [deprecated] use async acc + const int nlev, const int nblks_e, const int nblks_v + ); + +} \ No newline at end of file -- GitLab From 00d15198409a5478959f1af519dc8b0b1e95e1e5 Mon Sep 17 00:00:00 2001 From: Dylan Kierans <kierans@dkrz.de> Date: Tue, 25 Feb 2025 09:23:11 +0100 Subject: [PATCH 2/6] cmake format --- src/interpolation/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt index 13abb41..37c3ad0 100644 --- a/src/interpolation/CMakeLists.txt +++ b/src/interpolation/CMakeLists.txt @@ -17,7 +17,7 @@ add_library( mo_lib_intp_rbf.F90 mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp - ) +) add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation) -- GitLab From 6025a23b0e10bb22a2b5c405cd73bd6d36d5de55 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 25 Feb 2025 10:44:23 +0100 Subject: [PATCH 3/6] removed trailing whitespaces --- ...b_intp_rbf-rbf_vec_interpol_vertex_lib.cpp | 30 +++++++++---------- ...b_intp_rbf-rbf_vec_interpol_vertex_lib.hpp | 4 +-- ...f-rbf_vec_interpol_vertex_lib_bindings.cpp | 24 +++++++-------- ...rbf-rbf_vec_interpol_vertex_lib_bindings.h | 8 ++--- 4 files changed, 33 insertions(+), 33 deletions(-) diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp index c84495a..14ad14b 100644 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp @@ -38,8 +38,8 @@ constexpr int rbf_vec_dim_v = 6; /// precision(T) >= precision(S) template <typename T, typename S> void rbf_vec_interpol_vertex_lib( - const T* p_e_in, - const int* rbf_vec_idx_v, + const T* p_e_in, + const int* rbf_vec_idx_v, const int* rbf_vec_blk_v, const T* rbf_vec_coeff_v, S* p_u_out, @@ -90,16 +90,16 @@ void rbf_vec_interpol_vertex_lib( UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v); UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v); - // coefficients are working precision array containing the coefficients used for vector rbf interpolation - // at each tringle vertex (input is normal component), + // coefficients are working precision array containing the coefficients used for vector rbf interpolation + // at each tringle vertex (input is normal component), // dim: (rbf_vec_dim_v,2,nproma,nblks_v) //UnmanagedConstT4D rbf_vec_coeff_v_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v); UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v); - // reconstructed x-component (u) of velocity vector, + // reconstructed x-component (u) of velocity vector, // dim: (nproma,nlev,nblks_v) UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v); - // reconstructed y-component (v) of velocity vector, + // reconstructed y-component (v) of velocity vector, // dim: (nproma,nlev,nblks_v) UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v); @@ -141,10 +141,10 @@ void rbf_vec_interpol_vertex_lib( } // Explicit instantiation - double precision -template +template void rbf_vec_interpol_vertex_lib<double, double>( - const double* p_e_in, - const int* rbf_vec_idx_v, + const double* p_e_in, + const int* rbf_vec_idx_v, const int* rbf_vec_blk_v, const double* rbf_vec_coeff_v, double* p_u_out, @@ -162,10 +162,10 @@ void rbf_vec_interpol_vertex_lib<double, double>( ); // Explicit instantiation - single precision -template +template void rbf_vec_interpol_vertex_lib<float, float>( - const float* p_e_in, - const int* rbf_vec_idx_v, + const float* p_e_in, + const int* rbf_vec_idx_v, const int* rbf_vec_blk_v, const float* rbf_vec_coeff_v, float* p_u_out, @@ -183,10 +183,10 @@ void rbf_vec_interpol_vertex_lib<float, float>( ); // Explicit instantiation - mixed precision -template +template void rbf_vec_interpol_vertex_lib<double, float>( - const double* p_e_in, - const int* rbf_vec_idx_v, + const double* p_e_in, + const int* rbf_vec_idx_v, const int* rbf_vec_blk_v, const double* rbf_vec_coeff_v, float* p_u_out, diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp index cd6553e..c0b6f05 100644 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp @@ -13,8 +13,8 @@ template <typename T, typename S> void rbf_vec_interpol_vertex_lib( - const T* p_e_in, - const int* rbf_vec_idx_v, + const T* p_e_in, + const int* rbf_vec_idx_v, const int* rbf_vec_blk_v, const T* rbf_vec_coeff_v, S* p_u_out, diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp index 1972341..06dc467 100644 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp @@ -13,8 +13,8 @@ #include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" void rbf_vec_interpol_vertex_lib_dp( - const double* p_e_in, - const int* rbf_vec_idx_v, + const double* p_e_in, + const int* rbf_vec_idx_v, const int* rbf_vec_blk_v, const double* rbf_vec_coeff_v, double* p_u_out, @@ -32,8 +32,8 @@ void rbf_vec_interpol_vertex_lib_dp( ) { rbf_vec_interpol_vertex_lib<double, double>( - p_e_in, - rbf_vec_idx_v, + p_e_in, + rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, @@ -53,8 +53,8 @@ void rbf_vec_interpol_vertex_lib_dp( void rbf_vec_interpol_vertex_lib_sp( - const float* p_e_in, - const int* rbf_vec_idx_v, + const float* p_e_in, + const int* rbf_vec_idx_v, const int* rbf_vec_blk_v, const float* rbf_vec_coeff_v, float* p_u_out, @@ -72,8 +72,8 @@ void rbf_vec_interpol_vertex_lib_sp( ) { rbf_vec_interpol_vertex_lib<float, float>( - p_e_in, - rbf_vec_idx_v, + p_e_in, + rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, @@ -93,8 +93,8 @@ void rbf_vec_interpol_vertex_lib_sp( } void rbf_vec_interpol_vertex_lib_mixprec( - const double* p_e_in, - const int* rbf_vec_idx_v, + const double* p_e_in, + const int* rbf_vec_idx_v, const int* rbf_vec_blk_v, const double* rbf_vec_coeff_v, float* p_u_out, @@ -112,8 +112,8 @@ void rbf_vec_interpol_vertex_lib_mixprec( ) { rbf_vec_interpol_vertex_lib<double, float>( - p_e_in, - rbf_vec_idx_v, + p_e_in, + rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h index 68d497a..4356f88 100644 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h @@ -14,8 +14,8 @@ extern "C" { void rbf_vec_interpol_vertex_lib_dp( - const double* p_e_in, - const int* rbf_vec_idx_v, + const double* p_e_in, + const int* rbf_vec_idx_v, const int* rbf_vec_blk_v, const double* rbf_vec_coeff_v, double* p_u_out, @@ -33,8 +33,8 @@ void rbf_vec_interpol_vertex_lib_dp( ); void rbf_vec_interpol_vertex_lib_sp( - const float* p_e_in, - const int* rbf_vec_idx_v, + const float* p_e_in, + const int* rbf_vec_idx_v, const int* rbf_vec_blk_v, const float* rbf_vec_coeff_v, float* p_u_out, -- GitLab From a35191f48dcdbc3fdbc24b299ca02eac9628b0d1 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 25 Feb 2025 22:28:05 +0100 Subject: [PATCH 4/6] fixed few bugs in the routine --- ...mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp index 14ad14b..c9b776e 100644 --- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp +++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp @@ -13,7 +13,6 @@ /// /// Separate to avoid conflicts with Ali working on rest of mo_lib_intp_rbf - #include <type_traits> #include <Kokkos_Core.hpp> #include "mo_lib_loopindices.hpp" @@ -65,10 +64,6 @@ void rbf_vec_interpol_vertex_lib( using MemSpace = Kokkos::HostSpace; #endif - // Wrap raw pointers in unmanaged Kokkos Views. - typedef Kokkos::View<const T***, Kokkos::LayoutLeft, MemSpace, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T***, Kokkos::LayoutLeft, MemSpace, Kokkos::MemoryUnmanaged> UnmanagedT3D; - ... */ // Wrap raw pointers in unmanaged Kokkos Views. @@ -85,15 +80,12 @@ void rbf_vec_interpol_vertex_lib( // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex // (rbf_vec_dim_v,nproma,nblks_v) - //UnmanagedConstInt3D rbf_vec_idx_v_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v); - //UnmanagedConstInt3D rbf_vec_blk_v_view(rbf_vec_blk_v, rbf_vec_blk_v, nproma, nblks_v); UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v); UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v); // coefficients are working precision array containing the coefficients used for vector rbf interpolation // at each tringle vertex (input is normal component), // dim: (rbf_vec_dim_v,2,nproma,nblks_v) - //UnmanagedConstT4D rbf_vec_coeff_v_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v); UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v); // reconstructed x-component (u) of velocity vector, @@ -109,16 +101,16 @@ void rbf_vec_interpol_vertex_lib( int i_startidx; // start index int i_endidx; // end index - for (jb=i_startblk; jb<i_endblk; ++jb){ + for (jb=i_startblk; jb <= i_endblk; ++jb){ get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev, i_endidx}); + {slev, i_startidx}, {elev + 1, i_endidx + 1}); Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy, - KOKKOS_LAMBDA(const int jv, const int jk) { + KOKKOS_LAMBDA(const int jk, const int jv) { // NOTE: Static indexes reduced by 1 from Fortran version p_u_out_view(jv, jk, jb) = -- GitLab From 77f98d4a6863e76125e4d4700ee7cf8d77061070 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 25 Feb 2025 22:31:58 +0100 Subject: [PATCH 5/6] added unit tests for the routine --- test/c/CMakeLists.txt | 1 + test/c/test_intp_rbf.cpp | 115 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 test/c/test_intp_rbf.cpp diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 95ca08b..13c5dfe 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -25,6 +25,7 @@ set(SOURCES main.cpp test_tdma_solver.cpp test_interpolation_vector.cpp + test_intp_rbf.cpp ) # Create the test executable from your test files, including main.cpp. add_executable(iconmath_test_c ${SOURCES}) diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp new file mode 100644 index 0000000..4c9bc30 --- /dev/null +++ b/test/c/test_intp_rbf.cpp @@ -0,0 +1,115 @@ +#include <gtest/gtest.h> +#include <Kokkos_Core.hpp> +#include <vector> +#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp" + +// Free-function helpers for 3D and 4D array sizes (assumed column-major) +template<typename T> +size_t num_elements_3d(int d1, int d2, int d3) { + return static_cast<size_t>(d1) * d2 * d3; +} + +template<typename T> +size_t num_elements_4d(int d1, int d2, int d3, int d4) { + return static_cast<size_t>(d1) * d2 * d3 * d4; +} + +// Define a helper struct that holds the two types. +template<typename InT, typename OutT> +struct MixedPrecision { + using in_type = InT; + using out_type = OutT; +}; + +// Define the list of type pairs we want to test. +typedef ::testing::Types< MixedPrecision<double, double>, + MixedPrecision<double, float>, + MixedPrecision<float, float> > MixedTypes; + +// Define a typed test fixture. +template <typename TypePair> +class RbfVecInterpolVertexMixedTestFixture : public ::testing::Test { +public: + using InType = typename TypePair::in_type; + using OutType = typename TypePair::out_type; + + // Constant dimensions. + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 4; // number of vertical levels + static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in) + static constexpr int nblks_v = 2; // number of vertex blocks (for rbf arrays and outputs) + static constexpr int rbf_vec_dim = 6; // fixed dimension for rbf vector (stencil points) + + // Parameter values. + int i_startblk = 0; + int i_endblk = 1; // Test blocks [0, 1] + int i_startidx_in = 0; + int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1 + int slev = 0; + int elev = nlev - 1; // Full vertical range (0 .. nlev-1) + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // No asynchronous execution. + + // Arrays stored in std::vector. + std::vector<InType> p_e_in; // Dimensions: (nproma, nlev, nblks_e) + std::vector<int> rbf_vec_idx_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) + std::vector<int> rbf_vec_blk_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v) + std::vector<InType> rbf_vec_coeff_v; // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v) + std::vector<OutType> p_u_out; // Dimensions: (nproma, nlev, nblks_v) + std::vector<OutType> p_v_out; // Dimensions: (nproma, nlev, nblks_v) + + RbfVecInterpolVertexMixedTestFixture() { + // Allocate and initialize inputs. + p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1)); + rbf_vec_idx_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1); + rbf_vec_blk_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 0); + rbf_vec_coeff_v.resize(num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v), static_cast<InType>(1)); + + // Allocate output arrays and initialize to zero. + p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0)); + } +}; + +TYPED_TEST_SUITE(RbfVecInterpolVertexMixedTestFixture, MixedTypes); + +TYPED_TEST(RbfVecInterpolVertexMixedTestFixture, BasicTest) { + using InType = typename TestFixture::InType; + using OutType = typename TestFixture::OutType; + + // Call the function with mixed precision. + rbf_vec_interpol_vertex_lib<InType, OutType>( + this->p_e_in.data(), + this->rbf_vec_idx_v.data(), + this->rbf_vec_blk_v.data(), + this->rbf_vec_coeff_v.data(), + this->p_u_out.data(), + this->p_v_out.data(), + this->i_startblk, + this->i_endblk, + this->i_startidx_in, + this->i_endidx_in, + this->slev, + this->elev, + this->nproma, + this->lacc, + this->acc_async, + this->nlev, + RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_e, + RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_v); + + // Check the outputs only for blocks in the range [i_startblk, i_endblk]. + for (int block = this->i_startblk; block <= this->i_endblk; ++block) { + for (int level = 0; level < this->nlev; ++level) { + for (int i = 0; i < this->nproma; ++i) { + // Compute the linear index for a 3D array in column-major order: + size_t idx = i + level * this->nproma + block * this->nproma * this->nlev; + // Since every contribution is 1 and there are 6 stencil points, expect 6. + EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5)) + << "Failure at block " << block << ", level " << level << ", index " << i; + } + } + } +} -- GitLab From fa76e94423d6fa2e45443e1ecfa49476af7756fb Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Tue, 25 Feb 2025 22:34:24 +0100 Subject: [PATCH 6/6] added license to the new test file --- test/c/test_intp_rbf.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp index 4c9bc30..0aa4f9b 100644 --- a/test/c/test_intp_rbf.cpp +++ b/test/c/test_intp_rbf.cpp @@ -1,3 +1,14 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + #include <gtest/gtest.h> #include <Kokkos_Core.hpp> #include <vector> -- GitLab