From 8dfa058d11c7062a41863a861bad68f131cd3a06 Mon Sep 17 00:00:00 2001
From: Dylan Kierans <kierans@dkrz.de>
Date: Tue, 25 Feb 2025 09:12:14 +0100
Subject: [PATCH 1/6] Mixed precision templating of
 mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib

---
 src/interpolation/CMakeLists.txt              |   5 +-
 ...b_intp_rbf-rbf_vec_interpol_vertex_lib.cpp | 205 ++++++++++++++++++
 ...b_intp_rbf-rbf_vec_interpol_vertex_lib.hpp |  32 +++
 ...f-rbf_vec_interpol_vertex_lib_bindings.cpp | 134 ++++++++++++
 ...rbf-rbf_vec_interpol_vertex_lib_bindings.h |  54 +++++
 5 files changed, 429 insertions(+), 1 deletion(-)
 create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
 create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp
 create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
 create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h

diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt
index f982f3b..13abb41 100644
--- a/src/interpolation/CMakeLists.txt
+++ b/src/interpolation/CMakeLists.txt
@@ -14,7 +14,10 @@ add_library(
   mo_lib_interpolation_scalar.F90
   mo_lib_interpolation_vector.F90
   mo_lib_interpolation_vector.cpp
-  mo_lib_intp_rbf.F90)
+  mo_lib_intp_rbf.F90
+  mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
+  mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
+  )
 
 add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation)
 
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
new file mode 100644
index 0000000..c84495a
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
@@ -0,0 +1,205 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+/// Contains the only mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib()
+///
+/// Separate to avoid conflicts with Ali working on rest of mo_lib_intp_rbf
+
+
+#include <type_traits>
+#include <Kokkos_Core.hpp>
+#include "mo_lib_loopindices.hpp"
+#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
+
+
+constexpr int rbf_vec_dim_v = 6;
+
+//-------------------------------------------------------------------------
+//
+//
+//>
+/// Performs vector RBF reconstruction at triangle vertices.
+///
+/// Theory described in Narcowich and Ward (Math Comp. 1994) and
+/// Bonaventura and Baudisch (Mox Report n. 75).
+/// It takes edge based variables as input and combines them
+/// into three dimensional cartesian vectors at each vertex.
+///
+/// Two templated variables in order to support mixed precision.
+/// Intended that type_traits::is_floating_point(T,S)==TRUE
+/// precision(T) >= precision(S)
+template <typename T, typename S>
+void rbf_vec_interpol_vertex_lib(
+    const T* p_e_in, 
+    const int* rbf_vec_idx_v, 
+    const int* rbf_vec_blk_v,
+    const T* rbf_vec_coeff_v,
+    S* p_u_out,
+    S* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    // Dimensions for the arrays.
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    /*
+#ifdef DIM_ENABLE_GPU
+    if (lacc){ using MemSpace = Kokkos::CudaSpace;
+    } else { using MemSpace = Kokkos::HostSpace; }
+#else
+    using MemSpace = Kokkos::HostSpace;
+#endif
+
+    // Wrap raw pointers in unmanaged Kokkos Views.
+    typedef Kokkos::View<const T***,    Kokkos::LayoutLeft, MemSpace, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
+    typedef Kokkos::View<T***,          Kokkos::LayoutLeft, MemSpace, Kokkos::MemoryUnmanaged> UnmanagedT3D;
+    ...
+    */
+
+    // Wrap raw pointers in unmanaged Kokkos Views.
+    typedef Kokkos::View<const T***,    Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
+    typedef Kokkos::View<const T****,   Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D;
+    typedef Kokkos::View<const int***,  Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
+    typedef Kokkos::View<S***,          Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D;
+
+
+
+    // input components of velocity or horizontal vorticity vectors at edge midpoints
+    // dim: (nproma,nlev,nblks_e)
+    UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e);
+
+    // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex
+    // (rbf_vec_dim_v,nproma,nblks_v)
+    //UnmanagedConstInt3D rbf_vec_idx_v_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v);
+    //UnmanagedConstInt3D rbf_vec_blk_v_view(rbf_vec_blk_v, rbf_vec_blk_v, nproma, nblks_v);
+    UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v);
+    UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v);
+
+    // coefficients are working precision array containing the coefficients used for vector rbf interpolation 
+    // at each tringle vertex (input is normal component), 
+    // dim: (rbf_vec_dim_v,2,nproma,nblks_v)
+    //UnmanagedConstT4D rbf_vec_coeff_v_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v);
+    UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v);
+
+    // reconstructed x-component (u) of velocity vector, 
+    // dim: (nproma,nlev,nblks_v)
+    UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v);
+    // reconstructed y-component (v) of velocity vector, 
+    // dim: (nproma,nlev,nblks_v)
+    UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v);
+
+    // Local vars
+    //int jv, jk, jb; // integer over vertices, levels, and blocks,
+    int jb; // integer over vertices, levels, and blocks,
+    int i_startidx; // start index
+    int i_endidx;   // end index
+
+    for (jb=i_startblk; jb<i_endblk; ++jb){
+
+        get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk,
+                          i_startidx, i_endidx);
+
+        Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
+            {slev, i_startidx}, {elev, i_endidx});
+
+        Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy,
+            KOKKOS_LAMBDA(const int jv, const int jk) {
+
+                // NOTE: Static indexes reduced by 1 from Fortran version
+                p_u_out_view(jv, jk, jb) =
+                    ptr_coeff_view(0, 0, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
+                    ptr_coeff_view(1, 0, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
+                    ptr_coeff_view(2, 0, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
+                    ptr_coeff_view(3, 0, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
+                    ptr_coeff_view(4, 0, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
+                    ptr_coeff_view(5, 0, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
+                p_v_out_view(jv, jk, jb) =
+                    ptr_coeff_view(0, 1, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
+                    ptr_coeff_view(1, 1, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
+                    ptr_coeff_view(2, 1, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
+                    ptr_coeff_view(3, 1, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
+                    ptr_coeff_view(4, 1, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
+                    ptr_coeff_view(5, 1, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
+            }
+        );
+    }
+}
+
+// Explicit instantiation - double precision
+template 
+void rbf_vec_interpol_vertex_lib<double, double>(
+    const double* p_e_in, 
+    const int* rbf_vec_idx_v, 
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    double* p_u_out,
+    double* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,             // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
+// Explicit instantiation - single precision
+template 
+void rbf_vec_interpol_vertex_lib<float, float>(
+    const float* p_e_in, 
+    const int* rbf_vec_idx_v, 
+    const int* rbf_vec_blk_v,
+    const float* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,             // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
+// Explicit instantiation - mixed precision
+template 
+void rbf_vec_interpol_vertex_lib<double, float>(
+    const double* p_e_in, 
+    const int* rbf_vec_idx_v, 
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,             // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp
new file mode 100644
index 0000000..cd6553e
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp
@@ -0,0 +1,32 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#pragma once
+
+template <typename T, typename S>
+void rbf_vec_interpol_vertex_lib(
+    const T* p_e_in, 
+    const int* rbf_vec_idx_v, 
+    const int* rbf_vec_blk_v,
+    const T* rbf_vec_coeff_v,
+    S* p_u_out,
+    S* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,             // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_c
+    );
\ No newline at end of file
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
new file mode 100644
index 0000000..1972341
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
@@ -0,0 +1,134 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h"
+#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
+
+void rbf_vec_interpol_vertex_lib_dp(
+    const double* p_e_in, 
+    const int* rbf_vec_idx_v, 
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    double* p_u_out,
+    double* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    rbf_vec_interpol_vertex_lib<double, double>(
+        p_e_in, 
+        rbf_vec_idx_v, 
+        rbf_vec_blk_v,
+        rbf_vec_coeff_v,
+        p_u_out,
+        p_v_out,
+        i_startblk,       // start_block needed for get_indices_c_lib
+        i_endblk,         // end_block needed for get_indices_c_lib
+        i_startidx_in,    // start_index needed for get_indices_c_lib
+        i_endidx_in,      // end_index needed for get_indices_c_lib
+        slev,             // vertical start level
+        elev,             // vertical end level
+        nproma,           // inner loop length/vector length
+        lacc,                  // if true, use Cuda mem-/exec-spaces
+        acc_async,              // [deprecated] use async acc
+        nlev, nblks_e, nblks_v
+        );
+}
+
+
+void rbf_vec_interpol_vertex_lib_sp(
+    const float* p_e_in, 
+    const int* rbf_vec_idx_v, 
+    const int* rbf_vec_blk_v,
+    const float* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    rbf_vec_interpol_vertex_lib<float, float>(
+        p_e_in, 
+        rbf_vec_idx_v, 
+        rbf_vec_blk_v,
+        rbf_vec_coeff_v,
+        p_u_out,
+        p_v_out,
+        i_startblk,       // start_block needed for get_indices_c_lib
+        i_endblk,         // end_block needed for get_indices_c_lib
+        i_startidx_in,    // start_index needed for get_indices_c_lib
+        i_endidx_in,      // end_index needed for get_indices_c_lib
+        slev,             // vertical start level
+        elev,             // vertical end level
+        nproma,           // inner loop length/vector length
+        lacc,                  // if true, use Cuda mem-/exec-spaces
+        acc_async,              // [deprecated] use async acc
+        nlev, nblks_e, nblks_v
+        );
+
+}
+
+void rbf_vec_interpol_vertex_lib_mixprec(
+    const double* p_e_in, 
+    const int* rbf_vec_idx_v, 
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    rbf_vec_interpol_vertex_lib<double, float>(
+        p_e_in, 
+        rbf_vec_idx_v, 
+        rbf_vec_blk_v,
+        rbf_vec_coeff_v,
+        p_u_out,
+        p_v_out,
+        i_startblk,       // start_block needed for get_indices_c_lib
+        i_endblk,         // end_block needed for get_indices_c_lib
+        i_startidx_in,    // start_index needed for get_indices_c_lib
+        i_endidx_in,      // end_index needed for get_indices_c_lib
+        slev,             // vertical start level
+        elev,             // vertical end level
+        nproma,           // inner loop length/vector length
+        lacc,                  // if true, use Cuda mem-/exec-spaces
+        acc_async,              // [deprecated] use async acc
+        nlev, nblks_e, nblks_v
+        );
+
+}
+
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h
new file mode 100644
index 0000000..68d497a
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h
@@ -0,0 +1,54 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#pragma once
+
+extern "C" {
+
+void rbf_vec_interpol_vertex_lib_dp(
+    const double* p_e_in, 
+    const int* rbf_vec_idx_v, 
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    double* p_u_out,
+    double* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
+void rbf_vec_interpol_vertex_lib_sp(
+    const float* p_e_in, 
+    const int* rbf_vec_idx_v, 
+    const int* rbf_vec_blk_v,
+    const float* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
+}
\ No newline at end of file
-- 
GitLab


From 00d15198409a5478959f1af519dc8b0b1e95e1e5 Mon Sep 17 00:00:00 2001
From: Dylan Kierans <kierans@dkrz.de>
Date: Tue, 25 Feb 2025 09:23:11 +0100
Subject: [PATCH 2/6] cmake format

---
 src/interpolation/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt
index 13abb41..37c3ad0 100644
--- a/src/interpolation/CMakeLists.txt
+++ b/src/interpolation/CMakeLists.txt
@@ -17,7 +17,7 @@ add_library(
   mo_lib_intp_rbf.F90
   mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
   mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
-  )
+)
 
 add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation)
 
-- 
GitLab


From 6025a23b0e10bb22a2b5c405cd73bd6d36d5de55 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Tue, 25 Feb 2025 10:44:23 +0100
Subject: [PATCH 3/6] removed trailing whitespaces

---
 ...b_intp_rbf-rbf_vec_interpol_vertex_lib.cpp | 30 +++++++++----------
 ...b_intp_rbf-rbf_vec_interpol_vertex_lib.hpp |  4 +--
 ...f-rbf_vec_interpol_vertex_lib_bindings.cpp | 24 +++++++--------
 ...rbf-rbf_vec_interpol_vertex_lib_bindings.h |  8 ++---
 4 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
index c84495a..14ad14b 100644
--- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
+++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
@@ -38,8 +38,8 @@ constexpr int rbf_vec_dim_v = 6;
 /// precision(T) >= precision(S)
 template <typename T, typename S>
 void rbf_vec_interpol_vertex_lib(
-    const T* p_e_in, 
-    const int* rbf_vec_idx_v, 
+    const T* p_e_in,
+    const int* rbf_vec_idx_v,
     const int* rbf_vec_blk_v,
     const T* rbf_vec_coeff_v,
     S* p_u_out,
@@ -90,16 +90,16 @@ void rbf_vec_interpol_vertex_lib(
     UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v);
     UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v);
 
-    // coefficients are working precision array containing the coefficients used for vector rbf interpolation 
-    // at each tringle vertex (input is normal component), 
+    // coefficients are working precision array containing the coefficients used for vector rbf interpolation
+    // at each tringle vertex (input is normal component),
     // dim: (rbf_vec_dim_v,2,nproma,nblks_v)
     //UnmanagedConstT4D rbf_vec_coeff_v_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v);
     UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v);
 
-    // reconstructed x-component (u) of velocity vector, 
+    // reconstructed x-component (u) of velocity vector,
     // dim: (nproma,nlev,nblks_v)
     UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v);
-    // reconstructed y-component (v) of velocity vector, 
+    // reconstructed y-component (v) of velocity vector,
     // dim: (nproma,nlev,nblks_v)
     UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v);
 
@@ -141,10 +141,10 @@ void rbf_vec_interpol_vertex_lib(
 }
 
 // Explicit instantiation - double precision
-template 
+template
 void rbf_vec_interpol_vertex_lib<double, double>(
-    const double* p_e_in, 
-    const int* rbf_vec_idx_v, 
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
     const int* rbf_vec_blk_v,
     const double* rbf_vec_coeff_v,
     double* p_u_out,
@@ -162,10 +162,10 @@ void rbf_vec_interpol_vertex_lib<double, double>(
     );
 
 // Explicit instantiation - single precision
-template 
+template
 void rbf_vec_interpol_vertex_lib<float, float>(
-    const float* p_e_in, 
-    const int* rbf_vec_idx_v, 
+    const float* p_e_in,
+    const int* rbf_vec_idx_v,
     const int* rbf_vec_blk_v,
     const float* rbf_vec_coeff_v,
     float* p_u_out,
@@ -183,10 +183,10 @@ void rbf_vec_interpol_vertex_lib<float, float>(
     );
 
 // Explicit instantiation - mixed precision
-template 
+template
 void rbf_vec_interpol_vertex_lib<double, float>(
-    const double* p_e_in, 
-    const int* rbf_vec_idx_v, 
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
     const int* rbf_vec_blk_v,
     const double* rbf_vec_coeff_v,
     float* p_u_out,
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp
index cd6553e..c0b6f05 100644
--- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp
+++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp
@@ -13,8 +13,8 @@
 
 template <typename T, typename S>
 void rbf_vec_interpol_vertex_lib(
-    const T* p_e_in, 
-    const int* rbf_vec_idx_v, 
+    const T* p_e_in,
+    const int* rbf_vec_idx_v,
     const int* rbf_vec_blk_v,
     const T* rbf_vec_coeff_v,
     S* p_u_out,
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
index 1972341..06dc467 100644
--- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
+++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
@@ -13,8 +13,8 @@
 #include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
 
 void rbf_vec_interpol_vertex_lib_dp(
-    const double* p_e_in, 
-    const int* rbf_vec_idx_v, 
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
     const int* rbf_vec_blk_v,
     const double* rbf_vec_coeff_v,
     double* p_u_out,
@@ -32,8 +32,8 @@ void rbf_vec_interpol_vertex_lib_dp(
     )
 {
     rbf_vec_interpol_vertex_lib<double, double>(
-        p_e_in, 
-        rbf_vec_idx_v, 
+        p_e_in,
+        rbf_vec_idx_v,
         rbf_vec_blk_v,
         rbf_vec_coeff_v,
         p_u_out,
@@ -53,8 +53,8 @@ void rbf_vec_interpol_vertex_lib_dp(
 
 
 void rbf_vec_interpol_vertex_lib_sp(
-    const float* p_e_in, 
-    const int* rbf_vec_idx_v, 
+    const float* p_e_in,
+    const int* rbf_vec_idx_v,
     const int* rbf_vec_blk_v,
     const float* rbf_vec_coeff_v,
     float* p_u_out,
@@ -72,8 +72,8 @@ void rbf_vec_interpol_vertex_lib_sp(
     )
 {
     rbf_vec_interpol_vertex_lib<float, float>(
-        p_e_in, 
-        rbf_vec_idx_v, 
+        p_e_in,
+        rbf_vec_idx_v,
         rbf_vec_blk_v,
         rbf_vec_coeff_v,
         p_u_out,
@@ -93,8 +93,8 @@ void rbf_vec_interpol_vertex_lib_sp(
 }
 
 void rbf_vec_interpol_vertex_lib_mixprec(
-    const double* p_e_in, 
-    const int* rbf_vec_idx_v, 
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
     const int* rbf_vec_blk_v,
     const double* rbf_vec_coeff_v,
     float* p_u_out,
@@ -112,8 +112,8 @@ void rbf_vec_interpol_vertex_lib_mixprec(
     )
 {
     rbf_vec_interpol_vertex_lib<double, float>(
-        p_e_in, 
-        rbf_vec_idx_v, 
+        p_e_in,
+        rbf_vec_idx_v,
         rbf_vec_blk_v,
         rbf_vec_coeff_v,
         p_u_out,
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h
index 68d497a..4356f88 100644
--- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h
+++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h
@@ -14,8 +14,8 @@
 extern "C" {
 
 void rbf_vec_interpol_vertex_lib_dp(
-    const double* p_e_in, 
-    const int* rbf_vec_idx_v, 
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
     const int* rbf_vec_blk_v,
     const double* rbf_vec_coeff_v,
     double* p_u_out,
@@ -33,8 +33,8 @@ void rbf_vec_interpol_vertex_lib_dp(
     );
 
 void rbf_vec_interpol_vertex_lib_sp(
-    const float* p_e_in, 
-    const int* rbf_vec_idx_v, 
+    const float* p_e_in,
+    const int* rbf_vec_idx_v,
     const int* rbf_vec_blk_v,
     const float* rbf_vec_coeff_v,
     float* p_u_out,
-- 
GitLab


From a35191f48dcdbc3fdbc24b299ca02eac9628b0d1 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Tue, 25 Feb 2025 22:28:05 +0100
Subject: [PATCH 4/6] fixed few bugs in the routine

---
 ...mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
index 14ad14b..c9b776e 100644
--- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
+++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
@@ -13,7 +13,6 @@
 ///
 /// Separate to avoid conflicts with Ali working on rest of mo_lib_intp_rbf
 
-
 #include <type_traits>
 #include <Kokkos_Core.hpp>
 #include "mo_lib_loopindices.hpp"
@@ -65,10 +64,6 @@ void rbf_vec_interpol_vertex_lib(
     using MemSpace = Kokkos::HostSpace;
 #endif
 
-    // Wrap raw pointers in unmanaged Kokkos Views.
-    typedef Kokkos::View<const T***,    Kokkos::LayoutLeft, MemSpace, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
-    typedef Kokkos::View<T***,          Kokkos::LayoutLeft, MemSpace, Kokkos::MemoryUnmanaged> UnmanagedT3D;
-    ...
     */
 
     // Wrap raw pointers in unmanaged Kokkos Views.
@@ -85,15 +80,12 @@ void rbf_vec_interpol_vertex_lib(
 
     // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex
     // (rbf_vec_dim_v,nproma,nblks_v)
-    //UnmanagedConstInt3D rbf_vec_idx_v_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v);
-    //UnmanagedConstInt3D rbf_vec_blk_v_view(rbf_vec_blk_v, rbf_vec_blk_v, nproma, nblks_v);
     UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v);
     UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v);
 
     // coefficients are working precision array containing the coefficients used for vector rbf interpolation
     // at each tringle vertex (input is normal component),
     // dim: (rbf_vec_dim_v,2,nproma,nblks_v)
-    //UnmanagedConstT4D rbf_vec_coeff_v_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v);
     UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v);
 
     // reconstructed x-component (u) of velocity vector,
@@ -109,16 +101,16 @@ void rbf_vec_interpol_vertex_lib(
     int i_startidx; // start index
     int i_endidx;   // end index
 
-    for (jb=i_startblk; jb<i_endblk; ++jb){
+    for (jb=i_startblk; jb <= i_endblk; ++jb){
 
         get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk,
                           i_startidx, i_endidx);
 
         Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
-            {slev, i_startidx}, {elev, i_endidx});
+            {slev, i_startidx}, {elev + 1, i_endidx + 1});
 
         Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy,
-            KOKKOS_LAMBDA(const int jv, const int jk) {
+            KOKKOS_LAMBDA(const int jk, const int jv) {
 
                 // NOTE: Static indexes reduced by 1 from Fortran version
                 p_u_out_view(jv, jk, jb) =
-- 
GitLab


From 77f98d4a6863e76125e4d4700ee7cf8d77061070 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Tue, 25 Feb 2025 22:31:58 +0100
Subject: [PATCH 5/6] added unit tests for the routine

---
 test/c/CMakeLists.txt    |   1 +
 test/c/test_intp_rbf.cpp | 115 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 116 insertions(+)
 create mode 100644 test/c/test_intp_rbf.cpp

diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt
index 95ca08b..13c5dfe 100644
--- a/test/c/CMakeLists.txt
+++ b/test/c/CMakeLists.txt
@@ -25,6 +25,7 @@ set(SOURCES
   main.cpp
   test_tdma_solver.cpp
   test_interpolation_vector.cpp
+  test_intp_rbf.cpp
 )
 # Create the test executable from your test files, including main.cpp.
 add_executable(iconmath_test_c ${SOURCES})
diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp
new file mode 100644
index 0000000..4c9bc30
--- /dev/null
+++ b/test/c/test_intp_rbf.cpp
@@ -0,0 +1,115 @@
+#include <gtest/gtest.h>
+#include <Kokkos_Core.hpp>
+#include <vector>
+#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
+
+// Free-function helpers for 3D and 4D array sizes (assumed column-major)
+template<typename T>
+size_t num_elements_3d(int d1, int d2, int d3) {
+  return static_cast<size_t>(d1) * d2 * d3;
+}
+
+template<typename T>
+size_t num_elements_4d(int d1, int d2, int d3, int d4) {
+  return static_cast<size_t>(d1) * d2 * d3 * d4;
+}
+
+// Define a helper struct that holds the two types.
+template<typename InT, typename OutT>
+struct MixedPrecision {
+  using in_type  = InT;
+  using out_type = OutT;
+};
+
+// Define the list of type pairs we want to test.
+typedef ::testing::Types< MixedPrecision<double, double>,
+                          MixedPrecision<double,  float>,
+                          MixedPrecision<float,   float>  > MixedTypes;
+
+// Define a typed test fixture.
+template <typename TypePair>
+class RbfVecInterpolVertexMixedTestFixture : public ::testing::Test {
+public:
+  using InType  = typename TypePair::in_type;
+  using OutType = typename TypePair::out_type;
+
+  // Constant dimensions.
+  static constexpr int nproma     = 3;  // inner loop length
+  static constexpr int nlev        = 4;  // number of vertical levels
+  static constexpr int nblks_e     = 2;  // number of edge blocks (for p_e_in)
+  static constexpr int nblks_v     = 2;  // number of vertex blocks (for rbf arrays and outputs)
+  static constexpr int rbf_vec_dim = 6;  // fixed dimension for rbf vector (stencil points)
+
+  // Parameter values.
+  int i_startblk    = 0;
+  int i_endblk      = 1;      // Test blocks [0, 1]
+  int i_startidx_in = 0;
+  int i_endidx_in   = nproma - 1; // Full range: 0 .. nproma-1
+  int slev          = 0;
+  int elev          = nlev - 1;   // Full vertical range (0 .. nlev-1)
+  bool lacc         = false;  // Not using ACC-specific behavior.
+  bool acc_async    = false;  // No asynchronous execution.
+
+  // Arrays stored in std::vector.
+  std::vector<InType>  p_e_in;           // Dimensions: (nproma, nlev, nblks_e)
+  std::vector<int>     rbf_vec_idx_v;    // Dimensions: (rbf_vec_dim, nproma, nblks_v)
+  std::vector<int>     rbf_vec_blk_v;    // Dimensions: (rbf_vec_dim, nproma, nblks_v)
+  std::vector<InType>  rbf_vec_coeff_v;    // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v)
+  std::vector<OutType> p_u_out;           // Dimensions: (nproma, nlev, nblks_v)
+  std::vector<OutType> p_v_out;           // Dimensions: (nproma, nlev, nblks_v)
+
+  RbfVecInterpolVertexMixedTestFixture() {
+    // Allocate and initialize inputs.
+    p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1));
+    rbf_vec_idx_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1);
+    rbf_vec_blk_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 0);
+    rbf_vec_coeff_v.resize(num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v), static_cast<InType>(1));
+
+    // Allocate output arrays and initialize to zero.
+    p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0));
+    p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0));
+  }
+};
+
+TYPED_TEST_SUITE(RbfVecInterpolVertexMixedTestFixture, MixedTypes);
+
+TYPED_TEST(RbfVecInterpolVertexMixedTestFixture, BasicTest) {
+  using InType  = typename TestFixture::InType;
+  using OutType = typename TestFixture::OutType;
+
+  // Call the function with mixed precision.
+  rbf_vec_interpol_vertex_lib<InType, OutType>(
+    this->p_e_in.data(),
+    this->rbf_vec_idx_v.data(),
+    this->rbf_vec_blk_v.data(),
+    this->rbf_vec_coeff_v.data(),
+    this->p_u_out.data(),
+    this->p_v_out.data(),
+    this->i_startblk,
+    this->i_endblk,
+    this->i_startidx_in,
+    this->i_endidx_in,
+    this->slev,
+    this->elev,
+    this->nproma,
+    this->lacc,
+    this->acc_async,
+    this->nlev,
+    RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_e,
+    RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_v);
+
+  // Check the outputs only for blocks in the range [i_startblk, i_endblk].
+  for (int block = this->i_startblk; block <= this->i_endblk; ++block) {
+    for (int level = 0; level < this->nlev; ++level) {
+      for (int i = 0; i < this->nproma; ++i) {
+        // Compute the linear index for a 3D array in column-major order:
+        size_t idx = i + level * this->nproma + block * this->nproma * this->nlev;
+        // Since every contribution is 1 and there are 6 stencil points, expect 6.
+        EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5))
+            << "Failure at block " << block << ", level " << level << ", index " << i;
+        EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5))
+            << "Failure at block " << block << ", level " << level << ", index " << i;
+      }
+    }
+  }
+}
-- 
GitLab


From fa76e94423d6fa2e45443e1ecfa49476af7756fb Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Tue, 25 Feb 2025 22:34:24 +0100
Subject: [PATCH 6/6] added license to the new test file

---
 test/c/test_intp_rbf.cpp | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp
index 4c9bc30..0aa4f9b 100644
--- a/test/c/test_intp_rbf.cpp
+++ b/test/c/test_intp_rbf.cpp
@@ -1,3 +1,14 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
 #include <gtest/gtest.h>
 #include <Kokkos_Core.hpp>
 #include <vector>
-- 
GitLab