From a5c1b54e781ff85dacad451a492cc8f44e626e44 Mon Sep 17 00:00:00 2001
From: Dylan Kierans <kierans@dkrz.de>
Date: Tue, 25 Feb 2025 21:46:48 +0000
Subject: [PATCH] cpp version of mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib
 (icon-libraries/libiconmath!34)

## What is the new feature

cpp version of mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib

## How is it implemented

Kept separate file from other `mo_lib_intp_rbf` routines to avoid merge
conflicts. Will be resolved by Ali and Dylan later.

Co-authored-by: Pradipta Samanta <samanta@dkrz.de>
Merged-by: Pradipta Samanta <samanta@dkrz.de>
Changelog: feature
---
 src/interpolation/CMakeLists.txt              |   5 +-
 ...b_intp_rbf-rbf_vec_interpol_vertex_lib.cpp | 197 ++++++++++++++++++
 ...b_intp_rbf-rbf_vec_interpol_vertex_lib.hpp |  32 +++
 ...f-rbf_vec_interpol_vertex_lib_bindings.cpp | 134 ++++++++++++
 ...rbf-rbf_vec_interpol_vertex_lib_bindings.h |  54 +++++
 test/c/CMakeLists.txt                         |   1 +
 test/c/test_intp_rbf.cpp                      | 126 +++++++++++
 7 files changed, 548 insertions(+), 1 deletion(-)
 create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
 create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp
 create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
 create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h
 create mode 100644 test/c/test_intp_rbf.cpp

diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt
index f982f3b..37c3ad0 100644
--- a/src/interpolation/CMakeLists.txt
+++ b/src/interpolation/CMakeLists.txt
@@ -14,7 +14,10 @@ add_library(
   mo_lib_interpolation_scalar.F90
   mo_lib_interpolation_vector.F90
   mo_lib_interpolation_vector.cpp
-  mo_lib_intp_rbf.F90)
+  mo_lib_intp_rbf.F90
+  mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
+  mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
+)
 
 add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation)
 
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
new file mode 100644
index 0000000..c9b776e
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
@@ -0,0 +1,197 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+/// Contains the only mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib()
+///
+/// Separate to avoid conflicts with Ali working on rest of mo_lib_intp_rbf
+
+#include <type_traits>
+#include <Kokkos_Core.hpp>
+#include "mo_lib_loopindices.hpp"
+#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
+
+
+constexpr int rbf_vec_dim_v = 6;
+
+//-------------------------------------------------------------------------
+//
+//
+//>
+/// Performs vector RBF reconstruction at triangle vertices.
+///
+/// Theory described in Narcowich and Ward (Math Comp. 1994) and
+/// Bonaventura and Baudisch (Mox Report n. 75).
+/// It takes edge based variables as input and combines them
+/// into three dimensional cartesian vectors at each vertex.
+///
+/// Two templated variables in order to support mixed precision.
+/// Intended that type_traits::is_floating_point(T,S)==TRUE
+/// precision(T) >= precision(S)
+template <typename T, typename S>
+void rbf_vec_interpol_vertex_lib(
+    const T* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const T* rbf_vec_coeff_v,
+    S* p_u_out,
+    S* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    // Dimensions for the arrays.
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    /*
+#ifdef DIM_ENABLE_GPU
+    if (lacc){ using MemSpace = Kokkos::CudaSpace;
+    } else { using MemSpace = Kokkos::HostSpace; }
+#else
+    using MemSpace = Kokkos::HostSpace;
+#endif
+
+    */
+
+    // Wrap raw pointers in unmanaged Kokkos Views.
+    typedef Kokkos::View<const T***,    Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
+    typedef Kokkos::View<const T****,   Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D;
+    typedef Kokkos::View<const int***,  Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
+    typedef Kokkos::View<S***,          Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D;
+
+
+
+    // input components of velocity or horizontal vorticity vectors at edge midpoints
+    // dim: (nproma,nlev,nblks_e)
+    UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e);
+
+    // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex
+    // (rbf_vec_dim_v,nproma,nblks_v)
+    UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v);
+    UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v);
+
+    // coefficients are working precision array containing the coefficients used for vector rbf interpolation
+    // at each tringle vertex (input is normal component),
+    // dim: (rbf_vec_dim_v,2,nproma,nblks_v)
+    UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v);
+
+    // reconstructed x-component (u) of velocity vector,
+    // dim: (nproma,nlev,nblks_v)
+    UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v);
+    // reconstructed y-component (v) of velocity vector,
+    // dim: (nproma,nlev,nblks_v)
+    UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v);
+
+    // Local vars
+    //int jv, jk, jb; // integer over vertices, levels, and blocks,
+    int jb; // integer over vertices, levels, and blocks,
+    int i_startidx; // start index
+    int i_endidx;   // end index
+
+    for (jb=i_startblk; jb <= i_endblk; ++jb){
+
+        get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk,
+                          i_startidx, i_endidx);
+
+        Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
+            {slev, i_startidx}, {elev + 1, i_endidx + 1});
+
+        Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy,
+            KOKKOS_LAMBDA(const int jk, const int jv) {
+
+                // NOTE: Static indexes reduced by 1 from Fortran version
+                p_u_out_view(jv, jk, jb) =
+                    ptr_coeff_view(0, 0, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
+                    ptr_coeff_view(1, 0, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
+                    ptr_coeff_view(2, 0, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
+                    ptr_coeff_view(3, 0, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
+                    ptr_coeff_view(4, 0, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
+                    ptr_coeff_view(5, 0, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
+                p_v_out_view(jv, jk, jb) =
+                    ptr_coeff_view(0, 1, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
+                    ptr_coeff_view(1, 1, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
+                    ptr_coeff_view(2, 1, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
+                    ptr_coeff_view(3, 1, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
+                    ptr_coeff_view(4, 1, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
+                    ptr_coeff_view(5, 1, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
+            }
+        );
+    }
+}
+
+// Explicit instantiation - double precision
+template
+void rbf_vec_interpol_vertex_lib<double, double>(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    double* p_u_out,
+    double* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,             // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
+// Explicit instantiation - single precision
+template
+void rbf_vec_interpol_vertex_lib<float, float>(
+    const float* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const float* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,             // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
+// Explicit instantiation - mixed precision
+template
+void rbf_vec_interpol_vertex_lib<double, float>(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,             // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp
new file mode 100644
index 0000000..c0b6f05
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp
@@ -0,0 +1,32 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#pragma once
+
+template <typename T, typename S>
+void rbf_vec_interpol_vertex_lib(
+    const T* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const T* rbf_vec_coeff_v,
+    S* p_u_out,
+    S* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,             // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_c
+    );
\ No newline at end of file
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
new file mode 100644
index 0000000..06dc467
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
@@ -0,0 +1,134 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h"
+#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
+
+void rbf_vec_interpol_vertex_lib_dp(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    double* p_u_out,
+    double* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    rbf_vec_interpol_vertex_lib<double, double>(
+        p_e_in,
+        rbf_vec_idx_v,
+        rbf_vec_blk_v,
+        rbf_vec_coeff_v,
+        p_u_out,
+        p_v_out,
+        i_startblk,       // start_block needed for get_indices_c_lib
+        i_endblk,         // end_block needed for get_indices_c_lib
+        i_startidx_in,    // start_index needed for get_indices_c_lib
+        i_endidx_in,      // end_index needed for get_indices_c_lib
+        slev,             // vertical start level
+        elev,             // vertical end level
+        nproma,           // inner loop length/vector length
+        lacc,                  // if true, use Cuda mem-/exec-spaces
+        acc_async,              // [deprecated] use async acc
+        nlev, nblks_e, nblks_v
+        );
+}
+
+
+void rbf_vec_interpol_vertex_lib_sp(
+    const float* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const float* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    rbf_vec_interpol_vertex_lib<float, float>(
+        p_e_in,
+        rbf_vec_idx_v,
+        rbf_vec_blk_v,
+        rbf_vec_coeff_v,
+        p_u_out,
+        p_v_out,
+        i_startblk,       // start_block needed for get_indices_c_lib
+        i_endblk,         // end_block needed for get_indices_c_lib
+        i_startidx_in,    // start_index needed for get_indices_c_lib
+        i_endidx_in,      // end_index needed for get_indices_c_lib
+        slev,             // vertical start level
+        elev,             // vertical end level
+        nproma,           // inner loop length/vector length
+        lacc,                  // if true, use Cuda mem-/exec-spaces
+        acc_async,              // [deprecated] use async acc
+        nlev, nblks_e, nblks_v
+        );
+
+}
+
+void rbf_vec_interpol_vertex_lib_mixprec(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    rbf_vec_interpol_vertex_lib<double, float>(
+        p_e_in,
+        rbf_vec_idx_v,
+        rbf_vec_blk_v,
+        rbf_vec_coeff_v,
+        p_u_out,
+        p_v_out,
+        i_startblk,       // start_block needed for get_indices_c_lib
+        i_endblk,         // end_block needed for get_indices_c_lib
+        i_startidx_in,    // start_index needed for get_indices_c_lib
+        i_endidx_in,      // end_index needed for get_indices_c_lib
+        slev,             // vertical start level
+        elev,             // vertical end level
+        nproma,           // inner loop length/vector length
+        lacc,                  // if true, use Cuda mem-/exec-spaces
+        acc_async,              // [deprecated] use async acc
+        nlev, nblks_e, nblks_v
+        );
+
+}
+
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h
new file mode 100644
index 0000000..4356f88
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h
@@ -0,0 +1,54 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#pragma once
+
+extern "C" {
+
+void rbf_vec_interpol_vertex_lib_dp(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    double* p_u_out,
+    double* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
+void rbf_vec_interpol_vertex_lib_sp(
+    const float* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const float* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
+}
\ No newline at end of file
diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt
index 95ca08b..13c5dfe 100644
--- a/test/c/CMakeLists.txt
+++ b/test/c/CMakeLists.txt
@@ -25,6 +25,7 @@ set(SOURCES
   main.cpp
   test_tdma_solver.cpp
   test_interpolation_vector.cpp
+  test_intp_rbf.cpp
 )
 # Create the test executable from your test files, including main.cpp.
 add_executable(iconmath_test_c ${SOURCES})
diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp
new file mode 100644
index 0000000..0aa4f9b
--- /dev/null
+++ b/test/c/test_intp_rbf.cpp
@@ -0,0 +1,126 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#include <gtest/gtest.h>
+#include <Kokkos_Core.hpp>
+#include <vector>
+#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
+
+// Free-function helpers for 3D and 4D array sizes (assumed column-major)
+template<typename T>
+size_t num_elements_3d(int d1, int d2, int d3) {
+  return static_cast<size_t>(d1) * d2 * d3;
+}
+
+template<typename T>
+size_t num_elements_4d(int d1, int d2, int d3, int d4) {
+  return static_cast<size_t>(d1) * d2 * d3 * d4;
+}
+
+// Define a helper struct that holds the two types.
+template<typename InT, typename OutT>
+struct MixedPrecision {
+  using in_type  = InT;
+  using out_type = OutT;
+};
+
+// Define the list of type pairs we want to test.
+typedef ::testing::Types< MixedPrecision<double, double>,
+                          MixedPrecision<double,  float>,
+                          MixedPrecision<float,   float>  > MixedTypes;
+
+// Define a typed test fixture.
+template <typename TypePair>
+class RbfVecInterpolVertexMixedTestFixture : public ::testing::Test {
+public:
+  using InType  = typename TypePair::in_type;
+  using OutType = typename TypePair::out_type;
+
+  // Constant dimensions.
+  static constexpr int nproma     = 3;  // inner loop length
+  static constexpr int nlev        = 4;  // number of vertical levels
+  static constexpr int nblks_e     = 2;  // number of edge blocks (for p_e_in)
+  static constexpr int nblks_v     = 2;  // number of vertex blocks (for rbf arrays and outputs)
+  static constexpr int rbf_vec_dim = 6;  // fixed dimension for rbf vector (stencil points)
+
+  // Parameter values.
+  int i_startblk    = 0;
+  int i_endblk      = 1;      // Test blocks [0, 1]
+  int i_startidx_in = 0;
+  int i_endidx_in   = nproma - 1; // Full range: 0 .. nproma-1
+  int slev          = 0;
+  int elev          = nlev - 1;   // Full vertical range (0 .. nlev-1)
+  bool lacc         = false;  // Not using ACC-specific behavior.
+  bool acc_async    = false;  // No asynchronous execution.
+
+  // Arrays stored in std::vector.
+  std::vector<InType>  p_e_in;           // Dimensions: (nproma, nlev, nblks_e)
+  std::vector<int>     rbf_vec_idx_v;    // Dimensions: (rbf_vec_dim, nproma, nblks_v)
+  std::vector<int>     rbf_vec_blk_v;    // Dimensions: (rbf_vec_dim, nproma, nblks_v)
+  std::vector<InType>  rbf_vec_coeff_v;    // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v)
+  std::vector<OutType> p_u_out;           // Dimensions: (nproma, nlev, nblks_v)
+  std::vector<OutType> p_v_out;           // Dimensions: (nproma, nlev, nblks_v)
+
+  RbfVecInterpolVertexMixedTestFixture() {
+    // Allocate and initialize inputs.
+    p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1));
+    rbf_vec_idx_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1);
+    rbf_vec_blk_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 0);
+    rbf_vec_coeff_v.resize(num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v), static_cast<InType>(1));
+
+    // Allocate output arrays and initialize to zero.
+    p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0));
+    p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0));
+  }
+};
+
+TYPED_TEST_SUITE(RbfVecInterpolVertexMixedTestFixture, MixedTypes);
+
+TYPED_TEST(RbfVecInterpolVertexMixedTestFixture, BasicTest) {
+  using InType  = typename TestFixture::InType;
+  using OutType = typename TestFixture::OutType;
+
+  // Call the function with mixed precision.
+  rbf_vec_interpol_vertex_lib<InType, OutType>(
+    this->p_e_in.data(),
+    this->rbf_vec_idx_v.data(),
+    this->rbf_vec_blk_v.data(),
+    this->rbf_vec_coeff_v.data(),
+    this->p_u_out.data(),
+    this->p_v_out.data(),
+    this->i_startblk,
+    this->i_endblk,
+    this->i_startidx_in,
+    this->i_endidx_in,
+    this->slev,
+    this->elev,
+    this->nproma,
+    this->lacc,
+    this->acc_async,
+    this->nlev,
+    RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_e,
+    RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_v);
+
+  // Check the outputs only for blocks in the range [i_startblk, i_endblk].
+  for (int block = this->i_startblk; block <= this->i_endblk; ++block) {
+    for (int level = 0; level < this->nlev; ++level) {
+      for (int i = 0; i < this->nproma; ++i) {
+        // Compute the linear index for a 3D array in column-major order:
+        size_t idx = i + level * this->nproma + block * this->nproma * this->nlev;
+        // Since every contribution is 1 and there are 6 stencil points, expect 6.
+        EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5))
+            << "Failure at block " << block << ", level " << level << ", index " << i;
+        EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5))
+            << "Failure at block " << block << ", level " << level << ", index " << i;
+      }
+    }
+  }
+}
-- 
GitLab