diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt
index 10515165cbe3a16354226aa9f11756200bfa33fd..96f281c37d02ffb82d28f0ef6c638950f4e01b29 100644
--- a/src/interpolation/CMakeLists.txt
+++ b/src/interpolation/CMakeLists.txt
@@ -16,10 +16,8 @@ add_library(
   mo_lib_interpolation_vector.F90
   mo_lib_interpolation_vector.cpp
   mo_lib_intp_rbf.F90
-  mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
-  mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
-  interpolation_bindings.cpp
-)
+  mo_lib_intp_rbf.cpp
+  interpolation_bindings.cpp)
 
 add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation)
 
diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp
index 628f4115978bc8981084b3418478d8e1844b2232..4524ad760c29d92a3716b58fea8dba8592214b05 100644
--- a/src/interpolation/interpolation_bindings.cpp
+++ b/src/interpolation/interpolation_bindings.cpp
@@ -12,6 +12,7 @@
 #include "interpolation_bindings.h"
 #include "mo_lib_interpolation_scalar.hpp"
 #include "mo_lib_interpolation_vector.hpp"
+#include "mo_lib_intp_rbf.hpp"
 
 // This is the binding for mo_interpolation_vector::edges2cells_vector_lib
 // (wp=dp)
@@ -326,3 +327,132 @@ void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx,
                       avg_psi_c, i_startblk, i_endblk, i_startidx_in,
                       i_endidx_in, slev, elev, nproma, nlev, nblks_c, lacc);
 }
+
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_vertex_dp_lib
+void rbf_vec_interpol_vertex_lib_dp(
+    const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v) {
+  rbf_vec_interpol_vertex_lib<double, double>(
+      p_e_in, rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, p_v_out,
+      i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma,
+      lacc, acc_async, nlev, nblks_e, nblks_v);
+}
+
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_vertex_sp_lib
+void rbf_vec_interpol_vertex_lib_sp(
+    const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v) {
+  rbf_vec_interpol_vertex_lib<float, float>(
+      p_e_in, rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, p_v_out,
+      i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma,
+      lacc, acc_async, nlev, nblks_e, nblks_v);
+}
+
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_vertex_dpsp_lib
+void rbf_vec_interpol_vertex_lib_dpsp(
+    const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v) {
+  rbf_vec_interpol_vertex_lib<double, float>(
+      p_e_in, rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, p_v_out,
+      i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma,
+      lacc, acc_async, nlev, nblks_e, nblks_v);
+}
+
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=dp)
+void rbf_interpol_c2grad_lib_sp(const float *p_cell_in,
+                                const int *rbf_c2grad_idx,
+                                const int *rbf_c2grad_blk,
+                                const float *rbf_c2grad_coeff, float *grad_x,
+                                float *grad_y, int i_startblk, int i_endblk,
+                                int i_startidx_in, int i_endidx_in, int slev,
+                                int elev, int nproma, int rbf_c2grad_dim,
+                                int nlev, int nblk_c, bool lacc) {
+
+  rbf_interpol_c2grad_lib<float>(
+      p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, rbf_c2grad_coeff, grad_x,
+      grad_y, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev,
+      nproma, rbf_c2grad_dim, nlev, nblk_c, lacc);
+}
+
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=sp)
+void rbf_interpol_c2grad_lib_dp(const double *p_cell_in,
+                                const int *rbf_c2grad_idx,
+                                const int *rbf_c2grad_blk,
+                                const double *rbf_c2grad_coeff, double *grad_x,
+                                double *grad_y, int i_startblk, int i_endblk,
+                                int i_startidx_in, int i_endidx_in, int slev,
+                                int elev, int nproma, int rbf_c2grad_dim,
+                                int nlev, int nblk_c, bool lacc) {
+
+  rbf_interpol_c2grad_lib<double>(
+      p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, rbf_c2grad_coeff, grad_x,
+      grad_y, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev,
+      nproma, rbf_c2grad_dim, nlev, nblk_c, lacc);
+}
+
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=dp)
+void rbf_vec_interpol_cell_lib_sp(
+    const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c,
+    const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev,
+    int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c,
+    bool lacc, bool acc_async) {
+
+  rbf_vec_interpol_cell_lib<float>(
+      p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, p_u_out, p_v_out,
+      i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma,
+      nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async);
+}
+
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=sp)
+void rbf_vec_interpol_cell_lib_dp(
+    const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c,
+    const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev,
+    int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c,
+    bool lacc, bool acc_async) {
+
+  rbf_vec_interpol_cell_lib<double>(
+      p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, p_u_out, p_v_out,
+      i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma,
+      nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async);
+}
+
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_edge_lib (wp=dp)
+void rbf_vec_interpol_edge_lib_dp(
+    const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e,
+    const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk,
+    int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev,
+    int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+    bool acc_async) {
+
+  rbf_vec_interpol_edge_lib<double>(
+      p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, rbf_vec_coeff_e, p_vt_out,
+      i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nlev,
+      nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async);
+}
+
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_edge_lib (wp=sp)
+void rbf_vec_interpol_edge_lib_sp(
+    const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e,
+    const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk,
+    int i_startidx_in, int i_endidx_in, int slev, int elev, int nlev,
+    int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, bool acc_async) {
+
+  rbf_vec_interpol_edge_lib<float>(
+      p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, rbf_vec_coeff_e, p_vt_out,
+      i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nlev,
+      nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async);
+}
diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h
index 7cb873d8ac2f8ba1bffa0553d28847b2b9b11c58..64c6a8c0a88f1dab691e6b69d13ade4b64954604 100644
--- a/src/interpolation/interpolation_bindings.h
+++ b/src/interpolation/interpolation_bindings.h
@@ -185,4 +185,69 @@ void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx,
                      const int i_startidx_in, const int i_endidx_in,
                      const int slev, const int elev, const int nproma,
                      const int nlev, const int nblks_c, const bool lacc);
+
+void rbf_vec_interpol_vertex_lib_dp(
+    const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v);
+
+void rbf_vec_interpol_vertex_lib_sp(
+    const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v);
+
+void rbf_vec_interpol_vertex_lib_dpsp(
+    const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v);
+
+void rbf_interpol_c2grad_lib_sp(
+    const float *p_cell_in, const int *rbf_c2grad_idx,
+    const int *rbf_c2grad_blk, const float *rbf_c2grad_coeff, float *grad_x,
+    const float *grad_y, int i_startblk, int i_endblk, int i_startidx_in,
+    int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim,
+    int nlev, int nblk_c, bool lacc);
+
+void rbf_interpol_c2grad_lib_dp(
+    const double *p_cell_in, const int *rbf_c2grad_idx,
+    const int *rbf_c2grad_blk, const double *rbf_c2grad_coeff, double *grad_x,
+    const double *grad_y, int i_startblk, int i_endblk, int i_startidx_in,
+    int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim,
+    int nlev, int nblk_c, bool lacc);
+
+void rbf_vec_interpol_cell_lib_sp(
+    const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c,
+    const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev,
+    int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c,
+    bool lacc, bool acc_async);
+
+void rbf_vec_interpol_cell_lib_dp(
+    const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c,
+    const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev,
+    int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c,
+    bool lacc, bool acc_async);
+
+void rbf_vec_interpol_edge_lib_dp(
+    const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e,
+    const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk,
+    int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev,
+    int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+    bool acc_async);
+
+void rbf_vec_interpol_edge_lib_sp(
+    const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e,
+    const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk,
+    int i_startidx_in, int i_endidx_in, int slev, int elev, int nlev,
+    int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, bool acc_async);
 }
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
deleted file mode 100644
index c9b776e5f409eb7878cd8b2dab3203c655422b1f..0000000000000000000000000000000000000000
--- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
+++ /dev/null
@@ -1,197 +0,0 @@
-// ICON
-//
-// ---------------------------------------------------------------
-// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
-// Contact information: icon-model.org
-//
-// See AUTHORS.TXT for a list of authors
-// See LICENSES/ for license information
-// SPDX-License-Identifier: BSD-3-Clause
-// ---------------------------------------------------------------
-
-/// Contains the only mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib()
-///
-/// Separate to avoid conflicts with Ali working on rest of mo_lib_intp_rbf
-
-#include <type_traits>
-#include <Kokkos_Core.hpp>
-#include "mo_lib_loopindices.hpp"
-#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
-
-
-constexpr int rbf_vec_dim_v = 6;
-
-//-------------------------------------------------------------------------
-//
-//
-//>
-/// Performs vector RBF reconstruction at triangle vertices.
-///
-/// Theory described in Narcowich and Ward (Math Comp. 1994) and
-/// Bonaventura and Baudisch (Mox Report n. 75).
-/// It takes edge based variables as input and combines them
-/// into three dimensional cartesian vectors at each vertex.
-///
-/// Two templated variables in order to support mixed precision.
-/// Intended that type_traits::is_floating_point(T,S)==TRUE
-/// precision(T) >= precision(S)
-template <typename T, typename S>
-void rbf_vec_interpol_vertex_lib(
-    const T* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const T* rbf_vec_coeff_v,
-    S* p_u_out,
-    S* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    // Dimensions for the arrays.
-    const int nlev, const int nblks_e, const int nblks_v
-    )
-{
-    /*
-#ifdef DIM_ENABLE_GPU
-    if (lacc){ using MemSpace = Kokkos::CudaSpace;
-    } else { using MemSpace = Kokkos::HostSpace; }
-#else
-    using MemSpace = Kokkos::HostSpace;
-#endif
-
-    */
-
-    // Wrap raw pointers in unmanaged Kokkos Views.
-    typedef Kokkos::View<const T***,    Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
-    typedef Kokkos::View<const T****,   Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D;
-    typedef Kokkos::View<const int***,  Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
-    typedef Kokkos::View<S***,          Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D;
-
-
-
-    // input components of velocity or horizontal vorticity vectors at edge midpoints
-    // dim: (nproma,nlev,nblks_e)
-    UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e);
-
-    // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex
-    // (rbf_vec_dim_v,nproma,nblks_v)
-    UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v);
-    UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v);
-
-    // coefficients are working precision array containing the coefficients used for vector rbf interpolation
-    // at each tringle vertex (input is normal component),
-    // dim: (rbf_vec_dim_v,2,nproma,nblks_v)
-    UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v);
-
-    // reconstructed x-component (u) of velocity vector,
-    // dim: (nproma,nlev,nblks_v)
-    UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v);
-    // reconstructed y-component (v) of velocity vector,
-    // dim: (nproma,nlev,nblks_v)
-    UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v);
-
-    // Local vars
-    //int jv, jk, jb; // integer over vertices, levels, and blocks,
-    int jb; // integer over vertices, levels, and blocks,
-    int i_startidx; // start index
-    int i_endidx;   // end index
-
-    for (jb=i_startblk; jb <= i_endblk; ++jb){
-
-        get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk,
-                          i_startidx, i_endidx);
-
-        Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
-            {slev, i_startidx}, {elev + 1, i_endidx + 1});
-
-        Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy,
-            KOKKOS_LAMBDA(const int jk, const int jv) {
-
-                // NOTE: Static indexes reduced by 1 from Fortran version
-                p_u_out_view(jv, jk, jb) =
-                    ptr_coeff_view(0, 0, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
-                    ptr_coeff_view(1, 0, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
-                    ptr_coeff_view(2, 0, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
-                    ptr_coeff_view(3, 0, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
-                    ptr_coeff_view(4, 0, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
-                    ptr_coeff_view(5, 0, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
-                p_v_out_view(jv, jk, jb) =
-                    ptr_coeff_view(0, 1, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
-                    ptr_coeff_view(1, 1, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
-                    ptr_coeff_view(2, 1, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
-                    ptr_coeff_view(3, 1, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
-                    ptr_coeff_view(4, 1, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
-                    ptr_coeff_view(5, 1, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
-            }
-        );
-    }
-}
-
-// Explicit instantiation - double precision
-template
-void rbf_vec_interpol_vertex_lib<double, double>(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    double* p_u_out,
-    double* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,             // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    );
-
-// Explicit instantiation - single precision
-template
-void rbf_vec_interpol_vertex_lib<float, float>(
-    const float* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const float* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,             // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    );
-
-// Explicit instantiation - mixed precision
-template
-void rbf_vec_interpol_vertex_lib<double, float>(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,             // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    );
-
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp
deleted file mode 100644
index c0b6f05bed1d70d2ee625f45b4ad5b457bc84451..0000000000000000000000000000000000000000
--- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp
+++ /dev/null
@@ -1,32 +0,0 @@
-// ICON
-//
-// ---------------------------------------------------------------
-// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
-// Contact information: icon-model.org
-//
-// See AUTHORS.TXT for a list of authors
-// See LICENSES/ for license information
-// SPDX-License-Identifier: BSD-3-Clause
-// ---------------------------------------------------------------
-
-#pragma once
-
-template <typename T, typename S>
-void rbf_vec_interpol_vertex_lib(
-    const T* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const T* rbf_vec_coeff_v,
-    S* p_u_out,
-    S* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,             // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_c
-    );
\ No newline at end of file
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
deleted file mode 100644
index 06dc4678e1e3234c7e3a5a337ca36339100f6d20..0000000000000000000000000000000000000000
--- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-// ICON
-//
-// ---------------------------------------------------------------
-// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
-// Contact information: icon-model.org
-//
-// See AUTHORS.TXT for a list of authors
-// See LICENSES/ for license information
-// SPDX-License-Identifier: BSD-3-Clause
-// ---------------------------------------------------------------
-
-#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h"
-#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
-
-void rbf_vec_interpol_vertex_lib_dp(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    double* p_u_out,
-    double* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    )
-{
-    rbf_vec_interpol_vertex_lib<double, double>(
-        p_e_in,
-        rbf_vec_idx_v,
-        rbf_vec_blk_v,
-        rbf_vec_coeff_v,
-        p_u_out,
-        p_v_out,
-        i_startblk,       // start_block needed for get_indices_c_lib
-        i_endblk,         // end_block needed for get_indices_c_lib
-        i_startidx_in,    // start_index needed for get_indices_c_lib
-        i_endidx_in,      // end_index needed for get_indices_c_lib
-        slev,             // vertical start level
-        elev,             // vertical end level
-        nproma,           // inner loop length/vector length
-        lacc,                  // if true, use Cuda mem-/exec-spaces
-        acc_async,              // [deprecated] use async acc
-        nlev, nblks_e, nblks_v
-        );
-}
-
-
-void rbf_vec_interpol_vertex_lib_sp(
-    const float* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const float* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    )
-{
-    rbf_vec_interpol_vertex_lib<float, float>(
-        p_e_in,
-        rbf_vec_idx_v,
-        rbf_vec_blk_v,
-        rbf_vec_coeff_v,
-        p_u_out,
-        p_v_out,
-        i_startblk,       // start_block needed for get_indices_c_lib
-        i_endblk,         // end_block needed for get_indices_c_lib
-        i_startidx_in,    // start_index needed for get_indices_c_lib
-        i_endidx_in,      // end_index needed for get_indices_c_lib
-        slev,             // vertical start level
-        elev,             // vertical end level
-        nproma,           // inner loop length/vector length
-        lacc,                  // if true, use Cuda mem-/exec-spaces
-        acc_async,              // [deprecated] use async acc
-        nlev, nblks_e, nblks_v
-        );
-
-}
-
-void rbf_vec_interpol_vertex_lib_mixprec(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    )
-{
-    rbf_vec_interpol_vertex_lib<double, float>(
-        p_e_in,
-        rbf_vec_idx_v,
-        rbf_vec_blk_v,
-        rbf_vec_coeff_v,
-        p_u_out,
-        p_v_out,
-        i_startblk,       // start_block needed for get_indices_c_lib
-        i_endblk,         // end_block needed for get_indices_c_lib
-        i_startidx_in,    // start_index needed for get_indices_c_lib
-        i_endidx_in,      // end_index needed for get_indices_c_lib
-        slev,             // vertical start level
-        elev,             // vertical end level
-        nproma,           // inner loop length/vector length
-        lacc,                  // if true, use Cuda mem-/exec-spaces
-        acc_async,              // [deprecated] use async acc
-        nlev, nblks_e, nblks_v
-        );
-
-}
-
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h
deleted file mode 100644
index 4356f886bee8b5e30f0fe17686367873b34f05db..0000000000000000000000000000000000000000
--- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// ICON
-//
-// ---------------------------------------------------------------
-// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
-// Contact information: icon-model.org
-//
-// See AUTHORS.TXT for a list of authors
-// See LICENSES/ for license information
-// SPDX-License-Identifier: BSD-3-Clause
-// ---------------------------------------------------------------
-
-#pragma once
-
-extern "C" {
-
-void rbf_vec_interpol_vertex_lib_dp(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    double* p_u_out,
-    double* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    );
-
-void rbf_vec_interpol_vertex_lib_sp(
-    const float* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const float* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    );
-
-}
\ No newline at end of file
diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d1178a65397571818db4104ca546ff67eb35d01e
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf.cpp
@@ -0,0 +1,475 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#include "mo_lib_intp_rbf.hpp"
+#include <Kokkos_Core.hpp>
+
+constexpr int rbf_vec_dim_v = 6;
+
+//-------------------------------------------------------------------------
+//
+//
+//>
+/// Performs vector RBF reconstruction at triangle vertices.
+///
+/// Theory described in Narcowich and Ward (Math Comp. 1994) and
+/// Bonaventura and Baudisch (Mox Report n. 75).
+/// It takes edge based variables as input and combines them
+/// into three dimensional cartesian vectors at each vertex.
+///
+/// Two templated variables in order to support mixed precision.
+/// Intended that type_traits::is_floating_point(T,S)==TRUE
+/// precision(T) >= precision(S)
+template <typename T, typename S>
+void rbf_vec_interpol_vertex_lib(
+    const T *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const T *rbf_vec_coeff_v, S *p_u_out, S *p_v_out,
+    const int i_startblk,    // start_block needed for get_indices_c_lib
+    const int i_endblk,      // end_block needed for get_indices_c_lib
+    const int i_startidx_in, // start_index needed for get_indices_c_lib
+    const int i_endidx_in,   // end_index needed for get_indices_c_lib
+    const int slev,          // vertical start level
+    const int elev,          // vertical end level
+    const int nproma,        // inner loop length/vector length
+    const bool lacc,         // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,    // [deprecated] use async acc
+    // Dimensions for the arrays.
+    const int nlev, const int nblks_e, const int nblks_v) {
+  /*
+#ifdef DIM_ENABLE_GPU
+  if (lacc){ using MemSpace = Kokkos::CudaSpace;
+  } else { using MemSpace = Kokkos::HostSpace; }
+#else
+  using MemSpace = Kokkos::HostSpace;
+#endif
+
+  */
+
+  // Wrap raw pointers in unmanaged Kokkos Views.
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstT4D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+  typedef Kokkos::View<S ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedS3D;
+
+  // input components of velocity or horizontal vorticity vectors at edge
+  // midpoints dim: (nproma,nlev,nblks_e)
+  UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e);
+
+  // index array defining the stencil of surrounding edges for vector rbf
+  // interpolation at each triangle vertex (rbf_vec_dim_v,nproma,nblks_v)
+  UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v);
+  UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v);
+
+  // coefficients are working precision array containing the coefficients used
+  // for vector rbf interpolation at each tringle vertex (input is normal
+  // component), dim: (rbf_vec_dim_v,2,nproma,nblks_v)
+  UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma,
+                                   nblks_v);
+
+  // reconstructed x-component (u) of velocity vector,
+  // dim: (nproma,nlev,nblks_v)
+  UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v);
+  // reconstructed y-component (v) of velocity vector,
+  // dim: (nproma,nlev,nblks_v)
+  UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v);
+
+  // Local vars
+  // int jv, jk, jb; // integer over vertices, levels, and blocks,
+  int jb;         // integer over vertices, levels, and blocks,
+  int i_startidx; // start index
+  int i_endidx;   // end index
+
+  for (jb = i_startblk; jb <= i_endblk; ++jb) {
+
+    get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
+        {slev, i_startidx}, {elev + 1, i_endidx + 1});
+
+    Kokkos::parallel_for(
+        "rbf_vec_interpol_vertex_lib", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jv) {
+          // NOTE: Static indexes reduced by 1 from Fortran version
+          p_u_out_view(jv, jk, jb) =
+              ptr_coeff_view(0, 0, jv, jb) *
+                  p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
+              ptr_coeff_view(1, 0, jv, jb) *
+                  p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
+              ptr_coeff_view(2, 0, jv, jb) *
+                  p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
+              ptr_coeff_view(3, 0, jv, jb) *
+                  p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
+              ptr_coeff_view(4, 0, jv, jb) *
+                  p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
+              ptr_coeff_view(5, 0, jv, jb) *
+                  p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
+          p_v_out_view(jv, jk, jb) =
+              ptr_coeff_view(0, 1, jv, jb) *
+                  p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
+              ptr_coeff_view(1, 1, jv, jb) *
+                  p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
+              ptr_coeff_view(2, 1, jv, jb) *
+                  p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
+              ptr_coeff_view(3, 1, jv, jb) *
+                  p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
+              ptr_coeff_view(4, 1, jv, jb) *
+                  p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
+              ptr_coeff_view(5, 1, jv, jb) *
+                  p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
+        });
+  }
+}
+
+template <typename T>
+void rbf_interpol_c2grad_lib(const T *p_cell_in, const int *rbf_c2grad_idx,
+                             const int *rbf_c2grad_blk,
+                             const T *rbf_c2grad_coeff, T *grad_x, T *grad_y,
+                             int i_startblk, int i_endblk, int i_startidx_in,
+                             int i_endidx_in, int slev, int elev, int nproma,
+                             int rbf_c2grad_dim, int nlev, int nblks_c,
+                             bool lacc) {
+
+  // aliases for unmanaged Kokkos views
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT3D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+  typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstT4D;
+
+  // to avoid memory ownership issues
+  UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c);
+  UnmanagedT3D grad_x_view(grad_x, nproma, nlev, nblks_c);
+  UnmanagedT3D grad_y_view(grad_y, nproma, nlev, nblks_c);
+  UnmanagedConstInt3D rbf_c2grad_idx_view(rbf_c2grad_idx, rbf_c2grad_dim,
+                                          nproma, nblks_c);
+  UnmanagedConstInt3D rbf_c2grad_blk_view(rbf_c2grad_blk, rbf_c2grad_dim,
+                                          nproma, nblks_c);
+  UnmanagedConstT4D rbf_c2grad_coeff_view(rbf_c2grad_coeff, rbf_c2grad_dim, 2,
+                                          nproma, nblks_c);
+
+  for (int jb = i_startblk; jb <= i_endblk; ++jb) {
+
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
+        {slev, i_startidx}, {elev + 1, i_endidx + 1});
+
+    Kokkos::parallel_for(
+        "rbf_interpol_c2grad", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jc) {
+          grad_x_view(jc, jk, jb) =
+              rbf_c2grad_coeff_view(0, 1, jc, jb) * p_cell_in_view(jc, jk, jb) +
+              rbf_c2grad_coeff_view(1, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk,
+                                 rbf_c2grad_blk_view(1, jc, jb)) +
+              rbf_c2grad_coeff_view(2, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk,
+                                 rbf_c2grad_blk_view(2, jc, jb)) +
+              rbf_c2grad_coeff_view(3, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk,
+                                 rbf_c2grad_blk_view(3, jc, jb)) +
+              rbf_c2grad_coeff_view(4, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk,
+                                 rbf_c2grad_blk_view(4, jc, jb)) +
+              rbf_c2grad_coeff_view(5, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk,
+                                 rbf_c2grad_blk_view(5, jc, jb)) +
+              rbf_c2grad_coeff_view(6, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk,
+                                 rbf_c2grad_blk_view(6, jc, jb)) +
+              rbf_c2grad_coeff_view(7, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk,
+                                 rbf_c2grad_blk_view(7, jc, jb)) +
+              rbf_c2grad_coeff_view(8, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk,
+                                 rbf_c2grad_blk_view(8, jc, jb)) +
+              rbf_c2grad_coeff_view(9, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk,
+                                 rbf_c2grad_blk_view(9, jc, jb));
+
+          grad_y_view(jc, jk, jb) =
+              rbf_c2grad_coeff_view(0, 2, jc, jb) * p_cell_in_view(jc, jk, jb) +
+              rbf_c2grad_coeff_view(1, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk,
+                                 rbf_c2grad_blk_view(1, jc, jb)) +
+              rbf_c2grad_coeff_view(2, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk,
+                                 rbf_c2grad_blk_view(2, jc, jb)) +
+              rbf_c2grad_coeff_view(3, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk,
+                                 rbf_c2grad_blk_view(3, jc, jb)) +
+              rbf_c2grad_coeff_view(4, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk,
+                                 rbf_c2grad_blk_view(4, jc, jb)) +
+              rbf_c2grad_coeff_view(5, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk,
+                                 rbf_c2grad_blk_view(5, jc, jb)) +
+              rbf_c2grad_coeff_view(6, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk,
+                                 rbf_c2grad_blk_view(6, jc, jb)) +
+              rbf_c2grad_coeff_view(7, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk,
+                                 rbf_c2grad_blk_view(7, jc, jb)) +
+              rbf_c2grad_coeff_view(8, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk,
+                                 rbf_c2grad_blk_view(8, jc, jb)) +
+              rbf_c2grad_coeff_view(9, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk,
+                                 rbf_c2grad_blk_view(9, jc, jb));
+        });
+
+  } // for
+} // void
+
+//------------------------------------------rbf_vec_interpol_cell_lib---------------------------------------------
+
+template <typename T>
+void rbf_vec_interpol_cell_lib(const T *p_vn_in, const int *rbf_vec_idx_c,
+                               const int *rbf_vec_blk_c,
+                               const T *rbf_vec_coeff_c, T *p_u_out, T *p_v_out,
+                               int i_startblk, int i_endblk, int i_startidx_in,
+                               int i_endidx_in, int slev, int elev, int nproma,
+                               int nlev, int nblks_c, int nblks_e,
+                               int rbf_vec_dim_c, bool lacc, bool acc_async) {
+
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT3D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+  typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstT4D;
+
+  UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e);
+  UnmanagedConstInt3D rbf_vec_idx_c_view(rbf_vec_idx_c, rbf_vec_dim_c, nproma,
+                                         nblks_c);
+  UnmanagedConstInt3D rbf_vec_blk_c_view(rbf_vec_blk_c, rbf_vec_dim_c, nproma,
+                                         nblks_c);
+  UnmanagedConstT4D rbf_vec_coeff_c_view(rbf_vec_coeff_c, nproma,
+                                         nblks_c); // TODO
+  UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c);
+  UnmanagedT3D p_v_out_view(p_u_out, nproma, nlev, nblks_c);
+
+  for (int jb = i_startblk; jb <= i_endblk; ++jb) {
+
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
+        {slev, i_startidx}, {elev + 1, i_endidx + 1});
+
+    Kokkos::parallel_for(
+        "rbf_vec_interpol_cell_lib", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jc) {
+          p_u_out_view(jc, jk, jb) =
+              rbf_vec_coeff_c_view(0, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk,
+                               rbf_vec_blk_c_view(0, jc, jb)) +
+              rbf_vec_coeff_c_view(1, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk,
+                               rbf_vec_blk_c_view(1, jc, jb)) +
+              rbf_vec_coeff_c_view(2, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk,
+                               rbf_vec_blk_c_view(2, jc, jb)) +
+              rbf_vec_coeff_c_view(3, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk,
+                               rbf_vec_blk_c_view(3, jc, jb)) +
+              rbf_vec_coeff_c_view(4, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk,
+                               rbf_vec_blk_c_view(4, jc, jb)) +
+              rbf_vec_coeff_c_view(5, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk,
+                               rbf_vec_blk_c_view(5, jc, jb)) +
+              rbf_vec_coeff_c_view(6, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk,
+                               rbf_vec_blk_c_view(6, jc, jb)) +
+              rbf_vec_coeff_c_view(7, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk,
+                               rbf_vec_blk_c_view(7, jc, jb)) +
+              rbf_vec_coeff_c_view(8, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk,
+                               rbf_vec_blk_c_view(8, jc, jb));
+
+          p_v_out_view(jc, jk, jb) =
+              rbf_vec_coeff_c_view(0, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk,
+                               rbf_vec_blk_c_view(0, jc, jb)) +
+              rbf_vec_coeff_c_view(1, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk,
+                               rbf_vec_blk_c_view(1, jc, jb)) +
+              rbf_vec_coeff_c_view(2, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk,
+                               rbf_vec_blk_c_view(2, jc, jb)) +
+              rbf_vec_coeff_c_view(3, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk,
+                               rbf_vec_blk_c_view(3, jc, jb)) +
+              rbf_vec_coeff_c_view(4, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk,
+                               rbf_vec_blk_c_view(4, jc, jb)) +
+              rbf_vec_coeff_c_view(5, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk,
+                               rbf_vec_blk_c_view(5, jc, jb)) +
+              rbf_vec_coeff_c_view(6, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk,
+                               rbf_vec_blk_c_view(6, jc, jb)) +
+              rbf_vec_coeff_c_view(7, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk,
+                               rbf_vec_blk_c_view(7, jc, jb)) +
+              rbf_vec_coeff_c_view(8, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk,
+                               rbf_vec_blk_c_view(8, jc, jb));
+        });
+    Kokkos::fence();
+  } // for
+} // void
+
+//------------------------------------------rbf_vec_interpol_edge_lib---------------------------------------------
+
+template <typename T>
+void rbf_vec_interpol_edge_lib(const T *p_vn_in, const int *rbf_vec_idx_e,
+                               const int *rbf_vec_blk_e,
+                               const T *rbf_vec_coeff_e, T *p_vt_out,
+                               int i_startblk, int i_endblk, int i_startidx_in,
+                               int i_endidx_in, int slev, int elev, int nlev,
+                               int nproma, int rbf_vec_dim_e, int nblks_e,
+                               bool lacc, bool acc_async) {
+
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT3D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+
+  UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e);
+  UnmanagedConstInt3D rbf_vec_idx_e_view(rbf_vec_idx_e, rbf_vec_dim_e, nproma,
+                                         nblks_e);
+  UnmanagedConstInt3D rbf_vec_blk_e_view(rbf_vec_blk_e, rbf_vec_dim_e, nproma,
+                                         nblks_e);
+  UnmanagedConstT3D rbf_vec_coeff_e_view(rbf_vec_coeff_e, rbf_vec_dim_e, nproma,
+                                         nblks_e);
+  UnmanagedT3D p_vt_out_view(p_vt_out, nproma, nlev, nblks_e);
+
+  for (int jb = i_startblk; jb <= i_endblk; ++jb) {
+
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
+        {slev, i_startidx}, {elev + 1, i_endidx + 1});
+
+    Kokkos::parallel_for(
+        "rbf_vec_interpol_edge_lib", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int je) {
+          p_vt_out_view(je, jk, jb) =
+              rbf_vec_coeff_e_view(0, je, jb) *
+                  p_vn_in_view(rbf_vec_idx_e_view(0, je, jb), jk,
+                               rbf_vec_blk_e_view(0, je, jb)) +
+              rbf_vec_coeff_e_view(1, je, jb) *
+                  p_vn_in_view(rbf_vec_idx_e_view(1, je, jb), jk,
+                               rbf_vec_blk_e_view(1, je, jb)) +
+              rbf_vec_coeff_e_view(2, je, jb) *
+                  p_vn_in_view(rbf_vec_idx_e_view(2, je, jb), jk,
+                               rbf_vec_blk_e_view(2, je, jb)) +
+              rbf_vec_coeff_e_view(3, je, jb) *
+                  p_vn_in_view(rbf_vec_idx_e_view(3, je, jb), jk,
+                               rbf_vec_blk_e_view(3, je, jb));
+        });
+  }
+}
+
+// Explicit instantiation - double precision
+template void rbf_vec_interpol_vertex_lib<double, double>(
+    const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v);
+
+// Explicit instantiation - single precision
+template void rbf_vec_interpol_vertex_lib<float, float>(
+    const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v);
+
+// Explicit instantiation - mixed precision
+template void rbf_vec_interpol_vertex_lib<double, float>(
+    const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v);
+
+template void rbf_vec_interpol_cell_lib<double>(
+    const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c,
+    const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev,
+    int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c,
+    bool lacc, bool acc_async);
+
+template void rbf_vec_interpol_cell_lib<float>(
+    const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c,
+    const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev,
+    int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c,
+    bool lacc, bool acc_async);
+
+template void rbf_interpol_c2grad_lib<double>(
+    const double *p_cell_in, const int *rbf_c2grad_idx,
+    const int *rbf_c2grad_blk, const double *rbf_c2grad_coeff, double *grad_x,
+    double *grad_y, int i_startblk, int i_endblk, int i_startidx_in,
+    int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim,
+    int nlev, int nblks_c, bool lacc);
+
+template void rbf_interpol_c2grad_lib<float>(
+    const float *p_cell_in, const int *rbf_c2grad_idx,
+    const int *rbf_c2grad_blk, const float *rbf_c2grad_coeff, float *grad_x,
+    float *grad_y, int i_startblk, int i_endblk, int i_startidx_in,
+    int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim,
+    int nlev, int nblks_c, bool lacc);
+
+template void rbf_vec_interpol_edge_lib<double>(
+    const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e,
+    const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk,
+    int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev,
+    int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+    bool acc_async);
+
+template void rbf_vec_interpol_edge_lib<float>(
+    const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e,
+    const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk,
+    int i_startidx_in, int i_endidx_in, int slev, int elev, int nlev,
+    int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, bool acc_async);
diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..8a85502a08d3d77cae147ff6e9ea298abe891f6c
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf.hpp
@@ -0,0 +1,50 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+#pragma once
+
+#include "mo_lib_loopindices.hpp"
+#include <Kokkos_Core.hpp>
+#include <vector>
+
+template <typename T, typename S>
+void rbf_vec_interpol_vertex_lib(
+    const T *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const T *rbf_vec_coeff_v, S *p_u_out, S *p_v_out, const int i_startblk,
+    const int i_endblk, const int i_startidx_in, const int i_endidx_in,
+    const int slev, const int elev, const int nproma, const bool lacc,
+    const bool acc_async, const int nlev, const int nblks_e, const int nblks_c);
+
+template <typename T>
+void rbf_interpol_c2grad_lib(const T *p_cell_in, const int *rbf_c2grad_idx,
+                             const int *rbf_c2grad_blk,
+                             const T *rbf_c2grad_coeff, T *grad_x, T *grad_y,
+                             int i_startblk, int i_endblk, int i_startidx_in,
+                             int i_endidx_in, int slev, int elev, int nproma,
+                             int rbf_c2grad_dim, int nlev, int nblks_c,
+                             bool lacc);
+
+template <typename T>
+void rbf_vec_interpol_cell_lib(const T *p_vn_in, const int *rbf_vec_idx_c,
+                               const int *rbf_vec_blk_c,
+                               const T *rbf_vec_coeff_c, T *p_u_out, T *p_v_out,
+                               int i_startblk, int i_endblk, int i_startidx_in,
+                               int i_endidx_in, int slev, int elev, int nproma,
+                               int nlev, int nblks_c, int nblks_e,
+                               int rbf_vec_dim_c, bool lacc, bool acc_async);
+
+template <typename T>
+void rbf_vec_interpol_edge_lib(const T *p_vn_in, const int *rbf_vec_idx_e,
+                               const int *rbf_vec_blk_e,
+                               const T *rbf_vec_coeff_e, T *p_vt_out,
+                               int i_startblk, int i_endblk, int i_startidx_in,
+                               int i_endidx_in, int slev, int elev, int nlev,
+                               int nproma, int rbf_vec_dim_e, int nblks_e,
+                               bool lacc, bool acc_async);
diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp
index 0ee7fa37cad59ded759911ddf10b4e986adbd35a..507ec3f01ae5e79b6053cd52fc0d1af41cc67f41 100644
--- a/test/c/test_interpolation_scalar.cpp
+++ b/test/c/test_interpolation_scalar.cpp
@@ -48,8 +48,7 @@ public:
   static constexpr int nlev = 7;    // number of vertical levels
   static constexpr int nblks_c = 2; // number of cell blocks
   static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in)
-  static constexpr int nblks_v =
-      2; // number of vertex blocks (for rbf arrays and outputs)
+  static constexpr int nblks_v = 2; // number of vertex blocks
 
   // Parameter values.
   const int i_startblk = 0;
@@ -385,7 +384,7 @@ TYPED_TEST_SUITE(InterpolationScalarMixedTestFixture, MixedTypesSP2DP);
 //
 ////////////////////////////////////////////////////////////////////////////////
 
-TYPED_TEST(InterpolationScalarMixedTestFixture, cells2edges) {
+TYPED_TEST(InterpolationScalarMixedTestFixture, Cells2Edges) {
   using InType = typename TestFixture::InType;
   using OutType = typename TestFixture::OutType;
 
@@ -424,7 +423,7 @@ TYPED_TEST(InterpolationScalarMixedTestFixture, cells2edges) {
 //
 ////////////////////////////////////////////////////////////////////////////////
 
-TYPED_TEST(InterpolationScalarMixedTestFixture, cells2verts) {
+TYPED_TEST(InterpolationScalarMixedTestFixture, Cells2Verts) {
   using InType = typename TestFixture::InType;
   using OutType = typename TestFixture::OutType;
 
@@ -496,7 +495,7 @@ public:
 TYPED_TEST_SUITE(Cells2vertsriScalarLibTestFixture, MixedTypes);
 
 // Add test
-TYPED_TEST(Cells2vertsriScalarLibTestFixture, cells2verts_ri) {
+TYPED_TEST(Cells2vertsriScalarLibTestFixture, Cells2VertsRI) {
   using InType = typename TestFixture::InType;
   using OutType = typename TestFixture::OutType;
 
diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp
index 0aa4f9b6861f4e68207ffaa76eb5feb39795e84a..040d440223c683407a585de764511e5e2b384aea 100644
--- a/test/c/test_intp_rbf.cpp
+++ b/test/c/test_intp_rbf.cpp
@@ -9,117 +9,295 @@
 // SPDX-License-Identifier: BSD-3-Clause
 // ---------------------------------------------------------------
 
-#include <gtest/gtest.h>
+#include "mo_lib_intp_rbf.hpp"
 #include <Kokkos_Core.hpp>
+#include <algorithm>
+#include <gtest/gtest.h>
+#include <numeric>
 #include <vector>
-#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
 
 // Free-function helpers for 3D and 4D array sizes (assumed column-major)
-template<typename T>
-size_t num_elements_3d(int d1, int d2, int d3) {
+template <typename T> size_t num_elements_3d(int d1, int d2, int d3) {
   return static_cast<size_t>(d1) * d2 * d3;
 }
 
-template<typename T>
-size_t num_elements_4d(int d1, int d2, int d3, int d4) {
+template <typename T> size_t num_elements_4d(int d1, int d2, int d3, int d4) {
   return static_cast<size_t>(d1) * d2 * d3 * d4;
 }
 
 // Define a helper struct that holds the two types.
-template<typename InT, typename OutT>
-struct MixedPrecision {
-  using in_type  = InT;
+template <typename InT, typename OutT> struct MixedPrecision {
+  using in_type = InT;
   using out_type = OutT;
 };
 
 // Define the list of type pairs we want to test.
-typedef ::testing::Types< MixedPrecision<double, double>,
-                          MixedPrecision<double,  float>,
-                          MixedPrecision<float,   float>  > MixedTypes;
+typedef ::testing::Types<MixedPrecision<double, double>,
+                         MixedPrecision<double, float>,
+                         MixedPrecision<float, float>>
+    MixedTypes;
+
+class interp_dimensions {
+public:
+  // Constant dimensions.
+  static constexpr int nproma = 3;          // inner loop length
+  static constexpr int nlev = 4;            // number of vertical levels
+  static constexpr int nblks_c = 2;         // number of cell blocks
+  static constexpr int nblks_e = 2;         // number of edge blocks
+  static constexpr int nblks_v = 2;         // number of vertex blocks
+  static constexpr int rbf_c2grad_dim = 10; // fixed dimension
+  static constexpr int rbf_vec_dim_c = 9;
+  static constexpr int rbf_vec_dim_e = 4;
+
+  // Parameter values.
+  const int i_startblk = 0;
+  const int i_endblk = 1; // Test blocks [0, 1]
+  const int i_startidx_in = 0;
+  const int i_endidx_in = nproma - 1;
+  const int slev = 0;
+  const int elev = nlev - 1;
+  const bool lacc = false;      // Not using ACC-specific behavior.
+  const bool acc_async = false; // No asynchronous execution.
+};
+
+// Define a typed test fixture for the functions which have the same input and
+// output types
+template <typename T>
+class RbfInterpolTypedTestFixture : public ::testing::Test,
+                                    public interp_dimensions {
+public:
+  // Data arrays.
+  std::vector<T> p_cell_in;        // size: nproma * nlev * nblks_c
+  std::vector<int> rbf_c2grad_idx; // size: rbf_c2grad_dim * nproma * nblks_c
+  std::vector<int> rbf_c2grad_blk; // size: rbf_c2grad_dim * nproma * nblks_c
+  std::vector<int> rbf_vec_idx_c;  // size: rbf_vec_dim_c * nproma * nblks_c
+  std::vector<int> rbf_vec_blk_c;  // size: rbf_vec_dim_c * nproma * nblks_c
+  std::vector<T>
+      rbf_c2grad_coeff;  // size: rbf_c2grad_dim * 2 * nproma * nblks_c
+  std::vector<T> grad_x; // size: nproma * nlev * nblks_c
+  std::vector<T> grad_y; // size: nproma * nlev * nblks_c
+  std::vector<T> p_vn_in;
+  std::vector<T> rbf_vec_coeff_c;
+  std::vector<T> p_u_out;
+  std::vector<T> p_v_out;
+
+  std::vector<int> rbf_vec_idx_e;
+  std::vector<int> rbf_vec_blk_e;
+  std::vector<T> rbf_vec_coeff_e;
+  std::vector<T> p_vt_out;
+
+  RbfInterpolTypedTestFixture() {
+    size_t size3d = static_cast<size_t>(nproma) * nlev * nblks_c;
+    size_t size3d_idx = static_cast<size_t>(rbf_c2grad_dim) * nproma * nblks_c;
+    size_t size4d = static_cast<size_t>(rbf_c2grad_dim) * 2 * nproma * nblks_c;
+
+    size_t size3d_vec_dim =
+        static_cast<size_t>(rbf_vec_dim_c) * nproma * nblks_c;
+    size_t size_4d_vec_dim =
+        static_cast<size_t>(rbf_vec_dim_c) * 2 * nproma * nblks_c;
+
+    size_t size3d_edge_lib =
+        static_cast<size_t>(rbf_vec_dim_e) * nproma * nblks_c;
+    size_t size_4d_edge_lib =
+        static_cast<size_t>(rbf_vec_dim_e) * 2 * nproma * nblks_c;
+
+    p_cell_in.resize(size3d, static_cast<T>(1));
+    p_vn_in.resize(size3d, static_cast<T>(1));
+
+    rbf_vec_idx_c.resize(size3d_vec_dim, 1);
+    rbf_vec_blk_c.resize(size3d_vec_dim, 0);
+    rbf_c2grad_idx.resize(size3d_idx, 1);
+    rbf_c2grad_blk.resize(size3d_idx, 0); // Set block indices to 0 for testing.
+    rbf_vec_idx_e.resize(size3d_vec_dim, 1);
+    rbf_vec_blk_e.resize(size3d_vec_dim, 0);
+
+    rbf_vec_coeff_c.resize(size_4d_vec_dim, static_cast<T>(1));
+    rbf_c2grad_coeff.resize(size4d, static_cast<T>(1));
+    rbf_vec_coeff_e.resize(size_4d_edge_lib, static_cast<T>(1));
+
+    p_u_out.resize(size3d_vec_dim, static_cast<T>(0));
+    p_v_out.resize(size3d_vec_dim, static_cast<T>(0));
+    p_vt_out.resize(size3d_edge_lib, static_cast<T>(0));
+
+    grad_x.resize(size3d, static_cast<T>(0));
+    grad_y.resize(size3d, static_cast<T>(0));
+  }
+};
+
+typedef ::testing::Types<float, double> MyTypes;
+
+TYPED_TEST_SUITE(RbfInterpolTypedTestFixture, MyTypes);
+
+TYPED_TEST(RbfInterpolTypedTestFixture, C2Grad) {
+  using T = TypeParam;
+  rbf_interpol_c2grad_lib<TypeParam>(
+      this->p_cell_in.data(), this->rbf_c2grad_idx.data(),
+      this->rbf_c2grad_blk.data(), this->rbf_c2grad_coeff.data(),
+      this->grad_x.data(), this->grad_y.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->rbf_c2grad_dim, this->nlev, this->nblks_c,
+      this->lacc);
 
-// Define a typed test fixture.
+  // For each block from i_startblk to i_endblk-1, and for each (i, level)
+  // the kernel sums rbf_c2grad_dim contributions, each equal to 1.
+  // Therefore, we expect grad_x and grad_y to equal rbf_c2grad_dim (i.e., 10).
+  for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+    for (int jk = 0; jk < this->nlev; ++jk) {
+      for (int i = 0; i < this->nproma; ++i) {
+        size_t idx = i + static_cast<size_t>(jk) * this->nproma +
+                     static_cast<size_t>(jb) * this->nproma * this->nlev;
+        EXPECT_NEAR(this->grad_x[idx],
+                    static_cast<TypeParam>(this->rbf_c2grad_dim),
+                    static_cast<TypeParam>(1e-5))
+            << "grad_x failure at block " << jb << ", level " << jk
+            << ", index " << i;
+        EXPECT_NEAR(this->grad_y[idx],
+                    static_cast<TypeParam>(this->rbf_c2grad_dim),
+                    static_cast<TypeParam>(1e-5))
+            << "grad_y failure at block " << jb << ", level " << jk
+            << ", index " << i;
+      }
+    }
+  }
+}
+
+TYPED_TEST(RbfInterpolTypedTestFixture, Cell) {
+  using T = TypeParam;
+
+  rbf_vec_interpol_cell_lib<T>(
+      this->p_vn_in.data(), this->rbf_vec_idx_c.data(),
+      this->rbf_vec_blk_c.data(), this->rbf_vec_coeff_c.data(),
+      this->p_u_out.data(), this->p_v_out.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->rbf_c2grad_dim, this->nlev, this->nblks_c,
+      this->nblks_e, this->lacc, this->acc_async);
+
+  for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+    for (int jk = 0; jk < this->nlev; ++jk) {
+      for (int i = 0; i < this->nproma; ++i) {
+        size_t idx = i + static_cast<size_t>(jk) * this->nproma +
+                     static_cast<size_t>(jb) * this->nproma * this->nlev;
+        EXPECT_NEAR(this->p_u_out[idx], static_cast<T>(this->rbf_vec_dim_c),
+                    static_cast<T>(1e-5))
+            << "p_u_out failure at block " << jb << ", level " << jk
+            << ", index " << i;
+      }
+    }
+  }
+}
+
+TYPED_TEST(RbfInterpolTypedTestFixture, Edge) {
+  using T = TypeParam;
+
+  rbf_vec_interpol_edge_lib<T>(
+      this->p_vn_in.data(), this->rbf_vec_idx_e.data(),
+      this->rbf_vec_blk_e.data(), this->rbf_vec_coeff_e.data(),
+      this->p_vt_out.data(), this->i_startblk, this->i_endblk,
+      this->i_startidx_in, this->i_endidx_in, this->slev, this->elev,
+      this->nlev, this->nproma, this->rbf_vec_dim_e, this->nblks_e, this->lacc,
+      this->acc_async);
+
+  for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+    for (int jk = 0; jk < this->nlev; ++jk) {
+      for (int i = 0; i < this->nproma; ++i) {
+        size_t idx = i + static_cast<size_t>(jk) * this->nproma +
+                     static_cast<size_t>(jb) * this->nproma * this->nlev;
+        EXPECT_NEAR(this->p_vt_out[idx], static_cast<T>(this->rbf_vec_dim_e),
+                    static_cast<T>(1e-5))
+            << "p_vt_out failure at block " << jb << ", level " << jk
+            << ", index " << i;
+      }
+    }
+  }
+}
+
+// Define a typed test fixture for the functions which have different input and
+// output types
 template <typename TypePair>
-class RbfVecInterpolVertexMixedTestFixture : public ::testing::Test {
+class RbfVecInterpolMixedTestFixture : public ::testing::Test,
+                                       public interp_dimensions {
 public:
-  using InType  = typename TypePair::in_type;
+  using InType = typename TypePair::in_type;
   using OutType = typename TypePair::out_type;
 
   // Constant dimensions.
-  static constexpr int nproma     = 3;  // inner loop length
-  static constexpr int nlev        = 4;  // number of vertical levels
-  static constexpr int nblks_e     = 2;  // number of edge blocks (for p_e_in)
-  static constexpr int nblks_v     = 2;  // number of vertex blocks (for rbf arrays and outputs)
-  static constexpr int rbf_vec_dim = 6;  // fixed dimension for rbf vector (stencil points)
+  static constexpr int nproma = 3;  // inner loop length
+  static constexpr int nlev = 4;    // number of vertical levels
+  static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in)
+  static constexpr int nblks_v =
+      2; // number of vertex blocks (for rbf arrays and outputs)
+  static constexpr int rbf_vec_dim =
+      6; // fixed dimension for rbf vector (stencil points)
 
   // Parameter values.
-  int i_startblk    = 0;
-  int i_endblk      = 1;      // Test blocks [0, 1]
+  int i_startblk = 0;
+  int i_endblk = 1; // Test blocks [0, 1]
   int i_startidx_in = 0;
-  int i_endidx_in   = nproma - 1; // Full range: 0 .. nproma-1
-  int slev          = 0;
-  int elev          = nlev - 1;   // Full vertical range (0 .. nlev-1)
-  bool lacc         = false;  // Not using ACC-specific behavior.
-  bool acc_async    = false;  // No asynchronous execution.
+  int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1
+  int slev = 0;
+  int elev = nlev - 1;    // Full vertical range (0 .. nlev-1)
+  bool lacc = false;      // Not using ACC-specific behavior.
+  bool acc_async = false; // No asynchronous execution.
 
   // Arrays stored in std::vector.
-  std::vector<InType>  p_e_in;           // Dimensions: (nproma, nlev, nblks_e)
-  std::vector<int>     rbf_vec_idx_v;    // Dimensions: (rbf_vec_dim, nproma, nblks_v)
-  std::vector<int>     rbf_vec_blk_v;    // Dimensions: (rbf_vec_dim, nproma, nblks_v)
-  std::vector<InType>  rbf_vec_coeff_v;    // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v)
-  std::vector<OutType> p_u_out;           // Dimensions: (nproma, nlev, nblks_v)
-  std::vector<OutType> p_v_out;           // Dimensions: (nproma, nlev, nblks_v)
-
-  RbfVecInterpolVertexMixedTestFixture() {
+  std::vector<InType> p_e_in;     // Dimensions: (nproma, nlev, nblks_e)
+  std::vector<int> rbf_vec_idx_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v)
+  std::vector<int> rbf_vec_blk_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v)
+  std::vector<InType>
+      rbf_vec_coeff_v;          // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v)
+  std::vector<OutType> p_u_out; // Dimensions: (nproma, nlev, nblks_v)
+  std::vector<OutType> p_v_out; // Dimensions: (nproma, nlev, nblks_v)
+
+  RbfVecInterpolMixedTestFixture() {
     // Allocate and initialize inputs.
-    p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1));
+    p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e),
+                  static_cast<InType>(1));
     rbf_vec_idx_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1);
     rbf_vec_blk_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 0);
-    rbf_vec_coeff_v.resize(num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v), static_cast<InType>(1));
+    rbf_vec_coeff_v.resize(
+        num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v),
+        static_cast<InType>(1));
 
     // Allocate output arrays and initialize to zero.
-    p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0));
-    p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0));
+    p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v),
+                   static_cast<OutType>(0));
+    p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v),
+                   static_cast<OutType>(0));
   }
 };
 
-TYPED_TEST_SUITE(RbfVecInterpolVertexMixedTestFixture, MixedTypes);
+TYPED_TEST_SUITE(RbfVecInterpolMixedTestFixture, MixedTypes);
 
-TYPED_TEST(RbfVecInterpolVertexMixedTestFixture, BasicTest) {
-  using InType  = typename TestFixture::InType;
+TYPED_TEST(RbfVecInterpolMixedTestFixture, Vertex) {
+  using InType = typename TestFixture::InType;
   using OutType = typename TestFixture::OutType;
 
   // Call the function with mixed precision.
   rbf_vec_interpol_vertex_lib<InType, OutType>(
-    this->p_e_in.data(),
-    this->rbf_vec_idx_v.data(),
-    this->rbf_vec_blk_v.data(),
-    this->rbf_vec_coeff_v.data(),
-    this->p_u_out.data(),
-    this->p_v_out.data(),
-    this->i_startblk,
-    this->i_endblk,
-    this->i_startidx_in,
-    this->i_endidx_in,
-    this->slev,
-    this->elev,
-    this->nproma,
-    this->lacc,
-    this->acc_async,
-    this->nlev,
-    RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_e,
-    RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_v);
+      this->p_e_in.data(), this->rbf_vec_idx_v.data(),
+      this->rbf_vec_blk_v.data(), this->rbf_vec_coeff_v.data(),
+      this->p_u_out.data(), this->p_v_out.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->lacc, this->acc_async, this->nlev,
+      this->nblks_e, this->nblks_v);
 
   // Check the outputs only for blocks in the range [i_startblk, i_endblk].
   for (int block = this->i_startblk; block <= this->i_endblk; ++block) {
     for (int level = 0; level < this->nlev; ++level) {
       for (int i = 0; i < this->nproma; ++i) {
         // Compute the linear index for a 3D array in column-major order:
-        size_t idx = i + level * this->nproma + block * this->nproma * this->nlev;
-        // Since every contribution is 1 and there are 6 stencil points, expect 6.
-        EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5))
-            << "Failure at block " << block << ", level " << level << ", index " << i;
-        EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5))
-            << "Failure at block " << block << ", level " << level << ", index " << i;
+        size_t idx =
+            i + level * this->nproma + block * this->nproma * this->nlev;
+        // Since every contribution is 1 and there are 6 stencil points,
+        // expect 6.
+        EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6),
+                    static_cast<OutType>(1e-5))
+            << "Failure at block " << block << ", level " << level << ", index "
+            << i;
+        EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6),
+                    static_cast<OutType>(1e-5))
+            << "Failure at block " << block << ", level " << level << ", index "
+            << i;
       }
     }
   }