From 3a9361dc368018d6c2e2911dd2b8781d9eb0b205 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante5.lvt.dkrz.de>
Date: Mon, 24 Feb 2025 14:21:48 +0100
Subject: [PATCH 01/33] Added bindings for rbf

---
 .../mo_lib_intp_rbf_bindings.cpp              | 32 +++++++++++++++++++
 src/interpolation/mo_lib_intp_rbf_bindings.h  | 22 +++++++++++++
 2 files changed, 54 insertions(+)
 create mode 100644 src/interpolation/mo_lib_intp_rbf_bindings.cpp
 create mode 100644 src/interpolation/mo_lib_intp_rbf_bindings.h

diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp
new file mode 100644
index 0000000..281a89f
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf_bindings.cpp
@@ -0,0 +1,32 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#include  mo_lib_intp_rbf_bindings.h
+#include  mo_lib_intp_rbf.hpp 
+
+
+void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk
+                             const wp* rbf_c2grad_coeff, const wp* grad_x, const wp* grad_y,
+                             int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                             int slev, int elev, int nproma, bool lacc){
+
+
+rbf_interpol_c2grad_lib<wp>(p_cell_in, rbf_c2grad_idx, 
+                            rbf_c2grad_blk, rbf_c2grad_coeff, 
+                            grad_x, grad_y, i_startblk, i_endblk, 
+                            i_startidx_in, i_endidx_in, slev, elev, 
+                            nproma, lacc);
+
+
+}
+
+
+
diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.h b/src/interpolation/mo_lib_intp_rbf_bindings.h
new file mode 100644
index 0000000..826fb7e
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf_bindings.h
@@ -0,0 +1,22 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+#pragma once
+
+#ifdef __SINGLE_PRECISION
+  using wp = single;
+#else 
+  using wp = double;
+#endif
+
+void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, 
+                        const wp* rbf_c2grad_coeff, const wp* grad_x, const wp* grad_y, 
+                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, 
+                        int slev, int elev, int nproma, bool lacc); 
-- 
GitLab


From 63e4efcf558af3b5d08ecc7254b258c099036d8f Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante5.lvt.dkrz.de>
Date: Mon, 24 Feb 2025 14:42:23 +0100
Subject: [PATCH 02/33] added Views for c2grad_lib

---
 src/interpolation/mo_lib_intp_rbf.cpp | 31 +++++++++++++++++++++++++++
 src/interpolation/mo_lib_intp_rbf.hpp | 21 ++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 src/interpolation/mo_lib_intp_rbf.cpp
 create mode 100644 src/interpolation/mo_lib_intp_rbf.hpp

diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp
new file mode 100644
index 0000000..c74503e
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf.cpp
@@ -0,0 +1,31 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#inlcude mo_lib_intp_rbf.hpp
+
+
+void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2gra
+                             const wp* rbf_c2grad_coeff, const wp* grad_x, const wp* grad_y,
+                             int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                             int slev, int elev, int nproma, bool lacc){
+
+    //aliases for unmanaged Kokkos views 
+    typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
+    typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
+    typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
+
+    //to avoid memory ownership issues
+    UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); 
+    UnmanagedConstT3D grad_x_view(grad_x, nproma, nlev, nblks_c); 
+    UnmanagedConstT3D grad_y_view(grad_y, nproma, nlev, nblks_c);
+    UnmanagedConstInt3D rbf_c2grad_idx_view(rbf_c2grad_idx, rbf_c2grad_dim, nproma, nblks_c); 
+    UnmanagedConstInt3D rbf_c2grad_blk_view(rbf_c2grad_blk, rbf_c2grad_dim, nproma, nblks_c); 
+}
diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp
new file mode 100644
index 0000000..5fbf68b
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf.hpp
@@ -0,0 +1,21 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+#pragma once
+
+#include "mo_lib_loopindices.hpp"
+#include <Kokkos_Core.hpp>
+#include <vector>
+
+template <typename T>
+void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2gra
+                        const wp* rbf_c2grad_coeff, const wp* grad_x, const wp* grad_y,
+                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                        int slev, int elev, int nproma, bool lacc);
-- 
GitLab


From c1a0ccffe73b60020a51ad83502783d03a437a43 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante5.lvt.dkrz.de>
Date: Mon, 24 Feb 2025 14:44:57 +0100
Subject: [PATCH 03/33] fixed typo!

---
 src/interpolation/mo_lib_intp_rbf.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp
index c74503e..795d343 100644
--- a/src/interpolation/mo_lib_intp_rbf.cpp
+++ b/src/interpolation/mo_lib_intp_rbf.cpp
@@ -9,7 +9,7 @@
 // SPDX-License-Identifier: BSD-3-Clause
 // ---------------------------------------------------------------
 
-#inlcude mo_lib_intp_rbf.hpp
+#include mo_lib_intp_rbf.hpp
 
 
 void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2gra
-- 
GitLab


From 016db45178c908b4e57f27e3950231ac0cf97143 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante3.lvt.dkrz.de>
Date: Mon, 24 Feb 2025 15:22:30 +0100
Subject: [PATCH 04/33] added binding file into cmake

---
 src/interpolation/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt
index 1051516..f1dc7b9 100644
--- a/src/interpolation/CMakeLists.txt
+++ b/src/interpolation/CMakeLists.txt
@@ -19,7 +19,7 @@ add_library(
   mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
   mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
   interpolation_bindings.cpp
-)
+  mo_lib_intp_rbf_bindings.cpp)
 
 add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation)
 
-- 
GitLab


From 4c640a3a852321ca55f18c9f3d2bd9f882e05b30 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante5.lvt.dkrz.de>
Date: Mon, 24 Feb 2025 15:32:30 +0100
Subject: [PATCH 05/33] Fixed typo

---
 src/interpolation/mo_lib_intp_rbf_bindings.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp
index 281a89f..8bb0feb 100644
--- a/src/interpolation/mo_lib_intp_rbf_bindings.cpp
+++ b/src/interpolation/mo_lib_intp_rbf_bindings.cpp
@@ -9,8 +9,8 @@
 // SPDX-License-Identifier: BSD-3-Clause
 // ---------------------------------------------------------------
 
-#include  mo_lib_intp_rbf_bindings.h
-#include  mo_lib_intp_rbf.hpp 
+#include  "mo_lib_intp_rbf_bindings.h"
+#include  "mo_lib_intp_rbf.hpp"
 
 
 void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk
-- 
GitLab


From 3fe938837b825e1d47d5c203c3131cb9d0c1b6a0 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante5.lvt.dkrz.de>
Date: Mon, 24 Feb 2025 23:00:08 +0100
Subject: [PATCH 06/33] Ported rbf_interpol_c2grad_lib

---
 src/interpolation/CMakeLists.txt              |  3 +-
 src/interpolation/mo_lib_intp_rbf.cpp         | 66 ++++++++++++++++---
 src/interpolation/mo_lib_intp_rbf.hpp         |  6 +-
 .../mo_lib_intp_rbf_bindings.cpp              | 25 +++++--
 src/interpolation/mo_lib_intp_rbf_bindings.h  | 18 ++---
 5 files changed, 91 insertions(+), 27 deletions(-)

diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt
index f1dc7b9..d5af767 100644
--- a/src/interpolation/CMakeLists.txt
+++ b/src/interpolation/CMakeLists.txt
@@ -19,7 +19,8 @@ add_library(
   mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
   mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
   interpolation_bindings.cpp
-  mo_lib_intp_rbf_bindings.cpp)
+  mo_lib_intp_rbf_bindings.cpp
+  mo_lib_intp_rbf.cpp)
 
 add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation)
 
diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp
index 795d343..666e02c 100644
--- a/src/interpolation/mo_lib_intp_rbf.cpp
+++ b/src/interpolation/mo_lib_intp_rbf.cpp
@@ -9,23 +9,73 @@
 // SPDX-License-Identifier: BSD-3-Clause
 // ---------------------------------------------------------------
 
-#include mo_lib_intp_rbf.hpp
+#include "mo_lib_intp_rbf.hpp"
+#include <Kokkos_Core.hpp>
 
-
-void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2gra
-                             const wp* rbf_c2grad_coeff, const wp* grad_x, const wp* grad_y,
+template <typename T>
+void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
+                             const T* rbf_c2grad_coeff, T* grad_x, T* grad_y,
                              int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                             int slev, int elev, int nproma, bool lacc){
+                             int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc){
 
     //aliases for unmanaged Kokkos views 
     typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
     typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
     typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
+    typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D;
 
     //to avoid memory ownership issues
     UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); 
-    UnmanagedConstT3D grad_x_view(grad_x, nproma, nlev, nblks_c); 
-    UnmanagedConstT3D grad_y_view(grad_y, nproma, nlev, nblks_c);
+    UnmanagedT3D grad_x_view(grad_x, nproma, nlev, nblks_c); 
+    UnmanagedT3D grad_y_view(grad_y, nproma, nlev, nblks_c);
     UnmanagedConstInt3D rbf_c2grad_idx_view(rbf_c2grad_idx, rbf_c2grad_dim, nproma, nblks_c); 
     UnmanagedConstInt3D rbf_c2grad_blk_view(rbf_c2grad_blk, rbf_c2grad_dim, nproma, nblks_c); 
-}
+    UnmanagedConstT4D rbf_c2grad_coeff_view(rbf_c2grad_coeff, rbf_c2grad_dim, 2, nproma, nblks_c); 
+
+    for (int jb = i_startblk; jb < i_endblk; ++jb) {
+
+        int i_startidx, i_endidx;
+        get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); 
+
+        Kokkos::parallel_for("rbf_interpol_c2grad", 
+                             Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}),
+                             KOKKOS_LAMBDA(const int jk, const int jc){
+
+                             grad_x_view(jc, jk, jb) =
+                             rbf_c2grad_coeff_view(0, 1, jc, jb)*
+                             p_cell_in_view(jc, jk, jb) + 
+                             rbf_c2grad_coeff_view(1, 1, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk, rbf_c2grad_blk_view(1, jc, jb)) + 
+                             rbf_c2grad_coeff_view(2, 1, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk, rbf_c2grad_blk_view(2, jc, jb)) + 
+                             rbf_c2grad_coeff_view(3, 1, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk, rbf_c2grad_blk_view(3, jc, jb)) + 
+                             rbf_c2grad_coeff_view(4, 1, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk, rbf_c2grad_blk_view(4, jc, jb)) + 
+                             rbf_c2grad_coeff_view(5, 1, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk, rbf_c2grad_blk_view(5, jc, jb)) + 
+                             rbf_c2grad_coeff_view(6, 1, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk, rbf_c2grad_blk_view(6, jc, jb)) + 
+                             rbf_c2grad_coeff_view(7, 1, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk, rbf_c2grad_blk_view(7, jc, jb)) + 
+                             rbf_c2grad_coeff_view(8, 1, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, rbf_c2grad_blk_view(8, jc, jb)) + 
+                             rbf_c2grad_coeff_view(9, 1, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, rbf_c2grad_blk_view(9, jc, jb)); 
+                             
+                             });
+                             
+    }//for
+}//void
+
+template
+void rbf_interpol_c2grad_lib<double>(const double* p_cell_in,  const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
+                             const double* rbf_c2grad_coeff, double* grad_x, double* grad_y,
+                             int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                             int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
+
+template
+void rbf_interpol_c2grad_lib<float>(const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
+                             const float* rbf_c2grad_coeff, float* grad_x, float* grad_y,
+                             int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                             int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp
index 5fbf68b..8006a43 100644
--- a/src/interpolation/mo_lib_intp_rbf.hpp
+++ b/src/interpolation/mo_lib_intp_rbf.hpp
@@ -15,7 +15,7 @@
 #include <vector>
 
 template <typename T>
-void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2gra
-                        const wp* rbf_c2grad_coeff, const wp* grad_x, const wp* grad_y,
+void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
+                        const T* rbf_c2grad_coeff, T* grad_x, T* grad_y,
                         int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                        int slev, int elev, int nproma, bool lacc);
+                        int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp
index 8bb0feb..a44a3da 100644
--- a/src/interpolation/mo_lib_intp_rbf_bindings.cpp
+++ b/src/interpolation/mo_lib_intp_rbf_bindings.cpp
@@ -9,24 +9,35 @@
 // SPDX-License-Identifier: BSD-3-Clause
 // ---------------------------------------------------------------
 
+
 #include  "mo_lib_intp_rbf_bindings.h"
 #include  "mo_lib_intp_rbf.hpp"
 
 
-void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk
-                             const wp* rbf_c2grad_coeff, const wp* grad_x, const wp* grad_y,
+void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
+                             const float* rbf_c2grad_coeff, float* grad_x, float* grad_y,
                              int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                             int slev, int elev, int nproma, bool lacc){
+                             int slev, int elev, int nproma, int nlev, int nblk_c, int rbf_c2grad_dim, bool lacc){
 
 
-rbf_interpol_c2grad_lib<wp>(p_cell_in, rbf_c2grad_idx, 
-                            rbf_c2grad_blk, rbf_c2grad_coeff, 
+rbf_interpol_c2grad_lib<float>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk,
+                            rbf_c2grad_coeff, 
                             grad_x, grad_y, i_startblk, i_endblk, 
                             i_startidx_in, i_endidx_in, slev, elev, 
-                            nproma, lacc);
+                            nproma, nlev, nblk_c, rbf_c2grad_dim, lacc);
+}
 
 
-}
+void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
+                             const double* rbf_c2grad_coeff, double* grad_x, double* grad_y,
+                             int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                             int slev, int elev, int nproma, int nlev, int nblk_c, int rbf_c2grad_dim, bool lacc){
 
 
+rbf_interpol_c2grad_lib<double>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk,
+                            rbf_c2grad_coeff, 
+                            grad_x, grad_y, i_startblk, i_endblk, 
+                            i_startidx_in, i_endidx_in, slev, elev, 
+                            nproma, nlev, nblk_c, rbf_c2grad_dim, lacc);
+}
 
diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.h b/src/interpolation/mo_lib_intp_rbf_bindings.h
index 826fb7e..e7ef8f3 100644
--- a/src/interpolation/mo_lib_intp_rbf_bindings.h
+++ b/src/interpolation/mo_lib_intp_rbf_bindings.h
@@ -10,13 +10,15 @@
 // ---------------------------------------------------------------
 #pragma once
 
-#ifdef __SINGLE_PRECISION
-  using wp = single;
-#else 
-  using wp = double;
-#endif
+extern "C" {
 
-void rbf_interpol_c2grad_lib(const wp* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk, 
-                        const wp* rbf_c2grad_coeff, const wp* grad_x, const wp* grad_y, 
+void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
+                        const float* rbf_c2grad_coeff, float* grad_x, const float* grad_y, 
                         int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, 
-                        int slev, int elev, int nproma, bool lacc); 
+                        int slev, int elev, int nproma, int nlev, int nblk_c, int rbf_c2grad_dim, bool lacc); 
+
+void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
+                        const double* rbf_c2grad_coeff, double* grad_x, const double* grad_y, 
+                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, 
+                        int slev, int elev, int nproma, int nlev, int nblk_c, int rbf_c2grad_dim, bool lacc); 
+}
-- 
GitLab


From 19ab5976de9b83a3ac37333701826e0203851f8a Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante4.lvt.dkrz.de>
Date: Mon, 24 Feb 2025 23:24:44 +0100
Subject: [PATCH 07/33] Added grad_y

---
 src/interpolation/mo_lib_intp_rbf.cpp | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp
index 666e02c..0b0e213 100644
--- a/src/interpolation/mo_lib_intp_rbf.cpp
+++ b/src/interpolation/mo_lib_intp_rbf.cpp
@@ -62,8 +62,29 @@ void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, cons
                              p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, rbf_c2grad_blk_view(8, jc, jb)) + 
                              rbf_c2grad_coeff_view(9, 1, jc, jb)*
                              p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, rbf_c2grad_blk_view(9, jc, jb)); 
-                             
-                             });
+
+                             grad_y_view(jc, jk, jb) =
+                             rbf_c2grad_coeff_view(0, 2, jc, jb)*
+                             p_cell_in_view(jc, jk, jb) +
+                             rbf_c2grad_coeff_view(1, 2, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk, rbf_c2grad_blk_view(1, jc, jb)) +
+                             rbf_c2grad_coeff_view(2, 2, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk, rbf_c2grad_blk_view(2, jc, jb)) +
+                             rbf_c2grad_coeff_view(3, 2, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk, rbf_c2grad_blk_view(3, jc, jb)) +
+                             rbf_c2grad_coeff_view(4, 2, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk, rbf_c2grad_blk_view(4, jc, jb)) +
+                             rbf_c2grad_coeff_view(5, 2, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk, rbf_c2grad_blk_view(5, jc, jb)) +
+                             rbf_c2grad_coeff_view(6, 2, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk, rbf_c2grad_blk_view(6, jc, jb)) +
+                             rbf_c2grad_coeff_view(7, 2, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk, rbf_c2grad_blk_view(7, jc, jb)) +
+                             rbf_c2grad_coeff_view(8, 2, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, rbf_c2grad_blk_view(8, jc, jb)) +
+                             rbf_c2grad_coeff_view(9, 2, jc, jb)*
+                             p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, rbf_c2grad_blk_view(9, jc, jb));
+                            });
                              
     }//for
 }//void
-- 
GitLab


From 54ac88915fd7476b3e3f81713b6d97cd586e144b Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante0.lvt.dkrz.de>
Date: Tue, 25 Feb 2025 10:30:28 +0100
Subject: [PATCH 08/33] Ported rbf_vec_interpol_cell_lib

---
 src/interpolation/mo_lib_intp_rbf.cpp         | 86 +++++++++++++++++++
 src/interpolation/mo_lib_intp_rbf.hpp         |  7 ++
 .../mo_lib_intp_rbf_bindings.cpp              | 45 +++++++---
 src/interpolation/mo_lib_intp_rbf_bindings.h  | 16 +++-
 4 files changed, 139 insertions(+), 15 deletions(-)

diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp
index 0b0e213..fa9f372 100644
--- a/src/interpolation/mo_lib_intp_rbf.cpp
+++ b/src/interpolation/mo_lib_intp_rbf.cpp
@@ -100,3 +100,89 @@ void rbf_interpol_c2grad_lib<float>(const float* p_cell_in, const int* rbf_c2gra
                              const float* rbf_c2grad_coeff, float* grad_x, float* grad_y,
                              int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
                              int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
+
+
+//------------------------------------------rbf_vec_interpol_cell_lib---------------------------------------------
+
+template <typename T>
+void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+                          const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c,  bool lacc, bool acc_async){
+
+    typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
+    typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
+    typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
+    typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D;
+
+    UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e);
+    UnmanagedConstInt3D rbf_vec_idx_c_view(rbf_vec_idx_c, rbf_vec_dim_c, nproma, nblk_c);
+    UnmanagedConstInt3D rbf_vec_blk_c_view(rbf_vec_blk_c, rbf_vec_dim_c, nproma, nblk_c);
+    UnmanagedConstT4D rbf_vec_coeff_c_view(rbf_vec_coeff_c, nproma, nblk_c); //TODO
+    UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblk_c);
+    UnmanagedT3D p_v_out_view(p_u_out, nproma, nlev, nblk_c);
+
+
+    for (int jb = i_startblk; jb < i_endblk; ++jb) {
+
+         int i_startidx, i_endidx;
+         get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
+
+          Kokkos::parallel_for("rbf_vec_interpol_cell_lib",
+                               Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}),
+                               KOKKOS_LAMBDA(const int jk, const int jc){
+
+                               p_u_out_view(jc, jk, jb) =
+                               rbf_vec_coeff_c_view(0, 1, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, rbf_vec_blk_c_view(0, jc, jb)) +
+                               rbf_vec_coeff_c_view(1, 1, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, rbf_vec_blk_c_view(1, jc, jb)) +
+                               rbf_vec_coeff_c_view(2, 1, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, rbf_vec_blk_c_view(2, jc, jb)) +
+                               rbf_vec_coeff_c_view(3, 1, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, rbf_vec_blk_c_view(3, jc, jb)) +
+                               rbf_vec_coeff_c_view(4, 1, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, rbf_vec_blk_c_view(4, jc, jb)) +
+                               rbf_vec_coeff_c_view(5, 1, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, rbf_vec_blk_c_view(5, jc, jb)) +
+                               rbf_vec_coeff_c_view(6, 1, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, rbf_vec_blk_c_view(6, jc, jb)) +
+                               rbf_vec_coeff_c_view(7, 1, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, rbf_vec_blk_c_view(7, jc, jb)) +
+                               rbf_vec_coeff_c_view(8, 1, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb));
+
+                               p_v_out_view(jc, jk, jb) =
+                               rbf_vec_coeff_c_view(0, 2, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, rbf_vec_blk_c_view(0, jc, jb)) +
+                               rbf_vec_coeff_c_view(1, 2, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, rbf_vec_blk_c_view(1, jc, jb)) +
+                               rbf_vec_coeff_c_view(2, 2, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, rbf_vec_blk_c_view(2, jc, jb)) +
+                               rbf_vec_coeff_c_view(3, 2, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, rbf_vec_blk_c_view(3, jc, jb)) +
+                               rbf_vec_coeff_c_view(4, 2, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, rbf_vec_blk_c_view(4, jc, jb)) +
+                               rbf_vec_coeff_c_view(5, 2, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, rbf_vec_blk_c_view(5, jc, jb)) +
+                               rbf_vec_coeff_c_view(6, 2, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, rbf_vec_blk_c_view(6, jc, jb)) +
+                               rbf_vec_coeff_c_view(7, 2, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, rbf_vec_blk_c_view(7, jc, jb)) +
+                               rbf_vec_coeff_c_view(8, 2, jc, jb)*
+                               p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb));
+                               });
+                               Kokkos::fence();
+    }//for
+}//void
+
+template
+void rbf_vec_interpol_cell_lib(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+                          const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async);
+template
+void rbf_vec_interpol_cell_lib(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+                          const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async);
diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp
index 8006a43..520e9c6 100644
--- a/src/interpolation/mo_lib_intp_rbf.hpp
+++ b/src/interpolation/mo_lib_intp_rbf.hpp
@@ -19,3 +19,10 @@ void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, cons
                         const T* rbf_c2grad_coeff, T* grad_x, T* grad_y,
                         int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
                         int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
+
+
+template <typename T>
+void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+                          const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async);
diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp
index a44a3da..b19b28c 100644
--- a/src/interpolation/mo_lib_intp_rbf_bindings.cpp
+++ b/src/interpolation/mo_lib_intp_rbf_bindings.cpp
@@ -17,27 +17,46 @@
 void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
                              const float* rbf_c2grad_coeff, float* grad_x, float* grad_y,
                              int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                             int slev, int elev, int nproma, int nlev, int nblk_c, int rbf_c2grad_dim, bool lacc){
+                             int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){
 
 
-rbf_interpol_c2grad_lib<float>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk,
-                            rbf_c2grad_coeff, 
-                            grad_x, grad_y, i_startblk, i_endblk, 
-                            i_startidx_in, i_endidx_in, slev, elev, 
-                            nproma, nlev, nblk_c, rbf_c2grad_dim, lacc);
+    rbf_interpol_c2grad_lib<float>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk,
+                                rbf_c2grad_coeff,
+                                grad_x, grad_y, i_startblk, i_endblk,
+                                i_startidx_in, i_endidx_in, slev, elev,
+                                nproma,  rbf_c2grad_dim, nlev, nblk_c, lacc);
 }
 
-
 void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
                              const double* rbf_c2grad_coeff, double* grad_x, double* grad_y,
                              int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                             int slev, int elev, int nproma, int nlev, int nblk_c, int rbf_c2grad_dim, bool lacc){
+                             int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){
+
+
+    rbf_interpol_c2grad_lib<double>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk,
+                                rbf_c2grad_coeff,
+                                grad_x, grad_y, i_startblk, i_endblk,
+                                i_startidx_in, i_endidx_in, slev, elev,
+                                nproma, rbf_c2grad_dim, nlev, nblk_c, lacc);
+}
 
+void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+                          const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async){
 
-rbf_interpol_c2grad_lib<double>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk,
-                            rbf_c2grad_coeff, 
-                            grad_x, grad_y, i_startblk, i_endblk, 
-                            i_startidx_in, i_endidx_in, slev, elev, 
-                            nproma, nlev, nblk_c, rbf_c2grad_dim, lacc);
+    rbf_vec_interpol_cell_lib<float>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c,
+                                     rbf_vec_coeff_c, p_u_out, p_v_out,
+                                     i_startblk, i_endblk, i_startidx_in, i_endidx_in,
+                                     slev, elev, nproma, nlev, nblk_c, nblk_e, rbf_vec_dim_c, lacc, acc_async);
 }
 
+void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+                          const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async){
+
+    rbf_vec_interpol_cell_lib<double>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c,
+                                      p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in,
+                                      slev, elev, nproma, nlev, nblk_c, nblk_e, rbf_vec_dim_c, lacc, acc_async);
+}
diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.h b/src/interpolation/mo_lib_intp_rbf_bindings.h
index e7ef8f3..a9775fa 100644
--- a/src/interpolation/mo_lib_intp_rbf_bindings.h
+++ b/src/interpolation/mo_lib_intp_rbf_bindings.h
@@ -15,10 +15,22 @@ extern "C" {
 void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
                         const float* rbf_c2grad_coeff, float* grad_x, const float* grad_y, 
                         int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, 
-                        int slev, int elev, int nproma, int nlev, int nblk_c, int rbf_c2grad_dim, bool lacc); 
+                        int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
 
 void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
                         const double* rbf_c2grad_coeff, double* grad_x, const double* grad_y, 
                         int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, 
-                        int slev, int elev, int nproma, int nlev, int nblk_c, int rbf_c2grad_dim, bool lacc); 
+                        int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
+
+void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+                          const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async);
+
+void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+                          const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async);
+
+
 }
-- 
GitLab


From eac9e262eb66b25397b283b7e5e3efa5682d3a1c Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante0.lvt.dkrz.de>
Date: Tue, 25 Feb 2025 11:32:42 +0100
Subject: [PATCH 09/33] Ported rbf_vec_interpol_edge_lib

---
 src/interpolation/mo_lib_intp_rbf.cpp         | 59 ++++++++++++++++++-
 src/interpolation/mo_lib_intp_rbf.hpp         |  7 +++
 .../mo_lib_intp_rbf_bindings.cpp              | 25 ++++++++
 src/interpolation/mo_lib_intp_rbf_bindings.h  | 12 ++++
 4 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp
index fa9f372..f125f1f 100644
--- a/src/interpolation/mo_lib_intp_rbf.cpp
+++ b/src/interpolation/mo_lib_intp_rbf.cpp
@@ -177,12 +177,67 @@ void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const
 }//void
 
 template
-void rbf_vec_interpol_cell_lib(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+void rbf_vec_interpol_cell_lib<double>(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
                           const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
                           int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
                           int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async);
 template
-void rbf_vec_interpol_cell_lib(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+void rbf_vec_interpol_cell_lib<float>(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
                           const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
                           int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
                           int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async);
+
+//------------------------------------------rbf_vec_interpol_edge_lib---------------------------------------------
+
+template <typename T>
+void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
+                          const T* rbf_vec_coeff_e, T* p_vt_out,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+                          bool acc_async){
+
+    typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
+    typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
+    typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
+
+    UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma,nlev,nblks_e);
+    UnmanagedConstInt3D rbf_vec_idx_e_view(rbf_vec_idx_e, rbf_vec_dim_e,nproma,nblks_e);
+    UnmanagedConstInt3D rbf_vec_blk_e_view(rbf_vec_blk_e, rbf_vec_dim_e,nproma,nblks_e);
+    UnmanagedConstT3D rbf_vec_coeff_e_view(rbf_vec_coeff_e, rbf_vec_dim_e,nproma,nblks_e);
+    UnmanagedT3D p_vt_out_view(p_vt_out, nproma,nlev,nblks_e);
+
+
+    for (int jb = i_startblk; jb < i_endblk; ++jb) {
+
+         int i_startidx, i_endidx;
+         get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
+
+         Kokkos::parallel_for("rbf_vec_interpol_edge_lib",
+                              Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}),
+                              KOKKOS_LAMBDA(const int jk, const int je){
+
+                              p_vt_out_view(je, jk, jb) =
+                              rbf_vec_coeff_e_view(1, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(1, je, jb), jk, rbf_vec_blk_e_view(1, je, jb)) +
+                              rbf_vec_coeff_e_view(2, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(2, je, jb), jk, rbf_vec_blk_e_view(2, je, jb)) +
+                              rbf_vec_coeff_e_view(3, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(3, je, jb), jk, rbf_vec_blk_e_view(3, je, jb)) +
+                              rbf_vec_coeff_e_view(4, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(4, je, jb), jk, rbf_vec_blk_e_view(4, je, jb));
+
+                              });
+    }
+}
+
+template
+void rbf_vec_interpol_edge_lib<double>(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
+                          const double* rbf_vec_coeff_e, double* p_vt_out,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+                          bool acc_async);
+
+
+template
+void rbf_vec_interpol_edge_lib<float>(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
+                          const float* rbf_vec_coeff_e, float* p_vt_out,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+                          bool acc_async);
+
diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp
index 520e9c6..e52989d 100644
--- a/src/interpolation/mo_lib_intp_rbf.hpp
+++ b/src/interpolation/mo_lib_intp_rbf.hpp
@@ -26,3 +26,10 @@ void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const
                           const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out,
                           int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
                           int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async);
+
+template <typename T>
+void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
+                          const T* rbf_vec_coeff_e, T* p_vt_out,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nlev,  int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+                          bool acc_asynci);
diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp
index b19b28c..5c7943b 100644
--- a/src/interpolation/mo_lib_intp_rbf_bindings.cpp
+++ b/src/interpolation/mo_lib_intp_rbf_bindings.cpp
@@ -60,3 +60,28 @@ void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_
                                       p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in,
                                       slev, elev, nproma, nlev, nblk_c, nblk_e, rbf_vec_dim_c, lacc, acc_async);
 }
+
+void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
+                          const double* rbf_vec_coeff_e, double* p_vt_out,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+                          bool acc_async){
+
+
+    rbf_vec_interpol_edge_lib<double>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e,
+                                      rbf_vec_coeff_e, p_vt_out,
+                                      i_startblk, i_endblk, i_startidx_in,i_endidx_in,
+                                      slev, elev, nlev, nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async);
+}
+
+void rbf_vec_interpol_edge_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
+                          const float* rbf_vec_coeff_e, float* p_vt_out,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+                          bool acc_async){
+
+    rbf_vec_interpol_edge_lib<float>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e,
+                                     rbf_vec_coeff_e, p_vt_out,
+                                     i_startblk, i_endblk, i_startidx_in, i_endidx_in,
+                                     slev, elev, nlev, nproma, rbf_vec_dim_e, nblks_e,lacc, acc_async);
+}
diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.h b/src/interpolation/mo_lib_intp_rbf_bindings.h
index a9775fa..9620614 100644
--- a/src/interpolation/mo_lib_intp_rbf_bindings.h
+++ b/src/interpolation/mo_lib_intp_rbf_bindings.h
@@ -32,5 +32,17 @@ void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_
                           int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
                           int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async);
 
+void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
+                          const double* rbf_vec_coeff_e, double* p_vt_out,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+                          bool acc_async);
+
+void rbf_vec_interpol_edge_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
+                          const float* rbf_vec_coeff_e, float* p_vt_out,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+                          bool acc_async);
+
 
 }
-- 
GitLab


From 04a3601466fe73b9d396e60bdbd990d8c252e04a Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante0.lvt.dkrz.de>
Date: Tue, 25 Feb 2025 11:56:15 +0100
Subject: [PATCH 10/33] Fixed index

---
 src/interpolation/mo_lib_intp_rbf.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp
index f125f1f..a7d9cbd 100644
--- a/src/interpolation/mo_lib_intp_rbf.cpp
+++ b/src/interpolation/mo_lib_intp_rbf.cpp
@@ -217,10 +217,10 @@ void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const
                               KOKKOS_LAMBDA(const int jk, const int je){
 
                               p_vt_out_view(je, jk, jb) =
+                              rbf_vec_coeff_e_view(0, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(0, je, jb), jk, rbf_vec_blk_e_view(0, je, jb)) +
                               rbf_vec_coeff_e_view(1, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(1, je, jb), jk, rbf_vec_blk_e_view(1, je, jb)) +
                               rbf_vec_coeff_e_view(2, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(2, je, jb), jk, rbf_vec_blk_e_view(2, je, jb)) +
-                              rbf_vec_coeff_e_view(3, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(3, je, jb), jk, rbf_vec_blk_e_view(3, je, jb)) +
-                              rbf_vec_coeff_e_view(4, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(4, je, jb), jk, rbf_vec_blk_e_view(4, je, jb));
+                              rbf_vec_coeff_e_view(3, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(3, je, jb), jk, rbf_vec_blk_e_view(3, je, jb));
 
                               });
     }
-- 
GitLab


From f54eaa77aed000133b4c8c689dc20b49c37a6291 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante0.lvt.dkrz.de>
Date: Tue, 25 Feb 2025 16:03:59 +0100
Subject: [PATCH 11/33] ported mo_lib_laplace

---
 src/horizontal/CMakeLists.txt              |   4 +-
 src/horizontal/mo_lib_laplace.cpp          | 104 +++++++++++++++++++++
 src/horizontal/mo_lib_laplace.hpp          |  24 +++++
 src/horizontal/mo_lib_laplace_bindings.cpp |  50 ++++++++++
 src/horizontal/mo_lib_laplace_bindings.h   |  32 +++++++
 5 files changed, 213 insertions(+), 1 deletion(-)
 create mode 100644 src/horizontal/mo_lib_laplace.cpp
 create mode 100644 src/horizontal/mo_lib_laplace.hpp
 create mode 100644 src/horizontal/mo_lib_laplace_bindings.cpp
 create mode 100644 src/horizontal/mo_lib_laplace_bindings.h

diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt
index 078a14d..44f9e44 100644
--- a/src/horizontal/CMakeLists.txt
+++ b/src/horizontal/CMakeLists.txt
@@ -13,7 +13,9 @@ add_library(
   iconmath-horizontal
   mo_lib_divrot.F90
   mo_lib_laplace.F90
-  mo_lib_gradients.F90)
+  mo_lib_gradients.F90
+  mo_lib_laplace_bindings.cpp
+  mo_lib_laplace.cpp)
 
 add_library(${PROJECT_NAME}::horizontal ALIAS iconmath-horizontal)
 
diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp
new file mode 100644
index 0000000..48c4479
--- /dev/null
+++ b/src/horizontal/mo_lib_laplace.cpp
@@ -0,0 +1,104 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#include "mo_lib_laplace.hpp"
+//#include "mo_lib_gradients.hpp"
+#include <Kokkos_Core.hpp>
+#include <iostream>
+
+template<typename T>
+void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                       const int* edge_cell_idx, const int* edge_cell_blk, const T* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const T* geofac_n2s, const T* geofac_div, T* nabla2_psi_c,
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){
+
+    typedef Kokkos::View<const T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT2D;
+    typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
+    typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
+    typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
+
+
+    UnmanagedConstT3D psi_c_view(psi_c, nproma,nlev,nblks_c); 
+    UnmanagedConstInt3D cell_neighbor_idx_view(cell_neighbor_idx, nproma,nblks_c,3);
+    UnmanagedConstInt3D cell_neighbor_blk_view(cell_neighbor_blk, nproma,nblks_c,3); 
+    UnmanagedConstInt3D edge_cell_idx_view(edge_cell_idx, nproma,nblks_e,2);
+    UnmanagedConstInt3D edge_cell_blk_view(edge_cell_blk, nproma,nblks_e,2);
+    UnmanagedConstT2D inv_dual_edge_length_view(inv_dual_edge_length, nproma,nblks_e);
+    UnmanagedConstInt3D cell_edge_idx_view(cell_edge_idx, nproma,nblks_c,3);
+    UnmanagedConstInt3D cell_edge_blk_view(cell_edge_blk, nproma,nblks_c,3);
+    UnmanagedConstT3D geofac_n2s_view(geofac_n2s, nproma,cell_type+1,nblks_c);
+    UnmanagedConstT3D geofac_div_view(geofac_div, nproma,cell_type,nblks_c); 
+    UnmanagedT3D nabla2_psi_c_view(nabla2_psi_c, nproma,nlev,nblks_c);
+
+
+    switch (cell_type){
+
+        case 3: 
+             for (int jb = i_startblk; jb < i_endblk; ++jb) {
+
+                int i_startidx, i_endidx;
+                get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
+
+
+                Kokkos::parallel_for("rbf_interpol_c2grad",
+                                     Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}), 
+                                     KOKKOS_LAMBDA(const int jk, const int jc){
+
+                                        nabla2_psi_c_view(jc, jk, jb) =  
+                                            psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 1, jb) +
+                                            psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * 
+                                            geofac_n2s_view(jc, 2, jb) +
+                                            psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * 
+                                            geofac_n2s_view(jc, 3, jb) +
+                                            psi_c_view(cell_neighbor_idx_view(jc, jb, 3), jk, cell_neighbor_blk_view(jc, jb, 3)) * 
+                                            geofac_n2s_view(jc, 4, jb); 
+                                     });
+             }
+             break; 
+
+
+        case 6: 
+/* TODO
+             grad_fd_norm_lib(psi_c, edge_cell_idx, edge_cell_blk, 
+                              inv_dual_edge_length, z_grad_fd_norm_e,&
+                              i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e, 
+                              slev, elev, nproma); 
+
+             div_lib(z_grad_fd_norm_e, cell_edge_idx, cell_edge_blk, 
+                     geofac_div, nabla2_psi_c, i_startblk, i_endblk, 
+                     i_startidx_in, i_endidx_in, slev, elev, nproma); 
+*/
+        default:
+             std::cout << "Unknown value for cell_type\n";
+                  
+    }//switch
+}//void
+
+template
+void nabla2_scalar_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                       const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const double* geofac_n2s, const double* geofac_div, double* nabla2_psi_c,
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
+
+template
+void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                       const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const float* geofac_n2s, const float* geofac_div, float* nabla2_psi_c,
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp
new file mode 100644
index 0000000..2a6663c
--- /dev/null
+++ b/src/horizontal/mo_lib_laplace.hpp
@@ -0,0 +1,24 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+#pragma once
+
+#include "mo_lib_loopindices.hpp"
+#include <Kokkos_Core.hpp>
+#include <vector>
+
+template<typename T>
+void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
+                       const int* dge_cell_idx, const int* edge_cell_blk, const T* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const T* geofac_n2s, const T* geofac_div, const T* nabla2_psi_c, 
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, 
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, 
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
diff --git a/src/horizontal/mo_lib_laplace_bindings.cpp b/src/horizontal/mo_lib_laplace_bindings.cpp
new file mode 100644
index 0000000..43a19b3
--- /dev/null
+++ b/src/horizontal/mo_lib_laplace_bindings.cpp
@@ -0,0 +1,50 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+
+#include  "mo_lib_laplace_bindings.h"
+#include  "mo_lib_laplace.hpp"
+
+
+void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                       const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c,
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){
+
+    nabla2_scalar_lib<double>(psi_c, cell_neighbor_idx, cell_neighbor_blk,
+                       edge_cell_idx, edge_cell_blk, inv_dual_edge_length,
+                       cell_edge_idx, cell_edge_blk,
+                       geofac_n2s, geofac_div, nabla2_psi_c,
+                       i_startblk, i_endblk, i_startidx_in, i_endidx_in,
+                       i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e,
+                       nlev, slev, elev, nproma, nblks_e, nblks_c, cell_type, lacc); 
+}
+
+
+void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                       const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c,
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){
+
+    nabla2_scalar_lib<float>(psi_c, cell_neighbor_idx, cell_neighbor_blk,
+                       edge_cell_idx, edge_cell_blk, inv_dual_edge_length,
+                       cell_edge_idx, cell_edge_blk,
+                       geofac_n2s, geofac_div, nabla2_psi_c,
+                       i_startblk,  i_endblk, i_startidx_in, i_endidx_in,
+                       i_startblk_e,  i_endblk_e,  i_startidx_e,  i_endidx_e,
+                       nlev,  slev,  elev,  nproma,  nblks_e,  nblks_c,  cell_type, lacc);
+}
diff --git a/src/horizontal/mo_lib_laplace_bindings.h b/src/horizontal/mo_lib_laplace_bindings.h
new file mode 100644
index 0000000..96bdc64
--- /dev/null
+++ b/src/horizontal/mo_lib_laplace_bindings.h
@@ -0,0 +1,32 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+#pragma once
+
+
+
+extern "C"{
+
+void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                       const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c,
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
+
+void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                       const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c,
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
+}
-- 
GitLab


From bd8a950b6fa2be57b0068ddd6744d28d6fabcaa0 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante0.lvt.dkrz.de>
Date: Tue, 25 Feb 2025 16:28:59 +0100
Subject: [PATCH 12/33] Fixed index for nabla2_scalar_lib

---
 src/horizontal/mo_lib_laplace.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp
index 48c4479..203b3b7 100644
--- a/src/horizontal/mo_lib_laplace.cpp
+++ b/src/horizontal/mo_lib_laplace.cpp
@@ -56,13 +56,13 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int*
                                      KOKKOS_LAMBDA(const int jk, const int jc){
 
                                         nabla2_psi_c_view(jc, jk, jb) =  
-                                            psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 1, jb) +
+                                            psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 0, jb) +
+                                            psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * 
+                                            geofac_n2s_view(jc, 1, jb) +
                                             psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * 
                                             geofac_n2s_view(jc, 2, jb) +
                                             psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * 
-                                            geofac_n2s_view(jc, 3, jb) +
-                                            psi_c_view(cell_neighbor_idx_view(jc, jb, 3), jk, cell_neighbor_blk_view(jc, jb, 3)) * 
-                                            geofac_n2s_view(jc, 4, jb); 
+                                            geofac_n2s_view(jc, 3, jb); 
                                      });
              }
              break; 
-- 
GitLab


From 18b653dbc456a0061436df5e7a091b20a64592cd Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante4.lvt.dkrz.de>
Date: Wed, 26 Feb 2025 10:04:06 +0100
Subject: [PATCH 13/33] WIP: MR seperation.

---
 src/horizontal/mo_lib_laplace.cpp | 118 ++++++++++++++++++++++++++++++
 src/horizontal/mo_lib_laplace.hpp |   7 ++
 2 files changed, 125 insertions(+)

diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp
index 203b3b7..57ad292 100644
--- a/src/horizontal/mo_lib_laplace.cpp
+++ b/src/horizontal/mo_lib_laplace.cpp
@@ -11,8 +11,10 @@
 
 #include "mo_lib_laplace.hpp"
 //#include "mo_lib_gradients.hpp"
+//#inlcude "mo_fortran_tools.hpp"
 #include <Kokkos_Core.hpp>
 #include <iostream>
+#include <utility>
 
 template<typename T>
 void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
@@ -102,3 +104,119 @@ void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx,
                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
+
+
+//--------------------------------nabla2_scalar_avg_lib-------------------------------------
+
+
+template<typename T>
+void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
+                      const T* geofac_n2s, const T* avg_coeff, T* nabla2_psi_c,
+                      int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in,
+                      int nblks_c, int cell_type, int patch_id,
+                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){
+
+    typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
+    typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
+    typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
+
+    UnmanagedConstT3D psi_c_view(psi_c, nproma,nlev,nblks_c);
+    UnmanagedConstInt3D cell_neighbor_idx_view(cell_neighbor_idx, nproma,nblks_c,3); 
+    UnmanagedConstInt3D cell_neighbor_blk_view(cell_neighbor_blk, nproma,nblks_c,3); 
+    UnmanagedConstT3D geofac_n2s_view(geofac_n2s, nproma,cell_type+1,nblks_c); 
+    UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma,nlev,nblks_c); 
+    UnmanagedT3D nabla2_psi_c_view(nabla2_psi_c, nproma,nlev,nblks_c); 
+
+    int aux_c
+    UnmanagedT3D aux_c_view(aux_c, nproma, nlev, nblks_c); //local
+
+
+
+    switch (cell_type){
+
+        case 3: 
+
+            if(slev == elev){
+
+                jk = slev; 
+                i_startblk = i_startblk_in[1]; 
+                i_endblk = i_endblk_in[1]; 
+
+                for(int jb = i_startblk; jb < i_endblk; ++jb) {
+
+                   int i_startidx, i_endidx;
+                   get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
+
+                   Kokkos::parallel_for("aux_c", 
+                                        Kokkos::RangePolicy<int>(i_startidx, i_endidx), 
+                                        KOKKOS_LAMBDA(const int jc) {
+
+                                           aux_c_view(jc, jk, jb) =  
+                                           psi_c_view(jc, jk, jb) * geofac_n2s_view(jc, 0, jb) +
+                                           psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * 
+                                           geofac_n2s_view(jc, 1, jb) +
+                                           psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) *
+                                           geofac_n2s_view(jc, 2, jb) +
+                                           psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * 
+                                           geofac_n2s_view(jc, 3, jb); 
+
+                                        });
+                }
+
+                   if (l_limited_area || patch_id > 1){
+
+                      i_startblk = i_startblk_in[2]; 
+                      i_endblk = i_endblk_in[2]; 
+
+                      /*TODO
+                      gradient( Kokkos::subview(aux_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), 
+                                Kokkos::subview(nabla2_psi_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), lzacc);
+                     */
+                   }
+
+                      i_startblk = i_startblk_in[3]; 
+                      i_endblk = i_endblk_in[3]; 
+
+                      for(int jb = i_startblk; jb < i_endblk; ++jb) {
+
+                         int i_startidx, i_endidx;
+                         get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
+
+                         Kokkos::parallel_for("DivGrad",
+                                              Kokkos::RangePolicy<int>(i_startidx, i_endidx),
+                                              KOKKOS_LAMBDA(const int jc) {
+
+                                                 nabla2_psi_c_view(jc, jk, jb) =  
+                                                 aux_c_view(jc, jk, jb) * avg_coeff_view(jc, 0, jb) +
+                                                 aux_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * 
+                                                 avg_coeff_view(jc, 1, jb) +
+                                                 aux_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * 
+                                                 avg_coeff_view(jc, 2, jb) +
+                                                 aux_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * 
+                                                 avg_coeff_view(jc, 3, jb);
+
+                                              });
+                      }
+            }//if
+            break; 
+
+        default:
+        std::cout << "Default case.\n"; 
+
+}//switch
+}//void
+
+
+template
+void nabla2_scalar_avg_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
+                      const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c,
+                      int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in,
+                      int nblks_c, int cell_type, int patch_id,
+                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); 
+
+template
+void nabla2_scalar_avg_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                      const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c,
+                      int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in,
+                      int nblks_c, int cell_type, int patch_id,
+                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); 
diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp
index 2a6663c..0a67f85 100644
--- a/src/horizontal/mo_lib_laplace.hpp
+++ b/src/horizontal/mo_lib_laplace.hpp
@@ -22,3 +22,10 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int*
                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, 
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, 
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
+template<typename T>
+void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
+                      const T* geofac_n2s, const T* avg_coeff,  T* nabla2_psi_c, 
+                      int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, 
+                      int nblks_c, int cell_type, int patch_id, 
+                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc);
+
-- 
GitLab


From 4f0ea43895255e35edb7483f9187f78089403674 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante4.lvt.dkrz.de>
Date: Wed, 26 Feb 2025 10:10:02 +0100
Subject: [PATCH 14/33] Revert "WIP: MR seperation."

This reverts commit fed348a36d7907ea9b24e154906ddf8b47ff489a.
---
 src/horizontal/mo_lib_laplace.cpp | 118 ------------------------------
 src/horizontal/mo_lib_laplace.hpp |   7 --
 2 files changed, 125 deletions(-)

diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp
index 57ad292..203b3b7 100644
--- a/src/horizontal/mo_lib_laplace.cpp
+++ b/src/horizontal/mo_lib_laplace.cpp
@@ -11,10 +11,8 @@
 
 #include "mo_lib_laplace.hpp"
 //#include "mo_lib_gradients.hpp"
-//#inlcude "mo_fortran_tools.hpp"
 #include <Kokkos_Core.hpp>
 #include <iostream>
-#include <utility>
 
 template<typename T>
 void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
@@ -104,119 +102,3 @@ void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx,
                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
-
-
-//--------------------------------nabla2_scalar_avg_lib-------------------------------------
-
-
-template<typename T>
-void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
-                      const T* geofac_n2s, const T* avg_coeff, T* nabla2_psi_c,
-                      int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in,
-                      int nblks_c, int cell_type, int patch_id,
-                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){
-
-    typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
-    typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
-    typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
-
-    UnmanagedConstT3D psi_c_view(psi_c, nproma,nlev,nblks_c);
-    UnmanagedConstInt3D cell_neighbor_idx_view(cell_neighbor_idx, nproma,nblks_c,3); 
-    UnmanagedConstInt3D cell_neighbor_blk_view(cell_neighbor_blk, nproma,nblks_c,3); 
-    UnmanagedConstT3D geofac_n2s_view(geofac_n2s, nproma,cell_type+1,nblks_c); 
-    UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma,nlev,nblks_c); 
-    UnmanagedT3D nabla2_psi_c_view(nabla2_psi_c, nproma,nlev,nblks_c); 
-
-    int aux_c
-    UnmanagedT3D aux_c_view(aux_c, nproma, nlev, nblks_c); //local
-
-
-
-    switch (cell_type){
-
-        case 3: 
-
-            if(slev == elev){
-
-                jk = slev; 
-                i_startblk = i_startblk_in[1]; 
-                i_endblk = i_endblk_in[1]; 
-
-                for(int jb = i_startblk; jb < i_endblk; ++jb) {
-
-                   int i_startidx, i_endidx;
-                   get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
-
-                   Kokkos::parallel_for("aux_c", 
-                                        Kokkos::RangePolicy<int>(i_startidx, i_endidx), 
-                                        KOKKOS_LAMBDA(const int jc) {
-
-                                           aux_c_view(jc, jk, jb) =  
-                                           psi_c_view(jc, jk, jb) * geofac_n2s_view(jc, 0, jb) +
-                                           psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * 
-                                           geofac_n2s_view(jc, 1, jb) +
-                                           psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) *
-                                           geofac_n2s_view(jc, 2, jb) +
-                                           psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * 
-                                           geofac_n2s_view(jc, 3, jb); 
-
-                                        });
-                }
-
-                   if (l_limited_area || patch_id > 1){
-
-                      i_startblk = i_startblk_in[2]; 
-                      i_endblk = i_endblk_in[2]; 
-
-                      /*TODO
-                      gradient( Kokkos::subview(aux_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), 
-                                Kokkos::subview(nabla2_psi_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), lzacc);
-                     */
-                   }
-
-                      i_startblk = i_startblk_in[3]; 
-                      i_endblk = i_endblk_in[3]; 
-
-                      for(int jb = i_startblk; jb < i_endblk; ++jb) {
-
-                         int i_startidx, i_endidx;
-                         get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
-
-                         Kokkos::parallel_for("DivGrad",
-                                              Kokkos::RangePolicy<int>(i_startidx, i_endidx),
-                                              KOKKOS_LAMBDA(const int jc) {
-
-                                                 nabla2_psi_c_view(jc, jk, jb) =  
-                                                 aux_c_view(jc, jk, jb) * avg_coeff_view(jc, 0, jb) +
-                                                 aux_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * 
-                                                 avg_coeff_view(jc, 1, jb) +
-                                                 aux_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * 
-                                                 avg_coeff_view(jc, 2, jb) +
-                                                 aux_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * 
-                                                 avg_coeff_view(jc, 3, jb);
-
-                                              });
-                      }
-            }//if
-            break; 
-
-        default:
-        std::cout << "Default case.\n"; 
-
-}//switch
-}//void
-
-
-template
-void nabla2_scalar_avg_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
-                      const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c,
-                      int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in,
-                      int nblks_c, int cell_type, int patch_id,
-                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); 
-
-template
-void nabla2_scalar_avg_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
-                      const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c,
-                      int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in,
-                      int nblks_c, int cell_type, int patch_id,
-                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); 
diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp
index 0a67f85..2a6663c 100644
--- a/src/horizontal/mo_lib_laplace.hpp
+++ b/src/horizontal/mo_lib_laplace.hpp
@@ -22,10 +22,3 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int*
                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, 
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, 
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
-template<typename T>
-void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
-                      const T* geofac_n2s, const T* avg_coeff,  T* nabla2_psi_c, 
-                      int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, 
-                      int nblks_c, int cell_type, int patch_id, 
-                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc);
-
-- 
GitLab


From 732769bcec238a625ee72b573854559442e12845 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante4.lvt.dkrz.de>
Date: Wed, 26 Feb 2025 10:10:15 +0100
Subject: [PATCH 15/33] Revert "Fixed index for nabla2_scalar_lib"

This reverts commit 4122a81a9a283e42a5c8df8476e61820800bd0b0.
---
 src/horizontal/mo_lib_laplace.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp
index 203b3b7..48c4479 100644
--- a/src/horizontal/mo_lib_laplace.cpp
+++ b/src/horizontal/mo_lib_laplace.cpp
@@ -56,13 +56,13 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int*
                                      KOKKOS_LAMBDA(const int jk, const int jc){
 
                                         nabla2_psi_c_view(jc, jk, jb) =  
-                                            psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 0, jb) +
-                                            psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * 
-                                            geofac_n2s_view(jc, 1, jb) +
+                                            psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 1, jb) +
                                             psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * 
                                             geofac_n2s_view(jc, 2, jb) +
                                             psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * 
-                                            geofac_n2s_view(jc, 3, jb); 
+                                            geofac_n2s_view(jc, 3, jb) +
+                                            psi_c_view(cell_neighbor_idx_view(jc, jb, 3), jk, cell_neighbor_blk_view(jc, jb, 3)) * 
+                                            geofac_n2s_view(jc, 4, jb); 
                                      });
              }
              break; 
-- 
GitLab


From c115d9d0d3d52c1a49915544760b8d32d92293ce Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante4.lvt.dkrz.de>
Date: Wed, 26 Feb 2025 10:10:24 +0100
Subject: [PATCH 16/33] Revert "ported mo_lib_laplace"

This reverts commit 768b5176d0179e3cde4a76e894cc26a1d9513151.
---
 src/horizontal/CMakeLists.txt              |   4 +-
 src/horizontal/mo_lib_laplace.cpp          | 104 ---------------------
 src/horizontal/mo_lib_laplace.hpp          |  24 -----
 src/horizontal/mo_lib_laplace_bindings.cpp |  50 ----------
 src/horizontal/mo_lib_laplace_bindings.h   |  32 -------
 5 files changed, 1 insertion(+), 213 deletions(-)
 delete mode 100644 src/horizontal/mo_lib_laplace.cpp
 delete mode 100644 src/horizontal/mo_lib_laplace.hpp
 delete mode 100644 src/horizontal/mo_lib_laplace_bindings.cpp
 delete mode 100644 src/horizontal/mo_lib_laplace_bindings.h

diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt
index 44f9e44..078a14d 100644
--- a/src/horizontal/CMakeLists.txt
+++ b/src/horizontal/CMakeLists.txt
@@ -13,9 +13,7 @@ add_library(
   iconmath-horizontal
   mo_lib_divrot.F90
   mo_lib_laplace.F90
-  mo_lib_gradients.F90
-  mo_lib_laplace_bindings.cpp
-  mo_lib_laplace.cpp)
+  mo_lib_gradients.F90)
 
 add_library(${PROJECT_NAME}::horizontal ALIAS iconmath-horizontal)
 
diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp
deleted file mode 100644
index 48c4479..0000000
--- a/src/horizontal/mo_lib_laplace.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-// ICON
-//
-// ---------------------------------------------------------------
-// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
-// Contact information: icon-model.org
-//
-// See AUTHORS.TXT for a list of authors
-// See LICENSES/ for license information
-// SPDX-License-Identifier: BSD-3-Clause
-// ---------------------------------------------------------------
-
-#include "mo_lib_laplace.hpp"
-//#include "mo_lib_gradients.hpp"
-#include <Kokkos_Core.hpp>
-#include <iostream>
-
-template<typename T>
-void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
-                       const int* edge_cell_idx, const int* edge_cell_blk, const T* inv_dual_edge_length,
-                       const int* cell_edge_idx, const int* cell_edge_blk,
-                       const T* geofac_n2s, const T* geofac_div, T* nabla2_psi_c,
-                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
-                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){
-
-    typedef Kokkos::View<const T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT2D;
-    typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
-    typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
-    typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
-
-
-    UnmanagedConstT3D psi_c_view(psi_c, nproma,nlev,nblks_c); 
-    UnmanagedConstInt3D cell_neighbor_idx_view(cell_neighbor_idx, nproma,nblks_c,3);
-    UnmanagedConstInt3D cell_neighbor_blk_view(cell_neighbor_blk, nproma,nblks_c,3); 
-    UnmanagedConstInt3D edge_cell_idx_view(edge_cell_idx, nproma,nblks_e,2);
-    UnmanagedConstInt3D edge_cell_blk_view(edge_cell_blk, nproma,nblks_e,2);
-    UnmanagedConstT2D inv_dual_edge_length_view(inv_dual_edge_length, nproma,nblks_e);
-    UnmanagedConstInt3D cell_edge_idx_view(cell_edge_idx, nproma,nblks_c,3);
-    UnmanagedConstInt3D cell_edge_blk_view(cell_edge_blk, nproma,nblks_c,3);
-    UnmanagedConstT3D geofac_n2s_view(geofac_n2s, nproma,cell_type+1,nblks_c);
-    UnmanagedConstT3D geofac_div_view(geofac_div, nproma,cell_type,nblks_c); 
-    UnmanagedT3D nabla2_psi_c_view(nabla2_psi_c, nproma,nlev,nblks_c);
-
-
-    switch (cell_type){
-
-        case 3: 
-             for (int jb = i_startblk; jb < i_endblk; ++jb) {
-
-                int i_startidx, i_endidx;
-                get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
-
-
-                Kokkos::parallel_for("rbf_interpol_c2grad",
-                                     Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}), 
-                                     KOKKOS_LAMBDA(const int jk, const int jc){
-
-                                        nabla2_psi_c_view(jc, jk, jb) =  
-                                            psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 1, jb) +
-                                            psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * 
-                                            geofac_n2s_view(jc, 2, jb) +
-                                            psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * 
-                                            geofac_n2s_view(jc, 3, jb) +
-                                            psi_c_view(cell_neighbor_idx_view(jc, jb, 3), jk, cell_neighbor_blk_view(jc, jb, 3)) * 
-                                            geofac_n2s_view(jc, 4, jb); 
-                                     });
-             }
-             break; 
-
-
-        case 6: 
-/* TODO
-             grad_fd_norm_lib(psi_c, edge_cell_idx, edge_cell_blk, 
-                              inv_dual_edge_length, z_grad_fd_norm_e,&
-                              i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e, 
-                              slev, elev, nproma); 
-
-             div_lib(z_grad_fd_norm_e, cell_edge_idx, cell_edge_blk, 
-                     geofac_div, nabla2_psi_c, i_startblk, i_endblk, 
-                     i_startidx_in, i_endidx_in, slev, elev, nproma); 
-*/
-        default:
-             std::cout << "Unknown value for cell_type\n";
-                  
-    }//switch
-}//void
-
-template
-void nabla2_scalar_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
-                       const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length,
-                       const int* cell_edge_idx, const int* cell_edge_blk,
-                       const double* geofac_n2s, const double* geofac_div, double* nabla2_psi_c,
-                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
-                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
-
-template
-void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
-                       const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length,
-                       const int* cell_edge_idx, const int* cell_edge_blk,
-                       const float* geofac_n2s, const float* geofac_div, float* nabla2_psi_c,
-                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
-                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp
deleted file mode 100644
index 2a6663c..0000000
--- a/src/horizontal/mo_lib_laplace.hpp
+++ /dev/null
@@ -1,24 +0,0 @@
-// ICON
-//
-// ---------------------------------------------------------------
-// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
-// Contact information: icon-model.org
-//
-// See AUTHORS.TXT for a list of authors
-// See LICENSES/ for license information
-// SPDX-License-Identifier: BSD-3-Clause
-// ---------------------------------------------------------------
-#pragma once
-
-#include "mo_lib_loopindices.hpp"
-#include <Kokkos_Core.hpp>
-#include <vector>
-
-template<typename T>
-void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
-                       const int* dge_cell_idx, const int* edge_cell_blk, const T* inv_dual_edge_length,
-                       const int* cell_edge_idx, const int* cell_edge_blk,
-                       const T* geofac_n2s, const T* geofac_div, const T* nabla2_psi_c, 
-                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, 
-                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, 
-                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
diff --git a/src/horizontal/mo_lib_laplace_bindings.cpp b/src/horizontal/mo_lib_laplace_bindings.cpp
deleted file mode 100644
index 43a19b3..0000000
--- a/src/horizontal/mo_lib_laplace_bindings.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-// ICON
-//
-// ---------------------------------------------------------------
-// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
-// Contact information: icon-model.org
-//
-// See AUTHORS.TXT for a list of authors
-// See LICENSES/ for license information
-// SPDX-License-Identifier: BSD-3-Clause
-// ---------------------------------------------------------------
-
-
-#include  "mo_lib_laplace_bindings.h"
-#include  "mo_lib_laplace.hpp"
-
-
-void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
-                       const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length,
-                       const int* cell_edge_idx, const int* cell_edge_blk,
-                       const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c,
-                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
-                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){
-
-    nabla2_scalar_lib<double>(psi_c, cell_neighbor_idx, cell_neighbor_blk,
-                       edge_cell_idx, edge_cell_blk, inv_dual_edge_length,
-                       cell_edge_idx, cell_edge_blk,
-                       geofac_n2s, geofac_div, nabla2_psi_c,
-                       i_startblk, i_endblk, i_startidx_in, i_endidx_in,
-                       i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e,
-                       nlev, slev, elev, nproma, nblks_e, nblks_c, cell_type, lacc); 
-}
-
-
-void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
-                       const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length,
-                       const int* cell_edge_idx, const int* cell_edge_blk,
-                       const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c,
-                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
-                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){
-
-    nabla2_scalar_lib<float>(psi_c, cell_neighbor_idx, cell_neighbor_blk,
-                       edge_cell_idx, edge_cell_blk, inv_dual_edge_length,
-                       cell_edge_idx, cell_edge_blk,
-                       geofac_n2s, geofac_div, nabla2_psi_c,
-                       i_startblk,  i_endblk, i_startidx_in, i_endidx_in,
-                       i_startblk_e,  i_endblk_e,  i_startidx_e,  i_endidx_e,
-                       nlev,  slev,  elev,  nproma,  nblks_e,  nblks_c,  cell_type, lacc);
-}
diff --git a/src/horizontal/mo_lib_laplace_bindings.h b/src/horizontal/mo_lib_laplace_bindings.h
deleted file mode 100644
index 96bdc64..0000000
--- a/src/horizontal/mo_lib_laplace_bindings.h
+++ /dev/null
@@ -1,32 +0,0 @@
-// ICON
-//
-// ---------------------------------------------------------------
-// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
-// Contact information: icon-model.org
-//
-// See AUTHORS.TXT for a list of authors
-// See LICENSES/ for license information
-// SPDX-License-Identifier: BSD-3-Clause
-// ---------------------------------------------------------------
-#pragma once
-
-
-
-extern "C"{
-
-void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
-                       const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length,
-                       const int* cell_edge_idx, const int* cell_edge_blk,
-                       const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c,
-                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
-                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
-
-void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
-                       const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length,
-                       const int* cell_edge_idx, const int* cell_edge_blk,
-                       const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c,
-                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
-                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
-}
-- 
GitLab


From 6a1c7103cef418f5908d02507e0d40b30f1a923b Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Wed, 26 Feb 2025 11:15:47 +0100
Subject: [PATCH 17/33] modified few things in the existing test for intp_rbf

---
 test/c/test_intp_rbf.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp
index 0aa4f9b..4dc2d92 100644
--- a/test/c/test_intp_rbf.cpp
+++ b/test/c/test_intp_rbf.cpp
@@ -39,7 +39,7 @@ typedef ::testing::Types< MixedPrecision<double, double>,
 
 // Define a typed test fixture.
 template <typename TypePair>
-class RbfVecInterpolVertexMixedTestFixture : public ::testing::Test {
+class RbfVecInterpolMixedTestFixture : public ::testing::Test {
 public:
   using InType  = typename TypePair::in_type;
   using OutType = typename TypePair::out_type;
@@ -69,7 +69,7 @@ public:
   std::vector<OutType> p_u_out;           // Dimensions: (nproma, nlev, nblks_v)
   std::vector<OutType> p_v_out;           // Dimensions: (nproma, nlev, nblks_v)
 
-  RbfVecInterpolVertexMixedTestFixture() {
+  RbfVecInterpolMixedTestFixture() {
     // Allocate and initialize inputs.
     p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1));
     rbf_vec_idx_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1);
@@ -82,9 +82,9 @@ public:
   }
 };
 
-TYPED_TEST_SUITE(RbfVecInterpolVertexMixedTestFixture, MixedTypes);
+TYPED_TEST_SUITE(RbfVecInterpolMixedTestFixture, MixedTypes);
 
-TYPED_TEST(RbfVecInterpolVertexMixedTestFixture, BasicTest) {
+TYPED_TEST(RbfVecInterpolMixedTestFixture, BasicTest) {
   using InType  = typename TestFixture::InType;
   using OutType = typename TestFixture::OutType;
 
@@ -106,8 +106,8 @@ TYPED_TEST(RbfVecInterpolVertexMixedTestFixture, BasicTest) {
     this->lacc,
     this->acc_async,
     this->nlev,
-    RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_e,
-    RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_v);
+    this->nblks_e,
+    this->nblks_v);
 
   // Check the outputs only for blocks in the range [i_startblk, i_endblk].
   for (int block = this->i_startblk; block <= this->i_endblk; ++block) {
-- 
GitLab


From 8dadd736b6f11f68a9f86f89a5e77986358008b2 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Wed, 26 Feb 2025 11:16:28 +0100
Subject: [PATCH 18/33] fixed few bugs in one of the routine of intp_rbf

---
 src/interpolation/mo_lib_intp_rbf.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp
index a7d9cbd..e87a3e5 100644
--- a/src/interpolation/mo_lib_intp_rbf.cpp
+++ b/src/interpolation/mo_lib_intp_rbf.cpp
@@ -32,13 +32,13 @@ void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, cons
     UnmanagedConstInt3D rbf_c2grad_blk_view(rbf_c2grad_blk, rbf_c2grad_dim, nproma, nblks_c); 
     UnmanagedConstT4D rbf_c2grad_coeff_view(rbf_c2grad_coeff, rbf_c2grad_dim, 2, nproma, nblks_c); 
 
-    for (int jb = i_startblk; jb < i_endblk; ++jb) {
+    for (int jb = i_startblk; jb <= i_endblk; ++jb) {
 
         int i_startidx, i_endidx;
         get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); 
 
         Kokkos::parallel_for("rbf_interpol_c2grad", 
-                             Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}),
+                             Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev+1, i_endidx+1}),
                              KOKKOS_LAMBDA(const int jk, const int jc){
 
                              grad_x_view(jc, jk, jb) =
-- 
GitLab


From 06cf4132c0920ba9615d9b9121a5e3cb7f2b048d Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Wed, 26 Feb 2025 11:19:12 +0100
Subject: [PATCH 19/33] added a unit-test for rbf_interpol_c2grad_lib

---
 test/c/test_intp_rbf.cpp | 84 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 83 insertions(+), 1 deletion(-)

diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp
index 4dc2d92..a6aa11a 100644
--- a/test/c/test_intp_rbf.cpp
+++ b/test/c/test_intp_rbf.cpp
@@ -13,6 +13,7 @@
 #include <Kokkos_Core.hpp>
 #include <vector>
 #include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
+#include "mo_lib_intp_rbf.hpp"
 
 // Free-function helpers for 3D and 4D array sizes (assumed column-major)
 template<typename T>
@@ -37,7 +38,88 @@ typedef ::testing::Types< MixedPrecision<double, double>,
                           MixedPrecision<double,  float>,
                           MixedPrecision<float,   float>  > MixedTypes;
 
-// Define a typed test fixture.
+// Define a typed test fixture for the functions which have the same input and output types
+template <typename T>
+class RbfInterpolTypedTestFixture : public ::testing::Test {
+public:
+
+  static constexpr int nproma         = 3; // inner loop length
+  static constexpr int nlev            = 4; // vertical levels
+  static constexpr int nblks_c         = 2; // number of blocks for the cell arrays
+  static constexpr int rbf_c2grad_dim  = 10; // fixed dimension for the RBF c2 gradient
+
+  // Parameters for the index ranges.
+  int i_startblk    = 0;
+  int i_endblk      = 1;  // Note: loop over jb uses jb < i_endblk, so blocks 0 and 1
+  int i_startidx_in = 0;
+  int i_endidx_in   = nproma-1;  // [0, nproma) i.e. 0 .. nproma-1
+  int slev          = 0;
+  int elev          = nlev-1;
+  bool lacc         = false;   // Not using any ACC-specific behavior.
+
+  // Data arrays.
+  std::vector<T> p_cell_in;          // size: nproma * nlev * nblks_c
+  std::vector<int> rbf_c2grad_idx;     // size: rbf_c2grad_dim * nproma * nblks_c
+  std::vector<int> rbf_c2grad_blk;     // size: rbf_c2grad_dim * nproma * nblks_c
+  std::vector<T> rbf_c2grad_coeff;     // size: rbf_c2grad_dim * 2 * nproma * nblks_c
+  std::vector<T> grad_x;             // size: nproma * nlev * nblks_c
+  std::vector<T> grad_y;             // size: nproma * nlev * nblks_c
+
+  RbfInterpolTypedTestFixture() {
+    size_t size3d = static_cast<size_t>(nproma) * nlev * nblks_c;
+    size_t size3d_idx = static_cast<size_t>(rbf_c2grad_dim) * nproma * nblks_c;
+    size_t size4d = static_cast<size_t>(rbf_c2grad_dim) * 2 * nproma * nblks_c;
+    p_cell_in.resize(size3d, static_cast<T>(1));
+    rbf_c2grad_idx.resize(size3d_idx, 1);
+    rbf_c2grad_blk.resize(size3d_idx, 0);  // Set block indices to 0 for testing.
+    rbf_c2grad_coeff.resize(size4d, static_cast<T>(1));
+    grad_x.resize(size3d, static_cast<T>(0));
+    grad_y.resize(size3d, static_cast<T>(0));
+  }
+};
+
+typedef ::testing::Types<float, double> MyTypes;
+
+TYPED_TEST_SUITE(RbfInterpolTypedTestFixture, MyTypes);
+
+TYPED_TEST(RbfInterpolTypedTestFixture, BasicTest) {
+
+  rbf_interpol_c2grad_lib<TypeParam>(
+    this->p_cell_in.data(),
+    this->rbf_c2grad_idx.data(),
+    this->rbf_c2grad_blk.data(),
+    this->rbf_c2grad_coeff.data(),
+    this->grad_x.data(),
+    this->grad_y.data(),
+    this->i_startblk,
+    this->i_endblk,
+    this->i_startidx_in,
+    this->i_endidx_in,
+    this->slev,
+    this->elev,
+    this->nproma,
+    this->rbf_c2grad_dim,
+    this->nlev,
+    this->nblks_c,
+    this->lacc);
+
+  // For each block from i_startblk to i_endblk-1, and for each (i, level)
+  // the kernel sums rbf_c2grad_dim contributions, each equal to 1.
+  // Therefore, we expect grad_x and grad_y to equal rbf_c2grad_dim (i.e., 10).
+  for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+    for (int jk = 0; jk < this->nlev; ++jk) {
+      for (int i = 0; i < this->nproma; ++i) {
+        size_t idx = i + static_cast<size_t>(jk) * this->nproma + static_cast<size_t>(jb) * this->nproma * this->nlev;
+        EXPECT_NEAR(this->grad_x[idx], static_cast<TypeParam>(this->rbf_c2grad_dim), static_cast<TypeParam>(1e-5))
+          << "grad_x failure at block " << jb << ", level " << jk << ", index " << i;
+        EXPECT_NEAR(this->grad_y[idx], static_cast<TypeParam>(this->rbf_c2grad_dim), static_cast<TypeParam>(1e-5))
+          << "grad_y failure at block " << jb << ", level " << jk << ", index " << i;
+      }
+    }
+  }
+}
+
+// Define a typed test fixture for the functions which have different input and output types
 template <typename TypePair>
 class RbfVecInterpolMixedTestFixture : public ::testing::Test {
 public:
-- 
GitLab


From 6db33cd540dcab1d944cbf25fcf0b117ad8b8c52 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante4.lvt.dkrz.de>
Date: Thu, 27 Feb 2025 11:15:29 +0100
Subject: [PATCH 20/33] Rbf clean up

---
 src/interpolation/mo_lib_intp_rbf.cpp         | 58 +++++++--------
 src/interpolation/mo_lib_intp_rbf.hpp         | 21 +++---
 .../mo_lib_intp_rbf_bindings.cpp              | 47 ++++++------
 src/interpolation/mo_lib_intp_rbf_bindings.h  | 72 ++++++++++---------
 4 files changed, 103 insertions(+), 95 deletions(-)

diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp
index e87a3e5..b22e737 100644
--- a/src/interpolation/mo_lib_intp_rbf.cpp
+++ b/src/interpolation/mo_lib_intp_rbf.cpp
@@ -91,24 +91,25 @@ void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, cons
 
 template
 void rbf_interpol_c2grad_lib<double>(const double* p_cell_in,  const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
-                             const double* rbf_c2grad_coeff, double* grad_x, double* grad_y,
-                             int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                             int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
+                                     const double* rbf_c2grad_coeff, double* grad_x, double* grad_y,
+                                     int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                     int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
 
 template
 void rbf_interpol_c2grad_lib<float>(const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
-                             const float* rbf_c2grad_coeff, float* grad_x, float* grad_y,
-                             int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                             int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
+                                    const float* rbf_c2grad_coeff, float* grad_x, float* grad_y,
+                                    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                    int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
 
 
 //------------------------------------------rbf_vec_interpol_cell_lib---------------------------------------------
 
 template <typename T>
 void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-                          const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out,
-                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                          int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c,  bool lacc, bool acc_async){
+                               const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out,
+                               int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                               int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, i
+                               int rbf_vec_dim_c,  bool lacc, bool acc_async){
 
     typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
     typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
@@ -178,23 +179,25 @@ void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const
 
 template
 void rbf_vec_interpol_cell_lib<double>(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-                          const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
-                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                          int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async);
+                                       const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
+                                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                       int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e,
+                                       int rbf_vec_dim_c, bool lacc, bool acc_async);
 template
 void rbf_vec_interpol_cell_lib<float>(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-                          const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
-                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                          int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async);
+                                      const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
+                                      int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                      int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e,
+                                      int rbf_vec_dim_c, bool lacc, bool acc_async);
 
 //------------------------------------------rbf_vec_interpol_edge_lib---------------------------------------------
 
 template <typename T>
 void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-                          const T* rbf_vec_coeff_e, T* p_vt_out,
-                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                          int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-                          bool acc_async){
+                               const T* rbf_vec_coeff_e, T* p_vt_out,
+                               int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                               int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+                               bool acc_async){
 
     typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
     typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
@@ -228,16 +231,15 @@ void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const
 
 template
 void rbf_vec_interpol_edge_lib<double>(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-                          const double* rbf_vec_coeff_e, double* p_vt_out,
-                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                          int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-                          bool acc_async);
+                                       const double* rbf_vec_coeff_e, double* p_vt_out,
+                                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                       int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+                                       bool acc_async);
 
 
 template
 void rbf_vec_interpol_edge_lib<float>(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-                          const float* rbf_vec_coeff_e, float* p_vt_out,
-                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                          int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-                          bool acc_async);
-
+                                      const float* rbf_vec_coeff_e, float* p_vt_out,
+                                      int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                      int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+                                      bool acc_async);
diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp
index e52989d..1e95737 100644
--- a/src/interpolation/mo_lib_intp_rbf.hpp
+++ b/src/interpolation/mo_lib_intp_rbf.hpp
@@ -16,20 +16,21 @@
 
 template <typename T>
 void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
-                        const T* rbf_c2grad_coeff, T* grad_x, T* grad_y,
-                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                        int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
+                             const T* rbf_c2grad_coeff, T* grad_x, T* grad_y,
+                             int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                             int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
 
 
 template <typename T>
 void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-                          const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out,
-                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                          int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, int rbf_vec_dim_c, bool lacc, bool acc_async);
+                               const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out,
+                               int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                               int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e,
+                               int rbf_vec_dim_c, bool lacc, bool acc_async);
 
 template <typename T>
 void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-                          const T* rbf_vec_coeff_e, T* p_vt_out,
-                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                          int slev, int elev, int nlev,  int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-                          bool acc_asynci);
+                               const T* rbf_vec_coeff_e, T* p_vt_out,
+                               int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                               int slev, int elev, int nlev,  int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+                               bool acc_async);
diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp
index 5c7943b..2932f24 100644
--- a/src/interpolation/mo_lib_intp_rbf_bindings.cpp
+++ b/src/interpolation/mo_lib_intp_rbf_bindings.cpp
@@ -15,10 +15,9 @@
 
 
 void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
-                             const float* rbf_c2grad_coeff, float* grad_x, float* grad_y,
-                             int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                             int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){
-
+                                const float* rbf_c2grad_coeff, float* grad_x, float* grad_y,
+                                int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){
 
     rbf_interpol_c2grad_lib<float>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk,
                                 rbf_c2grad_coeff,
@@ -27,11 +26,10 @@ void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx
                                 nproma,  rbf_c2grad_dim, nlev, nblk_c, lacc);
 }
 
-void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
-                             const double* rbf_c2grad_coeff, double* grad_x, double* grad_y,
-                             int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                             int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){
-
+void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
+                                const double* rbf_c2grad_coeff, double* grad_x, double* grad_y,
+                                int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){
 
     rbf_interpol_c2grad_lib<double>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk,
                                 rbf_c2grad_coeff,
@@ -41,9 +39,10 @@ void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int*rbf_c2grad_id
 }
 
 void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-                          const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
-                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                          int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async){
+                                  const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
+                                  int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                  int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, 
+                                  int rbf_vec_dim_c, bool lacc, bool acc_async){
 
     rbf_vec_interpol_cell_lib<float>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c,
                                      rbf_vec_coeff_c, p_u_out, p_v_out,
@@ -52,9 +51,10 @@ void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c
 }
 
 void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-                          const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
-                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                          int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async){
+                                  const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
+                                  int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                  int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, 
+                                  int rbf_vec_dim_c, bool lacc, bool acc_async){
 
     rbf_vec_interpol_cell_lib<double>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c,
                                       p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in,
@@ -62,11 +62,10 @@ void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_
 }
 
 void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-                          const double* rbf_vec_coeff_e, double* p_vt_out,
-                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                          int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-                          bool acc_async){
-
+                                  const double* rbf_vec_coeff_e, double* p_vt_out,
+                                  int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                  int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+                                  bool acc_async){
 
     rbf_vec_interpol_edge_lib<double>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e,
                                       rbf_vec_coeff_e, p_vt_out,
@@ -75,10 +74,10 @@ void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_
 }
 
 void rbf_vec_interpol_edge_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-                          const float* rbf_vec_coeff_e, float* p_vt_out,
-                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                          int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-                          bool acc_async){
+                                  const float* rbf_vec_coeff_e, float* p_vt_out,
+                                  int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                  int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+                                  bool acc_async){
 
     rbf_vec_interpol_edge_lib<float>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e,
                                      rbf_vec_coeff_e, p_vt_out,
diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.h b/src/interpolation/mo_lib_intp_rbf_bindings.h
index 9620614..9473aef 100644
--- a/src/interpolation/mo_lib_intp_rbf_bindings.h
+++ b/src/interpolation/mo_lib_intp_rbf_bindings.h
@@ -12,37 +12,43 @@
 
 extern "C" {
 
-void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
-                        const float* rbf_c2grad_coeff, float* grad_x, const float* grad_y, 
-                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, 
-                        int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
-
-void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
-                        const double* rbf_c2grad_coeff, double* grad_x, const double* grad_y, 
-                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, 
-                        int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
-
-void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-                          const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
-                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                          int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async);
-
-void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-                          const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
-                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                          int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, int rbf_vec_dim_c, bool lacc, bool acc_async);
-
-void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-                          const double* rbf_vec_coeff_e, double* p_vt_out,
-                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                          int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-                          bool acc_async);
-
-void rbf_vec_interpol_edge_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-                          const float* rbf_vec_coeff_e, float* p_vt_out,
-                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                          int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-                          bool acc_async);
-
-
+void rbf_interpol_c2grad_lib_sp(
+        const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
+        const float* rbf_c2grad_coeff, float* grad_x, const float* grad_y,
+        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+        int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
+
+void rbf_interpol_c2grad_lib_dp(
+        const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
+        const double* rbf_c2grad_coeff, double* grad_x, const double* grad_y,
+        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+        int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
+
+void rbf_vec_interpol_cell_lib_sp(
+        const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+        const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
+        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+        int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e,
+        int rbf_vec_dim_c, bool lacc, bool acc_async);
+
+void rbf_vec_interpol_cell_lib_dp(
+        const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+        const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
+        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+        int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e,
+        int rbf_vec_dim_c, bool lacc, bool acc_async);
+
+void rbf_vec_interpol_edge_lib_dp(
+        const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
+        const double* rbf_vec_coeff_e, double* p_vt_out,
+        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+        int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+        bool acc_async);
+
+void rbf_vec_interpol_edge_lib_sp(
+        const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
+        const float* rbf_vec_coeff_e, float* p_vt_out,
+        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+        int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+        bool acc_async);
 }
-- 
GitLab


From 1be0c3ab1cec1bcbcf3ab6b446e16ccd4a58c2a5 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante4.lvt.dkrz.de>
Date: Thu, 27 Feb 2025 11:22:56 +0100
Subject: [PATCH 21/33] Fix

---
 src/interpolation/mo_lib_intp_rbf.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp
index b22e737..1cce6d8 100644
--- a/src/interpolation/mo_lib_intp_rbf.cpp
+++ b/src/interpolation/mo_lib_intp_rbf.cpp
@@ -108,7 +108,7 @@ template <typename T>
 void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
                                const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out,
                                int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                               int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, i
+                               int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, 
                                int rbf_vec_dim_c,  bool lacc, bool acc_async){
 
     typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
-- 
GitLab


From 0f5a6e7543abd5e54edf9b043b95bf3d4bca51c8 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante0.lvt.dkrz.de>
Date: Tue, 4 Mar 2025 16:46:31 +0100
Subject: [PATCH 22/33] Added test for cell_lib

---
 src/interpolation/mo_lib_intp_rbf.cpp         | 42 ++++++------
 src/interpolation/mo_lib_intp_rbf.hpp         |  4 +-
 .../mo_lib_intp_rbf_bindings.cpp              |  8 +--
 src/interpolation/mo_lib_intp_rbf_bindings.h  |  4 +-
 test/c/test_intp_rbf.cpp                      | 64 ++++++++++++++++++-
 5 files changed, 92 insertions(+), 30 deletions(-)

diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp
index 1cce6d8..bdbfbe5 100644
--- a/src/interpolation/mo_lib_intp_rbf.cpp
+++ b/src/interpolation/mo_lib_intp_rbf.cpp
@@ -93,13 +93,13 @@ template
 void rbf_interpol_c2grad_lib<double>(const double* p_cell_in,  const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
                                      const double* rbf_c2grad_coeff, double* grad_x, double* grad_y,
                                      int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                     int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
+                                     int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc);
 
 template
 void rbf_interpol_c2grad_lib<float>(const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
                                     const float* rbf_c2grad_coeff, float* grad_x, float* grad_y,
                                     int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                    int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
+                                    int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc);
 
 
 //------------------------------------------rbf_vec_interpol_cell_lib---------------------------------------------
@@ -108,7 +108,7 @@ template <typename T>
 void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
                                const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out,
                                int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                               int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e, 
+                               int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
                                int rbf_vec_dim_c,  bool lacc, bool acc_async){
 
     typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
@@ -117,20 +117,20 @@ void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const
     typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D;
 
     UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e);
-    UnmanagedConstInt3D rbf_vec_idx_c_view(rbf_vec_idx_c, rbf_vec_dim_c, nproma, nblk_c);
-    UnmanagedConstInt3D rbf_vec_blk_c_view(rbf_vec_blk_c, rbf_vec_dim_c, nproma, nblk_c);
-    UnmanagedConstT4D rbf_vec_coeff_c_view(rbf_vec_coeff_c, nproma, nblk_c); //TODO
-    UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblk_c);
-    UnmanagedT3D p_v_out_view(p_u_out, nproma, nlev, nblk_c);
+    UnmanagedConstInt3D rbf_vec_idx_c_view(rbf_vec_idx_c, rbf_vec_dim_c, nproma, nblks_c);
+    UnmanagedConstInt3D rbf_vec_blk_c_view(rbf_vec_blk_c, rbf_vec_dim_c, nproma, nblks_c);
+    UnmanagedConstT4D rbf_vec_coeff_c_view(rbf_vec_coeff_c, nproma, nblks_c); //TODO
+    UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c);
+    UnmanagedT3D p_v_out_view(p_u_out, nproma, nlev, nblks_c);
 
 
-    for (int jb = i_startblk; jb < i_endblk; ++jb) {
+    for (int jb = i_startblk; jb <= i_endblk; ++jb) {
 
          int i_startidx, i_endidx;
          get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
 
           Kokkos::parallel_for("rbf_vec_interpol_cell_lib",
-                               Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}),
+                               Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev+1, i_endidx+1}),
                                KOKKOS_LAMBDA(const int jk, const int jc){
 
                                p_u_out_view(jc, jk, jb) =
@@ -154,23 +154,23 @@ void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const
                                p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb));
 
                                p_v_out_view(jc, jk, jb) =
-                               rbf_vec_coeff_c_view(0, 2, jc, jb)*
+                               rbf_vec_coeff_c_view(0, 1, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, rbf_vec_blk_c_view(0, jc, jb)) +
-                               rbf_vec_coeff_c_view(1, 2, jc, jb)*
+                               rbf_vec_coeff_c_view(1, 1, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, rbf_vec_blk_c_view(1, jc, jb)) +
-                               rbf_vec_coeff_c_view(2, 2, jc, jb)*
+                               rbf_vec_coeff_c_view(2, 1, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, rbf_vec_blk_c_view(2, jc, jb)) +
-                               rbf_vec_coeff_c_view(3, 2, jc, jb)*
+                               rbf_vec_coeff_c_view(3, 1, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, rbf_vec_blk_c_view(3, jc, jb)) +
-                               rbf_vec_coeff_c_view(4, 2, jc, jb)*
+                               rbf_vec_coeff_c_view(4, 1, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, rbf_vec_blk_c_view(4, jc, jb)) +
-                               rbf_vec_coeff_c_view(5, 2, jc, jb)*
+                               rbf_vec_coeff_c_view(5, 1, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, rbf_vec_blk_c_view(5, jc, jb)) +
-                               rbf_vec_coeff_c_view(6, 2, jc, jb)*
+                               rbf_vec_coeff_c_view(6, 1, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, rbf_vec_blk_c_view(6, jc, jb)) +
-                               rbf_vec_coeff_c_view(7, 2, jc, jb)*
+                               rbf_vec_coeff_c_view(7, 1, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, rbf_vec_blk_c_view(7, jc, jb)) +
-                               rbf_vec_coeff_c_view(8, 2, jc, jb)*
+                               rbf_vec_coeff_c_view(8, 1, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb));
                                });
                                Kokkos::fence();
@@ -181,13 +181,13 @@ template
 void rbf_vec_interpol_cell_lib<double>(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
                                        const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
                                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                       int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e,
+                                       int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
                                        int rbf_vec_dim_c, bool lacc, bool acc_async);
 template
 void rbf_vec_interpol_cell_lib<float>(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
                                       const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
                                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                      int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e,
+                                      int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
                                       int rbf_vec_dim_c, bool lacc, bool acc_async);
 
 //------------------------------------------rbf_vec_interpol_edge_lib---------------------------------------------
diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp
index 1e95737..5b9d5e1 100644
--- a/src/interpolation/mo_lib_intp_rbf.hpp
+++ b/src/interpolation/mo_lib_intp_rbf.hpp
@@ -18,14 +18,14 @@ template <typename T>
 void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
                              const T* rbf_c2grad_coeff, T* grad_x, T* grad_y,
                              int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                             int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
+                             int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc);
 
 
 template <typename T>
 void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
                                const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out,
                                int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                               int slev, int elev, int nproma, int nlev, int nblk_c, int nblks_e,
+                               int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
                                int rbf_vec_dim_c, bool lacc, bool acc_async);
 
 template <typename T>
diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp
index 2932f24..b679619 100644
--- a/src/interpolation/mo_lib_intp_rbf_bindings.cpp
+++ b/src/interpolation/mo_lib_intp_rbf_bindings.cpp
@@ -41,24 +41,24 @@ void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int* rbf_c2grad_i
 void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
                                   const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
                                   int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                  int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, 
+                                  int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
                                   int rbf_vec_dim_c, bool lacc, bool acc_async){
 
     rbf_vec_interpol_cell_lib<float>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c,
                                      rbf_vec_coeff_c, p_u_out, p_v_out,
                                      i_startblk, i_endblk, i_startidx_in, i_endidx_in,
-                                     slev, elev, nproma, nlev, nblk_c, nblk_e, rbf_vec_dim_c, lacc, acc_async);
+                                     slev, elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async);
 }
 
 void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
                                   const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
                                   int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                  int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e, 
+                                  int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
                                   int rbf_vec_dim_c, bool lacc, bool acc_async){
 
     rbf_vec_interpol_cell_lib<double>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c,
                                       p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in,
-                                      slev, elev, nproma, nlev, nblk_c, nblk_e, rbf_vec_dim_c, lacc, acc_async);
+                                      slev, elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async);
 }
 
 void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.h b/src/interpolation/mo_lib_intp_rbf_bindings.h
index 9473aef..cc1dd5c 100644
--- a/src/interpolation/mo_lib_intp_rbf_bindings.h
+++ b/src/interpolation/mo_lib_intp_rbf_bindings.h
@@ -28,14 +28,14 @@ void rbf_vec_interpol_cell_lib_sp(
         const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
         const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
         int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-        int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e,
+        int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
         int rbf_vec_dim_c, bool lacc, bool acc_async);
 
 void rbf_vec_interpol_cell_lib_dp(
         const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
         const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
         int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-        int slev, int elev, int nproma, int nlev, int nblk_c, int nblk_e,
+        int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
         int rbf_vec_dim_c, bool lacc, bool acc_async);
 
 void rbf_vec_interpol_edge_lib_dp(
diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp
index a6aa11a..9414061 100644
--- a/test/c/test_intp_rbf.cpp
+++ b/test/c/test_intp_rbf.cpp
@@ -12,6 +12,8 @@
 #include <gtest/gtest.h>
 #include <Kokkos_Core.hpp>
 #include <vector>
+#include <algorithm>
+#include <numeric>
 #include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
 #include "mo_lib_intp_rbf.hpp"
 
@@ -46,7 +48,9 @@ public:
   static constexpr int nproma         = 3; // inner loop length
   static constexpr int nlev            = 4; // vertical levels
   static constexpr int nblks_c         = 2; // number of blocks for the cell arrays
+  static constexpr int nblks_e          = 2;
   static constexpr int rbf_c2grad_dim  = 10; // fixed dimension for the RBF c2 gradient
+  static constexpr int rbf_vec_dim_c = 9;
 
   // Parameters for the index ranges.
   int i_startblk    = 0;
@@ -56,23 +60,43 @@ public:
   int slev          = 0;
   int elev          = nlev-1;
   bool lacc         = false;   // Not using any ACC-specific behavior.
+  bool acc_async    = false;
 
   // Data arrays.
   std::vector<T> p_cell_in;          // size: nproma * nlev * nblks_c
   std::vector<int> rbf_c2grad_idx;     // size: rbf_c2grad_dim * nproma * nblks_c
   std::vector<int> rbf_c2grad_blk;     // size: rbf_c2grad_dim * nproma * nblks_c
+  std::vector<int> rbf_vec_idx_c;      //size: rbf_vec_dim_c * nproma * nblks_c
+  std::vector<int> rbf_vec_blk_c;      // size: rbf_vec_dim_c * nproma * nblks_c
   std::vector<T> rbf_c2grad_coeff;     // size: rbf_c2grad_dim * 2 * nproma * nblks_c
   std::vector<T> grad_x;             // size: nproma * nlev * nblks_c
   std::vector<T> grad_y;             // size: nproma * nlev * nblks_c
+  std::vector<T> p_vn_in;
+  std::vector<T> rbf_vec_coeff_c;
+  std::vector<T> p_u_out;
+  std::vector<T> p_v_out;
 
   RbfInterpolTypedTestFixture() {
     size_t size3d = static_cast<size_t>(nproma) * nlev * nblks_c;
     size_t size3d_idx = static_cast<size_t>(rbf_c2grad_dim) * nproma * nblks_c;
     size_t size4d = static_cast<size_t>(rbf_c2grad_dim) * 2 * nproma * nblks_c;
+
+    size_t size3d_vec_dim = static_cast<size_t>(rbf_vec_dim_c) * nproma * nblks_c;
+    size_t size_4d_vec_dim = static_cast<size_t>(rbf_vec_dim_c) * 2 * nproma * nblks_c;
+
     p_cell_in.resize(size3d, static_cast<T>(1));
+    p_vn_in.resize(size3d, static_cast<T>(1));
+
+    rbf_vec_idx_c.resize(size3d_vec_dim, 1);
+    rbf_vec_blk_c.resize(size3d_vec_dim, 0);
     rbf_c2grad_idx.resize(size3d_idx, 1);
     rbf_c2grad_blk.resize(size3d_idx, 0);  // Set block indices to 0 for testing.
+
+    rbf_vec_coeff_c.resize(size_4d_vec_dim, static_cast<T>(1));
     rbf_c2grad_coeff.resize(size4d, static_cast<T>(1));
+
+    p_u_out.resize(size3d_vec_dim, static_cast<T>(0));
+    p_v_out.resize(size3d_vec_dim, static_cast<T>(0));
     grad_x.resize(size3d, static_cast<T>(0));
     grad_y.resize(size3d, static_cast<T>(0));
   }
@@ -83,7 +107,7 @@ typedef ::testing::Types<float, double> MyTypes;
 TYPED_TEST_SUITE(RbfInterpolTypedTestFixture, MyTypes);
 
 TYPED_TEST(RbfInterpolTypedTestFixture, BasicTest) {
-
+  using T = TypeParam;
   rbf_interpol_c2grad_lib<TypeParam>(
     this->p_cell_in.data(),
     this->rbf_c2grad_idx.data(),
@@ -119,6 +143,44 @@ TYPED_TEST(RbfInterpolTypedTestFixture, BasicTest) {
   }
 }
 
+
+TYPED_TEST(RbfInterpolTypedTestFixture, cell_lib) {
+    using T = TypeParam;
+
+    rbf_vec_interpol_cell_lib<T>(
+        this->p_vn_in.data(),
+        this->rbf_vec_idx_c.data(),
+        this->rbf_vec_blk_c.data(),
+        this->rbf_vec_coeff_c.data(),
+        this->p_u_out.data(),
+        this->p_v_out.data(),
+        this->i_startblk,
+        this->i_endblk,
+        this->i_startidx_in,
+        this->i_endidx_in,
+        this->slev,
+        this->elev,
+        this->nproma,
+        this->rbf_c2grad_dim,
+        this->nlev,
+        this->nblks_c,
+        this->nblks_e,
+        this->lacc,
+        this->acc_async);
+
+    for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+        for (int jk = 0; jk < this->nlev; ++jk) {
+          for (int i = 0; i < this->nproma; ++i) {
+            size_t idx = i + static_cast<size_t>(jk) * this->nproma + static_cast<size_t>(jb) * this->nproma * this->nlev;
+            EXPECT_NEAR(this->p_u_out[idx], static_cast<T>(this->rbf_vec_dim_c), static_cast<T>(1e-5))
+              << "p_u_out failure at block " << jb << ", level " << jk << ", index " << i;
+      //      EXPECT_NEAR(this->p_v_out[idx], static_cast<T>(this->rbf_vec_dim_c), static_cast<T>(1e-5))
+        //      << "p_v_out failure at block " << jb << ", level " << jk << ", index " << i;
+          }
+        }
+      }
+  }
+
 // Define a typed test fixture for the functions which have different input and output types
 template <typename TypePair>
 class RbfVecInterpolMixedTestFixture : public ::testing::Test {
-- 
GitLab


From 8175944d3f080ec145d4ce59e2e6fdddb8d6153d Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante0.lvt.dkrz.de>
Date: Tue, 4 Mar 2025 16:57:03 +0100
Subject: [PATCH 23/33] Fixed strange typo

---
 src/horizontal/mo_lib_divrot.F90 | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/horizontal/mo_lib_divrot.F90 b/src/horizontal/mo_lib_divrot.F90
index a2f2ad9..c1360f6 100644
--- a/src/horizontal/mo_lib_divrot.F90
+++ b/src/horizontal/mo_lib_divrot.F90
@@ -561,7 +561,7 @@ CONTAINS
 !!
 !! !LITERATURE
 !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh
-!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys.,
+!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys.,
 !! 181, 729-752
 !!
   SUBROUTINE recon_lsq_cell_q_lib(p_cc, lsq_idx_c, lsq_blk_c, &
@@ -800,7 +800,7 @@ CONTAINS
 !!
 !! !LITERATURE
 !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh
-!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys.,
+!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys.,
 !! 181, 729-752
 !!
   SUBROUTINE recon_lsq_cell_q_svd_lib(p_cc, lsq_idx_c, lsq_blk_c, &
@@ -1008,7 +1008,7 @@ CONTAINS
 !!
 !! !LITERATURE
 !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh
-!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys.,
+!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys.,
 !! 181, 729-752
 !!
   SUBROUTINE recon_lsq_cell_c_lib(p_cc, lsq_idx_c, lsq_blk_c, &
@@ -1295,7 +1295,7 @@ CONTAINS
 !!
 !! !LITERATURE
 !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh
-!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys.,
+!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys.,
 !! 181, 729-752
 !!
   SUBROUTINE recon_lsq_cell_c_svd_lib(p_cc, lsq_idx_c, lsq_blk_c, &
-- 
GitLab


From 3afe4ffe25a1621a662d055f21b04aaf8f9e3fbb Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante0.lvt.dkrz.de>
Date: Wed, 5 Mar 2025 14:57:12 +0100
Subject: [PATCH 24/33] Added test for edge_lib

---
 src/interpolation/mo_lib_intp_rbf.cpp | 22 ++++++------
 test/c/test_intp_rbf.cpp              | 52 +++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 11 deletions(-)

diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp
index bdbfbe5..76e4784 100644
--- a/src/interpolation/mo_lib_intp_rbf.cpp
+++ b/src/interpolation/mo_lib_intp_rbf.cpp
@@ -154,23 +154,23 @@ void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const
                                p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb));
 
                                p_v_out_view(jc, jk, jb) =
-                               rbf_vec_coeff_c_view(0, 1, jc, jb)*
+                               rbf_vec_coeff_c_view(0, 2, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, rbf_vec_blk_c_view(0, jc, jb)) +
-                               rbf_vec_coeff_c_view(1, 1, jc, jb)*
+                               rbf_vec_coeff_c_view(1, 2, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, rbf_vec_blk_c_view(1, jc, jb)) +
-                               rbf_vec_coeff_c_view(2, 1, jc, jb)*
+                               rbf_vec_coeff_c_view(2, 2, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, rbf_vec_blk_c_view(2, jc, jb)) +
-                               rbf_vec_coeff_c_view(3, 1, jc, jb)*
+                               rbf_vec_coeff_c_view(3, 2, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, rbf_vec_blk_c_view(3, jc, jb)) +
-                               rbf_vec_coeff_c_view(4, 1, jc, jb)*
+                               rbf_vec_coeff_c_view(4, 2, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, rbf_vec_blk_c_view(4, jc, jb)) +
-                               rbf_vec_coeff_c_view(5, 1, jc, jb)*
+                               rbf_vec_coeff_c_view(5, 2, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, rbf_vec_blk_c_view(5, jc, jb)) +
-                               rbf_vec_coeff_c_view(6, 1, jc, jb)*
+                               rbf_vec_coeff_c_view(6, 2, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, rbf_vec_blk_c_view(6, jc, jb)) +
-                               rbf_vec_coeff_c_view(7, 1, jc, jb)*
+                               rbf_vec_coeff_c_view(7, 2, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, rbf_vec_blk_c_view(7, jc, jb)) +
-                               rbf_vec_coeff_c_view(8, 1, jc, jb)*
+                               rbf_vec_coeff_c_view(8, 2, jc, jb)*
                                p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb));
                                });
                                Kokkos::fence();
@@ -210,13 +210,13 @@ void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const
     UnmanagedT3D p_vt_out_view(p_vt_out, nproma,nlev,nblks_e);
 
 
-    for (int jb = i_startblk; jb < i_endblk; ++jb) {
+    for (int jb = i_startblk; jb <= i_endblk; ++jb) {
 
          int i_startidx, i_endidx;
          get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
 
          Kokkos::parallel_for("rbf_vec_interpol_edge_lib",
-                              Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}),
+                              Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev+1, i_endidx+1}),
                               KOKKOS_LAMBDA(const int jk, const int je){
 
                               p_vt_out_view(je, jk, jb) =
diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp
index 9414061..426a86b 100644
--- a/test/c/test_intp_rbf.cpp
+++ b/test/c/test_intp_rbf.cpp
@@ -51,6 +51,7 @@ public:
   static constexpr int nblks_e          = 2;
   static constexpr int rbf_c2grad_dim  = 10; // fixed dimension for the RBF c2 gradient
   static constexpr int rbf_vec_dim_c = 9;
+  static constexpr int rbf_vec_dim_e = 4;
 
   // Parameters for the index ranges.
   int i_startblk    = 0;
@@ -76,6 +77,12 @@ public:
   std::vector<T> p_u_out;
   std::vector<T> p_v_out;
 
+  std::vector<int> rbf_vec_idx_e;
+  std::vector<int> rbf_vec_blk_e;
+  std::vector<T> rbf_vec_coeff_e;
+  std::vector<T> p_vt_out;
+
+
   RbfInterpolTypedTestFixture() {
     size_t size3d = static_cast<size_t>(nproma) * nlev * nblks_c;
     size_t size3d_idx = static_cast<size_t>(rbf_c2grad_dim) * nproma * nblks_c;
@@ -84,6 +91,9 @@ public:
     size_t size3d_vec_dim = static_cast<size_t>(rbf_vec_dim_c) * nproma * nblks_c;
     size_t size_4d_vec_dim = static_cast<size_t>(rbf_vec_dim_c) * 2 * nproma * nblks_c;
 
+    size_t size3d_edge_lib = static_cast<size_t>(rbf_vec_dim_e) * nproma * nblks_c;
+    size_t size_4d_edge_lib = static_cast<size_t>(rbf_vec_dim_e) * 2 * nproma * nblks_c;
+
     p_cell_in.resize(size3d, static_cast<T>(1));
     p_vn_in.resize(size3d, static_cast<T>(1));
 
@@ -91,12 +101,17 @@ public:
     rbf_vec_blk_c.resize(size3d_vec_dim, 0);
     rbf_c2grad_idx.resize(size3d_idx, 1);
     rbf_c2grad_blk.resize(size3d_idx, 0);  // Set block indices to 0 for testing.
+    rbf_vec_idx_e.resize(size3d_vec_dim, 1);
+    rbf_vec_blk_e.resize(size3d_vec_dim, 0);
 
     rbf_vec_coeff_c.resize(size_4d_vec_dim, static_cast<T>(1));
     rbf_c2grad_coeff.resize(size4d, static_cast<T>(1));
+    rbf_vec_coeff_e.resize(size_4d_edge_lib, static_cast<T>(1));
 
     p_u_out.resize(size3d_vec_dim, static_cast<T>(0));
     p_v_out.resize(size3d_vec_dim, static_cast<T>(0));
+    p_vt_out.resize(size3d_edge_lib, static_cast<T>(0));
+
     grad_x.resize(size3d, static_cast<T>(0));
     grad_y.resize(size3d, static_cast<T>(0));
   }
@@ -181,6 +196,43 @@ TYPED_TEST(RbfInterpolTypedTestFixture, cell_lib) {
       }
   }
 
+
+TYPED_TEST(RbfInterpolTypedTestFixture, edge_lib){
+    using T = TypeParam;
+
+
+    rbf_vec_interpol_edge_lib<T>(
+        this->p_vn_in.data(),
+        this->rbf_vec_idx_e.data(),
+        this->rbf_vec_blk_e.data(),
+        this->rbf_vec_coeff_e.data(),
+        this->p_vt_out.data(),
+        this->i_startblk,
+        this->i_endblk,
+        this->i_startidx_in,
+        this->i_endidx_in,
+        this->slev,
+        this->elev,
+        this->nlev,
+        this->nproma,
+        this->rbf_vec_dim_e,
+        this->nblks_e,
+        this->lacc,
+        this->acc_async);
+
+     for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+        for (int jk = 0; jk < this->nlev; ++jk) {
+          for (int i = 0; i < this->nproma; ++i) {
+            size_t idx = i + static_cast<size_t>(jk) * this->nproma + static_cast<size_t>(jb) * this->nproma * this->nlev;
+            EXPECT_NEAR(this->p_vt_out[idx], static_cast<T>(this->rbf_vec_dim_e), static_cast<T>(1e-5))
+              << "p_vt_out failure at block " << jb << ", level " << jk << ", index " << i;
+          }
+        }
+      }
+
+}
+
+
 // Define a typed test fixture for the functions which have different input and output types
 template <typename TypePair>
 class RbfVecInterpolMixedTestFixture : public ::testing::Test {
-- 
GitLab


From 9d414e59bb6ed9057b6335f7a5411f7eba352c30 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Thu, 6 Mar 2025 10:57:03 +0100
Subject: [PATCH 25/33] put the instantiations at the end of the file

---
 src/interpolation/mo_lib_intp_rbf.cpp | 82 +++++++++++++++------------
 1 file changed, 47 insertions(+), 35 deletions(-)

diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp
index 76e4784..f1a9fa4 100644
--- a/src/interpolation/mo_lib_intp_rbf.cpp
+++ b/src/interpolation/mo_lib_intp_rbf.cpp
@@ -89,18 +89,6 @@ void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, cons
     }//for
 }//void
 
-template
-void rbf_interpol_c2grad_lib<double>(const double* p_cell_in,  const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
-                                     const double* rbf_c2grad_coeff, double* grad_x, double* grad_y,
-                                     int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                     int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc);
-
-template
-void rbf_interpol_c2grad_lib<float>(const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
-                                    const float* rbf_c2grad_coeff, float* grad_x, float* grad_y,
-                                    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                    int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc);
-
 
 //------------------------------------------rbf_vec_interpol_cell_lib---------------------------------------------
 
@@ -177,19 +165,6 @@ void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const
     }//for
 }//void
 
-template
-void rbf_vec_interpol_cell_lib<double>(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-                                       const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
-                                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                       int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
-                                       int rbf_vec_dim_c, bool lacc, bool acc_async);
-template
-void rbf_vec_interpol_cell_lib<float>(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-                                      const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
-                                      int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                      int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
-                                      int rbf_vec_dim_c, bool lacc, bool acc_async);
-
 //------------------------------------------rbf_vec_interpol_edge_lib---------------------------------------------
 
 template <typename T>
@@ -230,16 +205,53 @@ void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const
 }
 
 template
-void rbf_vec_interpol_edge_lib<double>(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-                                       const double* rbf_vec_coeff_e, double* p_vt_out,
-                                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                       int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-                                       bool acc_async);
+void rbf_vec_interpol_cell_lib<double>(
+    const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+    const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+    int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
+    int rbf_vec_dim_c, bool lacc, bool acc_async
+);
 
+template
+void rbf_vec_interpol_cell_lib<float>(
+    const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+    const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+    int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
+    int rbf_vec_dim_c, bool lacc, bool acc_async
+);
+
+template
+void rbf_interpol_c2grad_lib<double>(
+    const double* p_cell_in,  const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
+    const double* rbf_c2grad_coeff, double* grad_x, double* grad_y,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+    int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc
+);
+
+template
+void rbf_interpol_c2grad_lib<float>(
+    const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
+    const float* rbf_c2grad_coeff, float* grad_x, float* grad_y,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+    int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc
+);
+
+template
+void rbf_vec_interpol_edge_lib<double>(
+    const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
+    const double* rbf_vec_coeff_e, double* p_vt_out,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+    int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+    bool acc_async
+);
 
 template
-void rbf_vec_interpol_edge_lib<float>(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-                                      const float* rbf_vec_coeff_e, float* p_vt_out,
-                                      int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                      int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-                                      bool acc_async);
+void rbf_vec_interpol_edge_lib<float>(
+    const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
+    const float* rbf_vec_coeff_e, float* p_vt_out,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+    int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+    bool acc_async
+);
-- 
GitLab


From a3e149a6fc76fbc261f754c66e18ecb9912073ac Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Thu, 6 Mar 2025 11:48:00 +0100
Subject: [PATCH 26/33] moved intp_rbf routines from the temporary file to the
 main file

fixed a bug
---
 src/interpolation/CMakeLists.txt              |   2 -
 ...b_intp_rbf-rbf_vec_interpol_vertex_lib.cpp | 197 ------------------
 ...b_intp_rbf-rbf_vec_interpol_vertex_lib.hpp |  32 ---
 ...f-rbf_vec_interpol_vertex_lib_bindings.cpp | 134 ------------
 ...rbf-rbf_vec_interpol_vertex_lib_bindings.h |  54 -----
 src/interpolation/mo_lib_intp_rbf.cpp         | 176 ++++++++++++++++
 src/interpolation/mo_lib_intp_rbf.hpp         |  20 ++
 .../mo_lib_intp_rbf_bindings.cpp              | 118 +++++++++++
 src/interpolation/mo_lib_intp_rbf_bindings.h  |  38 ++++
 test/c/test_intp_rbf.cpp                      |   1 -
 10 files changed, 352 insertions(+), 420 deletions(-)
 delete mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
 delete mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp
 delete mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
 delete mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h

diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt
index d5af767..bf5b0ac 100644
--- a/src/interpolation/CMakeLists.txt
+++ b/src/interpolation/CMakeLists.txt
@@ -16,8 +16,6 @@ add_library(
   mo_lib_interpolation_vector.F90
   mo_lib_interpolation_vector.cpp
   mo_lib_intp_rbf.F90
-  mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
-  mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
   interpolation_bindings.cpp
   mo_lib_intp_rbf_bindings.cpp
   mo_lib_intp_rbf.cpp)
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
deleted file mode 100644
index c9b776e..0000000
--- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
+++ /dev/null
@@ -1,197 +0,0 @@
-// ICON
-//
-// ---------------------------------------------------------------
-// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
-// Contact information: icon-model.org
-//
-// See AUTHORS.TXT for a list of authors
-// See LICENSES/ for license information
-// SPDX-License-Identifier: BSD-3-Clause
-// ---------------------------------------------------------------
-
-/// Contains the only mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib()
-///
-/// Separate to avoid conflicts with Ali working on rest of mo_lib_intp_rbf
-
-#include <type_traits>
-#include <Kokkos_Core.hpp>
-#include "mo_lib_loopindices.hpp"
-#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
-
-
-constexpr int rbf_vec_dim_v = 6;
-
-//-------------------------------------------------------------------------
-//
-//
-//>
-/// Performs vector RBF reconstruction at triangle vertices.
-///
-/// Theory described in Narcowich and Ward (Math Comp. 1994) and
-/// Bonaventura and Baudisch (Mox Report n. 75).
-/// It takes edge based variables as input and combines them
-/// into three dimensional cartesian vectors at each vertex.
-///
-/// Two templated variables in order to support mixed precision.
-/// Intended that type_traits::is_floating_point(T,S)==TRUE
-/// precision(T) >= precision(S)
-template <typename T, typename S>
-void rbf_vec_interpol_vertex_lib(
-    const T* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const T* rbf_vec_coeff_v,
-    S* p_u_out,
-    S* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    // Dimensions for the arrays.
-    const int nlev, const int nblks_e, const int nblks_v
-    )
-{
-    /*
-#ifdef DIM_ENABLE_GPU
-    if (lacc){ using MemSpace = Kokkos::CudaSpace;
-    } else { using MemSpace = Kokkos::HostSpace; }
-#else
-    using MemSpace = Kokkos::HostSpace;
-#endif
-
-    */
-
-    // Wrap raw pointers in unmanaged Kokkos Views.
-    typedef Kokkos::View<const T***,    Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
-    typedef Kokkos::View<const T****,   Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D;
-    typedef Kokkos::View<const int***,  Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
-    typedef Kokkos::View<S***,          Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D;
-
-
-
-    // input components of velocity or horizontal vorticity vectors at edge midpoints
-    // dim: (nproma,nlev,nblks_e)
-    UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e);
-
-    // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex
-    // (rbf_vec_dim_v,nproma,nblks_v)
-    UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v);
-    UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v);
-
-    // coefficients are working precision array containing the coefficients used for vector rbf interpolation
-    // at each tringle vertex (input is normal component),
-    // dim: (rbf_vec_dim_v,2,nproma,nblks_v)
-    UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v);
-
-    // reconstructed x-component (u) of velocity vector,
-    // dim: (nproma,nlev,nblks_v)
-    UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v);
-    // reconstructed y-component (v) of velocity vector,
-    // dim: (nproma,nlev,nblks_v)
-    UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v);
-
-    // Local vars
-    //int jv, jk, jb; // integer over vertices, levels, and blocks,
-    int jb; // integer over vertices, levels, and blocks,
-    int i_startidx; // start index
-    int i_endidx;   // end index
-
-    for (jb=i_startblk; jb <= i_endblk; ++jb){
-
-        get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk,
-                          i_startidx, i_endidx);
-
-        Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
-            {slev, i_startidx}, {elev + 1, i_endidx + 1});
-
-        Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy,
-            KOKKOS_LAMBDA(const int jk, const int jv) {
-
-                // NOTE: Static indexes reduced by 1 from Fortran version
-                p_u_out_view(jv, jk, jb) =
-                    ptr_coeff_view(0, 0, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
-                    ptr_coeff_view(1, 0, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
-                    ptr_coeff_view(2, 0, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
-                    ptr_coeff_view(3, 0, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
-                    ptr_coeff_view(4, 0, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
-                    ptr_coeff_view(5, 0, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
-                p_v_out_view(jv, jk, jb) =
-                    ptr_coeff_view(0, 1, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
-                    ptr_coeff_view(1, 1, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
-                    ptr_coeff_view(2, 1, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
-                    ptr_coeff_view(3, 1, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
-                    ptr_coeff_view(4, 1, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
-                    ptr_coeff_view(5, 1, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
-            }
-        );
-    }
-}
-
-// Explicit instantiation - double precision
-template
-void rbf_vec_interpol_vertex_lib<double, double>(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    double* p_u_out,
-    double* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,             // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    );
-
-// Explicit instantiation - single precision
-template
-void rbf_vec_interpol_vertex_lib<float, float>(
-    const float* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const float* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,             // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    );
-
-// Explicit instantiation - mixed precision
-template
-void rbf_vec_interpol_vertex_lib<double, float>(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,             // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    );
-
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp
deleted file mode 100644
index c0b6f05..0000000
--- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp
+++ /dev/null
@@ -1,32 +0,0 @@
-// ICON
-//
-// ---------------------------------------------------------------
-// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
-// Contact information: icon-model.org
-//
-// See AUTHORS.TXT for a list of authors
-// See LICENSES/ for license information
-// SPDX-License-Identifier: BSD-3-Clause
-// ---------------------------------------------------------------
-
-#pragma once
-
-template <typename T, typename S>
-void rbf_vec_interpol_vertex_lib(
-    const T* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const T* rbf_vec_coeff_v,
-    S* p_u_out,
-    S* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,             // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_c
-    );
\ No newline at end of file
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
deleted file mode 100644
index 06dc467..0000000
--- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-// ICON
-//
-// ---------------------------------------------------------------
-// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
-// Contact information: icon-model.org
-//
-// See AUTHORS.TXT for a list of authors
-// See LICENSES/ for license information
-// SPDX-License-Identifier: BSD-3-Clause
-// ---------------------------------------------------------------
-
-#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h"
-#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
-
-void rbf_vec_interpol_vertex_lib_dp(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    double* p_u_out,
-    double* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    )
-{
-    rbf_vec_interpol_vertex_lib<double, double>(
-        p_e_in,
-        rbf_vec_idx_v,
-        rbf_vec_blk_v,
-        rbf_vec_coeff_v,
-        p_u_out,
-        p_v_out,
-        i_startblk,       // start_block needed for get_indices_c_lib
-        i_endblk,         // end_block needed for get_indices_c_lib
-        i_startidx_in,    // start_index needed for get_indices_c_lib
-        i_endidx_in,      // end_index needed for get_indices_c_lib
-        slev,             // vertical start level
-        elev,             // vertical end level
-        nproma,           // inner loop length/vector length
-        lacc,                  // if true, use Cuda mem-/exec-spaces
-        acc_async,              // [deprecated] use async acc
-        nlev, nblks_e, nblks_v
-        );
-}
-
-
-void rbf_vec_interpol_vertex_lib_sp(
-    const float* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const float* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    )
-{
-    rbf_vec_interpol_vertex_lib<float, float>(
-        p_e_in,
-        rbf_vec_idx_v,
-        rbf_vec_blk_v,
-        rbf_vec_coeff_v,
-        p_u_out,
-        p_v_out,
-        i_startblk,       // start_block needed for get_indices_c_lib
-        i_endblk,         // end_block needed for get_indices_c_lib
-        i_startidx_in,    // start_index needed for get_indices_c_lib
-        i_endidx_in,      // end_index needed for get_indices_c_lib
-        slev,             // vertical start level
-        elev,             // vertical end level
-        nproma,           // inner loop length/vector length
-        lacc,                  // if true, use Cuda mem-/exec-spaces
-        acc_async,              // [deprecated] use async acc
-        nlev, nblks_e, nblks_v
-        );
-
-}
-
-void rbf_vec_interpol_vertex_lib_mixprec(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    )
-{
-    rbf_vec_interpol_vertex_lib<double, float>(
-        p_e_in,
-        rbf_vec_idx_v,
-        rbf_vec_blk_v,
-        rbf_vec_coeff_v,
-        p_u_out,
-        p_v_out,
-        i_startblk,       // start_block needed for get_indices_c_lib
-        i_endblk,         // end_block needed for get_indices_c_lib
-        i_startidx_in,    // start_index needed for get_indices_c_lib
-        i_endidx_in,      // end_index needed for get_indices_c_lib
-        slev,             // vertical start level
-        elev,             // vertical end level
-        nproma,           // inner loop length/vector length
-        lacc,                  // if true, use Cuda mem-/exec-spaces
-        acc_async,              // [deprecated] use async acc
-        nlev, nblks_e, nblks_v
-        );
-
-}
-
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h
deleted file mode 100644
index 4356f88..0000000
--- a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// ICON
-//
-// ---------------------------------------------------------------
-// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
-// Contact information: icon-model.org
-//
-// See AUTHORS.TXT for a list of authors
-// See LICENSES/ for license information
-// SPDX-License-Identifier: BSD-3-Clause
-// ---------------------------------------------------------------
-
-#pragma once
-
-extern "C" {
-
-void rbf_vec_interpol_vertex_lib_dp(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    double* p_u_out,
-    double* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    );
-
-void rbf_vec_interpol_vertex_lib_sp(
-    const float* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const float* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    );
-
-}
\ No newline at end of file
diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp
index f1a9fa4..361710f 100644
--- a/src/interpolation/mo_lib_intp_rbf.cpp
+++ b/src/interpolation/mo_lib_intp_rbf.cpp
@@ -12,6 +12,119 @@
 #include "mo_lib_intp_rbf.hpp"
 #include <Kokkos_Core.hpp>
 
+constexpr int rbf_vec_dim_v = 6;
+
+//-------------------------------------------------------------------------
+//
+//
+//>
+/// Performs vector RBF reconstruction at triangle vertices.
+///
+/// Theory described in Narcowich and Ward (Math Comp. 1994) and
+/// Bonaventura and Baudisch (Mox Report n. 75).
+/// It takes edge based variables as input and combines them
+/// into three dimensional cartesian vectors at each vertex.
+///
+/// Two templated variables in order to support mixed precision.
+/// Intended that type_traits::is_floating_point(T,S)==TRUE
+/// precision(T) >= precision(S)
+template <typename T, typename S>
+void rbf_vec_interpol_vertex_lib(
+    const T* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const T* rbf_vec_coeff_v,
+    S* p_u_out,
+    S* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    // Dimensions for the arrays.
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    /*
+#ifdef DIM_ENABLE_GPU
+    if (lacc){ using MemSpace = Kokkos::CudaSpace;
+    } else { using MemSpace = Kokkos::HostSpace; }
+#else
+    using MemSpace = Kokkos::HostSpace;
+#endif
+
+    */
+
+    // Wrap raw pointers in unmanaged Kokkos Views.
+    typedef Kokkos::View<const T***,    Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
+    typedef Kokkos::View<const T****,   Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D;
+    typedef Kokkos::View<const int***,  Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
+    typedef Kokkos::View<S***,          Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D;
+
+
+
+    // input components of velocity or horizontal vorticity vectors at edge midpoints
+    // dim: (nproma,nlev,nblks_e)
+    UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e);
+
+    // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex
+    // (rbf_vec_dim_v,nproma,nblks_v)
+    UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v);
+    UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v);
+
+    // coefficients are working precision array containing the coefficients used for vector rbf interpolation
+    // at each tringle vertex (input is normal component),
+    // dim: (rbf_vec_dim_v,2,nproma,nblks_v)
+    UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v);
+
+    // reconstructed x-component (u) of velocity vector,
+    // dim: (nproma,nlev,nblks_v)
+    UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v);
+    // reconstructed y-component (v) of velocity vector,
+    // dim: (nproma,nlev,nblks_v)
+    UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v);
+
+    // Local vars
+    //int jv, jk, jb; // integer over vertices, levels, and blocks,
+    int jb; // integer over vertices, levels, and blocks,
+    int i_startidx; // start index
+    int i_endidx;   // end index
+
+    for (jb=i_startblk; jb <= i_endblk; ++jb){
+
+        get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk,
+                          i_startidx, i_endidx);
+
+        Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
+            {slev, i_startidx}, {elev + 1, i_endidx + 1});
+
+        Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy,
+            KOKKOS_LAMBDA(const int jk, const int jv) {
+
+                // NOTE: Static indexes reduced by 1 from Fortran version
+                p_u_out_view(jv, jk, jb) =
+                    ptr_coeff_view(0, 0, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
+                    ptr_coeff_view(1, 0, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
+                    ptr_coeff_view(2, 0, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
+                    ptr_coeff_view(3, 0, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
+                    ptr_coeff_view(4, 0, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
+                    ptr_coeff_view(5, 0, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
+                p_v_out_view(jv, jk, jb) =
+                    ptr_coeff_view(0, 1, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
+                    ptr_coeff_view(1, 1, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
+                    ptr_coeff_view(2, 1, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
+                    ptr_coeff_view(3, 1, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
+                    ptr_coeff_view(4, 1, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
+                    ptr_coeff_view(5, 1, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
+            }
+        );
+    }
+}
+
 template <typename T>
 void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
                              const T* rbf_c2grad_coeff, T* grad_x, T* grad_y,
@@ -204,6 +317,69 @@ void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const
     }
 }
 
+// Explicit instantiation - double precision
+template
+void rbf_vec_interpol_vertex_lib<double, double>(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    double* p_u_out,
+    double* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,             // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
+// Explicit instantiation - single precision
+template
+void rbf_vec_interpol_vertex_lib<float, float>(
+    const float* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const float* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,             // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
+// Explicit instantiation - mixed precision
+template
+void rbf_vec_interpol_vertex_lib<double, float>(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,             // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
 template
 void rbf_vec_interpol_cell_lib<double>(
     const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp
index 5b9d5e1..53547fc 100644
--- a/src/interpolation/mo_lib_intp_rbf.hpp
+++ b/src/interpolation/mo_lib_intp_rbf.hpp
@@ -14,6 +14,26 @@
 #include <Kokkos_Core.hpp>
 #include <vector>
 
+template <typename T, typename S>
+void rbf_vec_interpol_vertex_lib(
+    const T* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const T* rbf_vec_coeff_v,
+    S* p_u_out,
+    S* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,             // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_c
+    );
+
 template <typename T>
 void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
                              const T* rbf_c2grad_coeff, T* grad_x, T* grad_y,
diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp
index b679619..e8461e6 100644
--- a/src/interpolation/mo_lib_intp_rbf_bindings.cpp
+++ b/src/interpolation/mo_lib_intp_rbf_bindings.cpp
@@ -13,6 +13,124 @@
 #include  "mo_lib_intp_rbf_bindings.h"
 #include  "mo_lib_intp_rbf.hpp"
 
+void rbf_vec_interpol_vertex_lib_dp(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    double* p_u_out,
+    double* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    rbf_vec_interpol_vertex_lib<double, double>(
+        p_e_in,
+        rbf_vec_idx_v,
+        rbf_vec_blk_v,
+        rbf_vec_coeff_v,
+        p_u_out,
+        p_v_out,
+        i_startblk,       // start_block needed for get_indices_c_lib
+        i_endblk,         // end_block needed for get_indices_c_lib
+        i_startidx_in,    // start_index needed for get_indices_c_lib
+        i_endidx_in,      // end_index needed for get_indices_c_lib
+        slev,             // vertical start level
+        elev,             // vertical end level
+        nproma,           // inner loop length/vector length
+        lacc,                  // if true, use Cuda mem-/exec-spaces
+        acc_async,              // [deprecated] use async acc
+        nlev, nblks_e, nblks_v
+        );
+}
+
+void rbf_vec_interpol_vertex_lib_sp(
+    const float* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const float* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    rbf_vec_interpol_vertex_lib<float, float>(
+        p_e_in,
+        rbf_vec_idx_v,
+        rbf_vec_blk_v,
+        rbf_vec_coeff_v,
+        p_u_out,
+        p_v_out,
+        i_startblk,       // start_block needed for get_indices_c_lib
+        i_endblk,         // end_block needed for get_indices_c_lib
+        i_startidx_in,    // start_index needed for get_indices_c_lib
+        i_endidx_in,      // end_index needed for get_indices_c_lib
+        slev,             // vertical start level
+        elev,             // vertical end level
+        nproma,           // inner loop length/vector length
+        lacc,                  // if true, use Cuda mem-/exec-spaces
+        acc_async,              // [deprecated] use async acc
+        nlev, nblks_e, nblks_v
+        );
+
+}
+
+void rbf_vec_interpol_vertex_lib_mixprec(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    rbf_vec_interpol_vertex_lib<double, float>(
+        p_e_in,
+        rbf_vec_idx_v,
+        rbf_vec_blk_v,
+        rbf_vec_coeff_v,
+        p_u_out,
+        p_v_out,
+        i_startblk,       // start_block needed for get_indices_c_lib
+        i_endblk,         // end_block needed for get_indices_c_lib
+        i_startidx_in,    // start_index needed for get_indices_c_lib
+        i_endidx_in,      // end_index needed for get_indices_c_lib
+        slev,             // vertical start level
+        elev,             // vertical end level
+        nproma,           // inner loop length/vector length
+        lacc,                  // if true, use Cuda mem-/exec-spaces
+        acc_async,              // [deprecated] use async acc
+        nlev, nblks_e, nblks_v
+        );
+
+}
 
 void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
                                 const float* rbf_c2grad_coeff, float* grad_x, float* grad_y,
diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.h b/src/interpolation/mo_lib_intp_rbf_bindings.h
index cc1dd5c..fb8e87c 100644
--- a/src/interpolation/mo_lib_intp_rbf_bindings.h
+++ b/src/interpolation/mo_lib_intp_rbf_bindings.h
@@ -12,6 +12,44 @@
 
 extern "C" {
 
+void rbf_vec_interpol_vertex_lib_dp(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    double* p_u_out,
+    double* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
+void rbf_vec_interpol_vertex_lib_sp(
+    const float* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const float* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
 void rbf_interpol_c2grad_lib_sp(
         const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
         const float* rbf_c2grad_coeff, float* grad_x, const float* grad_y,
diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp
index 426a86b..924ff02 100644
--- a/test/c/test_intp_rbf.cpp
+++ b/test/c/test_intp_rbf.cpp
@@ -14,7 +14,6 @@
 #include <vector>
 #include <algorithm>
 #include <numeric>
-#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
 #include "mo_lib_intp_rbf.hpp"
 
 // Free-function helpers for 3D and 4D array sizes (assumed column-major)
-- 
GitLab


From f0f195141b717ccf9c243161cacab52b114cf535 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Thu, 6 Mar 2025 12:15:27 +0100
Subject: [PATCH 27/33] deleted intp_rbf_bindings and merged its contents into
 interpolation_bindings

---
 src/interpolation/CMakeLists.txt              |   5 +-
 src/interpolation/interpolation_bindings.cpp  | 191 ++++++++++++++++
 src/interpolation/interpolation_bindings.h    |  78 +++++++
 .../mo_lib_intp_rbf_bindings.cpp              | 204 ------------------
 src/interpolation/mo_lib_intp_rbf_bindings.h  |  92 --------
 5 files changed, 271 insertions(+), 299 deletions(-)
 delete mode 100644 src/interpolation/mo_lib_intp_rbf_bindings.cpp
 delete mode 100644 src/interpolation/mo_lib_intp_rbf_bindings.h

diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt
index bf5b0ac..96f281c 100644
--- a/src/interpolation/CMakeLists.txt
+++ b/src/interpolation/CMakeLists.txt
@@ -16,9 +16,8 @@ add_library(
   mo_lib_interpolation_vector.F90
   mo_lib_interpolation_vector.cpp
   mo_lib_intp_rbf.F90
-  interpolation_bindings.cpp
-  mo_lib_intp_rbf_bindings.cpp
-  mo_lib_intp_rbf.cpp)
+  mo_lib_intp_rbf.cpp
+  interpolation_bindings.cpp)
 
 add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation)
 
diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp
index 628f411..75449d6 100644
--- a/src/interpolation/interpolation_bindings.cpp
+++ b/src/interpolation/interpolation_bindings.cpp
@@ -12,6 +12,7 @@
 #include "interpolation_bindings.h"
 #include "mo_lib_interpolation_scalar.hpp"
 #include "mo_lib_interpolation_vector.hpp"
+#include "mo_lib_intp_rbf.hpp"
 
 // This is the binding for mo_interpolation_vector::edges2cells_vector_lib
 // (wp=dp)
@@ -326,3 +327,193 @@ void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx,
                       avg_psi_c, i_startblk, i_endblk, i_startidx_in,
                       i_endidx_in, slev, elev, nproma, nlev, nblks_c, lacc);
 }
+
+void rbf_vec_interpol_vertex_lib_dp(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    double* p_u_out,
+    double* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    rbf_vec_interpol_vertex_lib<double, double>(
+        p_e_in,
+        rbf_vec_idx_v,
+        rbf_vec_blk_v,
+        rbf_vec_coeff_v,
+        p_u_out,
+        p_v_out,
+        i_startblk,       // start_block needed for get_indices_c_lib
+        i_endblk,         // end_block needed for get_indices_c_lib
+        i_startidx_in,    // start_index needed for get_indices_c_lib
+        i_endidx_in,      // end_index needed for get_indices_c_lib
+        slev,             // vertical start level
+        elev,             // vertical end level
+        nproma,           // inner loop length/vector length
+        lacc,                  // if true, use Cuda mem-/exec-spaces
+        acc_async,              // [deprecated] use async acc
+        nlev, nblks_e, nblks_v
+        );
+}
+
+void rbf_vec_interpol_vertex_lib_sp(
+    const float* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const float* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    rbf_vec_interpol_vertex_lib<float, float>(
+        p_e_in,
+        rbf_vec_idx_v,
+        rbf_vec_blk_v,
+        rbf_vec_coeff_v,
+        p_u_out,
+        p_v_out,
+        i_startblk,       // start_block needed for get_indices_c_lib
+        i_endblk,         // end_block needed for get_indices_c_lib
+        i_startidx_in,    // start_index needed for get_indices_c_lib
+        i_endidx_in,      // end_index needed for get_indices_c_lib
+        slev,             // vertical start level
+        elev,             // vertical end level
+        nproma,           // inner loop length/vector length
+        lacc,                  // if true, use Cuda mem-/exec-spaces
+        acc_async,              // [deprecated] use async acc
+        nlev, nblks_e, nblks_v
+        );
+
+}
+
+void rbf_vec_interpol_vertex_lib_mixprec(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    rbf_vec_interpol_vertex_lib<double, float>(
+        p_e_in,
+        rbf_vec_idx_v,
+        rbf_vec_blk_v,
+        rbf_vec_coeff_v,
+        p_u_out,
+        p_v_out,
+        i_startblk,       // start_block needed for get_indices_c_lib
+        i_endblk,         // end_block needed for get_indices_c_lib
+        i_startidx_in,    // start_index needed for get_indices_c_lib
+        i_endidx_in,      // end_index needed for get_indices_c_lib
+        slev,             // vertical start level
+        elev,             // vertical end level
+        nproma,           // inner loop length/vector length
+        lacc,                  // if true, use Cuda mem-/exec-spaces
+        acc_async,              // [deprecated] use async acc
+        nlev, nblks_e, nblks_v
+        );
+
+}
+
+void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
+                                const float* rbf_c2grad_coeff, float* grad_x, float* grad_y,
+                                int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){
+
+    rbf_interpol_c2grad_lib<float>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk,
+                                rbf_c2grad_coeff,
+                                grad_x, grad_y, i_startblk, i_endblk,
+                                i_startidx_in, i_endidx_in, slev, elev,
+                                nproma,  rbf_c2grad_dim, nlev, nblk_c, lacc);
+}
+
+void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
+                                const double* rbf_c2grad_coeff, double* grad_x, double* grad_y,
+                                int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){
+
+    rbf_interpol_c2grad_lib<double>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk,
+                                rbf_c2grad_coeff,
+                                grad_x, grad_y, i_startblk, i_endblk,
+                                i_startidx_in, i_endidx_in, slev, elev,
+                                nproma, rbf_c2grad_dim, nlev, nblk_c, lacc);
+}
+
+void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+                                  const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
+                                  int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                  int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
+                                  int rbf_vec_dim_c, bool lacc, bool acc_async){
+
+    rbf_vec_interpol_cell_lib<float>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c,
+                                     rbf_vec_coeff_c, p_u_out, p_v_out,
+                                     i_startblk, i_endblk, i_startidx_in, i_endidx_in,
+                                     slev, elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async);
+}
+
+void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+                                  const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
+                                  int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                  int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
+                                  int rbf_vec_dim_c, bool lacc, bool acc_async){
+
+    rbf_vec_interpol_cell_lib<double>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c,
+                                      p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in,
+                                      slev, elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async);
+}
+
+void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
+                                  const double* rbf_vec_coeff_e, double* p_vt_out,
+                                  int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                  int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+                                  bool acc_async){
+
+    rbf_vec_interpol_edge_lib<double>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e,
+                                      rbf_vec_coeff_e, p_vt_out,
+                                      i_startblk, i_endblk, i_startidx_in,i_endidx_in,
+                                      slev, elev, nlev, nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async);
+}
+
+void rbf_vec_interpol_edge_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
+                                  const float* rbf_vec_coeff_e, float* p_vt_out,
+                                  int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                                  int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+                                  bool acc_async){
+
+    rbf_vec_interpol_edge_lib<float>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e,
+                                     rbf_vec_coeff_e, p_vt_out,
+                                     i_startblk, i_endblk, i_startidx_in, i_endidx_in,
+                                     slev, elev, nlev, nproma, rbf_vec_dim_e, nblks_e,lacc, acc_async);
+}
diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h
index 7cb873d..419446a 100644
--- a/src/interpolation/interpolation_bindings.h
+++ b/src/interpolation/interpolation_bindings.h
@@ -185,4 +185,82 @@ void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx,
                      const int i_startidx_in, const int i_endidx_in,
                      const int slev, const int elev, const int nproma,
                      const int nlev, const int nblks_c, const bool lacc);
+
+void rbf_vec_interpol_vertex_lib_dp(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    double* p_u_out,
+    double* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
+void rbf_vec_interpol_vertex_lib_sp(
+    const float* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const float* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
+void rbf_interpol_c2grad_lib_sp(
+        const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
+        const float* rbf_c2grad_coeff, float* grad_x, const float* grad_y,
+        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+        int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
+
+void rbf_interpol_c2grad_lib_dp(
+        const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
+        const double* rbf_c2grad_coeff, double* grad_x, const double* grad_y,
+        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+        int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
+
+void rbf_vec_interpol_cell_lib_sp(
+        const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+        const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
+        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+        int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
+        int rbf_vec_dim_c, bool lacc, bool acc_async);
+
+void rbf_vec_interpol_cell_lib_dp(
+        const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
+        const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
+        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+        int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
+        int rbf_vec_dim_c, bool lacc, bool acc_async);
+
+void rbf_vec_interpol_edge_lib_dp(
+        const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
+        const double* rbf_vec_coeff_e, double* p_vt_out,
+        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+        int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+        bool acc_async);
+
+void rbf_vec_interpol_edge_lib_sp(
+        const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
+        const float* rbf_vec_coeff_e, float* p_vt_out,
+        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+        int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+        bool acc_async);
 }
diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.cpp b/src/interpolation/mo_lib_intp_rbf_bindings.cpp
deleted file mode 100644
index e8461e6..0000000
--- a/src/interpolation/mo_lib_intp_rbf_bindings.cpp
+++ /dev/null
@@ -1,204 +0,0 @@
-// ICON
-//
-// ---------------------------------------------------------------
-// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
-// Contact information: icon-model.org
-//
-// See AUTHORS.TXT for a list of authors
-// See LICENSES/ for license information
-// SPDX-License-Identifier: BSD-3-Clause
-// ---------------------------------------------------------------
-
-
-#include  "mo_lib_intp_rbf_bindings.h"
-#include  "mo_lib_intp_rbf.hpp"
-
-void rbf_vec_interpol_vertex_lib_dp(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    double* p_u_out,
-    double* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    )
-{
-    rbf_vec_interpol_vertex_lib<double, double>(
-        p_e_in,
-        rbf_vec_idx_v,
-        rbf_vec_blk_v,
-        rbf_vec_coeff_v,
-        p_u_out,
-        p_v_out,
-        i_startblk,       // start_block needed for get_indices_c_lib
-        i_endblk,         // end_block needed for get_indices_c_lib
-        i_startidx_in,    // start_index needed for get_indices_c_lib
-        i_endidx_in,      // end_index needed for get_indices_c_lib
-        slev,             // vertical start level
-        elev,             // vertical end level
-        nproma,           // inner loop length/vector length
-        lacc,                  // if true, use Cuda mem-/exec-spaces
-        acc_async,              // [deprecated] use async acc
-        nlev, nblks_e, nblks_v
-        );
-}
-
-void rbf_vec_interpol_vertex_lib_sp(
-    const float* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const float* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    )
-{
-    rbf_vec_interpol_vertex_lib<float, float>(
-        p_e_in,
-        rbf_vec_idx_v,
-        rbf_vec_blk_v,
-        rbf_vec_coeff_v,
-        p_u_out,
-        p_v_out,
-        i_startblk,       // start_block needed for get_indices_c_lib
-        i_endblk,         // end_block needed for get_indices_c_lib
-        i_startidx_in,    // start_index needed for get_indices_c_lib
-        i_endidx_in,      // end_index needed for get_indices_c_lib
-        slev,             // vertical start level
-        elev,             // vertical end level
-        nproma,           // inner loop length/vector length
-        lacc,                  // if true, use Cuda mem-/exec-spaces
-        acc_async,              // [deprecated] use async acc
-        nlev, nblks_e, nblks_v
-        );
-
-}
-
-void rbf_vec_interpol_vertex_lib_mixprec(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    )
-{
-    rbf_vec_interpol_vertex_lib<double, float>(
-        p_e_in,
-        rbf_vec_idx_v,
-        rbf_vec_blk_v,
-        rbf_vec_coeff_v,
-        p_u_out,
-        p_v_out,
-        i_startblk,       // start_block needed for get_indices_c_lib
-        i_endblk,         // end_block needed for get_indices_c_lib
-        i_startidx_in,    // start_index needed for get_indices_c_lib
-        i_endidx_in,      // end_index needed for get_indices_c_lib
-        slev,             // vertical start level
-        elev,             // vertical end level
-        nproma,           // inner loop length/vector length
-        lacc,                  // if true, use Cuda mem-/exec-spaces
-        acc_async,              // [deprecated] use async acc
-        nlev, nblks_e, nblks_v
-        );
-
-}
-
-void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
-                                const float* rbf_c2grad_coeff, float* grad_x, float* grad_y,
-                                int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){
-
-    rbf_interpol_c2grad_lib<float>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk,
-                                rbf_c2grad_coeff,
-                                grad_x, grad_y, i_startblk, i_endblk,
-                                i_startidx_in, i_endidx_in, slev, elev,
-                                nproma,  rbf_c2grad_dim, nlev, nblk_c, lacc);
-}
-
-void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
-                                const double* rbf_c2grad_coeff, double* grad_x, double* grad_y,
-                                int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){
-
-    rbf_interpol_c2grad_lib<double>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk,
-                                rbf_c2grad_coeff,
-                                grad_x, grad_y, i_startblk, i_endblk,
-                                i_startidx_in, i_endidx_in, slev, elev,
-                                nproma, rbf_c2grad_dim, nlev, nblk_c, lacc);
-}
-
-void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-                                  const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
-                                  int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                  int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
-                                  int rbf_vec_dim_c, bool lacc, bool acc_async){
-
-    rbf_vec_interpol_cell_lib<float>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c,
-                                     rbf_vec_coeff_c, p_u_out, p_v_out,
-                                     i_startblk, i_endblk, i_startidx_in, i_endidx_in,
-                                     slev, elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async);
-}
-
-void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-                                  const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
-                                  int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                  int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
-                                  int rbf_vec_dim_c, bool lacc, bool acc_async){
-
-    rbf_vec_interpol_cell_lib<double>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c,
-                                      p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in,
-                                      slev, elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async);
-}
-
-void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-                                  const double* rbf_vec_coeff_e, double* p_vt_out,
-                                  int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                  int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-                                  bool acc_async){
-
-    rbf_vec_interpol_edge_lib<double>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e,
-                                      rbf_vec_coeff_e, p_vt_out,
-                                      i_startblk, i_endblk, i_startidx_in,i_endidx_in,
-                                      slev, elev, nlev, nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async);
-}
-
-void rbf_vec_interpol_edge_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-                                  const float* rbf_vec_coeff_e, float* p_vt_out,
-                                  int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                  int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-                                  bool acc_async){
-
-    rbf_vec_interpol_edge_lib<float>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e,
-                                     rbf_vec_coeff_e, p_vt_out,
-                                     i_startblk, i_endblk, i_startidx_in, i_endidx_in,
-                                     slev, elev, nlev, nproma, rbf_vec_dim_e, nblks_e,lacc, acc_async);
-}
diff --git a/src/interpolation/mo_lib_intp_rbf_bindings.h b/src/interpolation/mo_lib_intp_rbf_bindings.h
deleted file mode 100644
index fb8e87c..0000000
--- a/src/interpolation/mo_lib_intp_rbf_bindings.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// ICON
-//
-// ---------------------------------------------------------------
-// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
-// Contact information: icon-model.org
-//
-// See AUTHORS.TXT for a list of authors
-// See LICENSES/ for license information
-// SPDX-License-Identifier: BSD-3-Clause
-// ---------------------------------------------------------------
-#pragma once
-
-extern "C" {
-
-void rbf_vec_interpol_vertex_lib_dp(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    double* p_u_out,
-    double* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    );
-
-void rbf_vec_interpol_vertex_lib_sp(
-    const float* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const float* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    );
-
-void rbf_interpol_c2grad_lib_sp(
-        const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
-        const float* rbf_c2grad_coeff, float* grad_x, const float* grad_y,
-        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-        int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
-
-void rbf_interpol_c2grad_lib_dp(
-        const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
-        const double* rbf_c2grad_coeff, double* grad_x, const double* grad_y,
-        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-        int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
-
-void rbf_vec_interpol_cell_lib_sp(
-        const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-        const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
-        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-        int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
-        int rbf_vec_dim_c, bool lacc, bool acc_async);
-
-void rbf_vec_interpol_cell_lib_dp(
-        const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-        const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
-        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-        int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
-        int rbf_vec_dim_c, bool lacc, bool acc_async);
-
-void rbf_vec_interpol_edge_lib_dp(
-        const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-        const double* rbf_vec_coeff_e, double* p_vt_out,
-        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-        int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-        bool acc_async);
-
-void rbf_vec_interpol_edge_lib_sp(
-        const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-        const float* rbf_vec_coeff_e, float* p_vt_out,
-        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-        int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-        bool acc_async);
-}
-- 
GitLab


From ed5c21b6247e11166e6d27b3bc5d3741a5664117 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Thu, 6 Mar 2025 14:15:28 +0100
Subject: [PATCH 28/33] made changes to test_intp_rbf

---
 test/c/test_intp_rbf.cpp | 70 +++++++++++++++++++++-------------------
 1 file changed, 36 insertions(+), 34 deletions(-)

diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp
index 924ff02..983352c 100644
--- a/test/c/test_intp_rbf.cpp
+++ b/test/c/test_intp_rbf.cpp
@@ -39,38 +39,43 @@ typedef ::testing::Types< MixedPrecision<double, double>,
                           MixedPrecision<double,  float>,
                           MixedPrecision<float,   float>  > MixedTypes;
 
-// Define a typed test fixture for the functions which have the same input and output types
-template <typename T>
-class RbfInterpolTypedTestFixture : public ::testing::Test {
+class interp_dimensions {
 public:
-
-  static constexpr int nproma         = 3; // inner loop length
-  static constexpr int nlev            = 4; // vertical levels
-  static constexpr int nblks_c         = 2; // number of blocks for the cell arrays
-  static constexpr int nblks_e          = 2;
-  static constexpr int rbf_c2grad_dim  = 10; // fixed dimension for the RBF c2 gradient
+  // Constant dimensions.
+  static constexpr int nproma = 3; // inner loop length
+  static constexpr int nlev = 4;    // number of vertical levels
+  static constexpr int nblks_c = 2; // number of cell blocks
+  static constexpr int nblks_e = 2; // number of edge blocks
+  static constexpr int nblks_v = 2; // number of vertex blocks
+  static constexpr int rbf_c2grad_dim  = 10; // fixed dimension
   static constexpr int rbf_vec_dim_c = 9;
   static constexpr int rbf_vec_dim_e = 4;
 
-  // Parameters for the index ranges.
-  int i_startblk    = 0;
-  int i_endblk      = 1;  // Note: loop over jb uses jb < i_endblk, so blocks 0 and 1
-  int i_startidx_in = 0;
-  int i_endidx_in   = nproma-1;  // [0, nproma) i.e. 0 .. nproma-1
-  int slev          = 0;
-  int elev          = nlev-1;
-  bool lacc         = false;   // Not using any ACC-specific behavior.
-  bool acc_async    = false;
+  // Parameter values.
+  const int i_startblk = 0;
+  const int i_endblk = 1; // Test blocks [0, 1]
+  const int i_startidx_in = 0;
+  const int i_endidx_in = nproma - 1;
+  const int slev = 0;
+  const int elev = nlev - 1;
+  const bool lacc = false; // Not using ACC-specific behavior.
+  const bool acc_async = false; // No asynchronous execution.
+};
+
+// Define a typed test fixture for the functions which have the same input and output types
+template <typename T>
+class RbfInterpolTypedTestFixture : public ::testing::Test, public interp_dimensions {
+public:
 
   // Data arrays.
-  std::vector<T> p_cell_in;          // size: nproma * nlev * nblks_c
-  std::vector<int> rbf_c2grad_idx;     // size: rbf_c2grad_dim * nproma * nblks_c
-  std::vector<int> rbf_c2grad_blk;     // size: rbf_c2grad_dim * nproma * nblks_c
-  std::vector<int> rbf_vec_idx_c;      //size: rbf_vec_dim_c * nproma * nblks_c
-  std::vector<int> rbf_vec_blk_c;      // size: rbf_vec_dim_c * nproma * nblks_c
-  std::vector<T> rbf_c2grad_coeff;     // size: rbf_c2grad_dim * 2 * nproma * nblks_c
-  std::vector<T> grad_x;             // size: nproma * nlev * nblks_c
-  std::vector<T> grad_y;             // size: nproma * nlev * nblks_c
+  std::vector<T> p_cell_in; // size: nproma * nlev * nblks_c
+  std::vector<int> rbf_c2grad_idx; // size: rbf_c2grad_dim * nproma * nblks_c
+  std::vector<int> rbf_c2grad_blk; // size: rbf_c2grad_dim * nproma * nblks_c
+  std::vector<int> rbf_vec_idx_c;  //size: rbf_vec_dim_c * nproma * nblks_c
+  std::vector<int> rbf_vec_blk_c;  // size: rbf_vec_dim_c * nproma * nblks_c
+  std::vector<T> rbf_c2grad_coeff; // size: rbf_c2grad_dim * 2 * nproma * nblks_c
+  std::vector<T> grad_x; // size: nproma * nlev * nblks_c
+  std::vector<T> grad_y; // size: nproma * nlev * nblks_c
   std::vector<T> p_vn_in;
   std::vector<T> rbf_vec_coeff_c;
   std::vector<T> p_u_out;
@@ -120,7 +125,7 @@ typedef ::testing::Types<float, double> MyTypes;
 
 TYPED_TEST_SUITE(RbfInterpolTypedTestFixture, MyTypes);
 
-TYPED_TEST(RbfInterpolTypedTestFixture, BasicTest) {
+TYPED_TEST(RbfInterpolTypedTestFixture, C2Grad) {
   using T = TypeParam;
   rbf_interpol_c2grad_lib<TypeParam>(
     this->p_cell_in.data(),
@@ -158,7 +163,7 @@ TYPED_TEST(RbfInterpolTypedTestFixture, BasicTest) {
 }
 
 
-TYPED_TEST(RbfInterpolTypedTestFixture, cell_lib) {
+TYPED_TEST(RbfInterpolTypedTestFixture, Cell) {
     using T = TypeParam;
 
     rbf_vec_interpol_cell_lib<T>(
@@ -188,15 +193,12 @@ TYPED_TEST(RbfInterpolTypedTestFixture, cell_lib) {
             size_t idx = i + static_cast<size_t>(jk) * this->nproma + static_cast<size_t>(jb) * this->nproma * this->nlev;
             EXPECT_NEAR(this->p_u_out[idx], static_cast<T>(this->rbf_vec_dim_c), static_cast<T>(1e-5))
               << "p_u_out failure at block " << jb << ", level " << jk << ", index " << i;
-      //      EXPECT_NEAR(this->p_v_out[idx], static_cast<T>(this->rbf_vec_dim_c), static_cast<T>(1e-5))
-        //      << "p_v_out failure at block " << jb << ", level " << jk << ", index " << i;
           }
         }
       }
   }
 
-
-TYPED_TEST(RbfInterpolTypedTestFixture, edge_lib){
+TYPED_TEST(RbfInterpolTypedTestFixture, Egde){
     using T = TypeParam;
 
 
@@ -234,7 +236,7 @@ TYPED_TEST(RbfInterpolTypedTestFixture, edge_lib){
 
 // Define a typed test fixture for the functions which have different input and output types
 template <typename TypePair>
-class RbfVecInterpolMixedTestFixture : public ::testing::Test {
+class RbfVecInterpolMixedTestFixture : public ::testing::Test, public interp_dimensions {
 public:
   using InType  = typename TypePair::in_type;
   using OutType = typename TypePair::out_type;
@@ -279,7 +281,7 @@ public:
 
 TYPED_TEST_SUITE(RbfVecInterpolMixedTestFixture, MixedTypes);
 
-TYPED_TEST(RbfVecInterpolMixedTestFixture, BasicTest) {
+TYPED_TEST(RbfVecInterpolMixedTestFixture, Vertex) {
   using InType  = typename TestFixture::InType;
   using OutType = typename TestFixture::OutType;
 
-- 
GitLab


From eb4096e188f939d5dc311c095cc403a8257a27fe Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Thu, 6 Mar 2025 14:19:33 +0100
Subject: [PATCH 29/33] made few changes to test_interpolation_scalar

---
 test/c/test_interpolation_scalar.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/test/c/test_interpolation_scalar.cpp b/test/c/test_interpolation_scalar.cpp
index 0ee7fa3..507ec3f 100644
--- a/test/c/test_interpolation_scalar.cpp
+++ b/test/c/test_interpolation_scalar.cpp
@@ -48,8 +48,7 @@ public:
   static constexpr int nlev = 7;    // number of vertical levels
   static constexpr int nblks_c = 2; // number of cell blocks
   static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in)
-  static constexpr int nblks_v =
-      2; // number of vertex blocks (for rbf arrays and outputs)
+  static constexpr int nblks_v = 2; // number of vertex blocks
 
   // Parameter values.
   const int i_startblk = 0;
@@ -385,7 +384,7 @@ TYPED_TEST_SUITE(InterpolationScalarMixedTestFixture, MixedTypesSP2DP);
 //
 ////////////////////////////////////////////////////////////////////////////////
 
-TYPED_TEST(InterpolationScalarMixedTestFixture, cells2edges) {
+TYPED_TEST(InterpolationScalarMixedTestFixture, Cells2Edges) {
   using InType = typename TestFixture::InType;
   using OutType = typename TestFixture::OutType;
 
@@ -424,7 +423,7 @@ TYPED_TEST(InterpolationScalarMixedTestFixture, cells2edges) {
 //
 ////////////////////////////////////////////////////////////////////////////////
 
-TYPED_TEST(InterpolationScalarMixedTestFixture, cells2verts) {
+TYPED_TEST(InterpolationScalarMixedTestFixture, Cells2Verts) {
   using InType = typename TestFixture::InType;
   using OutType = typename TestFixture::OutType;
 
@@ -496,7 +495,7 @@ public:
 TYPED_TEST_SUITE(Cells2vertsriScalarLibTestFixture, MixedTypes);
 
 // Add test
-TYPED_TEST(Cells2vertsriScalarLibTestFixture, cells2verts_ri) {
+TYPED_TEST(Cells2vertsriScalarLibTestFixture, Cells2VertsRI) {
   using InType = typename TestFixture::InType;
   using OutType = typename TestFixture::OutType;
 
-- 
GitLab


From 1d49b1bde48b2f43dd65c63c04517e4e7332b761 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Thu, 6 Mar 2025 15:07:23 +0100
Subject: [PATCH 30/33] formatted the c codes using clang-format

---
 src/interpolation/interpolation_bindings.cpp | 262 +++---
 src/interpolation/interpolation_bindings.h   | 103 +--
 src/interpolation/mo_lib_intp_rbf.cpp        | 798 ++++++++++---------
 src/interpolation/mo_lib_intp_rbf.hpp        |  56 +-
 test/c/test_intp_rbf.cpp                     | 319 ++++----
 5 files changed, 732 insertions(+), 806 deletions(-)

diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp
index 75449d6..a13ad0f 100644
--- a/src/interpolation/interpolation_bindings.cpp
+++ b/src/interpolation/interpolation_bindings.cpp
@@ -329,191 +329,121 @@ void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx,
 }
 
 void rbf_vec_interpol_vertex_lib_dp(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    double* p_u_out,
-    double* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    )
-{
-    rbf_vec_interpol_vertex_lib<double, double>(
-        p_e_in,
-        rbf_vec_idx_v,
-        rbf_vec_blk_v,
-        rbf_vec_coeff_v,
-        p_u_out,
-        p_v_out,
-        i_startblk,       // start_block needed for get_indices_c_lib
-        i_endblk,         // end_block needed for get_indices_c_lib
-        i_startidx_in,    // start_index needed for get_indices_c_lib
-        i_endidx_in,      // end_index needed for get_indices_c_lib
-        slev,             // vertical start level
-        elev,             // vertical end level
-        nproma,           // inner loop length/vector length
-        lacc,                  // if true, use Cuda mem-/exec-spaces
-        acc_async,              // [deprecated] use async acc
-        nlev, nblks_e, nblks_v
-        );
+    const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v) {
+  rbf_vec_interpol_vertex_lib<double, double>(
+      p_e_in, rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, p_v_out,
+      i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma,
+      lacc, acc_async, nlev, nblks_e, nblks_v);
 }
 
 void rbf_vec_interpol_vertex_lib_sp(
-    const float* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const float* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    )
-{
-    rbf_vec_interpol_vertex_lib<float, float>(
-        p_e_in,
-        rbf_vec_idx_v,
-        rbf_vec_blk_v,
-        rbf_vec_coeff_v,
-        p_u_out,
-        p_v_out,
-        i_startblk,       // start_block needed for get_indices_c_lib
-        i_endblk,         // end_block needed for get_indices_c_lib
-        i_startidx_in,    // start_index needed for get_indices_c_lib
-        i_endidx_in,      // end_index needed for get_indices_c_lib
-        slev,             // vertical start level
-        elev,             // vertical end level
-        nproma,           // inner loop length/vector length
-        lacc,                  // if true, use Cuda mem-/exec-spaces
-        acc_async,              // [deprecated] use async acc
-        nlev, nblks_e, nblks_v
-        );
-
+    const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v) {
+  rbf_vec_interpol_vertex_lib<float, float>(
+      p_e_in, rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, p_v_out,
+      i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma,
+      lacc, acc_async, nlev, nblks_e, nblks_v);
 }
 
 void rbf_vec_interpol_vertex_lib_mixprec(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    )
-{
-    rbf_vec_interpol_vertex_lib<double, float>(
-        p_e_in,
-        rbf_vec_idx_v,
-        rbf_vec_blk_v,
-        rbf_vec_coeff_v,
-        p_u_out,
-        p_v_out,
-        i_startblk,       // start_block needed for get_indices_c_lib
-        i_endblk,         // end_block needed for get_indices_c_lib
-        i_startidx_in,    // start_index needed for get_indices_c_lib
-        i_endidx_in,      // end_index needed for get_indices_c_lib
-        slev,             // vertical start level
-        elev,             // vertical end level
-        nproma,           // inner loop length/vector length
-        lacc,                  // if true, use Cuda mem-/exec-spaces
-        acc_async,              // [deprecated] use async acc
-        nlev, nblks_e, nblks_v
-        );
-
+    const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v) {
+  rbf_vec_interpol_vertex_lib<double, float>(
+      p_e_in, rbf_vec_idx_v, rbf_vec_blk_v, rbf_vec_coeff_v, p_u_out, p_v_out,
+      i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma,
+      lacc, acc_async, nlev, nblks_e, nblks_v);
 }
 
-void rbf_interpol_c2grad_lib_sp(const float* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
-                                const float* rbf_c2grad_coeff, float* grad_x, float* grad_y,
-                                int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){
-
-    rbf_interpol_c2grad_lib<float>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk,
-                                rbf_c2grad_coeff,
-                                grad_x, grad_y, i_startblk, i_endblk,
-                                i_startidx_in, i_endidx_in, slev, elev,
-                                nproma,  rbf_c2grad_dim, nlev, nblk_c, lacc);
+void rbf_interpol_c2grad_lib_sp(const float *p_cell_in,
+                                const int *rbf_c2grad_idx,
+                                const int *rbf_c2grad_blk,
+                                const float *rbf_c2grad_coeff, float *grad_x,
+                                float *grad_y, int i_startblk, int i_endblk,
+                                int i_startidx_in, int i_endidx_in, int slev,
+                                int elev, int nproma, int rbf_c2grad_dim,
+                                int nlev, int nblk_c, bool lacc) {
+
+  rbf_interpol_c2grad_lib<float>(
+      p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, rbf_c2grad_coeff, grad_x,
+      grad_y, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev,
+      nproma, rbf_c2grad_dim, nlev, nblk_c, lacc);
 }
 
-void rbf_interpol_c2grad_lib_dp(const double* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
-                                const double* rbf_c2grad_coeff, double* grad_x, double* grad_y,
-                                int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc){
-
-    rbf_interpol_c2grad_lib<double>(p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk,
-                                rbf_c2grad_coeff,
-                                grad_x, grad_y, i_startblk, i_endblk,
-                                i_startidx_in, i_endidx_in, slev, elev,
-                                nproma, rbf_c2grad_dim, nlev, nblk_c, lacc);
+void rbf_interpol_c2grad_lib_dp(const double *p_cell_in,
+                                const int *rbf_c2grad_idx,
+                                const int *rbf_c2grad_blk,
+                                const double *rbf_c2grad_coeff, double *grad_x,
+                                double *grad_y, int i_startblk, int i_endblk,
+                                int i_startidx_in, int i_endidx_in, int slev,
+                                int elev, int nproma, int rbf_c2grad_dim,
+                                int nlev, int nblk_c, bool lacc) {
+
+  rbf_interpol_c2grad_lib<double>(
+      p_cell_in, rbf_c2grad_idx, rbf_c2grad_blk, rbf_c2grad_coeff, grad_x,
+      grad_y, i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev,
+      nproma, rbf_c2grad_dim, nlev, nblk_c, lacc);
 }
 
-void rbf_vec_interpol_cell_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-                                  const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
-                                  int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                  int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
-                                  int rbf_vec_dim_c, bool lacc, bool acc_async){
+void rbf_vec_interpol_cell_lib_sp(
+    const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c,
+    const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev,
+    int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c,
+    bool lacc, bool acc_async) {
 
-    rbf_vec_interpol_cell_lib<float>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c,
-                                     rbf_vec_coeff_c, p_u_out, p_v_out,
-                                     i_startblk, i_endblk, i_startidx_in, i_endidx_in,
-                                     slev, elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async);
+  rbf_vec_interpol_cell_lib<float>(
+      p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, p_u_out, p_v_out,
+      i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma,
+      nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async);
 }
 
-void rbf_vec_interpol_cell_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-                                  const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
-                                  int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                  int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
-                                  int rbf_vec_dim_c, bool lacc, bool acc_async){
+void rbf_vec_interpol_cell_lib_dp(
+    const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c,
+    const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev,
+    int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c,
+    bool lacc, bool acc_async) {
 
-    rbf_vec_interpol_cell_lib<double>(p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c,
-                                      p_u_out, p_v_out, i_startblk, i_endblk, i_startidx_in, i_endidx_in,
-                                      slev, elev, nproma, nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async);
+  rbf_vec_interpol_cell_lib<double>(
+      p_vn_in, rbf_vec_idx_c, rbf_vec_blk_c, rbf_vec_coeff_c, p_u_out, p_v_out,
+      i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nproma,
+      nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async);
 }
 
-void rbf_vec_interpol_edge_lib_dp(const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-                                  const double* rbf_vec_coeff_e, double* p_vt_out,
-                                  int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                  int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-                                  bool acc_async){
-
-    rbf_vec_interpol_edge_lib<double>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e,
-                                      rbf_vec_coeff_e, p_vt_out,
-                                      i_startblk, i_endblk, i_startidx_in,i_endidx_in,
-                                      slev, elev, nlev, nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async);
+void rbf_vec_interpol_edge_lib_dp(
+    const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e,
+    const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk,
+    int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev,
+    int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+    bool acc_async) {
+
+  rbf_vec_interpol_edge_lib<double>(
+      p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, rbf_vec_coeff_e, p_vt_out,
+      i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nlev,
+      nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async);
 }
 
-void rbf_vec_interpol_edge_lib_sp(const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-                                  const float* rbf_vec_coeff_e, float* p_vt_out,
-                                  int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                                  int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-                                  bool acc_async){
+void rbf_vec_interpol_edge_lib_sp(
+    const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e,
+    const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk,
+    int i_startidx_in, int i_endidx_in, int slev, int elev, int nlev,
+    int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, bool acc_async) {
 
-    rbf_vec_interpol_edge_lib<float>(p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e,
-                                     rbf_vec_coeff_e, p_vt_out,
-                                     i_startblk, i_endblk, i_startidx_in, i_endidx_in,
-                                     slev, elev, nlev, nproma, rbf_vec_dim_e, nblks_e,lacc, acc_async);
+  rbf_vec_interpol_edge_lib<float>(
+      p_vn_in, rbf_vec_idx_e, rbf_vec_blk_e, rbf_vec_coeff_e, p_vt_out,
+      i_startblk, i_endblk, i_startidx_in, i_endidx_in, slev, elev, nlev,
+      nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async);
 }
diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h
index 419446a..0f4778d 100644
--- a/src/interpolation/interpolation_bindings.h
+++ b/src/interpolation/interpolation_bindings.h
@@ -187,80 +187,59 @@ void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx,
                      const int nlev, const int nblks_c, const bool lacc);
 
 void rbf_vec_interpol_vertex_lib_dp(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    double* p_u_out,
-    double* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    );
+    const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v);
 
 void rbf_vec_interpol_vertex_lib_sp(
-    const float* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const float* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    );
+    const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v);
 
 void rbf_interpol_c2grad_lib_sp(
-        const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
-        const float* rbf_c2grad_coeff, float* grad_x, const float* grad_y,
-        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-        int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
+    const float *p_cell_in, const int *rbf_c2grad_idx,
+    const int *rbf_c2grad_blk, const float *rbf_c2grad_coeff, float *grad_x,
+    const float *grad_y, int i_startblk, int i_endblk, int i_startidx_in,
+    int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim,
+    int nlev, int nblk_c, bool lacc);
 
 void rbf_interpol_c2grad_lib_dp(
-        const double* p_cell_in, const int*rbf_c2grad_idx, const int* rbf_c2grad_blk,
-        const double* rbf_c2grad_coeff, double* grad_x, const double* grad_y,
-        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-        int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblk_c, bool lacc);
+    const double *p_cell_in, const int *rbf_c2grad_idx,
+    const int *rbf_c2grad_blk, const double *rbf_c2grad_coeff, double *grad_x,
+    const double *grad_y, int i_startblk, int i_endblk, int i_startidx_in,
+    int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim,
+    int nlev, int nblk_c, bool lacc);
 
 void rbf_vec_interpol_cell_lib_sp(
-        const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-        const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
-        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-        int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
-        int rbf_vec_dim_c, bool lacc, bool acc_async);
+    const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c,
+    const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev,
+    int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c,
+    bool lacc, bool acc_async);
 
 void rbf_vec_interpol_cell_lib_dp(
-        const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-        const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
-        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-        int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
-        int rbf_vec_dim_c, bool lacc, bool acc_async);
+    const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c,
+    const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev,
+    int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c,
+    bool lacc, bool acc_async);
 
 void rbf_vec_interpol_edge_lib_dp(
-        const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-        const double* rbf_vec_coeff_e, double* p_vt_out,
-        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-        int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-        bool acc_async);
+    const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e,
+    const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk,
+    int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev,
+    int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+    bool acc_async);
 
 void rbf_vec_interpol_edge_lib_sp(
-        const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-        const float* rbf_vec_coeff_e, float* p_vt_out,
-        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-        int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-        bool acc_async);
+    const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e,
+    const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk,
+    int i_startidx_in, int i_endidx_in, int slev, int elev, int nlev,
+    int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, bool acc_async);
 }
diff --git a/src/interpolation/mo_lib_intp_rbf.cpp b/src/interpolation/mo_lib_intp_rbf.cpp
index 361710f..d1178a6 100644
--- a/src/interpolation/mo_lib_intp_rbf.cpp
+++ b/src/interpolation/mo_lib_intp_rbf.cpp
@@ -30,404 +30,446 @@ constexpr int rbf_vec_dim_v = 6;
 /// precision(T) >= precision(S)
 template <typename T, typename S>
 void rbf_vec_interpol_vertex_lib(
-    const T* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const T* rbf_vec_coeff_v,
-    S* p_u_out,
-    S* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,              // [deprecated] use async acc
+    const T *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const T *rbf_vec_coeff_v, S *p_u_out, S *p_v_out,
+    const int i_startblk,    // start_block needed for get_indices_c_lib
+    const int i_endblk,      // end_block needed for get_indices_c_lib
+    const int i_startidx_in, // start_index needed for get_indices_c_lib
+    const int i_endidx_in,   // end_index needed for get_indices_c_lib
+    const int slev,          // vertical start level
+    const int elev,          // vertical end level
+    const int nproma,        // inner loop length/vector length
+    const bool lacc,         // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,    // [deprecated] use async acc
     // Dimensions for the arrays.
-    const int nlev, const int nblks_e, const int nblks_v
-    )
-{
-    /*
+    const int nlev, const int nblks_e, const int nblks_v) {
+  /*
 #ifdef DIM_ENABLE_GPU
-    if (lacc){ using MemSpace = Kokkos::CudaSpace;
-    } else { using MemSpace = Kokkos::HostSpace; }
+  if (lacc){ using MemSpace = Kokkos::CudaSpace;
+  } else { using MemSpace = Kokkos::HostSpace; }
 #else
-    using MemSpace = Kokkos::HostSpace;
+  using MemSpace = Kokkos::HostSpace;
 #endif
 
-    */
-
-    // Wrap raw pointers in unmanaged Kokkos Views.
-    typedef Kokkos::View<const T***,    Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
-    typedef Kokkos::View<const T****,   Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D;
-    typedef Kokkos::View<const int***,  Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
-    typedef Kokkos::View<S***,          Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D;
-
-
-
-    // input components of velocity or horizontal vorticity vectors at edge midpoints
-    // dim: (nproma,nlev,nblks_e)
-    UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e);
-
-    // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex
-    // (rbf_vec_dim_v,nproma,nblks_v)
-    UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v);
-    UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v);
-
-    // coefficients are working precision array containing the coefficients used for vector rbf interpolation
-    // at each tringle vertex (input is normal component),
-    // dim: (rbf_vec_dim_v,2,nproma,nblks_v)
-    UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v);
-
-    // reconstructed x-component (u) of velocity vector,
-    // dim: (nproma,nlev,nblks_v)
-    UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v);
-    // reconstructed y-component (v) of velocity vector,
-    // dim: (nproma,nlev,nblks_v)
-    UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v);
-
-    // Local vars
-    //int jv, jk, jb; // integer over vertices, levels, and blocks,
-    int jb; // integer over vertices, levels, and blocks,
-    int i_startidx; // start index
-    int i_endidx;   // end index
-
-    for (jb=i_startblk; jb <= i_endblk; ++jb){
-
-        get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk,
-                          i_startidx, i_endidx);
-
-        Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
-            {slev, i_startidx}, {elev + 1, i_endidx + 1});
-
-        Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy,
-            KOKKOS_LAMBDA(const int jk, const int jv) {
-
-                // NOTE: Static indexes reduced by 1 from Fortran version
-                p_u_out_view(jv, jk, jb) =
-                    ptr_coeff_view(0, 0, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
-                    ptr_coeff_view(1, 0, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
-                    ptr_coeff_view(2, 0, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
-                    ptr_coeff_view(3, 0, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
-                    ptr_coeff_view(4, 0, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
-                    ptr_coeff_view(5, 0, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
-                p_v_out_view(jv, jk, jb) =
-                    ptr_coeff_view(0, 1, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
-                    ptr_coeff_view(1, 1, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
-                    ptr_coeff_view(2, 1, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
-                    ptr_coeff_view(3, 1, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
-                    ptr_coeff_view(4, 1, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
-                    ptr_coeff_view(5, 1, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
-            }
-        );
-    }
+  */
+
+  // Wrap raw pointers in unmanaged Kokkos Views.
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstT4D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+  typedef Kokkos::View<S ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedS3D;
+
+  // input components of velocity or horizontal vorticity vectors at edge
+  // midpoints dim: (nproma,nlev,nblks_e)
+  UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e);
+
+  // index array defining the stencil of surrounding edges for vector rbf
+  // interpolation at each triangle vertex (rbf_vec_dim_v,nproma,nblks_v)
+  UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v);
+  UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v);
+
+  // coefficients are working precision array containing the coefficients used
+  // for vector rbf interpolation at each tringle vertex (input is normal
+  // component), dim: (rbf_vec_dim_v,2,nproma,nblks_v)
+  UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma,
+                                   nblks_v);
+
+  // reconstructed x-component (u) of velocity vector,
+  // dim: (nproma,nlev,nblks_v)
+  UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v);
+  // reconstructed y-component (v) of velocity vector,
+  // dim: (nproma,nlev,nblks_v)
+  UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v);
+
+  // Local vars
+  // int jv, jk, jb; // integer over vertices, levels, and blocks,
+  int jb;         // integer over vertices, levels, and blocks,
+  int i_startidx; // start index
+  int i_endidx;   // end index
+
+  for (jb = i_startblk; jb <= i_endblk; ++jb) {
+
+    get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
+        {slev, i_startidx}, {elev + 1, i_endidx + 1});
+
+    Kokkos::parallel_for(
+        "rbf_vec_interpol_vertex_lib", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jv) {
+          // NOTE: Static indexes reduced by 1 from Fortran version
+          p_u_out_view(jv, jk, jb) =
+              ptr_coeff_view(0, 0, jv, jb) *
+                  p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
+              ptr_coeff_view(1, 0, jv, jb) *
+                  p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
+              ptr_coeff_view(2, 0, jv, jb) *
+                  p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
+              ptr_coeff_view(3, 0, jv, jb) *
+                  p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
+              ptr_coeff_view(4, 0, jv, jb) *
+                  p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
+              ptr_coeff_view(5, 0, jv, jb) *
+                  p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
+          p_v_out_view(jv, jk, jb) =
+              ptr_coeff_view(0, 1, jv, jb) *
+                  p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
+              ptr_coeff_view(1, 1, jv, jb) *
+                  p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
+              ptr_coeff_view(2, 1, jv, jb) *
+                  p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
+              ptr_coeff_view(3, 1, jv, jb) *
+                  p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
+              ptr_coeff_view(4, 1, jv, jb) *
+                  p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
+              ptr_coeff_view(5, 1, jv, jb) *
+                  p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
+        });
+  }
 }
 
 template <typename T>
-void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
-                             const T* rbf_c2grad_coeff, T* grad_x, T* grad_y,
-                             int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                             int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc){
-
-    //aliases for unmanaged Kokkos views 
-    typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
-    typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
-    typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
-    typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D;
-
-    //to avoid memory ownership issues
-    UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c); 
-    UnmanagedT3D grad_x_view(grad_x, nproma, nlev, nblks_c); 
-    UnmanagedT3D grad_y_view(grad_y, nproma, nlev, nblks_c);
-    UnmanagedConstInt3D rbf_c2grad_idx_view(rbf_c2grad_idx, rbf_c2grad_dim, nproma, nblks_c); 
-    UnmanagedConstInt3D rbf_c2grad_blk_view(rbf_c2grad_blk, rbf_c2grad_dim, nproma, nblks_c); 
-    UnmanagedConstT4D rbf_c2grad_coeff_view(rbf_c2grad_coeff, rbf_c2grad_dim, 2, nproma, nblks_c); 
-
-    for (int jb = i_startblk; jb <= i_endblk; ++jb) {
-
-        int i_startidx, i_endidx;
-        get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); 
-
-        Kokkos::parallel_for("rbf_interpol_c2grad", 
-                             Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev+1, i_endidx+1}),
-                             KOKKOS_LAMBDA(const int jk, const int jc){
-
-                             grad_x_view(jc, jk, jb) =
-                             rbf_c2grad_coeff_view(0, 1, jc, jb)*
-                             p_cell_in_view(jc, jk, jb) + 
-                             rbf_c2grad_coeff_view(1, 1, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk, rbf_c2grad_blk_view(1, jc, jb)) + 
-                             rbf_c2grad_coeff_view(2, 1, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk, rbf_c2grad_blk_view(2, jc, jb)) + 
-                             rbf_c2grad_coeff_view(3, 1, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk, rbf_c2grad_blk_view(3, jc, jb)) + 
-                             rbf_c2grad_coeff_view(4, 1, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk, rbf_c2grad_blk_view(4, jc, jb)) + 
-                             rbf_c2grad_coeff_view(5, 1, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk, rbf_c2grad_blk_view(5, jc, jb)) + 
-                             rbf_c2grad_coeff_view(6, 1, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk, rbf_c2grad_blk_view(6, jc, jb)) + 
-                             rbf_c2grad_coeff_view(7, 1, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk, rbf_c2grad_blk_view(7, jc, jb)) + 
-                             rbf_c2grad_coeff_view(8, 1, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, rbf_c2grad_blk_view(8, jc, jb)) + 
-                             rbf_c2grad_coeff_view(9, 1, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, rbf_c2grad_blk_view(9, jc, jb)); 
-
-                             grad_y_view(jc, jk, jb) =
-                             rbf_c2grad_coeff_view(0, 2, jc, jb)*
-                             p_cell_in_view(jc, jk, jb) +
-                             rbf_c2grad_coeff_view(1, 2, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk, rbf_c2grad_blk_view(1, jc, jb)) +
-                             rbf_c2grad_coeff_view(2, 2, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk, rbf_c2grad_blk_view(2, jc, jb)) +
-                             rbf_c2grad_coeff_view(3, 2, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk, rbf_c2grad_blk_view(3, jc, jb)) +
-                             rbf_c2grad_coeff_view(4, 2, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk, rbf_c2grad_blk_view(4, jc, jb)) +
-                             rbf_c2grad_coeff_view(5, 2, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk, rbf_c2grad_blk_view(5, jc, jb)) +
-                             rbf_c2grad_coeff_view(6, 2, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk, rbf_c2grad_blk_view(6, jc, jb)) +
-                             rbf_c2grad_coeff_view(7, 2, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk, rbf_c2grad_blk_view(7, jc, jb)) +
-                             rbf_c2grad_coeff_view(8, 2, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk, rbf_c2grad_blk_view(8, jc, jb)) +
-                             rbf_c2grad_coeff_view(9, 2, jc, jb)*
-                             p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk, rbf_c2grad_blk_view(9, jc, jb));
-                            });
-                             
-    }//for
-}//void
-
+void rbf_interpol_c2grad_lib(const T *p_cell_in, const int *rbf_c2grad_idx,
+                             const int *rbf_c2grad_blk,
+                             const T *rbf_c2grad_coeff, T *grad_x, T *grad_y,
+                             int i_startblk, int i_endblk, int i_startidx_in,
+                             int i_endidx_in, int slev, int elev, int nproma,
+                             int rbf_c2grad_dim, int nlev, int nblks_c,
+                             bool lacc) {
+
+  // aliases for unmanaged Kokkos views
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT3D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+  typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstT4D;
+
+  // to avoid memory ownership issues
+  UnmanagedConstT3D p_cell_in_view(p_cell_in, nproma, nlev, nblks_c);
+  UnmanagedT3D grad_x_view(grad_x, nproma, nlev, nblks_c);
+  UnmanagedT3D grad_y_view(grad_y, nproma, nlev, nblks_c);
+  UnmanagedConstInt3D rbf_c2grad_idx_view(rbf_c2grad_idx, rbf_c2grad_dim,
+                                          nproma, nblks_c);
+  UnmanagedConstInt3D rbf_c2grad_blk_view(rbf_c2grad_blk, rbf_c2grad_dim,
+                                          nproma, nblks_c);
+  UnmanagedConstT4D rbf_c2grad_coeff_view(rbf_c2grad_coeff, rbf_c2grad_dim, 2,
+                                          nproma, nblks_c);
+
+  for (int jb = i_startblk; jb <= i_endblk; ++jb) {
+
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
+        {slev, i_startidx}, {elev + 1, i_endidx + 1});
+
+    Kokkos::parallel_for(
+        "rbf_interpol_c2grad", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jc) {
+          grad_x_view(jc, jk, jb) =
+              rbf_c2grad_coeff_view(0, 1, jc, jb) * p_cell_in_view(jc, jk, jb) +
+              rbf_c2grad_coeff_view(1, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk,
+                                 rbf_c2grad_blk_view(1, jc, jb)) +
+              rbf_c2grad_coeff_view(2, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk,
+                                 rbf_c2grad_blk_view(2, jc, jb)) +
+              rbf_c2grad_coeff_view(3, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk,
+                                 rbf_c2grad_blk_view(3, jc, jb)) +
+              rbf_c2grad_coeff_view(4, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk,
+                                 rbf_c2grad_blk_view(4, jc, jb)) +
+              rbf_c2grad_coeff_view(5, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk,
+                                 rbf_c2grad_blk_view(5, jc, jb)) +
+              rbf_c2grad_coeff_view(6, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk,
+                                 rbf_c2grad_blk_view(6, jc, jb)) +
+              rbf_c2grad_coeff_view(7, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk,
+                                 rbf_c2grad_blk_view(7, jc, jb)) +
+              rbf_c2grad_coeff_view(8, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk,
+                                 rbf_c2grad_blk_view(8, jc, jb)) +
+              rbf_c2grad_coeff_view(9, 1, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk,
+                                 rbf_c2grad_blk_view(9, jc, jb));
+
+          grad_y_view(jc, jk, jb) =
+              rbf_c2grad_coeff_view(0, 2, jc, jb) * p_cell_in_view(jc, jk, jb) +
+              rbf_c2grad_coeff_view(1, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(1, jc, jb), jk,
+                                 rbf_c2grad_blk_view(1, jc, jb)) +
+              rbf_c2grad_coeff_view(2, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(2, jc, jb), jk,
+                                 rbf_c2grad_blk_view(2, jc, jb)) +
+              rbf_c2grad_coeff_view(3, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(3, jc, jb), jk,
+                                 rbf_c2grad_blk_view(3, jc, jb)) +
+              rbf_c2grad_coeff_view(4, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(4, jc, jb), jk,
+                                 rbf_c2grad_blk_view(4, jc, jb)) +
+              rbf_c2grad_coeff_view(5, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(5, jc, jb), jk,
+                                 rbf_c2grad_blk_view(5, jc, jb)) +
+              rbf_c2grad_coeff_view(6, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(6, jc, jb), jk,
+                                 rbf_c2grad_blk_view(6, jc, jb)) +
+              rbf_c2grad_coeff_view(7, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(7, jc, jb), jk,
+                                 rbf_c2grad_blk_view(7, jc, jb)) +
+              rbf_c2grad_coeff_view(8, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(8, jc, jb), jk,
+                                 rbf_c2grad_blk_view(8, jc, jb)) +
+              rbf_c2grad_coeff_view(9, 2, jc, jb) *
+                  p_cell_in_view(rbf_c2grad_idx_view(9, jc, jb), jk,
+                                 rbf_c2grad_blk_view(9, jc, jb));
+        });
+
+  } // for
+} // void
 
 //------------------------------------------rbf_vec_interpol_cell_lib---------------------------------------------
 
 template <typename T>
-void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-                               const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out,
-                               int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                               int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
-                               int rbf_vec_dim_c,  bool lacc, bool acc_async){
-
-    typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
-    typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
-    typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
-    typedef Kokkos::View<const T****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D;
-
-    UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e);
-    UnmanagedConstInt3D rbf_vec_idx_c_view(rbf_vec_idx_c, rbf_vec_dim_c, nproma, nblks_c);
-    UnmanagedConstInt3D rbf_vec_blk_c_view(rbf_vec_blk_c, rbf_vec_dim_c, nproma, nblks_c);
-    UnmanagedConstT4D rbf_vec_coeff_c_view(rbf_vec_coeff_c, nproma, nblks_c); //TODO
-    UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c);
-    UnmanagedT3D p_v_out_view(p_u_out, nproma, nlev, nblks_c);
-
-
-    for (int jb = i_startblk; jb <= i_endblk; ++jb) {
-
-         int i_startidx, i_endidx;
-         get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
-
-          Kokkos::parallel_for("rbf_vec_interpol_cell_lib",
-                               Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev+1, i_endidx+1}),
-                               KOKKOS_LAMBDA(const int jk, const int jc){
-
-                               p_u_out_view(jc, jk, jb) =
-                               rbf_vec_coeff_c_view(0, 1, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, rbf_vec_blk_c_view(0, jc, jb)) +
-                               rbf_vec_coeff_c_view(1, 1, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, rbf_vec_blk_c_view(1, jc, jb)) +
-                               rbf_vec_coeff_c_view(2, 1, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, rbf_vec_blk_c_view(2, jc, jb)) +
-                               rbf_vec_coeff_c_view(3, 1, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, rbf_vec_blk_c_view(3, jc, jb)) +
-                               rbf_vec_coeff_c_view(4, 1, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, rbf_vec_blk_c_view(4, jc, jb)) +
-                               rbf_vec_coeff_c_view(5, 1, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, rbf_vec_blk_c_view(5, jc, jb)) +
-                               rbf_vec_coeff_c_view(6, 1, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, rbf_vec_blk_c_view(6, jc, jb)) +
-                               rbf_vec_coeff_c_view(7, 1, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, rbf_vec_blk_c_view(7, jc, jb)) +
-                               rbf_vec_coeff_c_view(8, 1, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb));
-
-                               p_v_out_view(jc, jk, jb) =
-                               rbf_vec_coeff_c_view(0, 2, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk, rbf_vec_blk_c_view(0, jc, jb)) +
-                               rbf_vec_coeff_c_view(1, 2, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk, rbf_vec_blk_c_view(1, jc, jb)) +
-                               rbf_vec_coeff_c_view(2, 2, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk, rbf_vec_blk_c_view(2, jc, jb)) +
-                               rbf_vec_coeff_c_view(3, 2, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk, rbf_vec_blk_c_view(3, jc, jb)) +
-                               rbf_vec_coeff_c_view(4, 2, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk, rbf_vec_blk_c_view(4, jc, jb)) +
-                               rbf_vec_coeff_c_view(5, 2, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk, rbf_vec_blk_c_view(5, jc, jb)) +
-                               rbf_vec_coeff_c_view(6, 2, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk, rbf_vec_blk_c_view(6, jc, jb)) +
-                               rbf_vec_coeff_c_view(7, 2, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk, rbf_vec_blk_c_view(7, jc, jb)) +
-                               rbf_vec_coeff_c_view(8, 2, jc, jb)*
-                               p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk, rbf_vec_blk_c_view(8, jc, jb));
-                               });
-                               Kokkos::fence();
-    }//for
-}//void
+void rbf_vec_interpol_cell_lib(const T *p_vn_in, const int *rbf_vec_idx_c,
+                               const int *rbf_vec_blk_c,
+                               const T *rbf_vec_coeff_c, T *p_u_out, T *p_v_out,
+                               int i_startblk, int i_endblk, int i_startidx_in,
+                               int i_endidx_in, int slev, int elev, int nproma,
+                               int nlev, int nblks_c, int nblks_e,
+                               int rbf_vec_dim_c, bool lacc, bool acc_async) {
+
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT3D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+  typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstT4D;
+
+  UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e);
+  UnmanagedConstInt3D rbf_vec_idx_c_view(rbf_vec_idx_c, rbf_vec_dim_c, nproma,
+                                         nblks_c);
+  UnmanagedConstInt3D rbf_vec_blk_c_view(rbf_vec_blk_c, rbf_vec_dim_c, nproma,
+                                         nblks_c);
+  UnmanagedConstT4D rbf_vec_coeff_c_view(rbf_vec_coeff_c, nproma,
+                                         nblks_c); // TODO
+  UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c);
+  UnmanagedT3D p_v_out_view(p_u_out, nproma, nlev, nblks_c);
+
+  for (int jb = i_startblk; jb <= i_endblk; ++jb) {
+
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
+        {slev, i_startidx}, {elev + 1, i_endidx + 1});
+
+    Kokkos::parallel_for(
+        "rbf_vec_interpol_cell_lib", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jc) {
+          p_u_out_view(jc, jk, jb) =
+              rbf_vec_coeff_c_view(0, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk,
+                               rbf_vec_blk_c_view(0, jc, jb)) +
+              rbf_vec_coeff_c_view(1, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk,
+                               rbf_vec_blk_c_view(1, jc, jb)) +
+              rbf_vec_coeff_c_view(2, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk,
+                               rbf_vec_blk_c_view(2, jc, jb)) +
+              rbf_vec_coeff_c_view(3, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk,
+                               rbf_vec_blk_c_view(3, jc, jb)) +
+              rbf_vec_coeff_c_view(4, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk,
+                               rbf_vec_blk_c_view(4, jc, jb)) +
+              rbf_vec_coeff_c_view(5, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk,
+                               rbf_vec_blk_c_view(5, jc, jb)) +
+              rbf_vec_coeff_c_view(6, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk,
+                               rbf_vec_blk_c_view(6, jc, jb)) +
+              rbf_vec_coeff_c_view(7, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk,
+                               rbf_vec_blk_c_view(7, jc, jb)) +
+              rbf_vec_coeff_c_view(8, 1, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk,
+                               rbf_vec_blk_c_view(8, jc, jb));
+
+          p_v_out_view(jc, jk, jb) =
+              rbf_vec_coeff_c_view(0, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(0, jc, jb), jk,
+                               rbf_vec_blk_c_view(0, jc, jb)) +
+              rbf_vec_coeff_c_view(1, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(1, jc, jb), jk,
+                               rbf_vec_blk_c_view(1, jc, jb)) +
+              rbf_vec_coeff_c_view(2, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(2, jc, jb), jk,
+                               rbf_vec_blk_c_view(2, jc, jb)) +
+              rbf_vec_coeff_c_view(3, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(3, jc, jb), jk,
+                               rbf_vec_blk_c_view(3, jc, jb)) +
+              rbf_vec_coeff_c_view(4, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(4, jc, jb), jk,
+                               rbf_vec_blk_c_view(4, jc, jb)) +
+              rbf_vec_coeff_c_view(5, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(5, jc, jb), jk,
+                               rbf_vec_blk_c_view(5, jc, jb)) +
+              rbf_vec_coeff_c_view(6, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(6, jc, jb), jk,
+                               rbf_vec_blk_c_view(6, jc, jb)) +
+              rbf_vec_coeff_c_view(7, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(7, jc, jb), jk,
+                               rbf_vec_blk_c_view(7, jc, jb)) +
+              rbf_vec_coeff_c_view(8, 2, jc, jb) *
+                  p_vn_in_view(rbf_vec_idx_c_view(8, jc, jb), jk,
+                               rbf_vec_blk_c_view(8, jc, jb));
+        });
+    Kokkos::fence();
+  } // for
+} // void
 
 //------------------------------------------rbf_vec_interpol_edge_lib---------------------------------------------
 
 template <typename T>
-void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-                               const T* rbf_vec_coeff_e, T* p_vt_out,
-                               int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                               int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-                               bool acc_async){
-
-    typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
-    typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
-    typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
-
-    UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma,nlev,nblks_e);
-    UnmanagedConstInt3D rbf_vec_idx_e_view(rbf_vec_idx_e, rbf_vec_dim_e,nproma,nblks_e);
-    UnmanagedConstInt3D rbf_vec_blk_e_view(rbf_vec_blk_e, rbf_vec_dim_e,nproma,nblks_e);
-    UnmanagedConstT3D rbf_vec_coeff_e_view(rbf_vec_coeff_e, rbf_vec_dim_e,nproma,nblks_e);
-    UnmanagedT3D p_vt_out_view(p_vt_out, nproma,nlev,nblks_e);
-
-
-    for (int jb = i_startblk; jb <= i_endblk; ++jb) {
-
-         int i_startidx, i_endidx;
-         get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
-
-         Kokkos::parallel_for("rbf_vec_interpol_edge_lib",
-                              Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev+1, i_endidx+1}),
-                              KOKKOS_LAMBDA(const int jk, const int je){
-
-                              p_vt_out_view(je, jk, jb) =
-                              rbf_vec_coeff_e_view(0, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(0, je, jb), jk, rbf_vec_blk_e_view(0, je, jb)) +
-                              rbf_vec_coeff_e_view(1, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(1, je, jb), jk, rbf_vec_blk_e_view(1, je, jb)) +
-                              rbf_vec_coeff_e_view(2, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(2, je, jb), jk, rbf_vec_blk_e_view(2, je, jb)) +
-                              rbf_vec_coeff_e_view(3, je, jb) * p_vn_in_view(rbf_vec_idx_e_view(3, je, jb), jk, rbf_vec_blk_e_view(3, je, jb));
-
-                              });
-    }
+void rbf_vec_interpol_edge_lib(const T *p_vn_in, const int *rbf_vec_idx_e,
+                               const int *rbf_vec_blk_e,
+                               const T *rbf_vec_coeff_e, T *p_vt_out,
+                               int i_startblk, int i_endblk, int i_startidx_in,
+                               int i_endidx_in, int slev, int elev, int nlev,
+                               int nproma, int rbf_vec_dim_e, int nblks_e,
+                               bool lacc, bool acc_async) {
+
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT3D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+
+  UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e);
+  UnmanagedConstInt3D rbf_vec_idx_e_view(rbf_vec_idx_e, rbf_vec_dim_e, nproma,
+                                         nblks_e);
+  UnmanagedConstInt3D rbf_vec_blk_e_view(rbf_vec_blk_e, rbf_vec_dim_e, nproma,
+                                         nblks_e);
+  UnmanagedConstT3D rbf_vec_coeff_e_view(rbf_vec_coeff_e, rbf_vec_dim_e, nproma,
+                                         nblks_e);
+  UnmanagedT3D p_vt_out_view(p_vt_out, nproma, nlev, nblks_e);
+
+  for (int jb = i_startblk; jb <= i_endblk; ++jb) {
+
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
+        {slev, i_startidx}, {elev + 1, i_endidx + 1});
+
+    Kokkos::parallel_for(
+        "rbf_vec_interpol_edge_lib", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int je) {
+          p_vt_out_view(je, jk, jb) =
+              rbf_vec_coeff_e_view(0, je, jb) *
+                  p_vn_in_view(rbf_vec_idx_e_view(0, je, jb), jk,
+                               rbf_vec_blk_e_view(0, je, jb)) +
+              rbf_vec_coeff_e_view(1, je, jb) *
+                  p_vn_in_view(rbf_vec_idx_e_view(1, je, jb), jk,
+                               rbf_vec_blk_e_view(1, je, jb)) +
+              rbf_vec_coeff_e_view(2, je, jb) *
+                  p_vn_in_view(rbf_vec_idx_e_view(2, je, jb), jk,
+                               rbf_vec_blk_e_view(2, je, jb)) +
+              rbf_vec_coeff_e_view(3, je, jb) *
+                  p_vn_in_view(rbf_vec_idx_e_view(3, je, jb), jk,
+                               rbf_vec_blk_e_view(3, je, jb));
+        });
+  }
 }
 
 // Explicit instantiation - double precision
-template
-void rbf_vec_interpol_vertex_lib<double, double>(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    double* p_u_out,
-    double* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,             // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    );
+template void rbf_vec_interpol_vertex_lib<double, double>(
+    const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v);
 
 // Explicit instantiation - single precision
-template
-void rbf_vec_interpol_vertex_lib<float, float>(
-    const float* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const float* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,             // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    );
+template void rbf_vec_interpol_vertex_lib<float, float>(
+    const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v);
 
 // Explicit instantiation - mixed precision
-template
-void rbf_vec_interpol_vertex_lib<double, float>(
-    const double* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const double* rbf_vec_coeff_v,
-    float* p_u_out,
-    float* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,             // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_v
-    );
-
-template
-void rbf_vec_interpol_cell_lib<double>(
-    const double* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-    const double* rbf_vec_coeff_c, double* p_u_out, double* p_v_out,
-    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-    int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
-    int rbf_vec_dim_c, bool lacc, bool acc_async
-);
-
-template
-void rbf_vec_interpol_cell_lib<float>(
-    const float* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-    const float* rbf_vec_coeff_c, float* p_u_out, float* p_v_out,
-    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-    int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
-    int rbf_vec_dim_c, bool lacc, bool acc_async
-);
-
-template
-void rbf_interpol_c2grad_lib<double>(
-    const double* p_cell_in,  const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
-    const double* rbf_c2grad_coeff, double* grad_x, double* grad_y,
-    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-    int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc
-);
-
-template
-void rbf_interpol_c2grad_lib<float>(
-    const float* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
-    const float* rbf_c2grad_coeff, float* grad_x, float* grad_y,
-    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-    int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc
-);
-
-template
-void rbf_vec_interpol_edge_lib<double>(
-    const double* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-    const double* rbf_vec_coeff_e, double* p_vt_out,
-    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-    int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-    bool acc_async
-);
-
-template
-void rbf_vec_interpol_edge_lib<float>(
-    const float* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-    const float* rbf_vec_coeff_e, float* p_vt_out,
-    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-    int slev, int elev, int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-    bool acc_async
-);
+template void rbf_vec_interpol_vertex_lib<double, float>(
+    const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v);
+
+template void rbf_vec_interpol_cell_lib<double>(
+    const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c,
+    const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev,
+    int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c,
+    bool lacc, bool acc_async);
+
+template void rbf_vec_interpol_cell_lib<float>(
+    const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c,
+    const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out,
+    int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev,
+    int elev, int nproma, int nlev, int nblks_c, int nblks_e, int rbf_vec_dim_c,
+    bool lacc, bool acc_async);
+
+template void rbf_interpol_c2grad_lib<double>(
+    const double *p_cell_in, const int *rbf_c2grad_idx,
+    const int *rbf_c2grad_blk, const double *rbf_c2grad_coeff, double *grad_x,
+    double *grad_y, int i_startblk, int i_endblk, int i_startidx_in,
+    int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim,
+    int nlev, int nblks_c, bool lacc);
+
+template void rbf_interpol_c2grad_lib<float>(
+    const float *p_cell_in, const int *rbf_c2grad_idx,
+    const int *rbf_c2grad_blk, const float *rbf_c2grad_coeff, float *grad_x,
+    float *grad_y, int i_startblk, int i_endblk, int i_startidx_in,
+    int i_endidx_in, int slev, int elev, int nproma, int rbf_c2grad_dim,
+    int nlev, int nblks_c, bool lacc);
+
+template void rbf_vec_interpol_edge_lib<double>(
+    const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e,
+    const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk,
+    int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev,
+    int nlev, int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
+    bool acc_async);
+
+template void rbf_vec_interpol_edge_lib<float>(
+    const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e,
+    const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk,
+    int i_startidx_in, int i_endidx_in, int slev, int elev, int nlev,
+    int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc, bool acc_async);
diff --git a/src/interpolation/mo_lib_intp_rbf.hpp b/src/interpolation/mo_lib_intp_rbf.hpp
index 53547fc..8a85502 100644
--- a/src/interpolation/mo_lib_intp_rbf.hpp
+++ b/src/interpolation/mo_lib_intp_rbf.hpp
@@ -16,41 +16,35 @@
 
 template <typename T, typename S>
 void rbf_vec_interpol_vertex_lib(
-    const T* p_e_in,
-    const int* rbf_vec_idx_v,
-    const int* rbf_vec_blk_v,
-    const T* rbf_vec_coeff_v,
-    S* p_u_out,
-    S* p_v_out,
-    const int i_startblk,       // start_block needed for get_indices_c_lib
-    const int i_endblk,         // end_block needed for get_indices_c_lib
-    const int i_startidx_in,    // start_index needed for get_indices_c_lib
-    const int i_endidx_in,      // end_index needed for get_indices_c_lib
-    const int slev,             // vertical start level
-    const int elev,             // vertical end level
-    const int nproma,           // inner loop length/vector length
-    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
-    const bool acc_async,             // [deprecated] use async acc
-    const int nlev, const int nblks_e, const int nblks_c
-    );
+    const T *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const T *rbf_vec_coeff_v, S *p_u_out, S *p_v_out, const int i_startblk,
+    const int i_endblk, const int i_startidx_in, const int i_endidx_in,
+    const int slev, const int elev, const int nproma, const bool lacc,
+    const bool acc_async, const int nlev, const int nblks_e, const int nblks_c);
 
 template <typename T>
-void rbf_interpol_c2grad_lib(const T* p_cell_in, const int* rbf_c2grad_idx, const int* rbf_c2grad_blk,
-                             const T* rbf_c2grad_coeff, T* grad_x, T* grad_y,
-                             int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                             int slev, int elev, int nproma, int rbf_c2grad_dim, int nlev, int nblks_c, bool lacc);
-
+void rbf_interpol_c2grad_lib(const T *p_cell_in, const int *rbf_c2grad_idx,
+                             const int *rbf_c2grad_blk,
+                             const T *rbf_c2grad_coeff, T *grad_x, T *grad_y,
+                             int i_startblk, int i_endblk, int i_startidx_in,
+                             int i_endidx_in, int slev, int elev, int nproma,
+                             int rbf_c2grad_dim, int nlev, int nblks_c,
+                             bool lacc);
 
 template <typename T>
-void rbf_vec_interpol_cell_lib(const T* p_vn_in, const int* rbf_vec_idx_c, const int* rbf_vec_blk_c,
-                               const T* rbf_vec_coeff_c, T* p_u_out, T* p_v_out,
-                               int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                               int slev, int elev, int nproma, int nlev, int nblks_c, int nblks_e,
+void rbf_vec_interpol_cell_lib(const T *p_vn_in, const int *rbf_vec_idx_c,
+                               const int *rbf_vec_blk_c,
+                               const T *rbf_vec_coeff_c, T *p_u_out, T *p_v_out,
+                               int i_startblk, int i_endblk, int i_startidx_in,
+                               int i_endidx_in, int slev, int elev, int nproma,
+                               int nlev, int nblks_c, int nblks_e,
                                int rbf_vec_dim_c, bool lacc, bool acc_async);
 
 template <typename T>
-void rbf_vec_interpol_edge_lib(const T* p_vn_in, const int* rbf_vec_idx_e, const int* rbf_vec_blk_e,
-                               const T* rbf_vec_coeff_e, T* p_vt_out,
-                               int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
-                               int slev, int elev, int nlev,  int nproma, int rbf_vec_dim_e, int nblks_e, bool lacc,
-                               bool acc_async);
+void rbf_vec_interpol_edge_lib(const T *p_vn_in, const int *rbf_vec_idx_e,
+                               const int *rbf_vec_blk_e,
+                               const T *rbf_vec_coeff_e, T *p_vt_out,
+                               int i_startblk, int i_endblk, int i_startidx_in,
+                               int i_endidx_in, int slev, int elev, int nlev,
+                               int nproma, int rbf_vec_dim_e, int nblks_e,
+                               bool lacc, bool acc_async);
diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp
index 983352c..cdee264 100644
--- a/test/c/test_intp_rbf.cpp
+++ b/test/c/test_intp_rbf.cpp
@@ -9,45 +9,43 @@
 // SPDX-License-Identifier: BSD-3-Clause
 // ---------------------------------------------------------------
 
-#include <gtest/gtest.h>
+#include "mo_lib_intp_rbf.hpp"
 #include <Kokkos_Core.hpp>
-#include <vector>
 #include <algorithm>
+#include <gtest/gtest.h>
 #include <numeric>
-#include "mo_lib_intp_rbf.hpp"
+#include <vector>
 
 // Free-function helpers for 3D and 4D array sizes (assumed column-major)
-template<typename T>
-size_t num_elements_3d(int d1, int d2, int d3) {
+template <typename T> size_t num_elements_3d(int d1, int d2, int d3) {
   return static_cast<size_t>(d1) * d2 * d3;
 }
 
-template<typename T>
-size_t num_elements_4d(int d1, int d2, int d3, int d4) {
+template <typename T> size_t num_elements_4d(int d1, int d2, int d3, int d4) {
   return static_cast<size_t>(d1) * d2 * d3 * d4;
 }
 
 // Define a helper struct that holds the two types.
-template<typename InT, typename OutT>
-struct MixedPrecision {
-  using in_type  = InT;
+template <typename InT, typename OutT> struct MixedPrecision {
+  using in_type = InT;
   using out_type = OutT;
 };
 
 // Define the list of type pairs we want to test.
-typedef ::testing::Types< MixedPrecision<double, double>,
-                          MixedPrecision<double,  float>,
-                          MixedPrecision<float,   float>  > MixedTypes;
+typedef ::testing::Types<MixedPrecision<double, double>,
+                         MixedPrecision<double, float>,
+                         MixedPrecision<float, float>>
+    MixedTypes;
 
 class interp_dimensions {
 public:
   // Constant dimensions.
-  static constexpr int nproma = 3; // inner loop length
-  static constexpr int nlev = 4;    // number of vertical levels
-  static constexpr int nblks_c = 2; // number of cell blocks
-  static constexpr int nblks_e = 2; // number of edge blocks
-  static constexpr int nblks_v = 2; // number of vertex blocks
-  static constexpr int rbf_c2grad_dim  = 10; // fixed dimension
+  static constexpr int nproma = 3;          // inner loop length
+  static constexpr int nlev = 4;            // number of vertical levels
+  static constexpr int nblks_c = 2;         // number of cell blocks
+  static constexpr int nblks_e = 2;         // number of edge blocks
+  static constexpr int nblks_v = 2;         // number of vertex blocks
+  static constexpr int rbf_c2grad_dim = 10; // fixed dimension
   static constexpr int rbf_vec_dim_c = 9;
   static constexpr int rbf_vec_dim_e = 4;
 
@@ -58,22 +56,24 @@ public:
   const int i_endidx_in = nproma - 1;
   const int slev = 0;
   const int elev = nlev - 1;
-  const bool lacc = false; // Not using ACC-specific behavior.
+  const bool lacc = false;      // Not using ACC-specific behavior.
   const bool acc_async = false; // No asynchronous execution.
 };
 
-// Define a typed test fixture for the functions which have the same input and output types
+// Define a typed test fixture for the functions which have the same input and
+// output types
 template <typename T>
-class RbfInterpolTypedTestFixture : public ::testing::Test, public interp_dimensions {
+class RbfInterpolTypedTestFixture : public ::testing::Test,
+                                    public interp_dimensions {
 public:
-
   // Data arrays.
-  std::vector<T> p_cell_in; // size: nproma * nlev * nblks_c
+  std::vector<T> p_cell_in;        // size: nproma * nlev * nblks_c
   std::vector<int> rbf_c2grad_idx; // size: rbf_c2grad_dim * nproma * nblks_c
   std::vector<int> rbf_c2grad_blk; // size: rbf_c2grad_dim * nproma * nblks_c
-  std::vector<int> rbf_vec_idx_c;  //size: rbf_vec_dim_c * nproma * nblks_c
+  std::vector<int> rbf_vec_idx_c;  // size: rbf_vec_dim_c * nproma * nblks_c
   std::vector<int> rbf_vec_blk_c;  // size: rbf_vec_dim_c * nproma * nblks_c
-  std::vector<T> rbf_c2grad_coeff; // size: rbf_c2grad_dim * 2 * nproma * nblks_c
+  std::vector<T>
+      rbf_c2grad_coeff;  // size: rbf_c2grad_dim * 2 * nproma * nblks_c
   std::vector<T> grad_x; // size: nproma * nlev * nblks_c
   std::vector<T> grad_y; // size: nproma * nlev * nblks_c
   std::vector<T> p_vn_in;
@@ -86,17 +86,20 @@ public:
   std::vector<T> rbf_vec_coeff_e;
   std::vector<T> p_vt_out;
 
-
   RbfInterpolTypedTestFixture() {
     size_t size3d = static_cast<size_t>(nproma) * nlev * nblks_c;
     size_t size3d_idx = static_cast<size_t>(rbf_c2grad_dim) * nproma * nblks_c;
     size_t size4d = static_cast<size_t>(rbf_c2grad_dim) * 2 * nproma * nblks_c;
 
-    size_t size3d_vec_dim = static_cast<size_t>(rbf_vec_dim_c) * nproma * nblks_c;
-    size_t size_4d_vec_dim = static_cast<size_t>(rbf_vec_dim_c) * 2 * nproma * nblks_c;
+    size_t size3d_vec_dim =
+        static_cast<size_t>(rbf_vec_dim_c) * nproma * nblks_c;
+    size_t size_4d_vec_dim =
+        static_cast<size_t>(rbf_vec_dim_c) * 2 * nproma * nblks_c;
 
-    size_t size3d_edge_lib = static_cast<size_t>(rbf_vec_dim_e) * nproma * nblks_c;
-    size_t size_4d_edge_lib = static_cast<size_t>(rbf_vec_dim_e) * 2 * nproma * nblks_c;
+    size_t size3d_edge_lib =
+        static_cast<size_t>(rbf_vec_dim_e) * nproma * nblks_c;
+    size_t size_4d_edge_lib =
+        static_cast<size_t>(rbf_vec_dim_e) * 2 * nproma * nblks_c;
 
     p_cell_in.resize(size3d, static_cast<T>(1));
     p_vn_in.resize(size3d, static_cast<T>(1));
@@ -104,7 +107,7 @@ public:
     rbf_vec_idx_c.resize(size3d_vec_dim, 1);
     rbf_vec_blk_c.resize(size3d_vec_dim, 0);
     rbf_c2grad_idx.resize(size3d_idx, 1);
-    rbf_c2grad_blk.resize(size3d_idx, 0);  // Set block indices to 0 for testing.
+    rbf_c2grad_blk.resize(size3d_idx, 0); // Set block indices to 0 for testing.
     rbf_vec_idx_e.resize(size3d_vec_dim, 1);
     rbf_vec_blk_e.resize(size3d_vec_dim, 0);
 
@@ -128,23 +131,12 @@ TYPED_TEST_SUITE(RbfInterpolTypedTestFixture, MyTypes);
 TYPED_TEST(RbfInterpolTypedTestFixture, C2Grad) {
   using T = TypeParam;
   rbf_interpol_c2grad_lib<TypeParam>(
-    this->p_cell_in.data(),
-    this->rbf_c2grad_idx.data(),
-    this->rbf_c2grad_blk.data(),
-    this->rbf_c2grad_coeff.data(),
-    this->grad_x.data(),
-    this->grad_y.data(),
-    this->i_startblk,
-    this->i_endblk,
-    this->i_startidx_in,
-    this->i_endidx_in,
-    this->slev,
-    this->elev,
-    this->nproma,
-    this->rbf_c2grad_dim,
-    this->nlev,
-    this->nblks_c,
-    this->lacc);
+      this->p_cell_in.data(), this->rbf_c2grad_idx.data(),
+      this->rbf_c2grad_blk.data(), this->rbf_c2grad_coeff.data(),
+      this->grad_x.data(), this->grad_y.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->rbf_c2grad_dim, this->nlev, this->nblks_c,
+      this->lacc);
 
   // For each block from i_startblk to i_endblk-1, and for each (i, level)
   // the kernel sums rbf_c2grad_dim contributions, each equal to 1.
@@ -152,171 +144,160 @@ TYPED_TEST(RbfInterpolTypedTestFixture, C2Grad) {
   for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
     for (int jk = 0; jk < this->nlev; ++jk) {
       for (int i = 0; i < this->nproma; ++i) {
-        size_t idx = i + static_cast<size_t>(jk) * this->nproma + static_cast<size_t>(jb) * this->nproma * this->nlev;
-        EXPECT_NEAR(this->grad_x[idx], static_cast<TypeParam>(this->rbf_c2grad_dim), static_cast<TypeParam>(1e-5))
-          << "grad_x failure at block " << jb << ", level " << jk << ", index " << i;
-        EXPECT_NEAR(this->grad_y[idx], static_cast<TypeParam>(this->rbf_c2grad_dim), static_cast<TypeParam>(1e-5))
-          << "grad_y failure at block " << jb << ", level " << jk << ", index " << i;
+        size_t idx = i + static_cast<size_t>(jk) * this->nproma +
+                     static_cast<size_t>(jb) * this->nproma * this->nlev;
+        EXPECT_NEAR(this->grad_x[idx],
+                    static_cast<TypeParam>(this->rbf_c2grad_dim),
+                    static_cast<TypeParam>(1e-5))
+            << "grad_x failure at block " << jb << ", level " << jk
+            << ", index " << i;
+        EXPECT_NEAR(this->grad_y[idx],
+                    static_cast<TypeParam>(this->rbf_c2grad_dim),
+                    static_cast<TypeParam>(1e-5))
+            << "grad_y failure at block " << jb << ", level " << jk
+            << ", index " << i;
       }
     }
   }
 }
 
-
 TYPED_TEST(RbfInterpolTypedTestFixture, Cell) {
-    using T = TypeParam;
-
-    rbf_vec_interpol_cell_lib<T>(
-        this->p_vn_in.data(),
-        this->rbf_vec_idx_c.data(),
-        this->rbf_vec_blk_c.data(),
-        this->rbf_vec_coeff_c.data(),
-        this->p_u_out.data(),
-        this->p_v_out.data(),
-        this->i_startblk,
-        this->i_endblk,
-        this->i_startidx_in,
-        this->i_endidx_in,
-        this->slev,
-        this->elev,
-        this->nproma,
-        this->rbf_c2grad_dim,
-        this->nlev,
-        this->nblks_c,
-        this->nblks_e,
-        this->lacc,
-        this->acc_async);
-
-    for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
-        for (int jk = 0; jk < this->nlev; ++jk) {
-          for (int i = 0; i < this->nproma; ++i) {
-            size_t idx = i + static_cast<size_t>(jk) * this->nproma + static_cast<size_t>(jb) * this->nproma * this->nlev;
-            EXPECT_NEAR(this->p_u_out[idx], static_cast<T>(this->rbf_vec_dim_c), static_cast<T>(1e-5))
-              << "p_u_out failure at block " << jb << ", level " << jk << ", index " << i;
-          }
-        }
+  using T = TypeParam;
+
+  rbf_vec_interpol_cell_lib<T>(
+      this->p_vn_in.data(), this->rbf_vec_idx_c.data(),
+      this->rbf_vec_blk_c.data(), this->rbf_vec_coeff_c.data(),
+      this->p_u_out.data(), this->p_v_out.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->rbf_c2grad_dim, this->nlev, this->nblks_c,
+      this->nblks_e, this->lacc, this->acc_async);
+
+  for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+    for (int jk = 0; jk < this->nlev; ++jk) {
+      for (int i = 0; i < this->nproma; ++i) {
+        size_t idx = i + static_cast<size_t>(jk) * this->nproma +
+                     static_cast<size_t>(jb) * this->nproma * this->nlev;
+        EXPECT_NEAR(this->p_u_out[idx], static_cast<T>(this->rbf_vec_dim_c),
+                    static_cast<T>(1e-5))
+            << "p_u_out failure at block " << jb << ", level " << jk
+            << ", index " << i;
       }
+    }
   }
+}
 
-TYPED_TEST(RbfInterpolTypedTestFixture, Egde){
-    using T = TypeParam;
-
-
-    rbf_vec_interpol_edge_lib<T>(
-        this->p_vn_in.data(),
-        this->rbf_vec_idx_e.data(),
-        this->rbf_vec_blk_e.data(),
-        this->rbf_vec_coeff_e.data(),
-        this->p_vt_out.data(),
-        this->i_startblk,
-        this->i_endblk,
-        this->i_startidx_in,
-        this->i_endidx_in,
-        this->slev,
-        this->elev,
-        this->nlev,
-        this->nproma,
-        this->rbf_vec_dim_e,
-        this->nblks_e,
-        this->lacc,
-        this->acc_async);
-
-     for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
-        for (int jk = 0; jk < this->nlev; ++jk) {
-          for (int i = 0; i < this->nproma; ++i) {
-            size_t idx = i + static_cast<size_t>(jk) * this->nproma + static_cast<size_t>(jb) * this->nproma * this->nlev;
-            EXPECT_NEAR(this->p_vt_out[idx], static_cast<T>(this->rbf_vec_dim_e), static_cast<T>(1e-5))
-              << "p_vt_out failure at block " << jb << ", level " << jk << ", index " << i;
-          }
-        }
-      }
+TYPED_TEST(RbfInterpolTypedTestFixture, Egde) {
+  using T = TypeParam;
 
-}
+  rbf_vec_interpol_edge_lib<T>(
+      this->p_vn_in.data(), this->rbf_vec_idx_e.data(),
+      this->rbf_vec_blk_e.data(), this->rbf_vec_coeff_e.data(),
+      this->p_vt_out.data(), this->i_startblk, this->i_endblk,
+      this->i_startidx_in, this->i_endidx_in, this->slev, this->elev,
+      this->nlev, this->nproma, this->rbf_vec_dim_e, this->nblks_e, this->lacc,
+      this->acc_async);
 
+  for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+    for (int jk = 0; jk < this->nlev; ++jk) {
+      for (int i = 0; i < this->nproma; ++i) {
+        size_t idx = i + static_cast<size_t>(jk) * this->nproma +
+                     static_cast<size_t>(jb) * this->nproma * this->nlev;
+        EXPECT_NEAR(this->p_vt_out[idx], static_cast<T>(this->rbf_vec_dim_e),
+                    static_cast<T>(1e-5))
+            << "p_vt_out failure at block " << jb << ", level " << jk
+            << ", index " << i;
+      }
+    }
+  }
+}
 
-// Define a typed test fixture for the functions which have different input and output types
+// Define a typed test fixture for the functions which have different input and
+// output types
 template <typename TypePair>
-class RbfVecInterpolMixedTestFixture : public ::testing::Test, public interp_dimensions {
+class RbfVecInterpolMixedTestFixture : public ::testing::Test,
+                                       public interp_dimensions {
 public:
-  using InType  = typename TypePair::in_type;
+  using InType = typename TypePair::in_type;
   using OutType = typename TypePair::out_type;
 
   // Constant dimensions.
-  static constexpr int nproma     = 3;  // inner loop length
-  static constexpr int nlev        = 4;  // number of vertical levels
-  static constexpr int nblks_e     = 2;  // number of edge blocks (for p_e_in)
-  static constexpr int nblks_v     = 2;  // number of vertex blocks (for rbf arrays and outputs)
-  static constexpr int rbf_vec_dim = 6;  // fixed dimension for rbf vector (stencil points)
+  static constexpr int nproma = 3;  // inner loop length
+  static constexpr int nlev = 4;    // number of vertical levels
+  static constexpr int nblks_e = 2; // number of edge blocks (for p_e_in)
+  static constexpr int nblks_v =
+      2; // number of vertex blocks (for rbf arrays and outputs)
+  static constexpr int rbf_vec_dim =
+      6; // fixed dimension for rbf vector (stencil points)
 
   // Parameter values.
-  int i_startblk    = 0;
-  int i_endblk      = 1;      // Test blocks [0, 1]
+  int i_startblk = 0;
+  int i_endblk = 1; // Test blocks [0, 1]
   int i_startidx_in = 0;
-  int i_endidx_in   = nproma - 1; // Full range: 0 .. nproma-1
-  int slev          = 0;
-  int elev          = nlev - 1;   // Full vertical range (0 .. nlev-1)
-  bool lacc         = false;  // Not using ACC-specific behavior.
-  bool acc_async    = false;  // No asynchronous execution.
+  int i_endidx_in = nproma - 1; // Full range: 0 .. nproma-1
+  int slev = 0;
+  int elev = nlev - 1;    // Full vertical range (0 .. nlev-1)
+  bool lacc = false;      // Not using ACC-specific behavior.
+  bool acc_async = false; // No asynchronous execution.
 
   // Arrays stored in std::vector.
-  std::vector<InType>  p_e_in;           // Dimensions: (nproma, nlev, nblks_e)
-  std::vector<int>     rbf_vec_idx_v;    // Dimensions: (rbf_vec_dim, nproma, nblks_v)
-  std::vector<int>     rbf_vec_blk_v;    // Dimensions: (rbf_vec_dim, nproma, nblks_v)
-  std::vector<InType>  rbf_vec_coeff_v;    // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v)
-  std::vector<OutType> p_u_out;           // Dimensions: (nproma, nlev, nblks_v)
-  std::vector<OutType> p_v_out;           // Dimensions: (nproma, nlev, nblks_v)
+  std::vector<InType> p_e_in;     // Dimensions: (nproma, nlev, nblks_e)
+  std::vector<int> rbf_vec_idx_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v)
+  std::vector<int> rbf_vec_blk_v; // Dimensions: (rbf_vec_dim, nproma, nblks_v)
+  std::vector<InType>
+      rbf_vec_coeff_v;          // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v)
+  std::vector<OutType> p_u_out; // Dimensions: (nproma, nlev, nblks_v)
+  std::vector<OutType> p_v_out; // Dimensions: (nproma, nlev, nblks_v)
 
   RbfVecInterpolMixedTestFixture() {
     // Allocate and initialize inputs.
-    p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1));
+    p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e),
+                  static_cast<InType>(1));
     rbf_vec_idx_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1);
     rbf_vec_blk_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 0);
-    rbf_vec_coeff_v.resize(num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v), static_cast<InType>(1));
+    rbf_vec_coeff_v.resize(
+        num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v),
+        static_cast<InType>(1));
 
     // Allocate output arrays and initialize to zero.
-    p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0));
-    p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0));
+    p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v),
+                   static_cast<OutType>(0));
+    p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v),
+                   static_cast<OutType>(0));
   }
 };
 
 TYPED_TEST_SUITE(RbfVecInterpolMixedTestFixture, MixedTypes);
 
 TYPED_TEST(RbfVecInterpolMixedTestFixture, Vertex) {
-  using InType  = typename TestFixture::InType;
+  using InType = typename TestFixture::InType;
   using OutType = typename TestFixture::OutType;
 
   // Call the function with mixed precision.
   rbf_vec_interpol_vertex_lib<InType, OutType>(
-    this->p_e_in.data(),
-    this->rbf_vec_idx_v.data(),
-    this->rbf_vec_blk_v.data(),
-    this->rbf_vec_coeff_v.data(),
-    this->p_u_out.data(),
-    this->p_v_out.data(),
-    this->i_startblk,
-    this->i_endblk,
-    this->i_startidx_in,
-    this->i_endidx_in,
-    this->slev,
-    this->elev,
-    this->nproma,
-    this->lacc,
-    this->acc_async,
-    this->nlev,
-    this->nblks_e,
-    this->nblks_v);
+      this->p_e_in.data(), this->rbf_vec_idx_v.data(),
+      this->rbf_vec_blk_v.data(), this->rbf_vec_coeff_v.data(),
+      this->p_u_out.data(), this->p_v_out.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->lacc, this->acc_async, this->nlev,
+      this->nblks_e, this->nblks_v);
 
   // Check the outputs only for blocks in the range [i_startblk, i_endblk].
   for (int block = this->i_startblk; block <= this->i_endblk; ++block) {
     for (int level = 0; level < this->nlev; ++level) {
       for (int i = 0; i < this->nproma; ++i) {
         // Compute the linear index for a 3D array in column-major order:
-        size_t idx = i + level * this->nproma + block * this->nproma * this->nlev;
-        // Since every contribution is 1 and there are 6 stencil points, expect 6.
-        EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5))
-            << "Failure at block " << block << ", level " << level << ", index " << i;
-        EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5))
-            << "Failure at block " << block << ", level " << level << ", index " << i;
+        size_t idx =
+            i + level * this->nproma + block * this->nproma * this->nlev;
+        // Since every contribution is 1 and there are 6 stencil points,
+        // expect 6.
+        EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6),
+                    static_cast<OutType>(1e-5))
+            << "Failure at block " << block << ", level " << level << ", index "
+            << i;
+        EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6),
+                    static_cast<OutType>(1e-5))
+            << "Failure at block " << block << ", level " << level << ", index "
+            << i;
       }
     }
   }
-- 
GitLab


From 2cbc4234c219596f9f90816724d6cb5555ddea3a Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Thu, 6 Mar 2025 18:36:01 +0100
Subject: [PATCH 31/33] added some comments to the bindings

---
 src/interpolation/interpolation_bindings.cpp | 11 ++++++++++-
 src/interpolation/interpolation_bindings.h   |  8 ++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/interpolation/interpolation_bindings.cpp b/src/interpolation/interpolation_bindings.cpp
index a13ad0f..4524ad7 100644
--- a/src/interpolation/interpolation_bindings.cpp
+++ b/src/interpolation/interpolation_bindings.cpp
@@ -328,6 +328,7 @@ void cell_avg_lib_sp(const float *psi_c, const int *cell_neighbor_idx,
                       i_endidx_in, slev, elev, nproma, nlev, nblks_c, lacc);
 }
 
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_vertex_dp_lib
 void rbf_vec_interpol_vertex_lib_dp(
     const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
     const double *rbf_vec_coeff_v, double *p_u_out, double *p_v_out,
@@ -341,6 +342,7 @@ void rbf_vec_interpol_vertex_lib_dp(
       lacc, acc_async, nlev, nblks_e, nblks_v);
 }
 
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_vertex_sp_lib
 void rbf_vec_interpol_vertex_lib_sp(
     const float *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
     const float *rbf_vec_coeff_v, float *p_u_out, float *p_v_out,
@@ -354,7 +356,8 @@ void rbf_vec_interpol_vertex_lib_sp(
       lacc, acc_async, nlev, nblks_e, nblks_v);
 }
 
-void rbf_vec_interpol_vertex_lib_mixprec(
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_vertex_dpsp_lib
+void rbf_vec_interpol_vertex_lib_dpsp(
     const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
     const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out,
     const int i_startblk, const int i_endblk, const int i_startidx_in,
@@ -367,6 +370,7 @@ void rbf_vec_interpol_vertex_lib_mixprec(
       lacc, acc_async, nlev, nblks_e, nblks_v);
 }
 
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=dp)
 void rbf_interpol_c2grad_lib_sp(const float *p_cell_in,
                                 const int *rbf_c2grad_idx,
                                 const int *rbf_c2grad_blk,
@@ -382,6 +386,7 @@ void rbf_interpol_c2grad_lib_sp(const float *p_cell_in,
       nproma, rbf_c2grad_dim, nlev, nblk_c, lacc);
 }
 
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=sp)
 void rbf_interpol_c2grad_lib_dp(const double *p_cell_in,
                                 const int *rbf_c2grad_idx,
                                 const int *rbf_c2grad_blk,
@@ -397,6 +402,7 @@ void rbf_interpol_c2grad_lib_dp(const double *p_cell_in,
       nproma, rbf_c2grad_dim, nlev, nblk_c, lacc);
 }
 
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=dp)
 void rbf_vec_interpol_cell_lib_sp(
     const float *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c,
     const float *rbf_vec_coeff_c, float *p_u_out, float *p_v_out,
@@ -410,6 +416,7 @@ void rbf_vec_interpol_cell_lib_sp(
       nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async);
 }
 
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_cell_lib (wp=sp)
 void rbf_vec_interpol_cell_lib_dp(
     const double *p_vn_in, const int *rbf_vec_idx_c, const int *rbf_vec_blk_c,
     const double *rbf_vec_coeff_c, double *p_u_out, double *p_v_out,
@@ -423,6 +430,7 @@ void rbf_vec_interpol_cell_lib_dp(
       nlev, nblks_c, nblks_e, rbf_vec_dim_c, lacc, acc_async);
 }
 
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_edge_lib (wp=dp)
 void rbf_vec_interpol_edge_lib_dp(
     const double *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e,
     const double *rbf_vec_coeff_e, double *p_vt_out, int i_startblk,
@@ -436,6 +444,7 @@ void rbf_vec_interpol_edge_lib_dp(
       nproma, rbf_vec_dim_e, nblks_e, lacc, acc_async);
 }
 
+// This is the binding for mo_intp_rbf::rbf_vec_interpol_edge_lib (wp=sp)
 void rbf_vec_interpol_edge_lib_sp(
     const float *p_vn_in, const int *rbf_vec_idx_e, const int *rbf_vec_blk_e,
     const float *rbf_vec_coeff_e, float *p_vt_out, int i_startblk, int i_endblk,
diff --git a/src/interpolation/interpolation_bindings.h b/src/interpolation/interpolation_bindings.h
index 0f4778d..64c6a8c 100644
--- a/src/interpolation/interpolation_bindings.h
+++ b/src/interpolation/interpolation_bindings.h
@@ -202,6 +202,14 @@ void rbf_vec_interpol_vertex_lib_sp(
     const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
     const int nblks_v);
 
+void rbf_vec_interpol_vertex_lib_dpsp(
+    const double *p_e_in, const int *rbf_vec_idx_v, const int *rbf_vec_blk_v,
+    const double *rbf_vec_coeff_v, float *p_u_out, float *p_v_out,
+    const int i_startblk, const int i_endblk, const int i_startidx_in,
+    const int i_endidx_in, const int slev, const int elev, const int nproma,
+    const bool lacc, const bool acc_async, const int nlev, const int nblks_e,
+    const int nblks_v);
+
 void rbf_interpol_c2grad_lib_sp(
     const float *p_cell_in, const int *rbf_c2grad_idx,
     const int *rbf_c2grad_blk, const float *rbf_c2grad_coeff, float *grad_x,
-- 
GitLab


From 399e08caf80cc42ca7925ae1b7f7f921e9df0aef Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Thu, 6 Mar 2025 18:42:27 +0100
Subject: [PATCH 32/33] fixed a typo

---
 test/c/test_intp_rbf.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp
index cdee264..040d440 100644
--- a/test/c/test_intp_rbf.cpp
+++ b/test/c/test_intp_rbf.cpp
@@ -186,7 +186,7 @@ TYPED_TEST(RbfInterpolTypedTestFixture, Cell) {
   }
 }
 
-TYPED_TEST(RbfInterpolTypedTestFixture, Egde) {
+TYPED_TEST(RbfInterpolTypedTestFixture, Edge) {
   using T = TypeParam;
 
   rbf_vec_interpol_edge_lib<T>(
-- 
GitLab


From 609c5e5804c1b75ccff24ec3ce53126f0cc1a7d7 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Thu, 6 Mar 2025 18:48:50 +0100
Subject: [PATCH 33/33] Revert "Fixed strange typo"

This reverts commit 8175944d3f080ec145d4ce59e2e6fdddb8d6153d.
---
 src/horizontal/mo_lib_divrot.F90 | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/horizontal/mo_lib_divrot.F90 b/src/horizontal/mo_lib_divrot.F90
index c1360f6..a2f2ad9 100644
--- a/src/horizontal/mo_lib_divrot.F90
+++ b/src/horizontal/mo_lib_divrot.F90
@@ -561,7 +561,7 @@ CONTAINS
 !!
 !! !LITERATURE
 !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh
-!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys.,
+!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys.,
 !! 181, 729-752
 !!
   SUBROUTINE recon_lsq_cell_q_lib(p_cc, lsq_idx_c, lsq_blk_c, &
@@ -800,7 +800,7 @@ CONTAINS
 !!
 !! !LITERATURE
 !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh
-!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys.,
+!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys.,
 !! 181, 729-752
 !!
   SUBROUTINE recon_lsq_cell_q_svd_lib(p_cc, lsq_idx_c, lsq_blk_c, &
@@ -1008,7 +1008,7 @@ CONTAINS
 !!
 !! !LITERATURE
 !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh
-!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys.,
+!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys.,
 !! 181, 729-752
 !!
   SUBROUTINE recon_lsq_cell_c_lib(p_cc, lsq_idx_c, lsq_blk_c, &
@@ -1295,7 +1295,7 @@ CONTAINS
 !!
 !! !LITERATURE
 !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh
-!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys.,
+!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys.,
 !! 181, 729-752
 !!
   SUBROUTINE recon_lsq_cell_c_svd_lib(p_cc, lsq_idx_c, lsq_blk_c, &
-- 
GitLab