From 0ce09afcade6c639b39734678e5a1cd04a2285da Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Tue, 31 Dec 2024 08:52:24 +0100
Subject: [PATCH 01/50] added unittests for mo_lib_loopindices

---
 test/fortran/test_loopindices.f90 | 145 ++++++++++++++++++++++++++++++
 1 file changed, 145 insertions(+)
 create mode 100644 test/fortran/test_loopindices.f90

diff --git a/test/fortran/test_loopindices.f90 b/test/fortran/test_loopindices.f90
new file mode 100644
index 0000000..3c20630
--- /dev/null
+++ b/test/fortran/test_loopindices.f90
@@ -0,0 +1,145 @@
+! ICON
+!
+! ---------------------------------------------------------------
+! Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+! Contact information: icon-model.org
+!
+! See AUTHORS.TXT for a list of authors
+! See LICENSES/ for license information
+! SPDX-License-Identifier: BSD-3-Clause
+! ---------------------------------------------------------------
+
+MODULE TEST_mo_lib_loopindices
+  USE FORTUTF
+
+  IMPLICIT NONE
+
+  PRIVATE
+
+  PUBLIC :: TEST_get_indices_lib
+
+CONTAINS
+
+  SUBROUTINE TEST_get_indices_lib
+    USE mo_lib_loopindices, ONLY: get_indices_c_lib, get_indices_e_lib, get_indices_v_lib
+
+    INTEGER :: i_startidx_in ! Start index as input
+    INTEGER :: i_endidx_in ! End index as input
+    INTEGER :: nproma ! inner loop length/vector length
+    INTEGER :: i_blk ! Current block (variable jb in do loops)
+    INTEGER :: i_startblk ! Start block of do loop
+    INTEGER :: i_endblk ! End block of do loop
+
+    INTEGER :: i_startidx_out, i_endidx_out ! Start and end indices (jc loop), as output
+
+    i_startidx_in = 2
+    i_endidx_in = 15
+    nproma = 32
+    i_startblk = 1
+    i_endblk = 40
+
+    ! CASE: I -> i_blk == i_startblk
+    i_blk = 1
+    CALL get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
+                           i_startidx_out, i_endidx_out)
+
+    CALL TAG_TEST("TEST_get_indices_c_start_1")
+    CALL ASSERT_EQUAL(i_startidx_out, MAX(1, i_startidx_in))
+    CALL TAG_TEST("TEST_get_indices_c_end_1")
+    CALL ASSERT_EQUAL(i_endidx_out, nproma)
+
+    i_startidx_out = 0
+    i_endidx_out = 0
+
+    CALL get_indices_e_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
+                           i_startidx_out, i_endidx_out)
+
+    CALL TAG_TEST("TEST_get_indices_e_start_1")
+    CALL ASSERT_EQUAL(i_startidx_out, MAX(1, i_startidx_in))
+    CALL TAG_TEST("TEST_get_indices_e_end_1")
+    CALL ASSERT_EQUAL(i_endidx_out, nproma)
+
+    i_startidx_out = 0
+    i_endidx_out = 0
+
+    CALL get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
+                           i_startidx_out, i_endidx_out)
+
+    CALL TAG_TEST("TEST_get_indices_v_start_1")
+    CALL ASSERT_EQUAL(i_startidx_out, MAX(1, i_startidx_in))
+    CALL TAG_TEST("TEST_get_indices_v_end_1")
+    CALL ASSERT_EQUAL(i_endidx_out, nproma)
+
+    i_startidx_out = 0
+    i_endidx_out = 0
+
+    ! CASE: II -> i_blk == i_endblk
+    i_blk = 40
+    CALL get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
+                           i_startidx_out, i_endidx_out)
+
+    CALL TAG_TEST("TEST_get_indices_c_start_2")
+    CALL ASSERT_EQUAL(i_startidx_out, 1)
+    CALL TAG_TEST("TEST_get_indices_c_end_2")
+    CALL ASSERT_EQUAL(i_endidx_out, i_endidx_in)
+
+    i_startidx_out = 0
+    i_endidx_out = 0
+
+    CALL get_indices_e_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
+                           i_startidx_out, i_endidx_out)
+
+    CALL TAG_TEST("TEST_get_indices_e_start_2")
+    CALL ASSERT_EQUAL(i_startidx_out, 1)
+    CALL TAG_TEST("TEST_get_indices_e_end_2")
+    CALL ASSERT_EQUAL(i_endidx_out, i_endidx_in)
+
+    i_startidx_out = 0
+    i_endidx_out = 0
+
+    CALL get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
+                           i_startidx_out, i_endidx_out)
+
+    CALL TAG_TEST("TEST_get_indices_v_start_2")
+    CALL ASSERT_EQUAL(i_startidx_out, 1)
+    CALL TAG_TEST("TEST_get_indices_v_end_2")
+    CALL ASSERT_EQUAL(i_endidx_out, i_endidx_in)
+
+    i_startidx_out = 0
+    i_endidx_out = 0
+
+    ! CASE: III -> Every other cases
+    i_blk = 20
+    CALL get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
+                           i_startidx_out, i_endidx_out)
+
+    CALL TAG_TEST("TEST_get_indices_c_start_3")
+    CALL ASSERT_EQUAL(i_startidx_out, 1)
+    CALL TAG_TEST("TEST_get_indices_c_end_3")
+    CALL ASSERT_EQUAL(i_endidx_out, nproma)
+
+    i_startidx_out = 0
+    i_endidx_out = 0
+
+    CALL get_indices_e_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
+                           i_startidx_out, i_endidx_out)
+
+    CALL TAG_TEST("TEST_get_indices_e_start_3")
+    CALL ASSERT_EQUAL(i_startidx_out, 1)
+    CALL TAG_TEST("TEST_get_indices_e_end_3")
+    CALL ASSERT_EQUAL(i_endidx_out, nproma)
+
+    i_startidx_out = 0
+    i_endidx_out = 0
+
+    CALL get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
+                           i_startidx_out, i_endidx_out)
+
+    CALL TAG_TEST("TEST_get_indices_v_start_3")
+    CALL ASSERT_EQUAL(i_startidx_out, 1)
+    CALL TAG_TEST("TEST_get_indices_v_end_3")
+    CALL ASSERT_EQUAL(i_endidx_out, nproma)
+
+  END SUBROUTINE TEST_get_indices_lib
+
+END MODULE TEST_mo_lib_loopindices
-- 
GitLab


From fe47826cf3b0a08452f813c6707b4f9b95cd21d7 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Tue, 31 Dec 2024 08:53:01 +0100
Subject: [PATCH 02/50] added unittests for mo_math_utilities, partially done

---
 test/fortran/test_math_utilities.f90 | 270 +++++++++++++++++++++++++++
 1 file changed, 270 insertions(+)
 create mode 100644 test/fortran/test_math_utilities.f90

diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.f90
new file mode 100644
index 0000000..25064f5
--- /dev/null
+++ b/test/fortran/test_math_utilities.f90
@@ -0,0 +1,270 @@
+! ICON
+!
+! ---------------------------------------------------------------
+! Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+! Contact information: icon-model.org
+!
+! See AUTHORS.TXT for a list of authors
+! See LICENSES/ for license information
+! SPDX-License-Identifier: BSD-3-Clause
+! ---------------------------------------------------------------
+
+MODULE TEST_mo_math_utilities
+
+  USE FORTUTF
+  USE mo_math_types, ONLY: t_cartesian_coordinates, t_geographical_coordinates, &
+    &                      t_line, t_tangent_vectors
+  USE mo_math_constants, ONLY: pi, pi_2, pi_4
+  USE mo_lib_grid_geometry_info
+  ! USE mo_physical_constants, ONLY: earth_radius
+  USE, INTRINSIC :: ISO_FORTRAN_ENV, ONLY: wp => real64
+
+  IMPLICIT NONE
+
+  PRIVATE
+
+  PUBLIC :: TEST_cc2gc, TEST_gc2cc, TEST_cc2tv, TEST_gvec2cvec, TEST_cvec2gvec, TEST_tdma_solver_vec
+  REAL(wp), PARAMETER :: earth_radius           = 6.371229e6_wp    !! [m]    average radius
+
+CONTAINS
+
+  SUBROUTINE TEST_cc2gc
+
+    USE mo_math_utilities, ONLY: cc2gc
+
+    TYPE(t_cartesian_coordinates)    :: coord
+    TYPE(t_grid_geometry_info)       :: geometry_info
+    TYPE(t_geographical_coordinates) :: pos
+
+    REAL(wp) :: lon_ref, lat_ref
+
+    coord%x(1) = 10.0_wp
+    coord%x(2) = 10.0_wp
+    coord%x(3) = 5.0_wp
+
+    geometry_info%geometry_type = sphere_geometry
+
+    pos = cc2gc(coord, geometry_info)
+
+    lon_ref = 0.78539816339744828_wp
+    lat_ref = 0.33983690945412193_wp
+
+    CALL TAG_TEST("TEST_cc2gc_sphere_lon")
+    CALL ASSERT_EQUAL(pos%lon, lon_ref)
+    CALL TAG_TEST("TEST_cc2gc_sphere_lat")
+    CALL ASSERT_EQUAL(pos%lat, lat_ref)
+
+    geometry_info%geometry_type = planar_torus_geometry
+
+    geometry_info%domain_length = 2.0_wp*pi*earth_radius
+    geometry_info%domain_height = 2.0_wp*pi*earth_radius
+
+    coord%x(1) = 1000000.0_wp
+    coord%x(2) = 100.0_wp
+
+    pos = cc2gc(coord, geometry_info)
+
+    lon_ref = 0.15695558894524117_wp
+    lat_ref = -0.17453205322393880_wp
+
+    CALL TAG_TEST("TEST_cc2gc_torus_lon")
+    CALL ASSERT_EQUAL(pos%lon, lon_ref)
+    CALL TAG_TEST("TEST_cc2gc_torus_lat")
+    CALL ASSERT_EQUAL(pos%lat, lat_ref)
+
+  END SUBROUTINE TEST_cc2gc
+
+  SUBROUTINE TEST_gc2cc
+
+    USE mo_math_utilities, ONLY: gc2cc
+
+    TYPE(t_cartesian_coordinates)    :: coord, coord_ref
+    TYPE(t_grid_geometry_info)       :: geometry_info
+    TYPE(t_geographical_coordinates) :: pos
+
+    REAL(wp) :: lon_ref, lat_ref, tol
+    INTEGER :: i
+    CHARACTER(LEN=32) :: tag
+
+    tol = 1d-15
+    pos%lon = 0.78_wp
+    pos%lat = 0.34_wp
+
+    geometry_info%geometry_type = sphere_geometry
+
+    coord = gc2cc(pos, geometry_info)
+
+    coord_ref%x(1) = 0.6702170547483377_wp
+    coord_ref%x(2) = 0.6630199536212522_wp
+    coord_ref%x(3) = 0.3334870921408144_wp
+
+    DO i = 1, SIZE(coord%x)
+!     write(*,"(i4,a,f24.16)") i, ' coord%x(i): ', coord%x(i)
+      WRITE (tag, '(a,i1)') "TEST_gc2cc_sphere_", i
+      CALL TAG_TEST(tag)
+      CALL ASSERT_ALMOST_EQUAL(coord%x(i), coord_ref%x(i), tol)
+    END DO
+
+    geometry_info%geometry_type = planar_torus_geometry
+
+    geometry_info%domain_length = 2.0_wp*pi*earth_radius
+    geometry_info%domain_height = 2.0_wp*pi*earth_radius
+
+    pos%lon = 0.15_wp
+    pos%lat = -0.17_wp
+
+    coord = gc2cc(pos, geometry_info)
+
+    coord_ref%x(1) = 955684.3499999998603016_wp
+    coord_ref%x(2) = 519845.4807382422150113_wp
+    coord_ref%x(3) = 0.0_wp
+
+    DO i = 1, SIZE(coord%x)
+!     write(*,"(i4,a,f24.16)") i, ' coord%x(i): ', coord%x(i)
+      WRITE (tag, '(a,i1)') "TEST_gc2cc_torus_", i
+      CALL TAG_TEST(tag)
+      CALL ASSERT_EQUAL(coord%x(i), coord_ref%x(i))
+    END DO
+
+  END SUBROUTINE TEST_gc2cc
+
+  SUBROUTINE TEST_cc2tv
+    USE mo_math_utilities, ONLY: cc2tv
+
+    TYPE(t_cartesian_coordinates) :: coord
+    TYPE(t_geographical_coordinates) :: pos
+    TYPE(t_tangent_vectors):: tt
+    REAL(wp) :: v1_ref, v2_ref
+
+    pos%lon = pi_2
+    pos%lat = pi_4
+    coord%x(1) = 10.0_wp
+    coord%x(2) = 15.0_wp
+    coord%x(3) = 5.0_wp
+
+    tt = cc2tv(coord, pos)
+
+    v1_ref = -9.9999999999999982_wp
+    v2_ref = -7.0710678118654737_wp
+
+    CALL TAG_TEST("TEST_cc2tv_v1")
+    CALL ASSERT_EQUAL(tt%v1, v1_ref)
+    CALL TAG_TEST("TEST_cc2tv_v2")
+    CALL ASSERT_EQUAL(tt%v2, v2_ref)
+
+  END SUBROUTINE TEST_cc2tv
+
+  SUBROUTINE TEST_gvec2cvec
+    USE mo_math_utilities, ONLY: gvec2cvec
+
+    REAL(wp) :: p_gu, p_gv ! zonal and meridional vec. component
+    REAL(wp) :: p_long, p_lat ! geo. coord. of data point
+    TYPE(t_grid_geometry_info) :: geometry_info
+
+    REAL(wp) :: p_cu, p_cv, p_cw ! Cart. vector
+    REAL(wp) :: p_cu_ref, p_cv_ref, p_cw_ref ! Cart. vector ref
+
+    geometry_info%geometry_type = sphere_geometry
+
+    p_gu = 10.0_wp
+    p_gv = 5.0_wp
+
+    p_long = pi_2
+    p_lat = pi_4
+
+    CALL gvec2cvec(p_gu, p_gv, p_long, p_lat, p_cu, p_cv, p_cw, geometry_info)
+
+    p_cu_ref = -10.0_wp
+    p_cv_ref = -3.5355339059327369_wp
+    p_cw_ref = 3.5355339059327378_wp
+
+    CALL TAG_TEST("TEST_gvec2cvec_sphere_cu")
+    CALL ASSERT_EQUAL(p_cu, p_cu_ref)
+    CALL TAG_TEST("TEST_gvec2cvec_sphere_cv")
+    CALL ASSERT_EQUAL(p_cv, p_cv_ref)
+    CALL TAG_TEST("TEST_gvec2cvec_sphere_cw")
+    CALL ASSERT_EQUAL(p_cw, p_cw_ref)
+
+    geometry_info%geometry_type = planar_torus_geometry
+
+    CALL gvec2cvec(p_gu, p_gv, p_long, p_lat, p_cu, p_cv, p_cw, geometry_info)
+
+    CALL TAG_TEST("TEST_gvec2cvec_torus_cu")
+    CALL ASSERT_EQUAL(p_cu, p_gu)
+    CALL TAG_TEST("TEST_gvec2cvec_torus_cv")
+    CALL ASSERT_EQUAL(p_cv, p_gv)
+    CALL TAG_TEST("TEST_gvec2cvec_torus_cw")
+    CALL ASSERT_EQUAL(p_cw, 0.0_wp)
+
+  END SUBROUTINE TEST_gvec2cvec
+
+  SUBROUTINE TEST_cvec2gvec
+    USE mo_math_utilities, ONLY: cvec2gvec
+
+    REAL(wp) :: p_cu, p_cv, p_cw ! Cart. vector
+    REAL(wp) :: p_long, p_lat ! geo. coord. of data point
+    TYPE(t_grid_geometry_info) :: geometry_info
+
+    REAL(wp) :: p_gu, p_gv ! zonal and meridional vec. comp.
+    REAL(wp) :: p_gu_ref, p_gv_ref
+
+    geometry_info%geometry_type = sphere_geometry
+
+    p_cu = -10.0_wp
+    p_cv = -3.0_wp
+    p_cw = 3.0_wp
+
+    p_long = pi_2
+    p_lat = pi_4
+
+    CALL cvec2gvec(p_cu, p_cv, p_cw, p_long, p_lat, p_gu, p_gv, geometry_info)
+
+!   write(*,"(a,f24.16)") ' p_gu: ', p_gu
+!   write(*,"(a,f24.16)") ' p_gv: ', p_gv
+
+    p_gu_ref = 10.0_wp
+    p_gv_ref = 4.2426406871192857_wp
+
+    CALL TAG_TEST("TEST_cvec2gvec_sphere_gu")
+    CALL ASSERT_EQUAL(p_gu, p_gu_ref)
+    CALL TAG_TEST("TEST_cvec2gvec_sphere_gv")
+    CALL ASSERT_EQUAL(p_gv, p_gv_ref)
+
+    geometry_info%geometry_type = planar_torus_geometry
+
+    CALL cvec2gvec(p_cu, p_cv, p_cw, p_long, p_lat, p_gu, p_gv, geometry_info)
+
+    CALL TAG_TEST("TEST_cvec2gvec_torus_gu")
+    CALL ASSERT_EQUAL(p_gu, p_cu)
+    CALL TAG_TEST("TEST_cvec2gvec_torus_gv")
+    CALL ASSERT_EQUAL(p_gv, p_cv)
+
+  END SUBROUTINE TEST_cvec2gvec
+
+  SUBROUTINE TEST_tdma_solver_vec
+
+    USE mo_math_utilities, ONLY: tdma_solver_vec
+
+    INTEGER, PARAMETER :: n = 10
+    REAL(wp) :: a(n, n), b(n, n), c(n, n), d(n, n), x(n, n)
+    INTEGER :: i, j
+    REAL(wp) :: sum, sum_ref
+    DO i = 1, n
+      DO j = 1, n
+        a(i, j) = 1.0_wp
+        b(i, j) = 2.0_wp
+        c(i, j) = 1.0_wp
+        d(i, j) = 1.0_wp
+      END DO
+    END DO
+    CALL tdma_solver_vec(a, b, c, d, 1, n, 1, n, x)
+    sum = 0.0_wp
+    DO i = 1, n
+      sum = sum + x(i, 1)
+    END DO
+    sum_ref = 4.5454545454545467_wp
+    CALL TAG_TEST("TEST_tdma_solver_vec")
+    CALL ASSERT_EQUAL(sum, sum_ref)
+  END SUBROUTINE TEST_tdma_solver_vec
+
+END MODULE TEST_mo_math_utilities
-- 
GitLab


From 147e4b40b318a58abb41eccc3a676a35d3b76ea3 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Tue, 31 Dec 2024 08:54:28 +0100
Subject: [PATCH 03/50] updated the gitignore file

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 37de517..18c1a34 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,3 +23,5 @@ iconmath_Tests
 # Test stage files:
 /**/Testing/*
 run_tests.f90
+iconmath_Tests
+run_tests.f90
-- 
GitLab


From 86cd7136a1664dde92b4ebc1e0c19ffbbd439140 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Fri, 21 Feb 2025 10:04:20 +0100
Subject: [PATCH 04/50] fixed a style-check issue

---
 test/fortran/test_math_utilities.f90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.f90
index 25064f5..70fd9ab 100644
--- a/test/fortran/test_math_utilities.f90
+++ b/test/fortran/test_math_utilities.f90
@@ -24,7 +24,7 @@ MODULE TEST_mo_math_utilities
   PRIVATE
 
   PUBLIC :: TEST_cc2gc, TEST_gc2cc, TEST_cc2tv, TEST_gvec2cvec, TEST_cvec2gvec, TEST_tdma_solver_vec
-  REAL(wp), PARAMETER :: earth_radius           = 6.371229e6_wp    !! [m]    average radius
+  REAL(wp), PARAMETER :: earth_radius = 6.371229e6_wp !! [m]    average radius
 
 CONTAINS
 
-- 
GitLab


From 8539e26d2e03bb93b25b4f682c60bc0624c742c4 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Tue, 31 Dec 2024 08:54:28 +0100
Subject: [PATCH 05/50] updated the gitignore file

---
 .gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 18c1a34..23535bf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,5 @@ iconmath_Tests
 
 # Test stage files:
 /**/Testing/*
-run_tests.f90
 iconmath_Tests
 run_tests.f90
-- 
GitLab


From 32e75f2317260a1bcb074171dc58e867e222ab41 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Tue, 31 Dec 2024 14:57:31 +0100
Subject: [PATCH 06/50] changed the extension of mo_lib_loopindices to F90

---
 src/support/CMakeLists.txt                                     | 2 +-
 src/support/{mo_lib_loopindices.f90 => mo_lib_loopindices.F90} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename src/support/{mo_lib_loopindices.f90 => mo_lib_loopindices.F90} (100%)

diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt
index 8e5fcc4..0bce304 100644
--- a/src/support/CMakeLists.txt
+++ b/src/support/CMakeLists.txt
@@ -13,7 +13,7 @@ add_library(
   iconmath-support
   mo_gridman_constants.f90
   mo_lib_grid_geometry_info.f90
-  mo_lib_loopindices.f90
+  mo_lib_loopindices.F90
   mo_math_constants.f90
   mo_math_types.f90
   mo_math_utilities.F90
diff --git a/src/support/mo_lib_loopindices.f90 b/src/support/mo_lib_loopindices.F90
similarity index 100%
rename from src/support/mo_lib_loopindices.f90
rename to src/support/mo_lib_loopindices.F90
-- 
GitLab


From e9e1506bbb05fccc25ce47226b688c6ddaf6c44c Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Tue, 31 Dec 2024 14:58:40 +0100
Subject: [PATCH 07/50] added the cpp version of mo_lib_loopindices and
 compiled the code

made changes to debug the last version
---
 CMakeLists.txt                     |  2 +-
 src/support/CMakeLists.txt         | 12 +++++++-
 src/support/mo_lib_loopindices.cpp | 46 ++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+), 2 deletions(-)
 create mode 100644 src/support/mo_lib_loopindices.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index daafc5d..c40cd40 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -14,7 +14,7 @@ cmake_minimum_required(VERSION 3.18)
 project(
   iconmath
   VERSION 1.0.0
-  LANGUAGES Fortran)
+  LANGUAGES Fortran CXX)
 
 option(BUILD_SHARED_LIBS "Build shared libraries" ON)
 option(BUILD_TESTING "Build tests" ON)
diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt
index 0bce304..6d2fd78 100644
--- a/src/support/CMakeLists.txt
+++ b/src/support/CMakeLists.txt
@@ -13,6 +13,7 @@ add_library(
   iconmath-support
   mo_gridman_constants.f90
   mo_lib_grid_geometry_info.f90
+  mo_lib_loopindices.cpp
   mo_lib_loopindices.F90
   mo_math_constants.f90
   mo_math_types.f90
@@ -57,7 +58,16 @@ target_include_directories(
     # Path to the Fortran modules:
     $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:${Fortran_MODULE_DIRECTORY}>>
     $<INSTALL_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>>
-)
+  INTERFACE
+    # Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for
+    # multiple compile languages
+    # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html
+    $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>>
+  PRIVATE
+    # Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple
+    # compile languages
+    # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html
+    $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>>)
 
 target_link_libraries(iconmath-support PUBLIC fortran-support::fortran-support)
 
diff --git a/src/support/mo_lib_loopindices.cpp b/src/support/mo_lib_loopindices.cpp
new file mode 100644
index 0000000..9810427
--- /dev/null
+++ b/src/support/mo_lib_loopindices.cpp
@@ -0,0 +1,46 @@
+#include <algorithm> // For std::max
+
+extern "C" {
+    // get_indices_c_lib function
+    void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
+                           int &i_startidx_out, int &i_endidx_out) {
+        if (i_blk == i_startblk) {
+            i_startidx_out = std::max(1, i_startidx_in);
+            i_endidx_out = nproma;
+            if (i_blk == i_endblk) {
+                i_endidx_out = i_endidx_in;
+            }
+        } else if (i_blk == i_endblk) {
+            i_startidx_out = 1;
+            i_endidx_out = i_endidx_in;
+        } else {
+            i_startidx_out = 1;
+            i_endidx_out = nproma;
+        }
+    }
+
+    // get_indices_e_lib function
+    void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
+                           int &i_startidx_out, int &i_endidx_out) {
+        i_startidx_out = (i_blk != i_startblk) ? 1 : std::max(1, i_startidx_in);
+        i_endidx_out = (i_blk != i_endblk) ? nproma : i_endidx_in;
+    }
+
+    // get_indices_v_lib function
+    void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
+                           int &i_startidx_out, int &i_endidx_out) {
+        if (i_blk == i_startblk) {
+            i_startidx_out = i_startidx_in;
+            i_endidx_out = nproma;
+            if (i_blk == i_endblk) {
+                i_endidx_out = i_endidx_in;
+            }
+        } else if (i_blk == i_endblk) {
+            i_startidx_out = 1;
+            i_endidx_out = i_endidx_in;
+        } else {
+            i_startidx_out = 1;
+            i_endidx_out = nproma;
+        }
+    }
+}
-- 
GitLab


From ad6259025c880d4a9f1e4009c2942fc1882065de Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Tue, 31 Dec 2024 15:07:21 +0100
Subject: [PATCH 08/50] enabled the use of cpp bindings for mo_lib_loopindices

---
 CMakeLists.txt                     |  1 +
 src/support/CMakeLists.txt         |  4 ++++
 src/support/mo_lib_loopindices.F90 | 34 +++++++++++++++++++++++++++++-
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c40cd40..2f32fcf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,6 +23,7 @@ option(BUILD_ICONMATH_HORIZONTAL "Build horizontal library" ON)
 
 option(IM_ENABLE_MIXED_PRECISION "Enable mixed precision" OFF)
 option(IM_ENABLE_LOOP_EXCHANGE "Enable loop exchange" OFF)
+option(IM_USE_CPP_BINDINGS "Use C++ bindings" OFF)
 option(IM_ENABLE_DIM_SWAP "Enable dimension swap" OFF)
 option(IM_ENABLE_OPENACC "Enable OpenACC support" OFF)
 option(IM_ENABLE_OPENMP "Enable OpenMP support" OFF)
diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt
index 6d2fd78..c0fc287 100644
--- a/src/support/CMakeLists.txt
+++ b/src/support/CMakeLists.txt
@@ -41,6 +41,10 @@ if(IM_ENABLE_DIM_SWAP)
   target_compile_definitions(iconmath-support PRIVATE __SWAPDIM)
 endif()
 
+if(IM_USE_CPP_BINDINGS)
+  target_compile_definitions(iconmath-support PRIVATE __USE_CPP_BINDINGS)
+endif()
+
 if(IM_ENABLE_OPENACC)
   # If _OPENACC is defined, assume that the required compiler flags are already
   # provided, e.g. in CMAKE_Fortran_FLAGS:
diff --git a/src/support/mo_lib_loopindices.F90 b/src/support/mo_lib_loopindices.F90
index fe6c9b9..3ac80dd 100644
--- a/src/support/mo_lib_loopindices.F90
+++ b/src/support/mo_lib_loopindices.F90
@@ -16,12 +16,18 @@
 
 MODULE mo_lib_loopindices
 
+#ifdef __USE_CPP_BINDINGS
+  USE, INTRINSIC :: ISO_C_BINDING
+#endif
+
   IMPLICIT NONE
 
   PRIVATE
 
   PUBLIC :: get_indices_c_lib, get_indices_e_lib, get_indices_v_lib
 
+#ifndef __USE_CPP_BINDINGS
+
 CONTAINS
 
 !-------------------------------------------------------------------------
@@ -121,5 +127,31 @@ CONTAINS
 
   END SUBROUTINE get_indices_v_lib
 
-END MODULE mo_lib_loopindices
+#else
+
+  INTERFACE
+    SUBROUTINE get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
+                                 i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_c_lib")
+      IMPORT :: C_INT
+      INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk
+      INTEGER(C_INT) :: i_startidx_out, i_endidx_out
+    END SUBROUTINE get_indices_c_lib
+
+    SUBROUTINE get_indices_e_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
+                                 i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_e_lib")
+      IMPORT :: C_INT
+      INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk
+      INTEGER(C_INT) :: i_startidx_out, i_endidx_out
+    END SUBROUTINE get_indices_e_lib
+
+    SUBROUTINE get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
+                                 i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_v_lib")
+      IMPORT :: C_INT
+      INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk
+      INTEGER(C_INT) :: i_startidx_out, i_endidx_out
+    END SUBROUTINE get_indices_v_lib
+  END INTERFACE
+
+#endif
 
+END MODULE mo_lib_loopindices
-- 
GitLab


From a7402469ee13ce7266fa397c6e31c5e48e7130c1 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Thu, 2 Jan 2025 22:24:47 +0100
Subject: [PATCH 09/50] added the cpp version of tdma_solver_vec

made it compile
---
 CMakeLists.txt                    |   2 +
 src/support/CMakeLists.txt        |   1 +
 src/support/mo_math_utilities.F90 | 164 +++++++++++++++++-------------
 src/support/mo_math_utilities.cpp |  77 ++++++++++++++
 4 files changed, 172 insertions(+), 72 deletions(-)
 create mode 100644 src/support/mo_math_utilities.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2f32fcf..8fb4acf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,6 +16,8 @@ project(
   VERSION 1.0.0
   LANGUAGES Fortran CXX)
 
+set(CMAKE_CXX_STANDARD 17)
+
 option(BUILD_SHARED_LIBS "Build shared libraries" ON)
 option(BUILD_TESTING "Build tests" ON)
 option(BUILD_ICONMATH_INTERPOLATION "Build interpolation library" ON)
diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt
index c0fc287..35e1c71 100644
--- a/src/support/CMakeLists.txt
+++ b/src/support/CMakeLists.txt
@@ -17,6 +17,7 @@ add_library(
   mo_lib_loopindices.F90
   mo_math_constants.f90
   mo_math_types.f90
+  mo_math_utilities.cpp
   mo_math_utilities.F90
   mo_random_number_generators.F90)
 
diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90
index fa7ea20..3461cec 100644
--- a/src/support/mo_math_utilities.F90
+++ b/src/support/mo_math_utilities.F90
@@ -22,6 +22,7 @@
 ! #endif
 MODULE mo_math_utilities
 
+  USE, INTRINSIC :: ISO_C_BINDING
   USE mo_iconlib_kind, ONLY: wp, dp, sp
   USE mo_math_constants, ONLY: pi, pi_2, dbl_eps
   USE mo_gridman_constants, ONLY: SUCCESS, TORUS_MAX_LAT
@@ -160,7 +161,98 @@ MODULE mo_math_utilities
 
   CHARACTER(LEN=*), PARAMETER :: modname = 'mo_math_utilities'
 
+  !-------------------------------------------------------------------------
+  !>
+  !! TDMA tridiagonal matrix solver for a_i*x_(i-1) + b_i*x_i + c_i*x_(i+1) = d_i
+  !!
+  !!       a - sub-diagonal (means it is the diagonal below the main diagonal)
+  !!       b - the main diagonal
+  !!       c - sup-diagonal (means it is the diagonal above the main diagonal)
+  !!       d - right part
+  !!  varout - the answer (identical to x in description above)
+  !!    slev - start level (top)
+  !!    elev - end level (bottom)
+
+! Preprocessor directive to conditionally include the tdma_solver_vec implementation
+#ifndef __USE_CPP_BINDINGS
+
+  CONTAINS
+
+  SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue)
+    INTEGER, INTENT(IN) :: slev, elev
+    INTEGER, INTENT(IN) :: startidx, endidx
+    REAL(wp), INTENT(IN) :: a(:, :), b(:, :), c(:, :), d(:, :)
+    REAL(wp), INTENT(OUT) :: varout(:, :)
+    INTEGER, OPTIONAL, INTENT(IN) :: opt_acc_queue
+
+    !
+    ! local
+    REAL(wp):: m, c_p(SIZE(a, 1), SIZE(a, 2)), d_p(SIZE(a, 1), SIZE(a, 2))
+    INTEGER :: i
+    INTEGER :: jc
+    INTEGER :: acc_queue
+
+    IF (PRESENT(opt_acc_queue)) THEN
+      acc_queue = opt_acc_queue
+    ELSE
+      acc_queue = 1
+    END IF
+
+    ! initialize c-prime and d-prime
+    !$ACC PARALLEL DEFAULT(PRESENT) CREATE(c_p, d_p) ASYNC(acc_queue)
+    !$ACC LOOP GANG(STATIC: 1) VECTOR
+    DO jc = startidx, endidx
+      c_p(jc, slev) = c(jc, slev)/b(jc, slev)
+      d_p(jc, slev) = d(jc, slev)/b(jc, slev)
+    END DO
+    ! solve for vectors c-prime and d-prime
+    !$ACC LOOP SEQ
+!NEC$ outerloop_unroll(4)
+    DO i = slev + 1, elev
+      !$ACC LOOP GANG(STATIC: 1) VECTOR PRIVATE(m)
+      DO jc = startidx, endidx
+        m = 1._wp/(b(jc, i) - c_p(jc, i - 1)*a(jc, i))
+        c_p(jc, i) = c(jc, i)*m
+        d_p(jc, i) = (d(jc, i) - d_p(jc, i - 1)*a(jc, i))*m
+      END DO
+    END DO
+    ! initialize varout
+    !$ACC LOOP GANG(STATIC: 1) VECTOR
+    DO jc = startidx, endidx
+      varout(jc, elev) = d_p(jc, elev)
+    END DO
+    ! solve for varout from the vectors c-prime and d-prime
+    !$ACC LOOP SEQ
+!NEC$ outerloop_unroll(4)
+    DO i = elev - 1, slev, -1
+      !$ACC LOOP GANG(STATIC: 1) VECTOR
+      DO jc = startidx, endidx
+        varout(jc, i) = d_p(jc, i) - c_p(jc, i)*varout(jc, i + 1)
+      END DO
+    END DO
+    !$ACC END PARALLEL
+
+    IF (.NOT. PRESENT(opt_acc_queue)) THEN
+      !$ACC WAIT(acc_queue)
+    END IF
+
+  END SUBROUTINE tdma_solver_vec
+
+#else
+
+  ! C++ binding for tdma_solver_vec
+  INTERFACE
+    SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue) BIND(C, NAME="tdma_solver_vec")
+      IMPORT :: C_DOUBLE, C_INT
+      REAL(C_DOUBLE), INTENT(IN) :: a(*), b(*), c(*), d(*)
+      INTEGER(C_INT), VALUE :: slev, elev, startidx, endidx
+      REAL(C_DOUBLE), INTENT(OUT) :: varout(*)
+      INTEGER(C_INT), OPTIONAL :: opt_acc_queue
+    END SUBROUTINE tdma_solver_vec
+  END INTERFACE
+
 CONTAINS
+#endif
 
   !-------------------------------------------------------------------------
   ! Variant for double-precision (or working-precision=dp) lon+lat in ICON
@@ -2019,78 +2111,6 @@ CONTAINS
 
   END SUBROUTINE tdma_solver
 
-  !-------------------------------------------------------------------------
-  !>
-  !! TDMA tridiagonal matrix solver for a_i*x_(i-1) + b_i*x_i + c_i*x_(i+1) = d_i
-  !!
-  !!       a - sub-diagonal (means it is the diagonal below the main diagonal)
-  !!       b - the main diagonal
-  !!       c - sup-diagonal (means it is the diagonal above the main diagonal)
-  !!       d - right part
-  !!  varout - the answer (identical to x in description above)
-  !!    slev - start level (top)
-  !!    elev - end level (bottom)
-  SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue)
-    INTEGER, INTENT(IN) :: slev, elev
-    INTEGER, INTENT(IN) :: startidx, endidx
-    REAL(wp), INTENT(IN) :: a(:, :), b(:, :), c(:, :), d(:, :)
-    REAL(wp), INTENT(OUT) :: varout(:, :)
-    INTEGER, OPTIONAL, INTENT(IN) :: opt_acc_queue
-
-    !
-    ! local
-    REAL(wp):: m, c_p(SIZE(a, 1), SIZE(a, 2)), d_p(SIZE(a, 1), SIZE(a, 2))
-    INTEGER :: i
-    INTEGER :: jc
-    INTEGER :: acc_queue
-
-    IF (PRESENT(opt_acc_queue)) THEN
-      acc_queue = opt_acc_queue
-    ELSE
-      acc_queue = 1
-    END IF
-
-    ! initialize c-prime and d-prime
-    !$ACC PARALLEL DEFAULT(PRESENT) CREATE(c_p, d_p) ASYNC(acc_queue)
-    !$ACC LOOP GANG(STATIC: 1) VECTOR
-    DO jc = startidx, endidx
-      c_p(jc, slev) = c(jc, slev)/b(jc, slev)
-      d_p(jc, slev) = d(jc, slev)/b(jc, slev)
-    END DO
-    ! solve for vectors c-prime and d-prime
-    !$ACC LOOP SEQ
-!NEC$ outerloop_unroll(4)
-    DO i = slev + 1, elev
-      !$ACC LOOP GANG(STATIC: 1) VECTOR PRIVATE(m)
-      DO jc = startidx, endidx
-        m = 1._wp/(b(jc, i) - c_p(jc, i - 1)*a(jc, i))
-        c_p(jc, i) = c(jc, i)*m
-        d_p(jc, i) = (d(jc, i) - d_p(jc, i - 1)*a(jc, i))*m
-      END DO
-    END DO
-    ! initialize varout
-    !$ACC LOOP GANG(STATIC: 1) VECTOR
-    DO jc = startidx, endidx
-      varout(jc, elev) = d_p(jc, elev)
-    END DO
-    ! solve for varout from the vectors c-prime and d-prime
-    !$ACC LOOP SEQ
-!NEC$ outerloop_unroll(4)
-    DO i = elev - 1, slev, -1
-      !$ACC LOOP GANG(STATIC: 1) VECTOR
-      DO jc = startidx, endidx
-        varout(jc, i) = d_p(jc, i) - c_p(jc, i)*varout(jc, i + 1)
-      END DO
-    END DO
-    !$ACC END PARALLEL
-
-    IF (.NOT. PRESENT(opt_acc_queue)) THEN
-      !$ACC WAIT(acc_queue)
-    END IF
-
-  END SUBROUTINE tdma_solver_vec
-  !-------------------------------------------------------------------------
-
   !-------------------------------------------------------------------------
   !
   !> Helper functions for computing the vertical layer structure
diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp
new file mode 100644
index 0000000..a8ccce4
--- /dev/null
+++ b/src/support/mo_math_utilities.cpp
@@ -0,0 +1,77 @@
+#include <vector>
+#include <iostream>
+#include <chrono> // For timing
+
+extern "C" {
+
+void tdma_solver_vec(double *a, double *b, double *c, double *d,
+                     int slev, int elev, int startidx, int endidx,
+                     double* varout, int opt_acc_queue = -1) {
+
+    int acc_queue = (opt_acc_queue == -1) ? 1 : opt_acc_queue; // Use 1 as the default if opt_acc_queue is not provided
+
+    // Determine array sizes based on startidx and endidx
+    int nrows = endidx - startidx;
+    int ncols = elev - slev;
+
+    // Temporary arrays for c-prime and d-prime
+    std::vector<double> cp(nrows * ncols, 0.0);
+    std::vector<double> dp(nrows * ncols, 0.0);
+
+    // Helper function to access 2D arrays stored as 1D
+    auto idx = [&](int row, int col) { return col * nrows + row; };
+
+    // Start timing
+    auto start_time = std::chrono::high_resolution_clock::now();
+
+    // OpenACC Parallel Region
+    #pragma acc parallel default(present) create(cp[:nrows*ncols], dp[:nrows*ncols]) async(acc_queue)
+    {
+        // Initialize c-prime and d-prime
+        #pragma acc loop gang(static: 1) vector
+        for (int jc = startidx; jc < endidx; ++jc) {
+            cp[idx(jc, slev)] = c[idx(jc, slev)] / b[idx(jc, slev)];
+            dp[idx(jc, slev)] = d[idx(jc, slev)] / b[idx(jc, slev)];
+        }
+
+        // Solve for vectors c-prime and d-prime
+        #pragma acc loop seq
+        for (int i = slev + 1; i < elev; ++i) {
+            #pragma acc loop gang(static: 1) vector
+            for (int jc = startidx; jc < endidx; ++jc) {
+                double m = 1.0 / (b[idx(jc, i)] - cp[idx(jc, i - 1)] * a[idx(jc, i)]);
+                cp[idx(jc, i)] = c[idx(jc, i)] * m;
+                dp[idx(jc, i)] = (d[idx(jc, i)] - dp[idx(jc, i - 1)] * a[idx(jc, i)]) * m;
+            }
+        }
+
+        // Initialize varout
+        #pragma acc loop gang(static: 1) vector
+        for (int jc = startidx; jc < endidx; ++jc) {
+            varout[idx(jc, elev-1)] = dp[idx(jc, elev-1)];
+        }
+
+        // Solve for varout from the vectors c-prime and d-prime
+        #pragma acc loop seq
+        for (int i = elev - 2; i >= slev; --i) {
+            #pragma acc loop gang(static: 1) vector
+            for (int jc = startidx; jc < endidx; ++jc) {
+                varout[idx(jc, i)] = dp[idx(jc, i)] - cp[idx(jc, i)] * varout[idx(jc, i + 1)];
+            }
+        }
+    }
+
+    printf("tdma_solver_vec: completed using C++\n");
+
+    // Wait for OpenACC asynchronous operations to complete if acc_queue is not optional
+    if (opt_acc_queue == -1) {
+        #pragma acc wait(acc_queue)
+    }
+
+    // End timing
+    auto end_time = std::chrono::high_resolution_clock::now();
+    std::chrono::duration<double> elapsed_time = end_time - start_time;
+
+    std::cout << "Elapsed time for tdma_solver_vec (C++): " << elapsed_time.count() << " seconds" << std::endl;
+}
+}
-- 
GitLab


From c043b6df79b7e099b99b0681ecb20b1d717e665d Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Thu, 2 Jan 2025 22:29:20 +0100
Subject: [PATCH 10/50] added the test for the cpp binding of tdma_solver_vec

fixed a bug in testing
---
 src/support/CMakeLists.txt           |  2 +-
 test/fortran/test_math_utilities.f90 | 29 ++++++++++++++++++++++++----
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt
index 35e1c71..7af714e 100644
--- a/src/support/CMakeLists.txt
+++ b/src/support/CMakeLists.txt
@@ -43,7 +43,7 @@ if(IM_ENABLE_DIM_SWAP)
 endif()
 
 if(IM_USE_CPP_BINDINGS)
-  target_compile_definitions(iconmath-support PRIVATE __USE_CPP_BINDINGS)
+  target_compile_definitions(iconmath-support PUBLIC __USE_CPP_BINDINGS)
 endif()
 
 if(IM_ENABLE_OPENACC)
diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.f90
index 70fd9ab..8dcf5b3 100644
--- a/test/fortran/test_math_utilities.f90
+++ b/test/fortran/test_math_utilities.f90
@@ -248,7 +248,9 @@ CONTAINS
     INTEGER, PARAMETER :: n = 10
     REAL(wp) :: a(n, n), b(n, n), c(n, n), d(n, n), x(n, n)
     INTEGER :: i, j
-    REAL(wp) :: sum, sum_ref
+    REAL(wp) :: sum, sum_ref, tol
+    REAL(wp) :: start_time, end_time, elapsed_time
+
     DO i = 1, n
       DO j = 1, n
         a(i, j) = 1.0_wp
@@ -257,14 +259,33 @@ CONTAINS
         d(i, j) = 1.0_wp
       END DO
     END DO
+
+    CALL CPU_TIME(start_time)
+#ifndef __USE_CPP_BINDINGS
     CALL tdma_solver_vec(a, b, c, d, 1, n, 1, n, x)
+#else
+    CALL tdma_solver_vec(a, b, c, d, 0, n, 0, n, x, -1)
+#endif
+    CALL CPU_TIME(end_time)
+
+    ! Compute elapsed time
+    elapsed_time = end_time - start_time
+
+    ! Output timing result
+    write(*,*) "Elapsed time for tdma_solver_vec: ", elapsed_time, " seconds"
+
     sum = 0.0_wp
     DO i = 1, n
-      sum = sum + x(i, 1)
+      DO j = 1, n
+        sum = sum + x(i, j)
+      ! write(*,"(a,f24.16)") ' x(i, 1): ', x(i, 1)
+      END DO
     END DO
-    sum_ref = 4.5454545454545467_wp
+    sum_ref = 27.2727272727272769_wp
+    tol = 1d-15
     CALL TAG_TEST("TEST_tdma_solver_vec")
-    CALL ASSERT_EQUAL(sum, sum_ref)
+    CALL ASSERT_ALMOST_EQUAL(sum, sum_ref, tol)
+
   END SUBROUTINE TEST_tdma_solver_vec
 
 END MODULE TEST_mo_math_utilities
-- 
GitLab


From 8a55bbb6033537b38d94d82b67c2cf92176c4338 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Fri, 3 Jan 2025 09:31:02 +0100
Subject: [PATCH 11/50] changed the way local arrays are defined, it improves
 the performance

fixed a bug
---
 src/support/mo_math_utilities.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp
index a8ccce4..f82cd27 100644
--- a/src/support/mo_math_utilities.cpp
+++ b/src/support/mo_math_utilities.cpp
@@ -14,9 +14,8 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
     int nrows = endidx - startidx;
     int ncols = elev - slev;
 
-    // Temporary arrays for c-prime and d-prime
-    std::vector<double> cp(nrows * ncols, 0.0);
-    std::vector<double> dp(nrows * ncols, 0.0);
+    double* cp = new double[nrows * ncols];
+    double* dp = new double[nrows * ncols];
 
     // Helper function to access 2D arrays stored as 1D
     auto idx = [&](int row, int col) { return col * nrows + row; };
@@ -68,6 +67,9 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
         #pragma acc wait(acc_queue)
     }
 
+    // Free memory at the end
+    delete[] cp;
+    delete[] dp;
     // End timing
     auto end_time = std::chrono::high_resolution_clock::now();
     std::chrono::duration<double> elapsed_time = end_time - start_time;
-- 
GitLab


From bb9cc2524804f39c4538e447fcce4f9deaf0f999 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Fri, 3 Jan 2025 09:34:24 +0100
Subject: [PATCH 12/50] replaced the lambda function for calculating combined
 index with a macro function

fixed a bug
---
 src/support/mo_math_utilities.cpp | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp
index f82cd27..c2b46dc 100644
--- a/src/support/mo_math_utilities.cpp
+++ b/src/support/mo_math_utilities.cpp
@@ -17,11 +17,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
     double* cp = new double[nrows * ncols];
     double* dp = new double[nrows * ncols];
 
-    // Helper function to access 2D arrays stored as 1D
-    auto idx = [&](int row, int col) { return col * nrows + row; };
-
-    // Start timing
-    auto start_time = std::chrono::high_resolution_clock::now();
+    #define IDX(row, col) ((col) * nrows + (row)) // performs better than lambda function
 
     // OpenACC Parallel Region
     #pragma acc parallel default(present) create(cp[:nrows*ncols], dp[:nrows*ncols]) async(acc_queue)
@@ -29,8 +25,8 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
         // Initialize c-prime and d-prime
         #pragma acc loop gang(static: 1) vector
         for (int jc = startidx; jc < endidx; ++jc) {
-            cp[idx(jc, slev)] = c[idx(jc, slev)] / b[idx(jc, slev)];
-            dp[idx(jc, slev)] = d[idx(jc, slev)] / b[idx(jc, slev)];
+            cp[IDX(jc, slev)] = c[IDX(jc, slev)] / b[IDX(jc, slev)];
+            dp[IDX(jc, slev)] = d[IDX(jc, slev)] / b[IDX(jc, slev)];
         }
 
         // Solve for vectors c-prime and d-prime
@@ -38,16 +34,16 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
         for (int i = slev + 1; i < elev; ++i) {
             #pragma acc loop gang(static: 1) vector
             for (int jc = startidx; jc < endidx; ++jc) {
-                double m = 1.0 / (b[idx(jc, i)] - cp[idx(jc, i - 1)] * a[idx(jc, i)]);
-                cp[idx(jc, i)] = c[idx(jc, i)] * m;
-                dp[idx(jc, i)] = (d[idx(jc, i)] - dp[idx(jc, i - 1)] * a[idx(jc, i)]) * m;
+                double m = 1.0 / (b[IDX(jc, i)] - cp[IDX(jc, i - 1)] * a[IDX(jc, i)]);
+                cp[IDX(jc, i)] = c[IDX(jc, i)] * m;
+                dp[IDX(jc, i)] = (d[IDX(jc, i)] - dp[IDX(jc, i - 1)] * a[IDX(jc, i)]) * m;
             }
         }
 
         // Initialize varout
         #pragma acc loop gang(static: 1) vector
         for (int jc = startidx; jc < endidx; ++jc) {
-            varout[idx(jc, elev-1)] = dp[idx(jc, elev-1)];
+            varout[IDX(jc, elev-1)] = dp[IDX(jc, elev-1)];
         }
 
         // Solve for varout from the vectors c-prime and d-prime
@@ -55,7 +51,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
         for (int i = elev - 2; i >= slev; --i) {
             #pragma acc loop gang(static: 1) vector
             for (int jc = startidx; jc < endidx; ++jc) {
-                varout[idx(jc, i)] = dp[idx(jc, i)] - cp[idx(jc, i)] * varout[idx(jc, i + 1)];
+                varout[IDX(jc, i)] = dp[IDX(jc, i)] - cp[IDX(jc, i)] * varout[IDX(jc, i + 1)];
             }
         }
     }
@@ -70,6 +66,7 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
     // Free memory at the end
     delete[] cp;
     delete[] dp;
+
     // End timing
     auto end_time = std::chrono::high_resolution_clock::now();
     std::chrono::duration<double> elapsed_time = end_time - start_time;
-- 
GitLab


From d03022c361dfb3fc42f0c9ad0efa5c57d9a6e159 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Fri, 3 Jan 2025 09:35:30 +0100
Subject: [PATCH 13/50] change the place of start the timer

---
 src/support/mo_math_utilities.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp
index c2b46dc..45430b0 100644
--- a/src/support/mo_math_utilities.cpp
+++ b/src/support/mo_math_utilities.cpp
@@ -8,6 +8,9 @@ void tdma_solver_vec(double *a, double *b, double *c, double *d,
                      int slev, int elev, int startidx, int endidx,
                      double* varout, int opt_acc_queue = -1) {
 
+    // Start timing
+    auto start_time = std::chrono::high_resolution_clock::now();
+
     int acc_queue = (opt_acc_queue == -1) ? 1 : opt_acc_queue; // Use 1 as the default if opt_acc_queue is not provided
 
     // Determine array sizes based on startidx and endidx
-- 
GitLab


From bf0ac7c89ef60cc5482b83d7418410531d48de26 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Fri, 3 Jan 2025 11:08:00 +0100
Subject: [PATCH 14/50] added nrows and ncols as arguments to the cpp routine
 of tdma_solver_vec

---
 src/support/mo_math_utilities.F90    | 4 ++--
 src/support/mo_math_utilities.cpp    | 6 +-----
 test/fortran/test_math_utilities.f90 | 2 +-
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90
index 3461cec..c62af5c 100644
--- a/src/support/mo_math_utilities.F90
+++ b/src/support/mo_math_utilities.F90
@@ -242,10 +242,10 @@ MODULE mo_math_utilities
 
   ! C++ binding for tdma_solver_vec
   INTERFACE
-    SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue) BIND(C, NAME="tdma_solver_vec")
+    SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) BIND(C, NAME="tdma_solver_vec")
       IMPORT :: C_DOUBLE, C_INT
       REAL(C_DOUBLE), INTENT(IN) :: a(*), b(*), c(*), d(*)
-      INTEGER(C_INT), VALUE :: slev, elev, startidx, endidx
+      INTEGER(C_INT), VALUE :: slev, elev, startidx, endidx, nrows, ncols
       REAL(C_DOUBLE), INTENT(OUT) :: varout(*)
       INTEGER(C_INT), OPTIONAL :: opt_acc_queue
     END SUBROUTINE tdma_solver_vec
diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp
index 45430b0..ff94b89 100644
--- a/src/support/mo_math_utilities.cpp
+++ b/src/support/mo_math_utilities.cpp
@@ -6,17 +6,13 @@ extern "C" {
 
 void tdma_solver_vec(double *a, double *b, double *c, double *d,
                      int slev, int elev, int startidx, int endidx,
-                     double* varout, int opt_acc_queue = -1) {
+                     int nrows, int ncols, double *varout, int opt_acc_queue = -1) {
 
     // Start timing
     auto start_time = std::chrono::high_resolution_clock::now();
 
     int acc_queue = (opt_acc_queue == -1) ? 1 : opt_acc_queue; // Use 1 as the default if opt_acc_queue is not provided
 
-    // Determine array sizes based on startidx and endidx
-    int nrows = endidx - startidx;
-    int ncols = elev - slev;
-
     double* cp = new double[nrows * ncols];
     double* dp = new double[nrows * ncols];
 
diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.f90
index 8dcf5b3..9f95ca7 100644
--- a/test/fortran/test_math_utilities.f90
+++ b/test/fortran/test_math_utilities.f90
@@ -264,7 +264,7 @@ CONTAINS
 #ifndef __USE_CPP_BINDINGS
     CALL tdma_solver_vec(a, b, c, d, 1, n, 1, n, x)
 #else
-    CALL tdma_solver_vec(a, b, c, d, 0, n, 0, n, x, -1)
+    CALL tdma_solver_vec(a, b, c, d, 0, n, 0, n, n, n, x, -1)
 #endif
     CALL CPU_TIME(end_time)
 
-- 
GitLab


From c0979bbaaf1fa1a6e8fb8bba370a031d870fff3f Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Fri, 3 Jan 2025 11:20:50 +0100
Subject: [PATCH 15/50] added an additional test for tdma_solver_vec

---
 test/fortran/test_math_utilities.f90 | 31 +++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.f90
index 9f95ca7..db8b824 100644
--- a/test/fortran/test_math_utilities.f90
+++ b/test/fortran/test_math_utilities.f90
@@ -253,13 +253,15 @@ CONTAINS
 
     DO i = 1, n
       DO j = 1, n
-        a(i, j) = 1.0_wp
-        b(i, j) = 2.0_wp
-        c(i, j) = 1.0_wp
-        d(i, j) = 1.0_wp
+        a(i, j) = 1.0_wp*(i+j)
+        b(i, j) = 2.0_wp*(i+j)
+        c(i, j) = 1.0_wp*(i+j)
+        d(i, j) = 1.0_wp*(i+j)
       END DO
     END DO
 
+    tol = 1d-15
+
     CALL CPU_TIME(start_time)
 #ifndef __USE_CPP_BINDINGS
     CALL tdma_solver_vec(a, b, c, d, 1, n, 1, n, x)
@@ -278,12 +280,27 @@ CONTAINS
     DO i = 1, n
       DO j = 1, n
         sum = sum + x(i, j)
-      ! write(*,"(a,f24.16)") ' x(i, 1): ', x(i, 1)
       END DO
     END DO
     sum_ref = 27.2727272727272769_wp
-    tol = 1d-15
-    CALL TAG_TEST("TEST_tdma_solver_vec")
+    CALL TAG_TEST("TEST_tdma_solver_vec_full")
+    CALL ASSERT_ALMOST_EQUAL(sum, sum_ref, tol)
+
+    x = 0.0_wp
+#ifndef __USE_CPP_BINDINGS
+    CALL tdma_solver_vec(a, b, c, d, 2, n-1, 2, n-1, x)
+#else
+    CALL tdma_solver_vec(a, b, c, d, 1, n-1, 1, n-1, n, n, x, -1)
+#endif
+    sum = 0.0_wp
+    DO i = 2, n-1
+      DO j = 2, n-1
+        sum = sum + x(i, j)
+      END DO
+    END DO
+    sum_ref = 17.7777777777777679_wp
+
+    CALL TAG_TEST("TEST_tdma_solver_vec_partial")
     CALL ASSERT_ALMOST_EQUAL(sum, sum_ref, tol)
 
   END SUBROUTINE TEST_tdma_solver_vec
-- 
GitLab


From 6aef957598bf6a045db290284db9adf9be86a1d8 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Fri, 3 Jan 2025 11:44:43 +0100
Subject: [PATCH 16/50] updated the extension of test_math_utilities

---
 test/fortran/{test_math_utilities.f90 => test_math_utilities.F90} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename test/fortran/{test_math_utilities.f90 => test_math_utilities.F90} (100%)

diff --git a/test/fortran/test_math_utilities.f90 b/test/fortran/test_math_utilities.F90
similarity index 100%
rename from test/fortran/test_math_utilities.f90
rename to test/fortran/test_math_utilities.F90
-- 
GitLab


From 8aa6af717a93922f783f4f5e22dd75bd6e81df49 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Fri, 3 Jan 2025 11:58:39 +0100
Subject: [PATCH 17/50] fixed a style formatting issue

---
 src/support/mo_math_utilities.F90 | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90
index c62af5c..9bcb7a0 100644
--- a/src/support/mo_math_utilities.F90
+++ b/src/support/mo_math_utilities.F90
@@ -242,7 +242,8 @@ MODULE mo_math_utilities
 
   ! C++ binding for tdma_solver_vec
   INTERFACE
-    SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) BIND(C, NAME="tdma_solver_vec")
+    SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) & 
+                              BIND(C, NAME="tdma_solver_vec")
       IMPORT :: C_DOUBLE, C_INT
       REAL(C_DOUBLE), INTENT(IN) :: a(*), b(*), c(*), d(*)
       INTEGER(C_INT), VALUE :: slev, elev, startidx, endidx, nrows, ncols
-- 
GitLab


From 1e78f91c6218796a5e6b7bde6d67e7755e4debd7 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Fri, 3 Jan 2025 12:06:42 +0100
Subject: [PATCH 18/50] fixed further style formatting issues

---
 src/support/mo_lib_loopindices.F90   | 18 +++++++++---------
 src/support/mo_math_utilities.F90    | 16 ++++++++--------
 test/fortran/test_math_utilities.F90 | 18 +++++++++---------
 3 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/src/support/mo_lib_loopindices.F90 b/src/support/mo_lib_loopindices.F90
index 3ac80dd..ce67af7 100644
--- a/src/support/mo_lib_loopindices.F90
+++ b/src/support/mo_lib_loopindices.F90
@@ -132,23 +132,23 @@ CONTAINS
   INTERFACE
     SUBROUTINE get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
                                  i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_c_lib")
-      IMPORT :: C_INT
-      INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk
-      INTEGER(C_INT) :: i_startidx_out, i_endidx_out
+      IMPORT :: c_int
+      INTEGER(c_int), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk
+      INTEGER(c_int) :: i_startidx_out, i_endidx_out
     END SUBROUTINE get_indices_c_lib
 
     SUBROUTINE get_indices_e_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
                                  i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_e_lib")
-      IMPORT :: C_INT
-      INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk
-      INTEGER(C_INT) :: i_startidx_out, i_endidx_out
+      IMPORT :: c_int
+      INTEGER(c_int), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk
+      INTEGER(c_int) :: i_startidx_out, i_endidx_out
     END SUBROUTINE get_indices_e_lib
 
     SUBROUTINE get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk, &
                                  i_startidx_out, i_endidx_out) BIND(C, NAME="get_indices_v_lib")
-      IMPORT :: C_INT
-      INTEGER(C_INT), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk
-      INTEGER(C_INT) :: i_startidx_out, i_endidx_out
+      IMPORT :: c_int
+      INTEGER(c_int), VALUE :: i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk
+      INTEGER(c_int) :: i_startidx_out, i_endidx_out
     END SUBROUTINE get_indices_v_lib
   END INTERFACE
 
diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90
index 9bcb7a0..9c2897f 100644
--- a/src/support/mo_math_utilities.F90
+++ b/src/support/mo_math_utilities.F90
@@ -176,7 +176,7 @@ MODULE mo_math_utilities
 ! Preprocessor directive to conditionally include the tdma_solver_vec implementation
 #ifndef __USE_CPP_BINDINGS
 
-  CONTAINS
+CONTAINS
 
   SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, varout, opt_acc_queue)
     INTEGER, INTENT(IN) :: slev, elev
@@ -242,13 +242,13 @@ MODULE mo_math_utilities
 
   ! C++ binding for tdma_solver_vec
   INTERFACE
-    SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) & 
-                              BIND(C, NAME="tdma_solver_vec")
-      IMPORT :: C_DOUBLE, C_INT
-      REAL(C_DOUBLE), INTENT(IN) :: a(*), b(*), c(*), d(*)
-      INTEGER(C_INT), VALUE :: slev, elev, startidx, endidx, nrows, ncols
-      REAL(C_DOUBLE), INTENT(OUT) :: varout(*)
-      INTEGER(C_INT), OPTIONAL :: opt_acc_queue
+    SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) &
+      BIND(C, NAME="tdma_solver_vec")
+      IMPORT :: c_double, c_int
+      REAL(c_double), INTENT(IN) :: a(*), b(*), c(*), d(*)
+      INTEGER(c_int), VALUE :: slev, elev, startidx, endidx, nrows, ncols
+      REAL(c_double), INTENT(OUT) :: varout(*)
+      INTEGER(c_int), OPTIONAL :: opt_acc_queue
     END SUBROUTINE tdma_solver_vec
   END INTERFACE
 
diff --git a/test/fortran/test_math_utilities.F90 b/test/fortran/test_math_utilities.F90
index db8b824..07dcfb3 100644
--- a/test/fortran/test_math_utilities.F90
+++ b/test/fortran/test_math_utilities.F90
@@ -253,10 +253,10 @@ CONTAINS
 
     DO i = 1, n
       DO j = 1, n
-        a(i, j) = 1.0_wp*(i+j)
-        b(i, j) = 2.0_wp*(i+j)
-        c(i, j) = 1.0_wp*(i+j)
-        d(i, j) = 1.0_wp*(i+j)
+        a(i, j) = 1.0_wp*(i + j)
+        b(i, j) = 2.0_wp*(i + j)
+        c(i, j) = 1.0_wp*(i + j)
+        d(i, j) = 1.0_wp*(i + j)
       END DO
     END DO
 
@@ -274,7 +274,7 @@ CONTAINS
     elapsed_time = end_time - start_time
 
     ! Output timing result
-    write(*,*) "Elapsed time for tdma_solver_vec: ", elapsed_time, " seconds"
+    WRITE (*, *) "Elapsed time for tdma_solver_vec: ", elapsed_time, " seconds"
 
     sum = 0.0_wp
     DO i = 1, n
@@ -288,13 +288,13 @@ CONTAINS
 
     x = 0.0_wp
 #ifndef __USE_CPP_BINDINGS
-    CALL tdma_solver_vec(a, b, c, d, 2, n-1, 2, n-1, x)
+    CALL tdma_solver_vec(a, b, c, d, 2, n - 1, 2, n - 1, x)
 #else
-    CALL tdma_solver_vec(a, b, c, d, 1, n-1, 1, n-1, n, n, x, -1)
+    CALL tdma_solver_vec(a, b, c, d, 1, n - 1, 1, n - 1, n, n, x, -1)
 #endif
     sum = 0.0_wp
-    DO i = 2, n-1
-      DO j = 2, n-1
+    DO i = 2, n - 1
+      DO j = 2, n - 1
         sum = sum + x(i, j)
       END DO
     END DO
-- 
GitLab


From 5106ddc1bfd6538a81772780006825af0731f097 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Fri, 3 Jan 2025 12:10:24 +0100
Subject: [PATCH 19/50] added licences to the new cpp files

---
 src/support/mo_lib_loopindices.cpp | 11 +++++++++++
 src/support/mo_math_utilities.cpp  | 11 +++++++++++
 2 files changed, 22 insertions(+)

diff --git a/src/support/mo_lib_loopindices.cpp b/src/support/mo_lib_loopindices.cpp
index 9810427..e6d9d21 100644
--- a/src/support/mo_lib_loopindices.cpp
+++ b/src/support/mo_lib_loopindices.cpp
@@ -1,3 +1,14 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
 #include <algorithm> // For std::max
 
 extern "C" {
diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp
index ff94b89..e171606 100644
--- a/src/support/mo_math_utilities.cpp
+++ b/src/support/mo_math_utilities.cpp
@@ -1,3 +1,14 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
 #include <vector>
 #include <iostream>
 #include <chrono> // For timing
-- 
GitLab


From da4294b4f3abab096810470bb2407307e3049430 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Fri, 21 Feb 2025 14:59:55 +0100
Subject: [PATCH 20/50] enabled compilation using Kokkos

---
 src/support/CMakeLists.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt
index 7af714e..b4ceb37 100644
--- a/src/support/CMakeLists.txt
+++ b/src/support/CMakeLists.txt
@@ -57,6 +57,9 @@ if(IM_ENABLE_OPENACC)
   endif()
 endif()
 
+message(STATUS "iconmath-support enabling Kokkos")
+find_package(Kokkos REQUIRED)
+
 target_include_directories(
   iconmath-support
   PUBLIC
@@ -74,7 +77,7 @@ target_include_directories(
     # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html
     $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>>)
 
-target_link_libraries(iconmath-support PUBLIC fortran-support::fortran-support)
+target_link_libraries(iconmath-support PUBLIC fortran-support::fortran-support Kokkos::kokkos)
 
 install(TARGETS iconmath-support EXPORT "${PROJECT_NAME}-targets")
 
-- 
GitLab


From e70952a76fba137c4167e023c51be20610f913d1 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Fri, 21 Feb 2025 15:00:45 +0100
Subject: [PATCH 21/50] converted the c++ code in mo_math_utilities to Kokkos

---
 src/support/mo_math_utilities.cpp | 128 ++++++++++++++----------------
 1 file changed, 61 insertions(+), 67 deletions(-)

diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp
index e171606..b3031a5 100644
--- a/src/support/mo_math_utilities.cpp
+++ b/src/support/mo_math_utilities.cpp
@@ -12,75 +12,69 @@
 #include <vector>
 #include <iostream>
 #include <chrono> // For timing
+#include <Kokkos_Core.hpp>
 
 extern "C" {
 
-void tdma_solver_vec(double *a, double *b, double *c, double *d,
-                     int slev, int elev, int startidx, int endidx,
-                     int nrows, int ncols, double *varout, int opt_acc_queue = -1) {
-
-    // Start timing
-    auto start_time = std::chrono::high_resolution_clock::now();
-
-    int acc_queue = (opt_acc_queue == -1) ? 1 : opt_acc_queue; // Use 1 as the default if opt_acc_queue is not provided
-
-    double* cp = new double[nrows * ncols];
-    double* dp = new double[nrows * ncols];
-
-    #define IDX(row, col) ((col) * nrows + (row)) // performs better than lambda function
-
-    // OpenACC Parallel Region
-    #pragma acc parallel default(present) create(cp[:nrows*ncols], dp[:nrows*ncols]) async(acc_queue)
-    {
-        // Initialize c-prime and d-prime
-        #pragma acc loop gang(static: 1) vector
-        for (int jc = startidx; jc < endidx; ++jc) {
-            cp[IDX(jc, slev)] = c[IDX(jc, slev)] / b[IDX(jc, slev)];
-            dp[IDX(jc, slev)] = d[IDX(jc, slev)] / b[IDX(jc, slev)];
-        }
-
-        // Solve for vectors c-prime and d-prime
-        #pragma acc loop seq
-        for (int i = slev + 1; i < elev; ++i) {
-            #pragma acc loop gang(static: 1) vector
-            for (int jc = startidx; jc < endidx; ++jc) {
-                double m = 1.0 / (b[IDX(jc, i)] - cp[IDX(jc, i - 1)] * a[IDX(jc, i)]);
-                cp[IDX(jc, i)] = c[IDX(jc, i)] * m;
-                dp[IDX(jc, i)] = (d[IDX(jc, i)] - dp[IDX(jc, i - 1)] * a[IDX(jc, i)]) * m;
-            }
-        }
-
-        // Initialize varout
-        #pragma acc loop gang(static: 1) vector
-        for (int jc = startidx; jc < endidx; ++jc) {
-            varout[IDX(jc, elev-1)] = dp[IDX(jc, elev-1)];
-        }
-
-        // Solve for varout from the vectors c-prime and d-prime
-        #pragma acc loop seq
-        for (int i = elev - 2; i >= slev; --i) {
-            #pragma acc loop gang(static: 1) vector
-            for (int jc = startidx; jc < endidx; ++jc) {
-                varout[IDX(jc, i)] = dp[IDX(jc, i)] - cp[IDX(jc, i)] * varout[IDX(jc, i + 1)];
-            }
-        }
-    }
-
-    printf("tdma_solver_vec: completed using C++\n");
-
-    // Wait for OpenACC asynchronous operations to complete if acc_queue is not optional
-    if (opt_acc_queue == -1) {
-        #pragma acc wait(acc_queue)
-    }
-
-    // Free memory at the end
-    delete[] cp;
-    delete[] dp;
-
-    // End timing
-    auto end_time = std::chrono::high_resolution_clock::now();
-    std::chrono::duration<double> elapsed_time = end_time - start_time;
-
-    std::cout << "Elapsed time for tdma_solver_vec (C++): " << elapsed_time.count() << " seconds" << std::endl;
+void tdma_solver_vec_kokkos(const double* a, const double* b, const double* c, const double* d,
+                              int slev, int elev, int startidx, int endidx,
+                              int nrows, int ncols, double* varout) {
+
+  // Start timing
+  auto start_time = std::chrono::high_resolution_clock::now();
+
+  // Allocate temporary arrays using Kokkos::View.
+  // The views c_p and d_p are allocated as 2D arrays with dimensions [nrows][ncols].
+  // Kokkos::View automatically handles memory management.
+  Kokkos::View<double**> c_p("c_p", nrows, ncols);
+  Kokkos::View<double**> d_p("d_p", nrows, ncols);
+
+  // Wrap the input arrays in unmanaged views.
+  // We assume that the input arrays are laid out in column-major order as in the original code.
+  // Here we use LayoutLeft so that the first index (row) is contiguous.
+  typedef Kokkos::View<const double**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConst2D;
+  typedef Kokkos::View<double**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> Unmanaged2D;
+  UnmanagedConst2D a_view(a, nrows, ncols);
+  UnmanagedConst2D b_view(b, nrows, ncols);
+  UnmanagedConst2D c_view(c, nrows, ncols);
+  UnmanagedConst2D d_view(d, nrows, ncols);
+  Unmanaged2D varout_view(varout, nrows, ncols);
+
+  // Initialize c-prime and d-prime at the starting level (slev)
+  Kokkos::parallel_for("init_c_p_d_p", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) {
+    c_p(jc, slev) = c_view(jc, slev) / b_view(jc, slev);
+    d_p(jc, slev) = d_view(jc, slev) / b_view(jc, slev);
+  });
+  Kokkos::fence();
+
+  // Forward sweep: compute c-prime and d-prime for each column from slev+1 to elev-1.
+  for (int i = slev + 1; i < elev; ++i) {
+    Kokkos::parallel_for("forward_sweep", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) {
+      double m = 1.0 / (b_view(jc, i) - c_p(jc, i - 1) * a_view(jc, i));
+      c_p(jc, i) = c_view(jc, i) * m;
+      d_p(jc, i) = (d_view(jc, i) - d_p(jc, i - 1) * a_view(jc, i)) * m;
+    });
+    Kokkos::fence();
+  }
+
+  // Initialize the output array at the last level (elev-1)
+  Kokkos::parallel_for("init_varout", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) {
+    varout_view(jc, elev-1) = d_p(jc, elev-1);
+  });
+  Kokkos::fence();
+
+  // Back substitution: update varout for columns from elev-2 down to slev.
+  for (int i = elev - 2; i >= slev; --i) {
+    Kokkos::parallel_for("back_substitution", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) {
+      varout_view(jc, i) = d_p(jc, i) - c_p(jc, i) * varout_view(jc, i + 1);
+    });
+    Kokkos::fence();
+  }
+
+  // End timing and print the elapsed time
+  auto end_time = std::chrono::high_resolution_clock::now();
+  std::chrono::duration<double> elapsed_time = end_time - start_time;
+  std::cout << "Elapsed time for tdma_solver_vec (Kokkos): " << elapsed_time.count() << " seconds" << std::endl;
 }
+
 }
-- 
GitLab


From 136db1183f9b3bf1ddb317ad2d26c448b7c02a30 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Fri, 21 Feb 2025 17:16:59 +0100
Subject: [PATCH 22/50] build kokkos internally along with the package

---
 CMakeLists.txt             | 16 ++++++++++++++++
 src/support/CMakeLists.txt | 10 ++++++----
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8fb4acf..ab93b92 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -110,6 +110,22 @@ else()
   endif()
 endif()
 
+include(FetchContent)
+# configure kokkos 4.4 repository link
+FetchContent_Declare(kokkos
+        URL https://github.com/kokkos/kokkos/releases/download/4.4.01/kokkos-4.4.01.tar.gz
+        URL_HASH MD5=eafd0d42c9831858aa84fde78576644c
+)
+
+# disable build of C++23 mdspan experimental support for now        
+set(Kokkos_ENABLE_IMPL_MDSPAN OFF CACHE BOOL "Experimental mdspan support")
+
+# by default, build the Kokkos serial backend for CPU
+set(Kokkos_ENABLE_SERIAL ON CACHE BOOL "Kokkos Serial backend")
+set(Kokkos_ARCH_NATIVE ON CACHE BOOL "Kokkos native architecture optimisations")
+
+FetchContent_MakeAvailable(kokkos)
+
 add_subdirectory(src)
 
 # Allow for 'make test' even if the tests are disabled:
diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt
index b4ceb37..9f56017 100644
--- a/src/support/CMakeLists.txt
+++ b/src/support/CMakeLists.txt
@@ -57,9 +57,6 @@ if(IM_ENABLE_OPENACC)
   endif()
 endif()
 
-message(STATUS "iconmath-support enabling Kokkos")
-find_package(Kokkos REQUIRED)
-
 target_include_directories(
   iconmath-support
   PUBLIC
@@ -77,7 +74,12 @@ target_include_directories(
     # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html
     $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>>)
 
-target_link_libraries(iconmath-support PUBLIC fortran-support::fortran-support Kokkos::kokkos)
+target_link_libraries(iconmath-support
+    PUBLIC
+        fortran-support::fortran-support
+    PRIVATE
+        Kokkos::kokkos
+)
 
 install(TARGETS iconmath-support EXPORT "${PROJECT_NAME}-targets")
 
-- 
GitLab


From b865029415ea2421332985d4c27b69e6642ace8b Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Fri, 21 Feb 2025 17:27:31 +0100
Subject: [PATCH 23/50] changed cmake format

---
 CMakeLists.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ab93b92..26137c4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -112,10 +112,10 @@ endif()
 
 include(FetchContent)
 # configure kokkos 4.4 repository link
-FetchContent_Declare(kokkos
-        URL https://github.com/kokkos/kokkos/releases/download/4.4.01/kokkos-4.4.01.tar.gz
-        URL_HASH MD5=eafd0d42c9831858aa84fde78576644c
-)
+FetchContent_Declare(
+  kokkos
+  URL https://github.com/kokkos/kokkos/releases/download/4.4.01/kokkos-4.4.01.tar.gz
+  URL_HASH MD5=eafd0d42c9831858aa84fde78576644c)
 
 # disable build of C++23 mdspan experimental support for now        
 set(Kokkos_ENABLE_IMPL_MDSPAN OFF CACHE BOOL "Experimental mdspan support")
-- 
GitLab


From d026fe4459e14ce5d7af6d4a2fc8873f3e8b92b9 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Fri, 21 Feb 2025 18:01:11 +0100
Subject: [PATCH 24/50] fixed an error in cmake styling

---
 .cmake-format.py | 1 +
 CMakeLists.txt   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.cmake-format.py b/.cmake-format.py
index 31f47fa..1b19e4e 100644
--- a/.cmake-format.py
+++ b/.cmake-format.py
@@ -37,3 +37,4 @@ with section("lint"):
 	local_var_pattern = '[a-zA-Z][0-9a-zA-z_]+'
 	private_var_pattern = '[a-z][a-z0-9_]+'
 	public_var_pattern = '[A-Z][0-9a-zA-Z_]+'
+	global_var_pattern = '[A-Z][0-9a-zA-Z_]+'
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 26137c4..7cf92be 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -117,7 +117,7 @@ FetchContent_Declare(
   URL https://github.com/kokkos/kokkos/releases/download/4.4.01/kokkos-4.4.01.tar.gz
   URL_HASH MD5=eafd0d42c9831858aa84fde78576644c)
 
-# disable build of C++23 mdspan experimental support for now        
+# disable build of C++23 mdspan experimental support for now
 set(Kokkos_ENABLE_IMPL_MDSPAN OFF CACHE BOOL "Experimental mdspan support")
 
 # by default, build the Kokkos serial backend for CPU
-- 
GitLab


From fbae7837c92035d6cd952c0696aa2138a167406d Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Fri, 21 Feb 2025 18:04:04 +0100
Subject: [PATCH 25/50] updated the gitignore file

---
 .gitignore | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.gitignore b/.gitignore
index 23535bf..8123bd2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,12 @@ cmake_install.cmake
 iconmath-config-version.cmake
 iconmath-config.cmake
 iconmath-targets.cmake
+KokkosConfig.cmake
+KokkosConfigVersion.cmake
+KokkosTargets.cmake
+KokkosConfigCommon.cmake
+Kokkos_Version_Info.cpp
+Kokkos_Version_Info.hpp
 
 # Build stage files:
 *.L
-- 
GitLab


From f01010898e3732b505372f10332e02b0ad631dca Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Sun, 23 Feb 2025 14:25:53 +0100
Subject: [PATCH 26/50] made the cpp function a templated one

---
 src/support/mo_math_utilities.F90             | 10 ++++--
 src/support/mo_math_utilities.cpp             | 31 +++++++++++--------
 src/support/mo_math_utilities.hpp             | 15 +++++++++
 ..._utilities.F90 => test_math_utilities.f90} |  0
 4 files changed, 40 insertions(+), 16 deletions(-)
 create mode 100644 src/support/mo_math_utilities.hpp
 rename test/fortran/{test_math_utilities.F90 => test_math_utilities.f90} (100%)

diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90
index 9c2897f..168ec26 100644
--- a/src/support/mo_math_utilities.F90
+++ b/src/support/mo_math_utilities.F90
@@ -79,7 +79,11 @@ MODULE mo_math_utilities
   PUBLIC :: line_intersect
   PUBLIC :: lintersect
   PUBLIC :: tdma_solver
+#ifndef __USE_CPP_BINDINGS
   PUBLIC :: tdma_solver_vec
+#else
+  PUBLIC :: tdma_solver_vec_double
+#endif
   PUBLIC :: check_orientation
 
   !  vertical coordinates routines
@@ -242,14 +246,14 @@ CONTAINS
 
   ! C++ binding for tdma_solver_vec
   INTERFACE
-    SUBROUTINE tdma_solver_vec(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) &
-      BIND(C, NAME="tdma_solver_vec")
+    SUBROUTINE tdma_solver_vec_double(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) &
+      BIND(C, NAME="tdma_solver_vec_double")
       IMPORT :: c_double, c_int
       REAL(c_double), INTENT(IN) :: a(*), b(*), c(*), d(*)
       INTEGER(c_int), VALUE :: slev, elev, startidx, endidx, nrows, ncols
       REAL(c_double), INTENT(OUT) :: varout(*)
       INTEGER(c_int), OPTIONAL :: opt_acc_queue
-    END SUBROUTINE tdma_solver_vec
+    END SUBROUTINE tdma_solver_vec_double
   END INTERFACE
 
 CONTAINS
diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp
index b3031a5..6a60f2c 100644
--- a/src/support/mo_math_utilities.cpp
+++ b/src/support/mo_math_utilities.cpp
@@ -9,16 +9,12 @@
 // SPDX-License-Identifier: BSD-3-Clause
 // ---------------------------------------------------------------
 
-#include <vector>
-#include <iostream>
-#include <chrono> // For timing
-#include <Kokkos_Core.hpp>
+#include "mo_math_utilities.hpp"
 
-extern "C" {
-
-void tdma_solver_vec_kokkos(const double* a, const double* b, const double* c, const double* d,
+template <typename T>
+void tdma_solver_vec(const T* a, const T* b, const T* c, const T* d,
                               int slev, int elev, int startidx, int endidx,
-                              int nrows, int ncols, double* varout) {
+                              int nrows, int ncols, T* varout) {
 
   // Start timing
   auto start_time = std::chrono::high_resolution_clock::now();
@@ -26,14 +22,14 @@ void tdma_solver_vec_kokkos(const double* a, const double* b, const double* c, c
   // Allocate temporary arrays using Kokkos::View.
   // The views c_p and d_p are allocated as 2D arrays with dimensions [nrows][ncols].
   // Kokkos::View automatically handles memory management.
-  Kokkos::View<double**> c_p("c_p", nrows, ncols);
-  Kokkos::View<double**> d_p("d_p", nrows, ncols);
+  Kokkos::View<T**> c_p("c_p", nrows, ncols);
+  Kokkos::View<T**> d_p("d_p", nrows, ncols);
 
   // Wrap the input arrays in unmanaged views.
   // We assume that the input arrays are laid out in column-major order as in the original code.
   // Here we use LayoutLeft so that the first index (row) is contiguous.
-  typedef Kokkos::View<const double**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConst2D;
-  typedef Kokkos::View<double**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> Unmanaged2D;
+  typedef Kokkos::View<const T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConst2D;
+  typedef Kokkos::View<T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> Unmanaged2D;
   UnmanagedConst2D a_view(a, nrows, ncols);
   UnmanagedConst2D b_view(b, nrows, ncols);
   UnmanagedConst2D c_view(c, nrows, ncols);
@@ -50,7 +46,7 @@ void tdma_solver_vec_kokkos(const double* a, const double* b, const double* c, c
   // Forward sweep: compute c-prime and d-prime for each column from slev+1 to elev-1.
   for (int i = slev + 1; i < elev; ++i) {
     Kokkos::parallel_for("forward_sweep", Kokkos::RangePolicy<>(startidx, endidx), KOKKOS_LAMBDA (const int jc) {
-      double m = 1.0 / (b_view(jc, i) - c_p(jc, i - 1) * a_view(jc, i));
+      T m = 1.0 / (b_view(jc, i) - c_p(jc, i - 1) * a_view(jc, i));
       c_p(jc, i) = c_view(jc, i) * m;
       d_p(jc, i) = (d_view(jc, i) - d_p(jc, i - 1) * a_view(jc, i)) * m;
     });
@@ -71,10 +67,19 @@ void tdma_solver_vec_kokkos(const double* a, const double* b, const double* c, c
     Kokkos::fence();
   }
 
+  c_p = Kokkos::View<T**>();
+  d_p = Kokkos::View<T**>();
   // End timing and print the elapsed time
   auto end_time = std::chrono::high_resolution_clock::now();
   std::chrono::duration<double> elapsed_time = end_time - start_time;
   std::cout << "Elapsed time for tdma_solver_vec (Kokkos): " << elapsed_time.count() << " seconds" << std::endl;
 }
 
+extern "C" {
+
+  void tdma_solver_vec_double(const double* a, const double* b, const double* c, const double* d,
+      int slev, int elev, int startidx, int endidx,
+      int nrows, int ncols, double* varout) {
+    tdma_solver_vec<double>(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout);
+  }
 }
diff --git a/src/support/mo_math_utilities.hpp b/src/support/mo_math_utilities.hpp
new file mode 100644
index 0000000..20b1f44
--- /dev/null
+++ b/src/support/mo_math_utilities.hpp
@@ -0,0 +1,15 @@
+#include <vector>
+#include <iostream>
+#include <chrono> // For timing
+#include <Kokkos_Core.hpp>
+
+template <typename T>
+void tdma_solver_vec(const T* a, const T* b, const T* c, const T* d,
+                              int slev, int elev, int startidx, int endidx,
+                              int nrows, int ncols, T* varout);
+
+extern "C" {
+  void tdma_solver_vec_double(const double* a, const double* b, const double* c, const double* d,
+                              int slev, int elev, int startidx, int endidx,
+                              int nrows, int ncols, double* varout);
+}
diff --git a/test/fortran/test_math_utilities.F90 b/test/fortran/test_math_utilities.f90
similarity index 100%
rename from test/fortran/test_math_utilities.F90
rename to test/fortran/test_math_utilities.f90
-- 
GitLab


From e46960c7181b86066e142d950123446fe2f17270 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Sun, 23 Feb 2025 14:47:17 +0100
Subject: [PATCH 27/50] added unit-tests for c++ codes using googletest

---
 test/CMakeLists.txt         |  3 +-
 test/c/CMakeLists.txt       | 30 +++++++++++++++
 test/c/main.cpp             | 14 +++++++
 test/c/test_tdma_solver.cpp | 77 +++++++++++++++++++++++++++++++++++++
 4 files changed, 123 insertions(+), 1 deletion(-)
 create mode 100644 test/c/CMakeLists.txt
 create mode 100644 test/c/main.cpp
 create mode 100644 test/c/test_tdma_solver.cpp

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index c8fa8e2..2a5f5df 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -9,4 +9,5 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------
 
-add_subdirectory(fortran)
+# add_subdirectory(fortran)
+add_subdirectory(c)
diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt
new file mode 100644
index 0000000..be1af9e
--- /dev/null
+++ b/test/c/CMakeLists.txt
@@ -0,0 +1,30 @@
+# Fetch GoogleTest via FetchContent
+include(FetchContent)
+FetchContent_Declare(
+  googletest
+  URL https://github.com/google/googletest/archive/refs/tags/release-1.12.1.zip
+)
+set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+FetchContent_MakeAvailable(googletest)
+
+# Find Kokkos (or use your existing Kokkos installation)
+# find_package(Kokkos REQUIRED)
+
+set(SOURCES
+  main.cpp
+  test_tdma_solver.cpp
+)
+# Create the test executable from your test files, including main.cpp.
+add_executable(iconmath_test_c ${SOURCES})
+
+# Link the test executable with GoogleTest and Kokkos.
+target_link_libraries(iconmath_test_c
+  PUBLIC
+    iconmath-support
+  PRIVATE
+    gtest_main
+    Kokkos::kokkos
+)
+
+include(GoogleTest)
+gtest_discover_tests(iconmath_test_c)
diff --git a/test/c/main.cpp b/test/c/main.cpp
new file mode 100644
index 0000000..2df720d
--- /dev/null
+++ b/test/c/main.cpp
@@ -0,0 +1,14 @@
+#include <Kokkos_Core.hpp>
+#include <gtest/gtest.h>
+
+int main(int argc, char** argv) {
+  // Initialize Kokkos before any tests run.
+  Kokkos::initialize(argc, argv);
+  
+  ::testing::InitGoogleTest(&argc, argv);
+  int result = RUN_ALL_TESTS();
+  
+  // Finalize Kokkos after all tests have completed.
+  Kokkos::finalize();
+  return result;
+}
diff --git a/test/c/test_tdma_solver.cpp b/test/c/test_tdma_solver.cpp
new file mode 100644
index 0000000..7c3c3a8
--- /dev/null
+++ b/test/c/test_tdma_solver.cpp
@@ -0,0 +1,77 @@
+#include <gtest/gtest.h>
+#include <vector>
+#include <algorithm>
+#include "mo_math_utilities.hpp"
+
+// Helper function to compute the 1D index for column-major storage.
+inline int idx(int i, int j, int nrows) {
+  return i + j * nrows;
+}
+
+// Test fixture for the TDMA solver tests.
+class TDMASolverTestFixture : public ::testing::Test {
+protected:
+  const int n = 10;             // Matrix dimension.
+  std::vector<double> a;        // Input matrix a.
+  std::vector<double> b;        // Input matrix b.
+  std::vector<double> c;        // Input matrix c.
+  std::vector<double> d;        // Input matrix d.
+  std::vector<double> x;        // Output matrix.
+
+  TDMASolverTestFixture()
+      : a(n * n), b(n * n), c(n * n), d(n * n), x(n * n, 0.0) {}
+
+  // SetUp is run before each test.
+  void SetUp() override {
+    // Fill arrays in column-major order.
+    for (int j = 0; j < n; j++) {
+      for (int i = 0; i < n; i++) {
+        double value = (i + 1) + (j + 1);
+        a[idx(i, j, n)] = 1.0 * value;
+        b[idx(i, j, n)] = 2.0 * value;
+        c[idx(i, j, n)] = 1.0 * value;
+        d[idx(i, j, n)] = 1.0 * value;
+      }
+    }
+    // Clear the output vector.
+    std::fill(x.begin(), x.end(), 0.0);
+  }
+};
+
+TEST_F(TDMASolverTestFixture, FullTest) {
+  // Call the solver over the full range:
+  tdma_solver_vec_double(a.data(), b.data(), c.data(), d.data(),
+                         0, n, 0, n, n, n, x.data());
+
+  // Compute the sum of all elements in the output matrix.
+  double sum = 0.0;
+  for (int j = 0; j < n; j++) {
+    for (int i = 0; i < n; i++) {
+      sum += x[idx(i, j, n)];
+    }
+  }
+
+  // Expected reference sum
+  double sum_ref = 27.2727272727272769;
+  double tol = 1e-13;
+  EXPECT_NEAR(sum, sum_ref, tol);
+}
+
+TEST_F(TDMASolverTestFixture, PartialTest) {
+  // Call the solver for a partial region:
+  // For C++: slev = 1, elev = n-1, startidx = 1, endidx = n-1.
+  tdma_solver_vec_double(a.data(), b.data(), c.data(), d.data(),
+                         1, n - 1, 1, n - 1, n, n, x.data());
+
+  // Compute the sum over a region
+  double sum = 0.0;
+  for (int j = 1; j < n - 1; j++) {
+    for (int i = 1; i < n - 1; i++) {
+      sum += x[idx(i, j, n)];
+    }
+  }
+
+  double sum_ref = 17.7777777777777679;
+  double tol = 1e-13;
+  EXPECT_NEAR(sum, sum_ref, tol);
+}
-- 
GitLab


From 1b8c2abb9cca5fa5a7163f3d8037fada7ccd0108 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Sun, 23 Feb 2025 21:53:41 +0100
Subject: [PATCH 28/50] added c++ code for mo_lib_interpolation_vector

---
 src/interpolation/CMakeLists.txt              |  11 ++
 .../mo_lib_interpolation_vector.cpp           | 130 ++++++++++++++++++
 .../mo_lib_interpolation_vector.hpp           |  38 +++++
 src/support/mo_lib_loopindices.hpp            |  11 ++
 4 files changed, 190 insertions(+)
 create mode 100644 src/interpolation/mo_lib_interpolation_vector.cpp
 create mode 100644 src/interpolation/mo_lib_interpolation_vector.hpp
 create mode 100644 src/support/mo_lib_loopindices.hpp

diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt
index 73e582c..9455f9e 100644
--- a/src/interpolation/CMakeLists.txt
+++ b/src/interpolation/CMakeLists.txt
@@ -13,6 +13,7 @@ add_library(
   iconmath-interpolation
   mo_lib_interpolation_scalar.F90
   mo_lib_interpolation_vector.F90
+  mo_lib_interpolation_vector.cpp
   mo_lib_intp_rbf.F90)
 
 add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation)
@@ -55,10 +56,20 @@ target_include_directories(
     $<INSTALL_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>>
     # Path to internal include directory
     $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:${PROJECT_SOURCE_DIR}/include>>
+    # Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for
+    # multiple compile languages
+    # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html
+    $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>>
+  PRIVATE
+    # Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple
+    # compile languages
+    # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html
+    $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>>
 )
 
 target_link_libraries(iconmath-interpolation PUBLIC fortran-support::fortran-support)
 target_link_libraries(iconmath-interpolation PUBLIC iconmath-support)
+target_link_libraries(iconmath-interpolation PRIVATE Kokkos::kokkos)
 
 install(TARGETS iconmath-interpolation EXPORT "${PROJECT_NAME}-targets")
 
diff --git a/src/interpolation/mo_lib_interpolation_vector.cpp b/src/interpolation/mo_lib_interpolation_vector.cpp
new file mode 100644
index 0000000..50772e7
--- /dev/null
+++ b/src/interpolation/mo_lib_interpolation_vector.cpp
@@ -0,0 +1,130 @@
+#include "mo_lib_loopindices.hpp"
+#include "mo_lib_interpolation_vector.hpp"
+
+// The templated C++ function using Kokkos.
+// Raw pointer arguments are wrapped into unmanaged Kokkos::Views.
+// Note: The dimensions below must match the Fortran arrays.
+//   - p_vn_in and p_vt_in:   dimensions [nproma, nlev, nblks_e]
+//   - cell_edge_idx and cell_edge_blk: dimensions [nproma, nblks_c, 3]
+//   - e_bln_c_u and e_bln_c_v: dimensions [nproma, 6, nblks_c]
+//   - p_u_out and p_v_out:   dimensions [nproma, nlev, nblks_c]
+template <typename T>
+void edges2cells_vector_lib(
+    const T* p_vn_in, const T* p_vt_in,
+    const int* cell_edge_idx, const int* cell_edge_blk,
+    const T* e_bln_c_u, const T* e_bln_c_v,
+    T* p_u_out, T* p_v_out,
+    // Additional integer parameters.
+    int i_startblk, int i_endblk,
+    int i_startidx_in, int i_endidx_in,
+    int slev, int elev,
+    int nproma,
+    // Dimensions for the arrays.
+    int nlev, int nblks_e, int nblks_c)
+{
+  // Wrap raw pointers in unmanaged Kokkos Views.
+  typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
+  typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
+  typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
+
+  UnmanagedConstT3D p_vn_in_view(p_vn_in, nproma, nlev, nblks_e);
+  UnmanagedConstT3D p_vt_in_view(p_vt_in, nproma, nlev, nblks_e);
+
+  UnmanagedConstInt3D cell_edge_idx_view(cell_edge_idx, nproma, nblks_c, 3);
+  UnmanagedConstInt3D cell_edge_blk_view(cell_edge_blk, nproma, nblks_c, 3);
+
+  UnmanagedConstT3D e_bln_c_u_view(e_bln_c_u, nproma, 6, nblks_c);
+  UnmanagedConstT3D e_bln_c_v_view(e_bln_c_v, nproma, 6, nblks_c);
+
+  UnmanagedT3D p_u_out_view(p_u_out, nproma, nlev, nblks_c);
+  UnmanagedT3D p_v_out_view(p_v_out, nproma, nlev, nblks_c);
+
+  // Loop over cell blocks as in the original Fortran code.
+  for (int jb = i_startblk; jb <= i_endblk; ++jb) {
+    // Call get_indices_c_lib to get inner loop indices for block jb.
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in, i_endidx_in, nproma,
+                      jb, i_startblk, i_endblk,
+                      i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
+        {slev, i_startidx}, {elev + 1, i_endidx + 1});
+    Kokkos::parallel_for("edges2cells_inner", innerPolicy,
+      KOKKOS_LAMBDA(const int jk, const int jc) {
+        // Compute the bilinear interpolation for cell (jc, jk, jb).
+        p_u_out_view(jc, jk, jb) =
+          e_bln_c_u_view(jc, 0, jb) *
+            p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) +
+          e_bln_c_u_view(jc, 1, jb) *
+            p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) +
+          e_bln_c_u_view(jc, 2, jb) *
+            p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) +
+          e_bln_c_u_view(jc, 3, jb) *
+            p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) +
+          e_bln_c_u_view(jc, 4, jb) *
+            p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1) +
+          e_bln_c_u_view(jc, 5, jb) *
+            p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1);
+
+        p_v_out_view(jc, jk, jb) =
+          e_bln_c_v_view(jc, 0, jb) *
+            p_vn_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) +
+          e_bln_c_v_view(jc, 1, jb) *
+            p_vt_in_view(cell_edge_idx_view(jc, jb, 0) - 1, jk, cell_edge_blk_view(jc, jb, 0) - 1) +
+          e_bln_c_v_view(jc, 2, jb) *
+            p_vn_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) +
+          e_bln_c_v_view(jc, 3, jb) *
+            p_vt_in_view(cell_edge_idx_view(jc, jb, 1) - 1, jk, cell_edge_blk_view(jc, jb, 1) - 1) +
+          e_bln_c_v_view(jc, 4, jb) *
+            p_vn_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1) +
+          e_bln_c_v_view(jc, 5, jb) *
+            p_vt_in_view(cell_edge_idx_view(jc, jb, 2) - 1, jk, cell_edge_blk_view(jc, jb, 2) - 1);
+      });
+    // Optionally fence after each block if required.
+    Kokkos::fence();
+  }
+}
+
+extern "C" void edges2cells_vector_lib_dp(
+    const double* p_vn_in, const double* p_vt_in,
+    const int* cell_edge_idx, const int* cell_edge_blk,
+    const double* e_bln_c_u, const double* e_bln_c_v,
+    double* p_u_out, double* p_v_out,
+    int i_startblk, int i_endblk,
+    int i_startidx_in, int i_endidx_in,
+    int slev, int elev,
+    int nproma,
+    int nlev, int nblks_e, int nblks_c)
+{
+  edges2cells_vector_lib<double>(p_vn_in, p_vt_in,
+                                 cell_edge_idx, cell_edge_blk,
+                                 e_bln_c_u, e_bln_c_v,
+                                 p_u_out, p_v_out,
+                                 i_startblk, i_endblk,
+                                 i_startidx_in, i_endidx_in,
+                                 slev, elev,
+                                 nproma,
+                                 nlev, nblks_e, nblks_c);
+}
+
+extern "C" void edges2cells_vector_lib_sp(
+    const float* p_vn_in, const float* p_vt_in,
+    const int* cell_edge_idx, const int* cell_edge_blk,
+    const float* e_bln_c_u, const float* e_bln_c_v,
+    float* p_u_out, float* p_v_out,
+    int i_startblk, int i_endblk,
+    int i_startidx_in, int i_endidx_in,
+    int slev, int elev,
+    int nproma,
+    int nlev, int nblks_e, int nblks_c)
+{
+  edges2cells_vector_lib<float>(p_vn_in, p_vt_in,
+                                cell_edge_idx, cell_edge_blk,
+                                e_bln_c_u, e_bln_c_v,
+                                p_u_out, p_v_out,
+                                i_startblk, i_endblk,
+                                i_startidx_in, i_endidx_in,
+                                slev, elev,
+                                nproma,
+                                nlev, nblks_e, nblks_c);
+}
diff --git a/src/interpolation/mo_lib_interpolation_vector.hpp b/src/interpolation/mo_lib_interpolation_vector.hpp
new file mode 100644
index 0000000..a764ada
--- /dev/null
+++ b/src/interpolation/mo_lib_interpolation_vector.hpp
@@ -0,0 +1,38 @@
+#include <Kokkos_Core.hpp>
+#include <vector>
+
+template <typename T>
+void edges2cells_vector_lib(
+    const T* p_vn_in, const T* p_vt_in,
+    const int* cell_edge_idx, const int* cell_edge_blk,
+    const T* e_bln_c_u, const T* e_bln_c_v,
+    T* p_u_out, T* p_v_out,
+    // Additional integer parameters.
+    int i_startblk, int i_endblk,
+    int i_startidx_in, int i_endidx_in,
+    int slev, int elev,
+    int nproma,
+    // Dimensions for the arrays.
+    int nlev, int nblks_e, int nblks_c);
+
+extern "C" void edges2cells_vector_lib_dp(
+    const double* p_vn_in, const double* p_vt_in,
+    const int* cell_edge_idx, const int* cell_edge_blk,
+    const double* e_bln_c_u, const double* e_bln_c_v,
+    double* p_u_out, double* p_v_out,
+    int i_startblk, int i_endblk,
+    int i_startidx_in, int i_endidx_in,
+    int slev, int elev,
+    int nproma,
+    int nlev, int nblks_e, int nblks_c);
+
+extern "C" void edges2cells_vector_lib_sp(
+    const float* p_vn_in, const float* p_vt_in,
+    const int* cell_edge_idx, const int* cell_edge_blk,
+    const float* e_bln_c_u, const float* e_bln_c_v,
+    float* p_u_out, float* p_v_out,
+    int i_startblk, int i_endblk,
+    int i_startidx_in, int i_endidx_in,
+    int slev, int elev,
+    int nproma,
+    int nlev, int nblks_e, int nblks_c);
diff --git a/src/support/mo_lib_loopindices.hpp b/src/support/mo_lib_loopindices.hpp
new file mode 100644
index 0000000..d53aa38
--- /dev/null
+++ b/src/support/mo_lib_loopindices.hpp
@@ -0,0 +1,11 @@
+extern "C" {
+    // get_indices_c_lib function
+    void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
+                           int &i_startidx_out, int &i_endidx_out);
+
+    void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
+                           int &i_startidx_out, int &i_endidx_out);
+
+    void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
+                           int &i_startidx_out, int &i_endidx_out);
+}
-- 
GitLab


From e2411d5dd5375c955f179a39e54596bb8685e10a Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Mon, 24 Feb 2025 07:20:00 +0100
Subject: [PATCH 29/50] added license to the new files

---
 src/interpolation/mo_lib_interpolation_vector.cpp | 11 +++++++++++
 src/interpolation/mo_lib_interpolation_vector.hpp | 11 +++++++++++
 src/support/mo_lib_loopindices.hpp                | 11 +++++++++++
 src/support/mo_math_utilities.hpp                 | 11 +++++++++++
 test/c/CMakeLists.txt                             | 13 +++++++++++++
 test/c/main.cpp                                   | 11 +++++++++++
 test/c/test_tdma_solver.cpp                       | 11 +++++++++++
 7 files changed, 79 insertions(+)

diff --git a/src/interpolation/mo_lib_interpolation_vector.cpp b/src/interpolation/mo_lib_interpolation_vector.cpp
index 50772e7..00a914a 100644
--- a/src/interpolation/mo_lib_interpolation_vector.cpp
+++ b/src/interpolation/mo_lib_interpolation_vector.cpp
@@ -1,3 +1,14 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
 #include "mo_lib_loopindices.hpp"
 #include "mo_lib_interpolation_vector.hpp"
 
diff --git a/src/interpolation/mo_lib_interpolation_vector.hpp b/src/interpolation/mo_lib_interpolation_vector.hpp
index a764ada..0d19b24 100644
--- a/src/interpolation/mo_lib_interpolation_vector.hpp
+++ b/src/interpolation/mo_lib_interpolation_vector.hpp
@@ -1,3 +1,14 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
 #include <Kokkos_Core.hpp>
 #include <vector>
 
diff --git a/src/support/mo_lib_loopindices.hpp b/src/support/mo_lib_loopindices.hpp
index d53aa38..03eb977 100644
--- a/src/support/mo_lib_loopindices.hpp
+++ b/src/support/mo_lib_loopindices.hpp
@@ -1,3 +1,14 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
 extern "C" {
     // get_indices_c_lib function
     void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
diff --git a/src/support/mo_math_utilities.hpp b/src/support/mo_math_utilities.hpp
index 20b1f44..4ee5dc9 100644
--- a/src/support/mo_math_utilities.hpp
+++ b/src/support/mo_math_utilities.hpp
@@ -1,3 +1,14 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
 #include <vector>
 #include <iostream>
 #include <chrono> // For timing
diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt
index be1af9e..52225a7 100644
--- a/test/c/CMakeLists.txt
+++ b/test/c/CMakeLists.txt
@@ -1,3 +1,14 @@
+# ICON
+#
+# ---------------------------------------------------------------
+# Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+# Contact information: icon-model.org
+#
+# See AUTHORS.TXT for a list of authors
+# See LICENSES/ for license information
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------
+
 # Fetch GoogleTest via FetchContent
 include(FetchContent)
 FetchContent_Declare(
@@ -13,6 +24,7 @@ FetchContent_MakeAvailable(googletest)
 set(SOURCES
   main.cpp
   test_tdma_solver.cpp
+  test_interpolation_vector.cpp
 )
 # Create the test executable from your test files, including main.cpp.
 add_executable(iconmath_test_c ${SOURCES})
@@ -21,6 +33,7 @@ add_executable(iconmath_test_c ${SOURCES})
 target_link_libraries(iconmath_test_c
   PUBLIC
     iconmath-support
+    iconmath-interpolation
   PRIVATE
     gtest_main
     Kokkos::kokkos
diff --git a/test/c/main.cpp b/test/c/main.cpp
index 2df720d..bd0fadc 100644
--- a/test/c/main.cpp
+++ b/test/c/main.cpp
@@ -1,3 +1,14 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
 #include <Kokkos_Core.hpp>
 #include <gtest/gtest.h>
 
diff --git a/test/c/test_tdma_solver.cpp b/test/c/test_tdma_solver.cpp
index 7c3c3a8..8f120ef 100644
--- a/test/c/test_tdma_solver.cpp
+++ b/test/c/test_tdma_solver.cpp
@@ -1,3 +1,14 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
 #include <gtest/gtest.h>
 #include <vector>
 #include <algorithm>
-- 
GitLab


From dab58c8117fdd4db89a7cdbad96b7690556540ff Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Mon, 24 Feb 2025 07:23:09 +0100
Subject: [PATCH 30/50] fixed a bug in cmake style

---
 test/c/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt
index 52225a7..95ca08b 100644
--- a/test/c/CMakeLists.txt
+++ b/test/c/CMakeLists.txt
@@ -15,7 +15,7 @@ FetchContent_Declare(
   googletest
   URL https://github.com/google/googletest/archive/refs/tags/release-1.12.1.zip
 )
-set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+# set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
 FetchContent_MakeAvailable(googletest)
 
 # Find Kokkos (or use your existing Kokkos installation)
-- 
GitLab


From 29625285aa26ac3b1fdae7861e6765e1f7843bb1 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Mon, 24 Feb 2025 07:25:39 +0100
Subject: [PATCH 31/50] added test for interpolation_vector

---
 test/c/test_interpolation_vector.cpp | 123 +++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)
 create mode 100644 test/c/test_interpolation_vector.cpp

diff --git a/test/c/test_interpolation_vector.cpp b/test/c/test_interpolation_vector.cpp
new file mode 100644
index 0000000..dc70a63
--- /dev/null
+++ b/test/c/test_interpolation_vector.cpp
@@ -0,0 +1,123 @@
+#include <gtest/gtest.h>
+#include <Kokkos_Core.hpp>
+#include <vector>
+#include "mo_lib_interpolation_vector.hpp"
+
+// Dimensions for the test (small, trivial test).
+// We assume Fortran ordering: column-major, but our C wrappers will wrap raw pointers into Kokkos::Views with LayoutLeft.
+constexpr int nproma = 2;
+constexpr int nlev    = 3;
+constexpr int nblks_e = 2;  // For the edge arrays (p_vn_in, p_vt_in)
+constexpr int nblks_c = 2;  // For the cell arrays and interpolation coefficients
+
+// For the get_indices_c_lib inputs.
+constexpr int i_startblk   = 0;
+constexpr int i_endblk     = 1;  // two blocks: indices 0 and 1
+constexpr int i_startidx_in = 0;
+constexpr int i_endidx_in   = nproma - 1; // 0 and 1
+constexpr int slev  = 0;
+constexpr int elev  = nlev - 1; // 0 .. 2
+
+// Helper to compute total number of elements for a 3D array stored in column-major order.
+template<typename T>
+size_t num_elements(int dim1, int dim2, int dim3) {
+  return static_cast<size_t>(dim1) * dim2 * dim3;
+}
+
+// Test for the double precision (dp) version.
+TEST(Edges2CellsTest, DPTest) {
+  // Allocate and fill input arrays.
+  std::vector<double> p_vn_in(num_elements<double>(nproma, nlev, nblks_e), 1.0);
+  std::vector<double> p_vt_in(num_elements<double>(nproma, nlev, nblks_e), 1.0);
+  // cell_edge_idx and cell_edge_blk: dimensions [nproma, nblks_c, 3]
+  std::vector<int> cell_edge_idx(num_elements<int>(nproma, nblks_c, 3), 1);
+  std::vector<int> cell_edge_blk(num_elements<int>(nproma, nblks_c, 3), 1);
+
+  // Here we set cell_edge_idx to 1, 2, 1 for every triple.
+  for (int i = 0; i < num_elements<int>(nproma, nblks_c, 3); i += 3) {
+    cell_edge_idx[i]   = 1;
+    cell_edge_idx[i+1] = 2;
+    cell_edge_idx[i+2] = 1;
+  }
+  // Similarly, set cell_edge_blk to all ones (valid since nblks_e=2, so index 1 means block 0 after subtracting 1).
+  // e_bln_c_u and e_bln_c_v: dimensions [nproma, 6, nblks_c]
+  std::vector<double> e_bln_c_u(num_elements<double>(nproma, 6, nblks_c), 1.0);
+  std::vector<double> e_bln_c_v(num_elements<double>(nproma, 6, nblks_c), 1.0);
+  // Output arrays: dimensions [nproma, nlev, nblks_c]
+  std::vector<double> p_u_out(num_elements<double>(nproma, nlev, nblks_c), 0.0);
+  std::vector<double> p_v_out(num_elements<double>(nproma, nlev, nblks_c), 0.0);
+
+  std::vector<double> p_u_ref(num_elements<double>(nproma, nlev, nblks_c), 6.0);
+  std::vector<double> p_v_ref(num_elements<double>(nproma, nlev, nblks_c), 6.0);
+
+  // Call the dp (double precision) version.
+  edges2cells_vector_lib_dp(
+    p_vn_in.data(), p_vt_in.data(),
+    cell_edge_idx.data(), cell_edge_blk.data(),
+    e_bln_c_u.data(), e_bln_c_v.data(),
+    p_u_out.data(), p_v_out.data(),
+    i_startblk, i_endblk,
+    i_startidx_in, i_endidx_in,
+    slev, elev,
+    nproma,
+    nlev, nblks_e, nblks_c);
+
+  // Check that for each computed cell in p_u_out and p_v_out, the value is 6.
+  // This is because for each cell, the kernel adds 6 terms of 1*1.
+  p_u_ref[0] = 0.0;
+  p_u_ref[8] = 0.0;
+  p_u_ref[10] = 0.0;
+  p_v_ref[0] = 0.0;
+  p_v_ref[8] = 0.0;
+  p_v_ref[10] = 0.0;
+  for (size_t idx = 0; idx < p_u_out.size(); ++idx) {
+    EXPECT_NEAR(p_u_out[idx], p_u_ref[idx], 1e-12);
+    EXPECT_NEAR(p_v_out[idx], p_v_ref[idx], 1e-12);
+  }
+}
+
+// Test for the single precision (sp) version.
+TEST(Edges2CellsTest, SPTest) {
+  // Allocate and fill input arrays.
+  std::vector<float> p_vn_in(num_elements<float>(nproma, nlev, nblks_e), 1.0f);
+  std::vector<float> p_vt_in(num_elements<float>(nproma, nlev, nblks_e), 1.0f);
+  std::vector<int> cell_edge_idx(num_elements<int>(nproma, nblks_c, 3), 1);
+  std::vector<int> cell_edge_blk(num_elements<int>(nproma, nblks_c, 3), 1);
+  // Set cell_edge_idx values to 1, 2, 1.
+  for (int i = 0; i < num_elements<int>(nproma, nblks_c, 3); i += 3) {
+    cell_edge_idx[i]   = 1;
+    cell_edge_idx[i+1] = 2;
+    cell_edge_idx[i+2] = 1;
+  }
+  std::vector<float> e_bln_c_u(num_elements<float>(nproma, 6, nblks_c), 1.0f);
+  std::vector<float> e_bln_c_v(num_elements<float>(nproma, 6, nblks_c), 1.0f);
+  std::vector<float> p_u_out(num_elements<float>(nproma, nlev, nblks_c), 0.0f);
+  std::vector<float> p_v_out(num_elements<float>(nproma, nlev, nblks_c), 0.0f);
+
+  std::vector<float> p_u_ref(num_elements<float>(nproma, nlev, nblks_c), 6.0f);
+  std::vector<float> p_v_ref(num_elements<float>(nproma, nlev, nblks_c), 6.0f);
+
+  // Call the sp (float precision) version.
+  edges2cells_vector_lib_sp(
+    p_vn_in.data(), p_vt_in.data(),
+    cell_edge_idx.data(), cell_edge_blk.data(),
+    e_bln_c_u.data(), e_bln_c_v.data(),
+    p_u_out.data(), p_v_out.data(),
+    i_startblk, i_endblk,
+    i_startidx_in, i_endidx_in,
+    slev, elev,
+    nproma,
+    nlev, nblks_e, nblks_c);
+
+  p_u_ref[0] = 0.0f;
+  p_u_ref[8] = 0.0f;
+  p_u_ref[10] = 0.0f;
+  p_v_ref[0] = 0.0f;
+  p_v_ref[8] = 0.0f;
+  p_v_ref[10] = 0.0f;
+  // Verify that every computed output equals 6.
+  for (size_t idx = 0; idx < p_u_out.size(); ++idx) {
+    EXPECT_NEAR(p_u_out[idx], p_u_ref[idx], 1e-5f);
+    EXPECT_NEAR(p_v_out[idx], p_v_ref[idx], 1e-5f);
+  }
+}
-- 
GitLab


From 17c9c507858ff8f99036b4be34b0ebd8a139a7ee Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Mon, 24 Feb 2025 07:27:21 +0100
Subject: [PATCH 32/50] added licence to the new test file

---
 test/c/test_interpolation_vector.cpp | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/test/c/test_interpolation_vector.cpp b/test/c/test_interpolation_vector.cpp
index dc70a63..a33673c 100644
--- a/test/c/test_interpolation_vector.cpp
+++ b/test/c/test_interpolation_vector.cpp
@@ -1,3 +1,14 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
 #include <gtest/gtest.h>
 #include <Kokkos_Core.hpp>
 #include <vector>
-- 
GitLab


From b9b268efd076d905596d78b0760cd364d4306419 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Mon, 24 Feb 2025 09:25:16 +0100
Subject: [PATCH 33/50] modified CMakeLists.txt of horizontal to enable
 compilation of C++ code that uses Kokkos

---
 src/horizontal/CMakeLists.txt | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt
index 7515842..6cebed9 100644
--- a/src/horizontal/CMakeLists.txt
+++ b/src/horizontal/CMakeLists.txt
@@ -52,11 +52,21 @@ target_include_directories(
     # Path to the Fortran modules:
     $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:${Fortran_MODULE_DIRECTORY}>>
     $<INSTALL_INTERFACE:$<$<COMPILE_LANGUAGE:Fortran>:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>>
+    # Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for
+    # multiple compile languages
+    # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html
+    $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>>
+  PRIVATE
+    # Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple
+    # compile languages
+    # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html
+    $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_BINARY_DIR}>>
 )
 
 target_link_libraries(iconmath-horizontal PUBLIC fortran-support::fortran-support)
 target_link_libraries(iconmath-horizontal PUBLIC iconmath-support)
 target_link_libraries(iconmath-horizontal PUBLIC iconmath-interpolation)
+target_link_libraries(iconmath-interpolation PRIVATE Kokkos::kokkos)
 
 install(TARGETS iconmath-horizontal EXPORT "${PROJECT_NAME}-targets")
 
-- 
GitLab


From 01cbeda7dca297ee6fc738c7207b55e995d2e879 Mon Sep 17 00:00:00 2001
From: Georgiana Mania <mania@dkrz.de>
Date: Mon, 24 Feb 2025 12:32:00 +0000
Subject: [PATCH 34/50] configure compilation for kokkos + nvidia
 (icon-libraries/libiconmath!31)

Merged-by: Pradipta Samanta <samanta@dkrz.de>
Changelog: default
---
 CMakeLists.txt                   | 11 +++++++++++
 src/horizontal/CMakeLists.txt    |  3 ++-
 src/interpolation/CMakeLists.txt |  1 +
 src/support/CMakeLists.txt       |  1 +
 4 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7cf92be..affedaa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,6 +29,7 @@ option(IM_USE_CPP_BINDINGS "Use C++ bindings" OFF)
 option(IM_ENABLE_DIM_SWAP "Enable dimension swap" OFF)
 option(IM_ENABLE_OPENACC "Enable OpenACC support" OFF)
 option(IM_ENABLE_OPENMP "Enable OpenMP support" OFF)
+set(IM_ENABLE_GPU OFF CACHE STRING "Enable Kokkos GPU support for arch. Valid values: OFF, nvidia-sm80")
 
 # GNUInstallDirs issues a warning if CMAKE_SIZEOF_VOID_P is not defined, which
 # is the case with NAG. One way to circumvent that is to enable C language for
@@ -124,6 +125,16 @@ set(Kokkos_ENABLE_IMPL_MDSPAN OFF CACHE BOOL "Experimental mdspan support")
 set(Kokkos_ENABLE_SERIAL ON CACHE BOOL "Kokkos Serial backend")
 set(Kokkos_ARCH_NATIVE ON CACHE BOOL "Kokkos native architecture optimisations")
 
+if ("${IM_ENABLE_GPU}" STREQUAL "nvidia-sm80")
+  # NVIDIA A100
+  set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Kokkos CUDA backend")
+  set(Kokkos_ARCH_AMPERE80 ON CACHE BOOL "CUDA architecture: Ampere cc80")
+endif()
+
+if (${IM_ENABLE_OPENMP})
+  set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "Kokkos OpenMP backend")
+endif()
+
 FetchContent_MakeAvailable(kokkos)
 
 add_subdirectory(src)
diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt
index 6cebed9..d2abe7d 100644
--- a/src/horizontal/CMakeLists.txt
+++ b/src/horizontal/CMakeLists.txt
@@ -66,7 +66,8 @@ target_include_directories(
 target_link_libraries(iconmath-horizontal PUBLIC fortran-support::fortran-support)
 target_link_libraries(iconmath-horizontal PUBLIC iconmath-support)
 target_link_libraries(iconmath-horizontal PUBLIC iconmath-interpolation)
-target_link_libraries(iconmath-interpolation PRIVATE Kokkos::kokkos)
+target_link_libraries(iconmath-horizontal PRIVATE Kokkos::kokkos)
+set_target_properties(iconmath-horizontal PROPERTIES LINKER_LANGUAGE Fortran)
 
 install(TARGETS iconmath-horizontal EXPORT "${PROJECT_NAME}-targets")
 
diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt
index 9455f9e..346aaaa 100644
--- a/src/interpolation/CMakeLists.txt
+++ b/src/interpolation/CMakeLists.txt
@@ -70,6 +70,7 @@ target_include_directories(
 target_link_libraries(iconmath-interpolation PUBLIC fortran-support::fortran-support)
 target_link_libraries(iconmath-interpolation PUBLIC iconmath-support)
 target_link_libraries(iconmath-interpolation PRIVATE Kokkos::kokkos)
+set_target_properties(iconmath-interpolation PROPERTIES LINKER_LANGUAGE Fortran)
 
 install(TARGETS iconmath-interpolation EXPORT "${PROJECT_NAME}-targets")
 
diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt
index 9f56017..ed6a4d3 100644
--- a/src/support/CMakeLists.txt
+++ b/src/support/CMakeLists.txt
@@ -80,6 +80,7 @@ target_link_libraries(iconmath-support
     PRIVATE
         Kokkos::kokkos
 )
+set_target_properties(iconmath-support PROPERTIES LINKER_LANGUAGE Fortran)
 
 install(TARGETS iconmath-support EXPORT "${PROJECT_NAME}-targets")
 
-- 
GitLab


From c61fe00d6c0e99ce97afc9877625c3e6034fad01 Mon Sep 17 00:00:00 2001
From: Pradipta Samanta <samanta@dkrz.de>
Date: Mon, 24 Feb 2025 21:57:30 +0100
Subject: [PATCH 35/50] added openacc_fortran_options only for the compilation
 of Fortran codes

fixed a cmake style

added to two other components
---
 src/horizontal/CMakeLists.txt    | 4 +++-
 src/interpolation/CMakeLists.txt | 3 ++-
 src/support/CMakeLists.txt       | 4 +++-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt
index d2abe7d..078a14d 100644
--- a/src/horizontal/CMakeLists.txt
+++ b/src/horizontal/CMakeLists.txt
@@ -39,7 +39,9 @@ if(IM_ENABLE_OPENACC)
   # If _OPENACC is defined, assume that the required compiler flags are already
   # provided, e.g. in CMAKE_Fortran_FLAGS:
   if(NOT HAS_OPENACC_MACRO)
-    target_compile_options(iconmath-horizontal PRIVATE ${OpenACC_Fortran_OPTIONS})
+    target_compile_options(iconmath-horizontal
+                           PRIVATE
+                           $<$<COMPILE_LANGUAGE:Fortran>:${OpenACC_Fortran_OPTIONS}>)
     # This make sures that unit tests (FortUTF) compiles without the need of
     # passing OpenACC compile option.
     target_link_libraries(iconmath-horizontal PRIVATE OpenACC::OpenACC_Fortran)
diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt
index 346aaaa..f982f3b 100644
--- a/src/interpolation/CMakeLists.txt
+++ b/src/interpolation/CMakeLists.txt
@@ -41,7 +41,8 @@ if(IM_ENABLE_OPENACC)
   # provided, e.g. in CMAKE_Fortran_FLAGS:
   if(NOT HAS_OPENACC_MACRO)
     target_compile_options(iconmath-interpolation
-                           PRIVATE ${OpenACC_Fortran_OPTIONS})
+                           PRIVATE
+                           $<$<COMPILE_LANGUAGE:Fortran>:${OpenACC_Fortran_OPTIONS}>)
     # This make sures that unit tests (FortUTF) compiles without the need of
     # passing OpenACC compile option.
     target_link_libraries(iconmath-interpolation PRIVATE OpenACC::OpenACC_Fortran)
diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt
index ed6a4d3..e78fc16 100644
--- a/src/support/CMakeLists.txt
+++ b/src/support/CMakeLists.txt
@@ -50,7 +50,9 @@ if(IM_ENABLE_OPENACC)
   # If _OPENACC is defined, assume that the required compiler flags are already
   # provided, e.g. in CMAKE_Fortran_FLAGS:
   if(NOT HAS_OPENACC_MACRO)
-    target_compile_options(iconmath-support PRIVATE ${OpenACC_Fortran_OPTIONS})
+    target_compile_options(iconmath-support
+                           PRIVATE
+                           $<$<COMPILE_LANGUAGE:Fortran>:${OpenACC_Fortran_OPTIONS}>)
     # This make sures that unit tests (FortUTF) compiles without the need of
     # passing OpenACC compile option.
     target_link_libraries(iconmath-support PRIVATE OpenACC::OpenACC_Fortran)
-- 
GitLab


From e4efb896b8f7e7f092bcd3664e400a7e57efb9cc Mon Sep 17 00:00:00 2001
From: Harshada Balasubramanian <harshada.balasubramanian@mpimet.mpg.de>
Date: Mon, 24 Feb 2025 21:30:54 +0000
Subject: [PATCH 36/50] Added a new argument to the functions of
 `mo_lib_loopindices.cpp` to fix a bug regarding startindex
 (icon-libraries/libiconmath!32)

This made the code to produce bit-identical results for both Fortran and C++

Co-authored-by: Pradipta Samanta <samanta@dkrz.de>
Merged-by: Pradipta Samanta <samanta@dkrz.de>
Changelog: bigfix
---
 src/support/CMakeLists.txt           |   3 +-
 src/support/mo_lib_loopindices.cpp   | 100 +++++++++++++++++----------
 src/support/mo_lib_loopindices.hpp   |  20 +++---
 src/support/mo_math_utilities.F90    |   8 +--
 src/support/mo_math_utilities.cpp    |  16 +++--
 src/support/mo_math_utilities.hpp    |   8 +--
 src/support/support_bindings.cpp     |  50 ++++++++++++++
 src/support/support_bindings.h       |  33 +++++++++
 test/c/test_interpolation_vector.cpp |  12 ----
 test/c/test_tdma_solver.cpp          |   4 +-
 10 files changed, 176 insertions(+), 78 deletions(-)
 create mode 100644 src/support/support_bindings.cpp
 create mode 100644 src/support/support_bindings.h

diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt
index e78fc16..44f60aa 100644
--- a/src/support/CMakeLists.txt
+++ b/src/support/CMakeLists.txt
@@ -19,7 +19,8 @@ add_library(
   mo_math_types.f90
   mo_math_utilities.cpp
   mo_math_utilities.F90
-  mo_random_number_generators.F90)
+  mo_random_number_generators.F90
+  support_bindings.cpp)
 
 add_library(${PROJECT_NAME}::support ALIAS iconmath-support)
 
diff --git a/src/support/mo_lib_loopindices.cpp b/src/support/mo_lib_loopindices.cpp
index e6d9d21..30c82bd 100644
--- a/src/support/mo_lib_loopindices.cpp
+++ b/src/support/mo_lib_loopindices.cpp
@@ -11,47 +11,75 @@
 
 #include <algorithm> // For std::max
 
-extern "C" {
-    // get_indices_c_lib function
-    void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
-                           int &i_startidx_out, int &i_endidx_out) {
-        if (i_blk == i_startblk) {
-            i_startidx_out = std::max(1, i_startidx_in);
-            i_endidx_out = nproma;
-            if (i_blk == i_endblk) {
-                i_endidx_out = i_endidx_in;
-            }
-        } else if (i_blk == i_endblk) {
-            i_startidx_out = 1;
+// get_indices_c_lib function
+void get_indices_c_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, 
+                        const int i_blk, const int i_startblk, const int i_endblk,
+                        int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true) {
+    
+    //Since code is ported incrementally from Fortran to C++, depending on where the function is called from
+    //(either fortran or c++), the first index should be either 0 or 1.
+    int first_index;
+    if (called_from_cpp)
+        first_index = 0;
+    else
+        first_index = 1;                   
+    
+    if (i_blk == i_startblk) {
+        i_startidx_out = std::max(first_index, i_startidx_in);
+        i_endidx_out = nproma;
+        if (i_blk == i_endblk) {
             i_endidx_out = i_endidx_in;
-        } else {
-            i_startidx_out = 1;
-            i_endidx_out = nproma;
         }
+    } else if (i_blk == i_endblk) {
+        i_startidx_out = first_index;
+        i_endidx_out = i_endidx_in;
+    } else {
+        i_startidx_out = first_index;
+        i_endidx_out = nproma;
     }
+}
 
-    // get_indices_e_lib function
-    void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
-                           int &i_startidx_out, int &i_endidx_out) {
-        i_startidx_out = (i_blk != i_startblk) ? 1 : std::max(1, i_startidx_in);
-        i_endidx_out = (i_blk != i_endblk) ? nproma : i_endidx_in;
-    }
+// get_indices_e_lib function
+void get_indices_e_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, 
+                        const int i_blk, const int i_startblk, const int i_endblk,
+                        int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true) {
+    
+    //Since code is ported incrementally from Fortran to C++, depending on where the function is called from, 
+    //the first index should be either 0 or 1.
+    int first_index;
+    if (called_from_cpp)
+        first_index = 0;
+    else
+        first_index = 1;
+
+    i_startidx_out = (i_blk != i_startblk) ? first_index : std::max(first_index, i_startidx_in);
+    i_endidx_out = (i_blk != i_endblk) ? nproma : i_endidx_in;
+}
+
+// get_indices_v_lib function
+void get_indices_v_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, 
+                        const int i_blk, const int i_startblk, const int i_endblk,
+                        int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true) {
+    
+    //Since code is ported incrementally from Fortran to C++, depending on where the function is called from, 
+    //the first index should be either 0 or 1.
+    int first_index;
+    if (called_from_cpp)
+        first_index = 0;
+    else
+        first_index = 1;
 
-    // get_indices_v_lib function
-    void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
-                           int &i_startidx_out, int &i_endidx_out) {
-        if (i_blk == i_startblk) {
-            i_startidx_out = i_startidx_in;
-            i_endidx_out = nproma;
-            if (i_blk == i_endblk) {
-                i_endidx_out = i_endidx_in;
-            }
-        } else if (i_blk == i_endblk) {
-            i_startidx_out = 1;
+    if (i_blk == i_startblk) {
+        i_startidx_out = i_startidx_in;
+        i_endidx_out = nproma;
+        if (i_blk == i_endblk) {
             i_endidx_out = i_endidx_in;
-        } else {
-            i_startidx_out = 1;
-            i_endidx_out = nproma;
         }
+    } else if (i_blk == i_endblk) {
+        i_startidx_out = first_index;
+        i_endidx_out = i_endidx_in;
+    } else {
+        i_startidx_out = first_index;
+        i_endidx_out = nproma;
     }
-}
+}
\ No newline at end of file
diff --git a/src/support/mo_lib_loopindices.hpp b/src/support/mo_lib_loopindices.hpp
index 03eb977..5136c6a 100644
--- a/src/support/mo_lib_loopindices.hpp
+++ b/src/support/mo_lib_loopindices.hpp
@@ -8,15 +8,17 @@
 // See LICENSES/ for license information
 // SPDX-License-Identifier: BSD-3-Clause
 // ---------------------------------------------------------------
+#pragma once
 
-extern "C" {
-    // get_indices_c_lib function
-    void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
-                           int &i_startidx_out, int &i_endidx_out);
+void get_indices_c_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, 
+    const int i_blk, const int i_startblk, const int i_endblk,
+    int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true);
 
-    void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
-                           int &i_startidx_out, int &i_endidx_out);
+void get_indices_e_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, 
+                        const int i_blk, const int i_startblk, const int i_endblk,
+                        int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true);
 
-    void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
-                           int &i_startidx_out, int &i_endidx_out);
-}
+void get_indices_v_lib(const int i_startidx_in, const int i_endidx_in, const int nproma, 
+                            const int i_blk, const int i_startblk, const int i_endblk,
+                            int &i_startidx_out, int &i_endidx_out, const bool called_from_cpp=true);
+    
\ No newline at end of file
diff --git a/src/support/mo_math_utilities.F90 b/src/support/mo_math_utilities.F90
index 168ec26..d1cf120 100644
--- a/src/support/mo_math_utilities.F90
+++ b/src/support/mo_math_utilities.F90
@@ -82,7 +82,7 @@ MODULE mo_math_utilities
 #ifndef __USE_CPP_BINDINGS
   PUBLIC :: tdma_solver_vec
 #else
-  PUBLIC :: tdma_solver_vec_double
+  PUBLIC :: tdma_solver_vec_dp
 #endif
   PUBLIC :: check_orientation
 
@@ -246,14 +246,14 @@ CONTAINS
 
   ! C++ binding for tdma_solver_vec
   INTERFACE
-    SUBROUTINE tdma_solver_vec_double(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) &
-      BIND(C, NAME="tdma_solver_vec_double")
+    SUBROUTINE tdma_solver_vec_dp(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout, opt_acc_queue) &
+      BIND(C, NAME="tdma_solver_vec_dp")
       IMPORT :: c_double, c_int
       REAL(c_double), INTENT(IN) :: a(*), b(*), c(*), d(*)
       INTEGER(c_int), VALUE :: slev, elev, startidx, endidx, nrows, ncols
       REAL(c_double), INTENT(OUT) :: varout(*)
       INTEGER(c_int), OPTIONAL :: opt_acc_queue
-    END SUBROUTINE tdma_solver_vec_double
+    END SUBROUTINE tdma_solver_vec_dp
   END INTERFACE
 
 CONTAINS
diff --git a/src/support/mo_math_utilities.cpp b/src/support/mo_math_utilities.cpp
index 6a60f2c..5859b9d 100644
--- a/src/support/mo_math_utilities.cpp
+++ b/src/support/mo_math_utilities.cpp
@@ -75,11 +75,13 @@ void tdma_solver_vec(const T* a, const T* b, const T* c, const T* d,
   std::cout << "Elapsed time for tdma_solver_vec (Kokkos): " << elapsed_time.count() << " seconds" << std::endl;
 }
 
-extern "C" {
+template
+void tdma_solver_vec<double>(const double* a, const double* b, const double* c, const double* d,
+  int slev, int elev, int startidx, int endidx,
+  int nrows, int ncols, double* varout);
+
+template
+void tdma_solver_vec<float>(const float* a, const float* b, const float* c, const float* d,
+  int slev, int elev, int startidx, int endidx,
+  int nrows, int ncols, float* varout);
 
-  void tdma_solver_vec_double(const double* a, const double* b, const double* c, const double* d,
-      int slev, int elev, int startidx, int endidx,
-      int nrows, int ncols, double* varout) {
-    tdma_solver_vec<double>(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout);
-  }
-}
diff --git a/src/support/mo_math_utilities.hpp b/src/support/mo_math_utilities.hpp
index 4ee5dc9..a3f3ba1 100644
--- a/src/support/mo_math_utilities.hpp
+++ b/src/support/mo_math_utilities.hpp
@@ -17,10 +17,4 @@
 template <typename T>
 void tdma_solver_vec(const T* a, const T* b, const T* c, const T* d,
                               int slev, int elev, int startidx, int endidx,
-                              int nrows, int ncols, T* varout);
-
-extern "C" {
-  void tdma_solver_vec_double(const double* a, const double* b, const double* c, const double* d,
-                              int slev, int elev, int startidx, int endidx,
-                              int nrows, int ncols, double* varout);
-}
+                              int nrows, int ncols, T* varout);
\ No newline at end of file
diff --git a/src/support/support_bindings.cpp b/src/support/support_bindings.cpp
new file mode 100644
index 0000000..664fc1e
--- /dev/null
+++ b/src/support/support_bindings.cpp
@@ -0,0 +1,50 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#include "support_bindings.h"
+#include "mo_lib_loopindices.hpp"
+#include "mo_math_utilities.hpp"
+
+
+// mo_loop_indices.F90
+// C wrappers for C++ functionality
+void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
+    int &i_startidx_out, int &i_endidx_out){
+        get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk,
+                            i_endblk, i_startidx_out, i_endidx_out, false);
+}
+void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
+    int &i_startidx_out, int &i_endidx_out){
+        get_indices_e_lib(i_startidx_in, i_endidx_in, nproma,i_blk, i_startblk, i_endblk,
+                            i_startidx_out, i_endidx_out, false);
+}
+
+void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
+    int &i_startidx_out, int &i_endidx_out){
+        get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, i_blk, i_startblk, i_endblk,
+            i_startidx_out,i_endidx_out, false);
+}
+
+void tdma_solver_vec_dp(const double* a, const double* b, const double* c, const double* d,
+    int slev, int elev, int startidx, int endidx,
+    int nrows, int ncols, double* varout){
+    
+    tdma_solver_vec<double>(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout);
+
+}
+
+void tdma_solver_vec_sp(const float* a, const float* b, const float* c, const float* d,
+      int slev, int elev, int startidx, int endidx,
+      int nrows, int ncols, float* varout){
+
+    tdma_solver_vec<float>(a, b, c, d, slev, elev, startidx, endidx, nrows, ncols, varout);
+
+}
diff --git a/src/support/support_bindings.h b/src/support/support_bindings.h
new file mode 100644
index 0000000..df452e4
--- /dev/null
+++ b/src/support/support_bindings.h
@@ -0,0 +1,33 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+#pragma once
+
+extern "C" {
+    // mo_loop_indices.F90
+    void get_indices_c_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
+                            int &i_startidx_out, int &i_endidx_out);
+
+    void get_indices_e_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
+                            int &i_startidx_out, int &i_endidx_out);
+
+    void get_indices_v_lib(int i_startidx_in, int i_endidx_in, int nproma, int i_blk, int i_startblk, int i_endblk,
+                            int &i_startidx_out, int &i_endidx_out);
+
+    //mo_math_utilities.F90
+    void tdma_solver_vec_dp(const double* a, const double* b, const double* c, const double* d,
+        int slev, int elev, int startidx, int endidx,
+        int nrows, int ncols, double* varout);
+
+    void tdma_solver_vec_sp(const float* a, const float* b, const float* c, const float* d,
+          int slev, int elev, int startidx, int endidx,
+          int nrows, int ncols, float* varout);
+
+}
diff --git a/test/c/test_interpolation_vector.cpp b/test/c/test_interpolation_vector.cpp
index a33673c..0eb5a8d 100644
--- a/test/c/test_interpolation_vector.cpp
+++ b/test/c/test_interpolation_vector.cpp
@@ -75,12 +75,6 @@ TEST(Edges2CellsTest, DPTest) {
 
   // Check that for each computed cell in p_u_out and p_v_out, the value is 6.
   // This is because for each cell, the kernel adds 6 terms of 1*1.
-  p_u_ref[0] = 0.0;
-  p_u_ref[8] = 0.0;
-  p_u_ref[10] = 0.0;
-  p_v_ref[0] = 0.0;
-  p_v_ref[8] = 0.0;
-  p_v_ref[10] = 0.0;
   for (size_t idx = 0; idx < p_u_out.size(); ++idx) {
     EXPECT_NEAR(p_u_out[idx], p_u_ref[idx], 1e-12);
     EXPECT_NEAR(p_v_out[idx], p_v_ref[idx], 1e-12);
@@ -120,12 +114,6 @@ TEST(Edges2CellsTest, SPTest) {
     nproma,
     nlev, nblks_e, nblks_c);
 
-  p_u_ref[0] = 0.0f;
-  p_u_ref[8] = 0.0f;
-  p_u_ref[10] = 0.0f;
-  p_v_ref[0] = 0.0f;
-  p_v_ref[8] = 0.0f;
-  p_v_ref[10] = 0.0f;
   // Verify that every computed output equals 6.
   for (size_t idx = 0; idx < p_u_out.size(); ++idx) {
     EXPECT_NEAR(p_u_out[idx], p_u_ref[idx], 1e-5f);
diff --git a/test/c/test_tdma_solver.cpp b/test/c/test_tdma_solver.cpp
index 8f120ef..4e09ff3 100644
--- a/test/c/test_tdma_solver.cpp
+++ b/test/c/test_tdma_solver.cpp
@@ -51,7 +51,7 @@ protected:
 
 TEST_F(TDMASolverTestFixture, FullTest) {
   // Call the solver over the full range:
-  tdma_solver_vec_double(a.data(), b.data(), c.data(), d.data(),
+  tdma_solver_vec<double>(a.data(), b.data(), c.data(), d.data(),
                          0, n, 0, n, n, n, x.data());
 
   // Compute the sum of all elements in the output matrix.
@@ -71,7 +71,7 @@ TEST_F(TDMASolverTestFixture, FullTest) {
 TEST_F(TDMASolverTestFixture, PartialTest) {
   // Call the solver for a partial region:
   // For C++: slev = 1, elev = n-1, startidx = 1, endidx = n-1.
-  tdma_solver_vec_double(a.data(), b.data(), c.data(), d.data(),
+  tdma_solver_vec<double>(a.data(), b.data(), c.data(), d.data(),
                          1, n - 1, 1, n - 1, n, n, x.data());
 
   // Compute the sum over a region
-- 
GitLab


From 7620931e152dbb3a161df7212484654a52845b0e Mon Sep 17 00:00:00 2001
From: Dylan Kierans <kierans@dkrz.de>
Date: Tue, 25 Feb 2025 21:46:48 +0000
Subject: [PATCH 37/50] Draft: cpp version of
 mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib (icon-libraries/libiconmath!34)

## What is the new feature

cpp version of mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib

## How is it implemented

Kept separate file from other `mo_lib_intp_rbf` routines to avoid merge conflicts. Will be resolved by Ali and Dylan later.

Co-authored-by: Pradipta Samanta <samanta@dkrz.de>
Merged-by: Pradipta Samanta <samanta@dkrz.de>
Changelog: feature
---
 src/interpolation/CMakeLists.txt              |   5 +-
 ...b_intp_rbf-rbf_vec_interpol_vertex_lib.cpp | 197 ++++++++++++++++++
 ...b_intp_rbf-rbf_vec_interpol_vertex_lib.hpp |  32 +++
 ...f-rbf_vec_interpol_vertex_lib_bindings.cpp | 134 ++++++++++++
 ...rbf-rbf_vec_interpol_vertex_lib_bindings.h |  54 +++++
 test/c/CMakeLists.txt                         |   1 +
 test/c/test_intp_rbf.cpp                      | 126 +++++++++++
 7 files changed, 548 insertions(+), 1 deletion(-)
 create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
 create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp
 create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
 create mode 100644 src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h
 create mode 100644 test/c/test_intp_rbf.cpp

diff --git a/src/interpolation/CMakeLists.txt b/src/interpolation/CMakeLists.txt
index f982f3b..37c3ad0 100644
--- a/src/interpolation/CMakeLists.txt
+++ b/src/interpolation/CMakeLists.txt
@@ -14,7 +14,10 @@ add_library(
   mo_lib_interpolation_scalar.F90
   mo_lib_interpolation_vector.F90
   mo_lib_interpolation_vector.cpp
-  mo_lib_intp_rbf.F90)
+  mo_lib_intp_rbf.F90
+  mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
+  mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
+)
 
 add_library(${PROJECT_NAME}::interpolation ALIAS iconmath-interpolation)
 
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
new file mode 100644
index 0000000..c9b776e
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.cpp
@@ -0,0 +1,197 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+/// Contains the only mo_lib_intp_rbf::rbf_vec_interpol_vertex_lib()
+///
+/// Separate to avoid conflicts with Ali working on rest of mo_lib_intp_rbf
+
+#include <type_traits>
+#include <Kokkos_Core.hpp>
+#include "mo_lib_loopindices.hpp"
+#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
+
+
+constexpr int rbf_vec_dim_v = 6;
+
+//-------------------------------------------------------------------------
+//
+//
+//>
+/// Performs vector RBF reconstruction at triangle vertices.
+///
+/// Theory described in Narcowich and Ward (Math Comp. 1994) and
+/// Bonaventura and Baudisch (Mox Report n. 75).
+/// It takes edge based variables as input and combines them
+/// into three dimensional cartesian vectors at each vertex.
+///
+/// Two templated variables in order to support mixed precision.
+/// Intended that type_traits::is_floating_point(T,S)==TRUE
+/// precision(T) >= precision(S)
+template <typename T, typename S>
+void rbf_vec_interpol_vertex_lib(
+    const T* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const T* rbf_vec_coeff_v,
+    S* p_u_out,
+    S* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    // Dimensions for the arrays.
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    /*
+#ifdef DIM_ENABLE_GPU
+    if (lacc){ using MemSpace = Kokkos::CudaSpace;
+    } else { using MemSpace = Kokkos::HostSpace; }
+#else
+    using MemSpace = Kokkos::HostSpace;
+#endif
+
+    */
+
+    // Wrap raw pointers in unmanaged Kokkos Views.
+    typedef Kokkos::View<const T***,    Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
+    typedef Kokkos::View<const T****,   Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D;
+    typedef Kokkos::View<const int***,  Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
+    typedef Kokkos::View<S***,          Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedS3D;
+
+
+
+    // input components of velocity or horizontal vorticity vectors at edge midpoints
+    // dim: (nproma,nlev,nblks_e)
+    UnmanagedConstT3D p_e_in_view(p_e_in, nproma, nlev, nblks_e);
+
+    // index array defining the stencil of surrounding edges for vector rbf interpolation at each triangle vertex
+    // (rbf_vec_dim_v,nproma,nblks_v)
+    UnmanagedConstInt3D iidx_view(rbf_vec_idx_v, rbf_vec_dim_v, nproma, nblks_v);
+    UnmanagedConstInt3D iblk_view(rbf_vec_blk_v, rbf_vec_dim_v, nproma, nblks_v);
+
+    // coefficients are working precision array containing the coefficients used for vector rbf interpolation
+    // at each tringle vertex (input is normal component),
+    // dim: (rbf_vec_dim_v,2,nproma,nblks_v)
+    UnmanagedConstT4D ptr_coeff_view(rbf_vec_coeff_v, rbf_vec_dim_v, 2, nproma, nblks_v);
+
+    // reconstructed x-component (u) of velocity vector,
+    // dim: (nproma,nlev,nblks_v)
+    UnmanagedS3D p_u_out_view(p_u_out, nproma, nlev, nblks_v);
+    // reconstructed y-component (v) of velocity vector,
+    // dim: (nproma,nlev,nblks_v)
+    UnmanagedS3D p_v_out_view(p_v_out, nproma, nlev, nblks_v);
+
+    // Local vars
+    //int jv, jk, jb; // integer over vertices, levels, and blocks,
+    int jb; // integer over vertices, levels, and blocks,
+    int i_startidx; // start index
+    int i_endidx;   // end index
+
+    for (jb=i_startblk; jb <= i_endblk; ++jb){
+
+        get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk,
+                          i_startidx, i_endidx);
+
+        Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy(
+            {slev, i_startidx}, {elev + 1, i_endidx + 1});
+
+        Kokkos::parallel_for("rbf_vec_interpol_vertex_lib", innerPolicy,
+            KOKKOS_LAMBDA(const int jk, const int jv) {
+
+                // NOTE: Static indexes reduced by 1 from Fortran version
+                p_u_out_view(jv, jk, jb) =
+                    ptr_coeff_view(0, 0, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
+                    ptr_coeff_view(1, 0, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
+                    ptr_coeff_view(2, 0, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
+                    ptr_coeff_view(3, 0, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
+                    ptr_coeff_view(4, 0, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
+                    ptr_coeff_view(5, 0, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
+                p_v_out_view(jv, jk, jb) =
+                    ptr_coeff_view(0, 1, jv, jb)*p_e_in_view(iidx_view(0, jv, jb), jk, iblk_view(0, jv, jb)) +
+                    ptr_coeff_view(1, 1, jv, jb)*p_e_in_view(iidx_view(1, jv, jb), jk, iblk_view(1, jv, jb)) +
+                    ptr_coeff_view(2, 1, jv, jb)*p_e_in_view(iidx_view(2, jv, jb), jk, iblk_view(2, jv, jb)) +
+                    ptr_coeff_view(3, 1, jv, jb)*p_e_in_view(iidx_view(3, jv, jb), jk, iblk_view(3, jv, jb)) +
+                    ptr_coeff_view(4, 1, jv, jb)*p_e_in_view(iidx_view(4, jv, jb), jk, iblk_view(4, jv, jb)) +
+                    ptr_coeff_view(5, 1, jv, jb)*p_e_in_view(iidx_view(5, jv, jb), jk, iblk_view(5, jv, jb));
+            }
+        );
+    }
+}
+
+// Explicit instantiation - double precision
+template
+void rbf_vec_interpol_vertex_lib<double, double>(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    double* p_u_out,
+    double* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,             // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
+// Explicit instantiation - single precision
+template
+void rbf_vec_interpol_vertex_lib<float, float>(
+    const float* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const float* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,             // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
+// Explicit instantiation - mixed precision
+template
+void rbf_vec_interpol_vertex_lib<double, float>(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,             // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp
new file mode 100644
index 0000000..c0b6f05
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp
@@ -0,0 +1,32 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#pragma once
+
+template <typename T, typename S>
+void rbf_vec_interpol_vertex_lib(
+    const T* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const T* rbf_vec_coeff_v,
+    S* p_u_out,
+    S* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,             // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_c
+    );
\ No newline at end of file
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
new file mode 100644
index 0000000..06dc467
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.cpp
@@ -0,0 +1,134 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h"
+#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
+
+void rbf_vec_interpol_vertex_lib_dp(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    double* p_u_out,
+    double* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    rbf_vec_interpol_vertex_lib<double, double>(
+        p_e_in,
+        rbf_vec_idx_v,
+        rbf_vec_blk_v,
+        rbf_vec_coeff_v,
+        p_u_out,
+        p_v_out,
+        i_startblk,       // start_block needed for get_indices_c_lib
+        i_endblk,         // end_block needed for get_indices_c_lib
+        i_startidx_in,    // start_index needed for get_indices_c_lib
+        i_endidx_in,      // end_index needed for get_indices_c_lib
+        slev,             // vertical start level
+        elev,             // vertical end level
+        nproma,           // inner loop length/vector length
+        lacc,                  // if true, use Cuda mem-/exec-spaces
+        acc_async,              // [deprecated] use async acc
+        nlev, nblks_e, nblks_v
+        );
+}
+
+
+void rbf_vec_interpol_vertex_lib_sp(
+    const float* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const float* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    rbf_vec_interpol_vertex_lib<float, float>(
+        p_e_in,
+        rbf_vec_idx_v,
+        rbf_vec_blk_v,
+        rbf_vec_coeff_v,
+        p_u_out,
+        p_v_out,
+        i_startblk,       // start_block needed for get_indices_c_lib
+        i_endblk,         // end_block needed for get_indices_c_lib
+        i_startidx_in,    // start_index needed for get_indices_c_lib
+        i_endidx_in,      // end_index needed for get_indices_c_lib
+        slev,             // vertical start level
+        elev,             // vertical end level
+        nproma,           // inner loop length/vector length
+        lacc,                  // if true, use Cuda mem-/exec-spaces
+        acc_async,              // [deprecated] use async acc
+        nlev, nblks_e, nblks_v
+        );
+
+}
+
+void rbf_vec_interpol_vertex_lib_mixprec(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    )
+{
+    rbf_vec_interpol_vertex_lib<double, float>(
+        p_e_in,
+        rbf_vec_idx_v,
+        rbf_vec_blk_v,
+        rbf_vec_coeff_v,
+        p_u_out,
+        p_v_out,
+        i_startblk,       // start_block needed for get_indices_c_lib
+        i_endblk,         // end_block needed for get_indices_c_lib
+        i_startidx_in,    // start_index needed for get_indices_c_lib
+        i_endidx_in,      // end_index needed for get_indices_c_lib
+        slev,             // vertical start level
+        elev,             // vertical end level
+        nproma,           // inner loop length/vector length
+        lacc,                  // if true, use Cuda mem-/exec-spaces
+        acc_async,              // [deprecated] use async acc
+        nlev, nblks_e, nblks_v
+        );
+
+}
+
diff --git a/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h
new file mode 100644
index 0000000..4356f88
--- /dev/null
+++ b/src/interpolation/mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib_bindings.h
@@ -0,0 +1,54 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#pragma once
+
+extern "C" {
+
+void rbf_vec_interpol_vertex_lib_dp(
+    const double* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const double* rbf_vec_coeff_v,
+    double* p_u_out,
+    double* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
+void rbf_vec_interpol_vertex_lib_sp(
+    const float* p_e_in,
+    const int* rbf_vec_idx_v,
+    const int* rbf_vec_blk_v,
+    const float* rbf_vec_coeff_v,
+    float* p_u_out,
+    float* p_v_out,
+    const int i_startblk,       // start_block needed for get_indices_c_lib
+    const int i_endblk,         // end_block needed for get_indices_c_lib
+    const int i_startidx_in,    // start_index needed for get_indices_c_lib
+    const int i_endidx_in,      // end_index needed for get_indices_c_lib
+    const int slev,             // vertical start level
+    const int elev,             // vertical end level
+    const int nproma,           // inner loop length/vector length
+    const bool lacc,                  // if true, use Cuda mem-/exec-spaces
+    const bool acc_async,              // [deprecated] use async acc
+    const int nlev, const int nblks_e, const int nblks_v
+    );
+
+}
\ No newline at end of file
diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt
index 95ca08b..13c5dfe 100644
--- a/test/c/CMakeLists.txt
+++ b/test/c/CMakeLists.txt
@@ -25,6 +25,7 @@ set(SOURCES
   main.cpp
   test_tdma_solver.cpp
   test_interpolation_vector.cpp
+  test_intp_rbf.cpp
 )
 # Create the test executable from your test files, including main.cpp.
 add_executable(iconmath_test_c ${SOURCES})
diff --git a/test/c/test_intp_rbf.cpp b/test/c/test_intp_rbf.cpp
new file mode 100644
index 0000000..0aa4f9b
--- /dev/null
+++ b/test/c/test_intp_rbf.cpp
@@ -0,0 +1,126 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#include <gtest/gtest.h>
+#include <Kokkos_Core.hpp>
+#include <vector>
+#include "mo_lib_intp_rbf-rbf_vec_interpol_vertex_lib.hpp"
+
+// Free-function helpers for 3D and 4D array sizes (assumed column-major)
+template<typename T>
+size_t num_elements_3d(int d1, int d2, int d3) {
+  return static_cast<size_t>(d1) * d2 * d3;
+}
+
+template<typename T>
+size_t num_elements_4d(int d1, int d2, int d3, int d4) {
+  return static_cast<size_t>(d1) * d2 * d3 * d4;
+}
+
+// Define a helper struct that holds the two types.
+template<typename InT, typename OutT>
+struct MixedPrecision {
+  using in_type  = InT;
+  using out_type = OutT;
+};
+
+// Define the list of type pairs we want to test.
+typedef ::testing::Types< MixedPrecision<double, double>,
+                          MixedPrecision<double,  float>,
+                          MixedPrecision<float,   float>  > MixedTypes;
+
+// Define a typed test fixture.
+template <typename TypePair>
+class RbfVecInterpolVertexMixedTestFixture : public ::testing::Test {
+public:
+  using InType  = typename TypePair::in_type;
+  using OutType = typename TypePair::out_type;
+
+  // Constant dimensions.
+  static constexpr int nproma     = 3;  // inner loop length
+  static constexpr int nlev        = 4;  // number of vertical levels
+  static constexpr int nblks_e     = 2;  // number of edge blocks (for p_e_in)
+  static constexpr int nblks_v     = 2;  // number of vertex blocks (for rbf arrays and outputs)
+  static constexpr int rbf_vec_dim = 6;  // fixed dimension for rbf vector (stencil points)
+
+  // Parameter values.
+  int i_startblk    = 0;
+  int i_endblk      = 1;      // Test blocks [0, 1]
+  int i_startidx_in = 0;
+  int i_endidx_in   = nproma - 1; // Full range: 0 .. nproma-1
+  int slev          = 0;
+  int elev          = nlev - 1;   // Full vertical range (0 .. nlev-1)
+  bool lacc         = false;  // Not using ACC-specific behavior.
+  bool acc_async    = false;  // No asynchronous execution.
+
+  // Arrays stored in std::vector.
+  std::vector<InType>  p_e_in;           // Dimensions: (nproma, nlev, nblks_e)
+  std::vector<int>     rbf_vec_idx_v;    // Dimensions: (rbf_vec_dim, nproma, nblks_v)
+  std::vector<int>     rbf_vec_blk_v;    // Dimensions: (rbf_vec_dim, nproma, nblks_v)
+  std::vector<InType>  rbf_vec_coeff_v;    // Dimensions: (rbf_vec_dim, 2, nproma, nblks_v)
+  std::vector<OutType> p_u_out;           // Dimensions: (nproma, nlev, nblks_v)
+  std::vector<OutType> p_v_out;           // Dimensions: (nproma, nlev, nblks_v)
+
+  RbfVecInterpolVertexMixedTestFixture() {
+    // Allocate and initialize inputs.
+    p_e_in.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1));
+    rbf_vec_idx_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1);
+    rbf_vec_blk_v.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 0);
+    rbf_vec_coeff_v.resize(num_elements_4d<InType>(rbf_vec_dim, 2, nproma, nblks_v), static_cast<InType>(1));
+
+    // Allocate output arrays and initialize to zero.
+    p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0));
+    p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0));
+  }
+};
+
+TYPED_TEST_SUITE(RbfVecInterpolVertexMixedTestFixture, MixedTypes);
+
+TYPED_TEST(RbfVecInterpolVertexMixedTestFixture, BasicTest) {
+  using InType  = typename TestFixture::InType;
+  using OutType = typename TestFixture::OutType;
+
+  // Call the function with mixed precision.
+  rbf_vec_interpol_vertex_lib<InType, OutType>(
+    this->p_e_in.data(),
+    this->rbf_vec_idx_v.data(),
+    this->rbf_vec_blk_v.data(),
+    this->rbf_vec_coeff_v.data(),
+    this->p_u_out.data(),
+    this->p_v_out.data(),
+    this->i_startblk,
+    this->i_endblk,
+    this->i_startidx_in,
+    this->i_endidx_in,
+    this->slev,
+    this->elev,
+    this->nproma,
+    this->lacc,
+    this->acc_async,
+    this->nlev,
+    RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_e,
+    RbfVecInterpolVertexMixedTestFixture< TypeParam >::nblks_v);
+
+  // Check the outputs only for blocks in the range [i_startblk, i_endblk].
+  for (int block = this->i_startblk; block <= this->i_endblk; ++block) {
+    for (int level = 0; level < this->nlev; ++level) {
+      for (int i = 0; i < this->nproma; ++i) {
+        // Compute the linear index for a 3D array in column-major order:
+        size_t idx = i + level * this->nproma + block * this->nproma * this->nlev;
+        // Since every contribution is 1 and there are 6 stencil points, expect 6.
+        EXPECT_NEAR(this->p_u_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5))
+            << "Failure at block " << block << ", level " << level << ", index " << i;
+        EXPECT_NEAR(this->p_v_out[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5))
+            << "Failure at block " << block << ", level " << level << ", index " << i;
+      }
+    }
+  }
+}
-- 
GitLab


From 84f5c0904765c3f573e847519064fde4386f7274 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante0.lvt.dkrz.de>
Date: Tue, 25 Feb 2025 16:03:59 +0100
Subject: [PATCH 38/50] ported mo_lib_laplace

---
 src/horizontal/CMakeLists.txt              |   4 +-
 src/horizontal/mo_lib_laplace.cpp          | 104 +++++++++++++++++++++
 src/horizontal/mo_lib_laplace.hpp          |  24 +++++
 src/horizontal/mo_lib_laplace_bindings.cpp |  50 ++++++++++
 src/horizontal/mo_lib_laplace_bindings.h   |  32 +++++++
 5 files changed, 213 insertions(+), 1 deletion(-)
 create mode 100644 src/horizontal/mo_lib_laplace.cpp
 create mode 100644 src/horizontal/mo_lib_laplace.hpp
 create mode 100644 src/horizontal/mo_lib_laplace_bindings.cpp
 create mode 100644 src/horizontal/mo_lib_laplace_bindings.h

diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt
index 078a14d..44f9e44 100644
--- a/src/horizontal/CMakeLists.txt
+++ b/src/horizontal/CMakeLists.txt
@@ -13,7 +13,9 @@ add_library(
   iconmath-horizontal
   mo_lib_divrot.F90
   mo_lib_laplace.F90
-  mo_lib_gradients.F90)
+  mo_lib_gradients.F90
+  mo_lib_laplace_bindings.cpp
+  mo_lib_laplace.cpp)
 
 add_library(${PROJECT_NAME}::horizontal ALIAS iconmath-horizontal)
 
diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp
new file mode 100644
index 0000000..48c4479
--- /dev/null
+++ b/src/horizontal/mo_lib_laplace.cpp
@@ -0,0 +1,104 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#include "mo_lib_laplace.hpp"
+//#include "mo_lib_gradients.hpp"
+#include <Kokkos_Core.hpp>
+#include <iostream>
+
+template<typename T>
+void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                       const int* edge_cell_idx, const int* edge_cell_blk, const T* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const T* geofac_n2s, const T* geofac_div, T* nabla2_psi_c,
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){
+
+    typedef Kokkos::View<const T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT2D;
+    typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
+    typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
+    typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
+
+
+    UnmanagedConstT3D psi_c_view(psi_c, nproma,nlev,nblks_c); 
+    UnmanagedConstInt3D cell_neighbor_idx_view(cell_neighbor_idx, nproma,nblks_c,3);
+    UnmanagedConstInt3D cell_neighbor_blk_view(cell_neighbor_blk, nproma,nblks_c,3); 
+    UnmanagedConstInt3D edge_cell_idx_view(edge_cell_idx, nproma,nblks_e,2);
+    UnmanagedConstInt3D edge_cell_blk_view(edge_cell_blk, nproma,nblks_e,2);
+    UnmanagedConstT2D inv_dual_edge_length_view(inv_dual_edge_length, nproma,nblks_e);
+    UnmanagedConstInt3D cell_edge_idx_view(cell_edge_idx, nproma,nblks_c,3);
+    UnmanagedConstInt3D cell_edge_blk_view(cell_edge_blk, nproma,nblks_c,3);
+    UnmanagedConstT3D geofac_n2s_view(geofac_n2s, nproma,cell_type+1,nblks_c);
+    UnmanagedConstT3D geofac_div_view(geofac_div, nproma,cell_type,nblks_c); 
+    UnmanagedT3D nabla2_psi_c_view(nabla2_psi_c, nproma,nlev,nblks_c);
+
+
+    switch (cell_type){
+
+        case 3: 
+             for (int jb = i_startblk; jb < i_endblk; ++jb) {
+
+                int i_startidx, i_endidx;
+                get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
+
+
+                Kokkos::parallel_for("rbf_interpol_c2grad",
+                                     Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}), 
+                                     KOKKOS_LAMBDA(const int jk, const int jc){
+
+                                        nabla2_psi_c_view(jc, jk, jb) =  
+                                            psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 1, jb) +
+                                            psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * 
+                                            geofac_n2s_view(jc, 2, jb) +
+                                            psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * 
+                                            geofac_n2s_view(jc, 3, jb) +
+                                            psi_c_view(cell_neighbor_idx_view(jc, jb, 3), jk, cell_neighbor_blk_view(jc, jb, 3)) * 
+                                            geofac_n2s_view(jc, 4, jb); 
+                                     });
+             }
+             break; 
+
+
+        case 6: 
+/* TODO
+             grad_fd_norm_lib(psi_c, edge_cell_idx, edge_cell_blk, 
+                              inv_dual_edge_length, z_grad_fd_norm_e,&
+                              i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e, 
+                              slev, elev, nproma); 
+
+             div_lib(z_grad_fd_norm_e, cell_edge_idx, cell_edge_blk, 
+                     geofac_div, nabla2_psi_c, i_startblk, i_endblk, 
+                     i_startidx_in, i_endidx_in, slev, elev, nproma); 
+*/
+        default:
+             std::cout << "Unknown value for cell_type\n";
+                  
+    }//switch
+}//void
+
+template
+void nabla2_scalar_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                       const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const double* geofac_n2s, const double* geofac_div, double* nabla2_psi_c,
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
+
+template
+void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                       const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const float* geofac_n2s, const float* geofac_div, float* nabla2_psi_c,
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp
new file mode 100644
index 0000000..2a6663c
--- /dev/null
+++ b/src/horizontal/mo_lib_laplace.hpp
@@ -0,0 +1,24 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+#pragma once
+
+#include "mo_lib_loopindices.hpp"
+#include <Kokkos_Core.hpp>
+#include <vector>
+
+template<typename T>
+void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
+                       const int* dge_cell_idx, const int* edge_cell_blk, const T* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const T* geofac_n2s, const T* geofac_div, const T* nabla2_psi_c, 
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, 
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, 
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
diff --git a/src/horizontal/mo_lib_laplace_bindings.cpp b/src/horizontal/mo_lib_laplace_bindings.cpp
new file mode 100644
index 0000000..43a19b3
--- /dev/null
+++ b/src/horizontal/mo_lib_laplace_bindings.cpp
@@ -0,0 +1,50 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+
+#include  "mo_lib_laplace_bindings.h"
+#include  "mo_lib_laplace.hpp"
+
+
+void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                       const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c,
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){
+
+    nabla2_scalar_lib<double>(psi_c, cell_neighbor_idx, cell_neighbor_blk,
+                       edge_cell_idx, edge_cell_blk, inv_dual_edge_length,
+                       cell_edge_idx, cell_edge_blk,
+                       geofac_n2s, geofac_div, nabla2_psi_c,
+                       i_startblk, i_endblk, i_startidx_in, i_endidx_in,
+                       i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e,
+                       nlev, slev, elev, nproma, nblks_e, nblks_c, cell_type, lacc); 
+}
+
+
+void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                       const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c,
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){
+
+    nabla2_scalar_lib<float>(psi_c, cell_neighbor_idx, cell_neighbor_blk,
+                       edge_cell_idx, edge_cell_blk, inv_dual_edge_length,
+                       cell_edge_idx, cell_edge_blk,
+                       geofac_n2s, geofac_div, nabla2_psi_c,
+                       i_startblk,  i_endblk, i_startidx_in, i_endidx_in,
+                       i_startblk_e,  i_endblk_e,  i_startidx_e,  i_endidx_e,
+                       nlev,  slev,  elev,  nproma,  nblks_e,  nblks_c,  cell_type, lacc);
+}
diff --git a/src/horizontal/mo_lib_laplace_bindings.h b/src/horizontal/mo_lib_laplace_bindings.h
new file mode 100644
index 0000000..96bdc64
--- /dev/null
+++ b/src/horizontal/mo_lib_laplace_bindings.h
@@ -0,0 +1,32 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+#pragma once
+
+
+
+extern "C"{
+
+void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                       const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c,
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
+
+void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                       const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c,
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
+}
-- 
GitLab


From 3a150bdafae39401173346a89020a56767920124 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante0.lvt.dkrz.de>
Date: Tue, 25 Feb 2025 16:28:59 +0100
Subject: [PATCH 39/50] Fixed index for nabla2_scalar_lib

---
 src/horizontal/mo_lib_laplace.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp
index 48c4479..203b3b7 100644
--- a/src/horizontal/mo_lib_laplace.cpp
+++ b/src/horizontal/mo_lib_laplace.cpp
@@ -56,13 +56,13 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int*
                                      KOKKOS_LAMBDA(const int jk, const int jc){
 
                                         nabla2_psi_c_view(jc, jk, jb) =  
-                                            psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 1, jb) +
+                                            psi_c_view(jc, jk, jb)*geofac_n2s_view(jc, 0, jb) +
+                                            psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * 
+                                            geofac_n2s_view(jc, 1, jb) +
                                             psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * 
                                             geofac_n2s_view(jc, 2, jb) +
                                             psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * 
-                                            geofac_n2s_view(jc, 3, jb) +
-                                            psi_c_view(cell_neighbor_idx_view(jc, jb, 3), jk, cell_neighbor_blk_view(jc, jb, 3)) * 
-                                            geofac_n2s_view(jc, 4, jb); 
+                                            geofac_n2s_view(jc, 3, jb); 
                                      });
              }
              break; 
-- 
GitLab


From 79ce81cea6b785c2b8c4185cd8a3414fb5c81fbd Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante4.lvt.dkrz.de>
Date: Wed, 26 Feb 2025 10:04:06 +0100
Subject: [PATCH 40/50] WIP: MR seperation.

---
 src/horizontal/mo_lib_laplace.cpp | 118 ++++++++++++++++++++++++++++++
 src/horizontal/mo_lib_laplace.hpp |   7 ++
 2 files changed, 125 insertions(+)

diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp
index 203b3b7..57ad292 100644
--- a/src/horizontal/mo_lib_laplace.cpp
+++ b/src/horizontal/mo_lib_laplace.cpp
@@ -11,8 +11,10 @@
 
 #include "mo_lib_laplace.hpp"
 //#include "mo_lib_gradients.hpp"
+//#inlcude "mo_fortran_tools.hpp"
 #include <Kokkos_Core.hpp>
 #include <iostream>
+#include <utility>
 
 template<typename T>
 void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
@@ -102,3 +104,119 @@ void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx,
                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
+
+
+//--------------------------------nabla2_scalar_avg_lib-------------------------------------
+
+
+template<typename T>
+void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
+                      const T* geofac_n2s, const T* avg_coeff, T* nabla2_psi_c,
+                      int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in,
+                      int nblks_c, int cell_type, int patch_id,
+                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){
+
+    typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
+    typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
+    typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
+
+    UnmanagedConstT3D psi_c_view(psi_c, nproma,nlev,nblks_c);
+    UnmanagedConstInt3D cell_neighbor_idx_view(cell_neighbor_idx, nproma,nblks_c,3); 
+    UnmanagedConstInt3D cell_neighbor_blk_view(cell_neighbor_blk, nproma,nblks_c,3); 
+    UnmanagedConstT3D geofac_n2s_view(geofac_n2s, nproma,cell_type+1,nblks_c); 
+    UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma,nlev,nblks_c); 
+    UnmanagedT3D nabla2_psi_c_view(nabla2_psi_c, nproma,nlev,nblks_c); 
+
+    int aux_c
+    UnmanagedT3D aux_c_view(aux_c, nproma, nlev, nblks_c); //local
+
+
+
+    switch (cell_type){
+
+        case 3: 
+
+            if(slev == elev){
+
+                jk = slev; 
+                i_startblk = i_startblk_in[1]; 
+                i_endblk = i_endblk_in[1]; 
+
+                for(int jb = i_startblk; jb < i_endblk; ++jb) {
+
+                   int i_startidx, i_endidx;
+                   get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
+
+                   Kokkos::parallel_for("aux_c", 
+                                        Kokkos::RangePolicy<int>(i_startidx, i_endidx), 
+                                        KOKKOS_LAMBDA(const int jc) {
+
+                                           aux_c_view(jc, jk, jb) =  
+                                           psi_c_view(jc, jk, jb) * geofac_n2s_view(jc, 0, jb) +
+                                           psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * 
+                                           geofac_n2s_view(jc, 1, jb) +
+                                           psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) *
+                                           geofac_n2s_view(jc, 2, jb) +
+                                           psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * 
+                                           geofac_n2s_view(jc, 3, jb); 
+
+                                        });
+                }
+
+                   if (l_limited_area || patch_id > 1){
+
+                      i_startblk = i_startblk_in[2]; 
+                      i_endblk = i_endblk_in[2]; 
+
+                      /*TODO
+                      gradient( Kokkos::subview(aux_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), 
+                                Kokkos::subview(nabla2_psi_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), lzacc);
+                     */
+                   }
+
+                      i_startblk = i_startblk_in[3]; 
+                      i_endblk = i_endblk_in[3]; 
+
+                      for(int jb = i_startblk; jb < i_endblk; ++jb) {
+
+                         int i_startidx, i_endidx;
+                         get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
+
+                         Kokkos::parallel_for("DivGrad",
+                                              Kokkos::RangePolicy<int>(i_startidx, i_endidx),
+                                              KOKKOS_LAMBDA(const int jc) {
+
+                                                 nabla2_psi_c_view(jc, jk, jb) =  
+                                                 aux_c_view(jc, jk, jb) * avg_coeff_view(jc, 0, jb) +
+                                                 aux_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * 
+                                                 avg_coeff_view(jc, 1, jb) +
+                                                 aux_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * 
+                                                 avg_coeff_view(jc, 2, jb) +
+                                                 aux_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * 
+                                                 avg_coeff_view(jc, 3, jb);
+
+                                              });
+                      }
+            }//if
+            break; 
+
+        default:
+        std::cout << "Default case.\n"; 
+
+}//switch
+}//void
+
+
+template
+void nabla2_scalar_avg_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
+                      const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c,
+                      int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in,
+                      int nblks_c, int cell_type, int patch_id,
+                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); 
+
+template
+void nabla2_scalar_avg_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                      const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c,
+                      int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in,
+                      int nblks_c, int cell_type, int patch_id,
+                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); 
diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp
index 2a6663c..0a67f85 100644
--- a/src/horizontal/mo_lib_laplace.hpp
+++ b/src/horizontal/mo_lib_laplace.hpp
@@ -22,3 +22,10 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int*
                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, 
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, 
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
+template<typename T>
+void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
+                      const T* geofac_n2s, const T* avg_coeff,  T* nabla2_psi_c, 
+                      int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, 
+                      int nblks_c, int cell_type, int patch_id, 
+                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc);
+
-- 
GitLab


From 595f8e4f9c136a3dadcf6f319ed1db33b9680822 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante2.lvt.dkrz.de>
Date: Wed, 26 Feb 2025 15:02:02 +0100
Subject: [PATCH 41/50] Completed mo_lib_laplace

---
 src/horizontal/mo_lib_laplace.cpp          | 145 ++++++++++++++-------
 src/horizontal/mo_lib_laplace.hpp          |   2 +-
 src/horizontal/mo_lib_laplace_bindings.cpp |   4 +-
 src/horizontal/mo_lib_laplace_bindings.h   |   4 +-
 4 files changed, 102 insertions(+), 53 deletions(-)

diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp
index 57ad292..3ed2102 100644
--- a/src/horizontal/mo_lib_laplace.cpp
+++ b/src/horizontal/mo_lib_laplace.cpp
@@ -15,6 +15,7 @@
 #include <Kokkos_Core.hpp>
 #include <iostream>
 #include <utility>
+#include <memory>
 
 template<typename T>
 void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
@@ -112,7 +113,7 @@ void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx,
 template<typename T>
 void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
                       const T* geofac_n2s, const T* avg_coeff, T* nabla2_psi_c,
-                      int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in,
+                      int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3],
                       int nblks_c, int cell_type, int patch_id,
                       int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){
 
@@ -127,10 +128,7 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i
     UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma,nlev,nblks_c); 
     UnmanagedT3D nabla2_psi_c_view(nabla2_psi_c, nproma,nlev,nblks_c); 
 
-    int aux_c
-    UnmanagedT3D aux_c_view(aux_c, nproma, nlev, nblks_c); //local
-
-
+    Kokkos::View<int***, Kokkos::LayoutLeft> aux_c_view("aux_c_view", nproma, nlev, nblks_c);
 
     switch (cell_type){
 
@@ -138,71 +136,122 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i
 
             if(slev == elev){
 
-                jk = slev; 
-                i_startblk = i_startblk_in[1]; 
-                i_endblk = i_endblk_in[1]; 
+                int jk = slev;
+                int i_startblk = i_startblk_in[1];
+                int i_endblk = i_endblk_in[1];
 
                 for(int jb = i_startblk; jb < i_endblk; ++jb) {
 
                    int i_startidx, i_endidx;
-                   get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
+                   get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
 
                    Kokkos::parallel_for("aux_c", 
                                         Kokkos::RangePolicy<int>(i_startidx, i_endidx), 
                                         KOKKOS_LAMBDA(const int jc) {
 
                                            aux_c_view(jc, jk, jb) =  
-                                           psi_c_view(jc, jk, jb) * geofac_n2s_view(jc, 0, jb) +
-                                           psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * 
-                                           geofac_n2s_view(jc, 1, jb) +
-                                           psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) *
-                                           geofac_n2s_view(jc, 2, jb) +
-                                           psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * 
-                                           geofac_n2s_view(jc, 3, jb); 
-
+                                             psi_c_view(jc, jk, jb) * geofac_n2s_view(jc, 0, jb) +
+                                             psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) *
+                                             geofac_n2s_view(jc, 1, jb) +
+                                             psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) *
+                                             geofac_n2s_view(jc, 2, jb) +
+                                             psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) *
+                                             geofac_n2s_view(jc, 3, jb);
                                         });
                 }
 
-                   if (l_limited_area || patch_id > 1){
+                if (l_limited_area || patch_id > 1){
 
-                      i_startblk = i_startblk_in[2]; 
-                      i_endblk = i_endblk_in[2]; 
+                   i_startblk = i_startblk_in[2];
+                   i_endblk = i_endblk_in[2];
 
-                      /*TODO
-                      gradient( Kokkos::subview(aux_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), 
-                                Kokkos::subview(nabla2_psi_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), lzacc);
-                     */
+                   /*TODO
+                   copy(Kokkos::subview(aux_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)),
+                             Kokkos::subview(nabla2_psi_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), lzacc);
+                   */
+                }
+
+                   i_startblk = i_startblk_in[3];
+                   i_endblk = i_endblk_in[3];
+
+                   for(int jb = i_startblk; jb < i_endblk; ++jb) {
+
+                      int i_startidx, i_endidx;
+                      get_indices_c_lib(i_startidx_in[3], i_endidx_in[3], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
+
+                      Kokkos::parallel_for("DivGrad",
+                                           Kokkos::RangePolicy<int>(i_startidx, i_endidx),
+                                           KOKKOS_LAMBDA(const int jc) {
+
+                                              nabla2_psi_c_view(jc, jk, jb) =
+                                                  aux_c_view(jc, jk, jb) * avg_coeff_view(jc, 0, jb) +
+                                                  aux_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) *
+                                                  avg_coeff_view(jc, 1, jb) +
+                                                  aux_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) *
+                                                  avg_coeff_view(jc, 2, jb) +
+                                                  aux_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) *
+                                                  avg_coeff_view(jc, 3, jb);
+
+                                           });
                    }
+            }else{
 
-                      i_startblk = i_startblk_in[3]; 
-                      i_endblk = i_endblk_in[3]; 
+                int i_startblk = i_startblk_in[1];
+                int i_endblk = i_endblk_in[1];
 
-                      for(int jb = i_startblk; jb < i_endblk; ++jb) {
+                 for(int jb = i_startblk; jb < i_endblk; ++jb) {
 
-                         int i_startidx, i_endidx;
-                         get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
+                   int i_startidx, i_endidx;
+                   get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
+
+                   Kokkos::parallel_for("rbf_interpol_c2grad",
+                                        Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}),
+                                        KOKKOS_LAMBDA(const int jk, const int jc){
+                                           aux_c_view(jc, jk, jb) =
+                                                psi_c_view(jc, jk, jb) * geofac_n2s_view(jc, 0, jb) +
+                                                psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) *
+                                                geofac_n2s_view(jc, 1, jb) +
+                                                psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) *
+                                                geofac_n2s_view(jc, 2, jb) +
+                                                psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) *
+                                                geofac_n2s_view(jc, 3, jb);
+                                        });
+                 }
 
-                         Kokkos::parallel_for("DivGrad",
-                                              Kokkos::RangePolicy<int>(i_startidx, i_endidx),
-                                              KOKKOS_LAMBDA(const int jc) {
 
-                                                 nabla2_psi_c_view(jc, jk, jb) =  
-                                                 aux_c_view(jc, jk, jb) * avg_coeff_view(jc, 0, jb) +
-                                                 aux_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) * 
-                                                 avg_coeff_view(jc, 1, jb) +
-                                                 aux_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) * 
-                                                 avg_coeff_view(jc, 2, jb) +
-                                                 aux_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) * 
-                                                 avg_coeff_view(jc, 3, jb);
+                  if (l_limited_area || patch_id > 1){
 
-                                              });
-                      }
-            }//if
-            break; 
+                     i_startblk = i_startblk_in[2];
+                     i_endblk = i_endblk_in[2];
 
-        default:
-        std::cout << "Default case.\n"; 
+                     /*
+                      copy(Kokkos::subview(aux_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)),
+                           Kokkos::subview(nabla2_psi_c_view, Kokkos::ALL(), jk, std::make_pair(i_startblk, i_endblk + 1)), lzacc);
+                     */
+                  }
 
+                  i_startblk = i_startblk_in[3];
+                  i_endblk = i_endblk_in[3];
+
+                  for(int jb = i_startblk; jb < i_endblk; ++jb) {
+
+                   int i_startidx, i_endidx;
+                   get_indices_c_lib(i_startidx_in[3], i_endidx_in[3], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
+
+                   Kokkos::parallel_for("rbf_interpol_c2grad",
+                                        Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}),
+                                        KOKKOS_LAMBDA(const int jk, const int jc){
+                                           nabla2_psi_c_view(jc, jk, jb) =
+                                              psi_c_view(jc, jk, jb) * geofac_n2s_view(jc, 0, jb) +
+                                              psi_c_view(cell_neighbor_idx_view(jc, jb, 0), jk, cell_neighbor_blk_view(jc, jb, 0)) *
+                                              geofac_n2s_view(jc, 1, jb) +
+                                              psi_c_view(cell_neighbor_idx_view(jc, jb, 1), jk, cell_neighbor_blk_view(jc, jb, 1)) *
+                                              geofac_n2s_view(jc, 2, jb) +
+                                              psi_c_view(cell_neighbor_idx_view(jc, jb, 2), jk, cell_neighbor_blk_view(jc, jb, 2)) *
+                                              geofac_n2s_view(jc, 3, jb);
+                                        });
+                 }
+            }//if
 }//switch
 }//void
 
@@ -210,13 +259,13 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i
 template
 void nabla2_scalar_avg_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
                       const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c,
-                      int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in,
+                      int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3],
                       int nblks_c, int cell_type, int patch_id,
                       int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); 
 
 template
 void nabla2_scalar_avg_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
                       const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c,
-                      int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in,
+                      int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3],
                       int nblks_c, int cell_type, int patch_id,
                       int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); 
diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp
index 0a67f85..3aa905f 100644
--- a/src/horizontal/mo_lib_laplace.hpp
+++ b/src/horizontal/mo_lib_laplace.hpp
@@ -25,7 +25,7 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int*
 template<typename T>
 void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
                       const T* geofac_n2s, const T* avg_coeff,  T* nabla2_psi_c, 
-                      int i_startblk_in, int i_endblk_in, int i_startidx_in, int i_endidx_in, 
+                      int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], 
                       int nblks_c, int cell_type, int patch_id, 
                       int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc);
 
diff --git a/src/horizontal/mo_lib_laplace_bindings.cpp b/src/horizontal/mo_lib_laplace_bindings.cpp
index 43a19b3..73e5aa7 100644
--- a/src/horizontal/mo_lib_laplace_bindings.cpp
+++ b/src/horizontal/mo_lib_laplace_bindings.cpp
@@ -36,7 +36,7 @@ void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, cons
                        const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length,
                        const int* cell_edge_idx, const int* cell_edge_blk,
                        const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c,
-                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk[3], int i_endblk[3], int i_startidx_in[3], int i_endidx_in[3],
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){
 
@@ -44,7 +44,7 @@ void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, cons
                        edge_cell_idx, edge_cell_blk, inv_dual_edge_length,
                        cell_edge_idx, cell_edge_blk,
                        geofac_n2s, geofac_div, nabla2_psi_c,
-                       i_startblk,  i_endblk, i_startidx_in, i_endidx_in,
+                       i_startblk[3],  i_endblk[3], i_startidx_in[3], i_endidx_in[3],
                        i_startblk_e,  i_endblk_e,  i_startidx_e,  i_endidx_e,
                        nlev,  slev,  elev,  nproma,  nblks_e,  nblks_c,  cell_type, lacc);
 }
diff --git a/src/horizontal/mo_lib_laplace_bindings.h b/src/horizontal/mo_lib_laplace_bindings.h
index 96bdc64..c0fe237 100644
--- a/src/horizontal/mo_lib_laplace_bindings.h
+++ b/src/horizontal/mo_lib_laplace_bindings.h
@@ -18,7 +18,7 @@ void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, con
                        const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length,
                        const int* cell_edge_idx, const int* cell_edge_blk,
                        const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c,
-                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk[3], int i_endblk[3], int i_startidx_in[3], int i_endidx_in[3],
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
 
@@ -26,7 +26,7 @@ void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, cons
                        const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length,
                        const int* cell_edge_idx, const int* cell_edge_blk,
                        const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c,
-                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk[3], int i_endblk[3], int i_startidx_in[3], int i_endidx_in[3],
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
 }
-- 
GitLab


From 1fba81333e3038e280c4067dd1fe5ec81d9cb94e Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante6.lvt.dkrz.de>
Date: Thu, 27 Feb 2025 08:27:38 +0100
Subject: [PATCH 42/50] Ready for MR

---
 src/horizontal/mo_lib_laplace.cpp          | 102 ++++++++++++++++++++-
 src/horizontal/mo_lib_laplace.hpp          |  11 +++
 src/horizontal/mo_lib_laplace_bindings.cpp |  74 ++++++++++++++-
 src/horizontal/mo_lib_laplace_bindings.h   |  36 +++++++-
 4 files changed, 217 insertions(+), 6 deletions(-)

diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp
index 3ed2102..361d752 100644
--- a/src/horizontal/mo_lib_laplace.cpp
+++ b/src/horizontal/mo_lib_laplace.cpp
@@ -12,6 +12,7 @@
 #include "mo_lib_laplace.hpp"
 //#include "mo_lib_gradients.hpp"
 //#inlcude "mo_fortran_tools.hpp"
+//#inlcude "mo_lib_divrot"
 #include <Kokkos_Core.hpp>
 #include <iostream>
 #include <utility>
@@ -204,7 +205,7 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i
                    int i_startidx, i_endidx;
                    get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
 
-                   Kokkos::parallel_for("rbf_interpol_c2grad",
+                   Kokkos::parallel_for("aux_c_view",
                                         Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}),
                                         KOKKOS_LAMBDA(const int jk, const int jc){
                                            aux_c_view(jc, jk, jb) =
@@ -238,7 +239,7 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i
                    int i_startidx, i_endidx;
                    get_indices_c_lib(i_startidx_in[3], i_endidx_in[3], nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
 
-                   Kokkos::parallel_for("rbf_interpol_c2grad",
+                   Kokkos::parallel_for("nabla2_psi_c_view",
                                         Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}),
                                         KOKKOS_LAMBDA(const int jk, const int jc){
                                            nabla2_psi_c_view(jc, jk, jb) =
@@ -269,3 +270,100 @@ void nabla2_scalar_avg_lib<float>(const float* psi_c, const int* cell_neighbor_i
                       int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3],
                       int nblks_c, int cell_type, int patch_id,
                       int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); 
+
+
+//-----------------------------------------nabla2_vec_atmos_lib---------------------------------------
+
+template<typename T>
+void nabla2_vec_atmos_lib(const T* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
+                          const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
+                          const int* vert_edge_idx, const int* vert_edge_blk, const T* tangent_orientation,
+                          const T* inv_primal_edge_length, const T* inv_dual_edge_length, const T* geofac_div,
+                          const T* geofac_rot, T* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
+                          int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c,
+                          int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc){
+
+    typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
+    typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
+
+    typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
+    typedef Kokkos::View<const T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT2D;
+
+    UnmanagedConstT3D vec_e_view(vec_e, nproma,nlev,nblks_e);
+
+    UnmanagedConstInt3D edge_cell_idx_view(edge_cell_idx, nproma,nblks_e,2);
+    UnmanagedConstInt3D edge_cell_blk_view(edge_cell_blk, nproma,nblks_e,2);
+    UnmanagedConstInt3D edge_vertex_idx_view(edge_vertex_idx, nproma,nblks_e,4);
+    UnmanagedConstInt3D edge_vertex_blk_view(edge_vertex_blk, nproma,nblks_e,4);
+
+    UnmanagedConstInt3D cell_edge_idx_view(cell_edge_idx, nproma,nblks_c,3);
+    UnmanagedConstInt3D cell_edge_blk_view(cell_edge_blk, nproma,nblks_c,3);
+
+    UnmanagedConstInt3D vert_edge_idx_view(vert_edge_idx, nproma,nblks_v,6);
+    UnmanagedConstInt3D vert_edge_blk_view(vert_edge_blk, nproma,nblks_v,6);
+
+    UnmanagedConstT2D tangent_orientation_view(tangent_orientation, nproma,nblks_e);
+    UnmanagedConstT2D inv_primal_edge_length_view(inv_primal_edge_length, nproma,nblks_e);
+    UnmanagedConstT2D inv_dual_edge_length_view(inv_dual_edge_length, nproma,nblks_e);
+
+    UnmanagedConstT3D geofac_rot_view(geofac_rot, nproma,9-cell_type,nblks_v);
+    UnmanagedConstT3D geofac_div_view(geofac_div, nproma,cell_type,nblks_c);
+
+    UnmanagedT3D nabla2_vec_e_view(nabla2_vec_e, nproma,nlev,nblks_e);
+
+    Kokkos::View<int***, Kokkos::LayoutLeft> z_div_c_view("z_div_c", nproma, nlev, nblks_c); //Local
+    Kokkos::View<int***, Kokkos::LayoutLeft> z_rot_v_view("z_rot_v", nproma, nlev, nblks_v); //Local
+
+
+    /*TODO
+    div_lib(vec_e, cell_edge_idx, cell_edge_blk, geofac_div, z_div_c, i_startblk_c,
+            i_endblk_c, i_startidx_c, i_endidx_c, slev, elev, nproma);
+
+    rot_vertex_atmos_lib(vec_e, vert_edge_idx, vert_edge_blk, geofac_rot, z_rot_v,
+                         i_startblk_v, i_endblk_v, i_startidx_v, i_endidx_v,
+                         slev, elev, nproma);
+    */
+
+
+     for(int jb = i_startblk; jb < i_endblk; ++jb) {
+
+        int i_startidx, i_endidx;
+        get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx);
+
+        Kokkos::parallel_for("nabla2_psi_c_view",
+                            Kokkos::MDRangePolicy<Kokkos::Rank<2>>({slev, i_startidx}, {elev, i_endidx}),
+                            KOKKOS_LAMBDA(const int jk, const int je){
+
+                              nabla2_vec_e_view(je, jk, jb) =
+                                  tangent_orientation_view(je, jb) *
+                                  (z_rot_v_view(edge_vertex_idx_view(je, jb, 2), jk, edge_vertex_blk_view(je, jb, 2)) - z_rot_v_view(edge_vertex_idx_view(je, jb, 1), jk, edge_vertex_blk_view(je, jb, 1))) *
+                                  inv_primal_edge_length_view(je, jb) +
+                                  (z_div_c_view(cell_edge_idx_view(je, jb, 2), jk, cell_edge_blk_view(je, jb, 2)) - z_div_c_view(cell_edge_idx_view(je, jb, 1), jk, cell_edge_blk_view(je, jb, 1))) *
+                                  inv_dual_edge_length_view(je, jb);
+
+                            });
+     }
+}
+
+
+template
+void nabla2_vec_atmos_lib<double>(const double* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
+                          const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
+                          const int* vert_edge_idx, const int* vert_edge_blk, const double* tangent_orientation,
+                          const double* inv_primal_edge_length, const double* inv_dual_edge_length, const double* geofac_div,
+                          const double* geofac_rot, double* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
+                          int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c,
+                          int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc);
+
+
+template
+void nabla2_vec_atmos_lib<float>(const float* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
+                          const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
+                          const int* vert_edge_idx, const int* vert_edge_blk, const float* tangent_orientation,
+                          const float* inv_primal_edge_length, const float* inv_dual_edge_length, const float* geofac_div,
+                          const float* geofac_rot, float* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
+                          int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c,
+                          int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc);
diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp
index 3aa905f..5e89713 100644
--- a/src/horizontal/mo_lib_laplace.hpp
+++ b/src/horizontal/mo_lib_laplace.hpp
@@ -29,3 +29,14 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i
                       int nblks_c, int cell_type, int patch_id, 
                       int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc);
 
+
+
+template<typename T>
+void nabla2_vec_atmos_lib(const T* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
+                          const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
+                          const int* vert_edge_idx, const int* vert_edge_blk, const T* tangent_orientation,
+                          const T* inv_primal_edge_length, const T* inv_dual_edge_length, const T* geofac_div,
+                          const T* geofac_rot, T* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
+                          int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c,
+                          int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc);
diff --git a/src/horizontal/mo_lib_laplace_bindings.cpp b/src/horizontal/mo_lib_laplace_bindings.cpp
index 73e5aa7..4e22793 100644
--- a/src/horizontal/mo_lib_laplace_bindings.cpp
+++ b/src/horizontal/mo_lib_laplace_bindings.cpp
@@ -36,7 +36,7 @@ void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, cons
                        const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length,
                        const int* cell_edge_idx, const int* cell_edge_blk,
                        const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c,
-                       int i_startblk[3], int i_endblk[3], int i_startidx_in[3], int i_endidx_in[3],
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){
 
@@ -44,7 +44,77 @@ void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, cons
                        edge_cell_idx, edge_cell_blk, inv_dual_edge_length,
                        cell_edge_idx, cell_edge_blk,
                        geofac_n2s, geofac_div, nabla2_psi_c,
-                       i_startblk[3],  i_endblk[3], i_startidx_in[3], i_endidx_in[3],
+                       i_startblk_e,  i_endblk_e, i_startidx_e, i_endidx_e,
                        i_startblk_e,  i_endblk_e,  i_startidx_e,  i_endidx_e,
                        nlev,  slev,  elev,  nproma,  nblks_e,  nblks_c,  cell_type, lacc);
 }
+
+void nabla2_scalar_avg_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                      const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c,
+                      int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3],
+                      int nblks_c, int cell_type, int patch_id,
+                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){
+
+
+    nabla2_scalar_avg_lib<double>(psi_c, cell_neighbor_idx,cell_neighbor_blk,
+                                    geofac_n2s, avg_coeff, nabla2_psi_c,
+                                    i_startblk_in,  i_endblk_in,  i_startidx_in,  i_endidx_in,
+                                    nblks_c,  cell_type,  patch_id,
+                                    nlev,  slev,  elev,  nproma, l_limited_area, lacc);
+
+}
+
+void nabla2_scalar_avg_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                      const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c,
+                      int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3],
+                      int nblks_c, int cell_type, int patch_id,
+                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){
+
+    nabla2_scalar_avg_lib<float>(psi_c, cell_neighbor_idx,  cell_neighbor_blk,
+                                   geofac_n2s, avg_coeff, nabla2_psi_c,
+                                   i_startblk_in,  i_endblk_in,  i_startidx_in,  i_endidx_in,
+                                   nblks_c,  cell_type,  patch_id,
+                                   nlev,  slev,  elev,  nproma, l_limited_area, lacc);
+}
+
+
+void nabla2_vec_atmos_lib_dp(const double* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
+                          const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
+                          const int* vert_edge_idx, const int* vert_edge_blk, const double* tangent_orientation,
+                          const double* inv_primal_edge_length, const double* inv_dual_edge_length, const double* geofac_div,
+                          const double* geofac_rot, double* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
+                          int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c,
+                          int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc){
+
+    nabla2_vec_atmos_lib<double>(vec_e, edge_cell_idx, edge_cell_blk, edge_vertex_idx,
+                           edge_vertex_blk, cell_edge_idx, cell_edge_blk,
+                           vert_edge_idx, vert_edge_blk, tangent_orientation,
+                           inv_primal_edge_length, inv_dual_edge_length, geofac_div,
+                           geofac_rot,nabla2_vec_e, i_startblk_c, i_endblk_c, i_startidx_c,
+                           i_endidx_c, i_startblk_v, i_endblk_v, i_startidx_v, i_endidx_v,
+                           i_startblk, i_endblk, i_startidx_in, i_endidx_in, nlev, nblks_c,
+                           nblks_v, nblks_e, slev, elev, nproma, cell_type, lacc);
+}
+
+
+void nabla2_vec_atmos_lib_sp(const float* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
+                          const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
+                          const int* vert_edge_idx, const int* vert_edge_blk, const float* tangent_orientation,
+                          const float* inv_primal_edge_length, const float* inv_dual_edge_length, const float* geofac_div,
+                          const float* geofac_rot, float* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
+                          int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c,
+                          int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc){
+
+    nabla2_vec_atmos_lib<float>(vec_e, edge_cell_idx, edge_cell_blk, edge_vertex_idx,
+                                 edge_vertex_blk, cell_edge_idx, cell_edge_blk,
+                                 vert_edge_idx, vert_edge_blk, tangent_orientation,
+                                 inv_primal_edge_length, inv_dual_edge_length, geofac_div,
+                                 geofac_rot, nabla2_vec_e, i_startblk_c, i_endblk_c, i_startidx_c,
+                                 i_endidx_c, i_startblk_v, i_endblk_v, i_startidx_v, i_endidx_v,
+                                 i_startblk, i_endblk, i_startidx_in, i_endidx_in, nlev, nblks_c,
+                                 nblks_v, nblks_e, slev, elev, nproma, cell_type, lacc);
+
+
+}
diff --git a/src/horizontal/mo_lib_laplace_bindings.h b/src/horizontal/mo_lib_laplace_bindings.h
index c0fe237..7b22527 100644
--- a/src/horizontal/mo_lib_laplace_bindings.h
+++ b/src/horizontal/mo_lib_laplace_bindings.h
@@ -18,7 +18,7 @@ void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, con
                        const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length,
                        const int* cell_edge_idx, const int* cell_edge_blk,
                        const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c,
-                       int i_startblk[3], int i_endblk[3], int i_startidx_in[3], int i_endidx_in[3],
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
 
@@ -26,7 +26,39 @@ void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, cons
                        const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length,
                        const int* cell_edge_idx, const int* cell_edge_blk,
                        const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c,
-                       int i_startblk[3], int i_endblk[3], int i_startidx_in[3], int i_endidx_in[3],
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
+
+void nabla2_scalar_avg_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                      const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c,
+                      int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3],
+                      int nblks_c, int cell_type, int patch_id,
+                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc);
+
+void nabla2_scalar_avg_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                      const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c,
+                      int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3],
+                      int nblks_c, int cell_type, int patch_id,
+                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc);
+
+void nabla2_vec_atmos_lib_dp(const double* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
+                          const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
+                          const int* vert_edge_idx, const int* vert_edge_blk, const double* tangent_orientation,
+                          const double* inv_primal_edge_length, const double* inv_dual_edge_length, const double* geofac_div,
+                          const double* geofac_rot, double* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
+                          int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c,
+                          int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc);
+
+
+void nabla2_vec_atmos_lib_sp(const float* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
+                          const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
+                          const int* vert_edge_idx, const int* vert_edge_blk, const float* tangent_orientation,
+                          const float* inv_primal_edge_length, const float* inv_dual_edge_length, const float* geofac_div,
+                          const float* geofac_rot, float* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
+                          int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c,
+                          int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc);
+
 }
-- 
GitLab


From 8ae37ff24e8c5c5c7da38a566a372f665c97eba8 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante4.lvt.dkrz.de>
Date: Thu, 27 Feb 2025 08:29:57 +0100
Subject: [PATCH 43/50] Fixed typo

---
 src/horizontal/mo_lib_laplace.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp
index 361d752..32c6e00 100644
--- a/src/horizontal/mo_lib_laplace.cpp
+++ b/src/horizontal/mo_lib_laplace.cpp
@@ -11,8 +11,8 @@
 
 #include "mo_lib_laplace.hpp"
 //#include "mo_lib_gradients.hpp"
-//#inlcude "mo_fortran_tools.hpp"
-//#inlcude "mo_lib_divrot"
+//#include "mo_fortran_tools.hpp"
+//#include "mo_lib_divrot"
 #include <Kokkos_Core.hpp>
 #include <iostream>
 #include <utility>
-- 
GitLab


From 37875ce9f31a48cb2de40313009192f4d102f732 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante4.lvt.dkrz.de>
Date: Thu, 27 Feb 2025 08:50:46 +0100
Subject: [PATCH 44/50] Fix

---
 src/horizontal/mo_lib_laplace.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp
index 32c6e00..8dac99a 100644
--- a/src/horizontal/mo_lib_laplace.cpp
+++ b/src/horizontal/mo_lib_laplace.cpp
@@ -253,7 +253,11 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i
                                         });
                  }
             }//if
-}//switch
+            break; 
+
+        default: 
+            std::cerr << "Unknown value for cell_type: " << cell_type << '\n';
+    }//switch
 }//void
 
 
-- 
GitLab


From a671c52944ef93369c5b48f496f2637272ba2945 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante2.lvt.dkrz.de>
Date: Tue, 8 Apr 2025 16:25:17 +0200
Subject: [PATCH 45/50] Added the test for laplace

---
 test/c/test_mo_lib_laplace.cpp | 171 +++++++++++++++++++++++++++++++++
 1 file changed, 171 insertions(+)
 create mode 100644 test/c/test_mo_lib_laplace.cpp

diff --git a/test/c/test_mo_lib_laplace.cpp b/test/c/test_mo_lib_laplace.cpp
new file mode 100644
index 0000000..741f430
--- /dev/null
+++ b/test/c/test_mo_lib_laplace.cpp
@@ -0,0 +1,171 @@
+ // ICON
+ //
+ // ---------------------------------------------------------------
+ // Copyright (C) 2004-2024, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+ // Contact information: icon-model.org
+ //
+ // See AUTHORS.TXT for a list of authors
+ // See LICENSES/ for license information
+ // SPDX-License-Identifier: BSD-3-Clause
+ // ---------------------------------------------------------------
+ 
+ #include <gtest/gtest.h>
+ #include <Kokkos_Core.hpp>
+ #include <vector>
+ #include "mo_lib_laplace.cpp" 
+ 
+ // Free-function helpers for 3D and 4D array sizes (assumed column-major)
+ template<typename T>
+ size_t num_elements_3d(int d1, int d2, int d3) {
+   return static_cast<size_t>(d1) * d2 * d3;
+ }
+ 
+ template<typename T>
+ size_t num_elements_4d(int d1, int d2, int d3, int d4) {
+   return static_cast<size_t>(d1) * d2 * d3 * d4;
+ }
+ 
+ // Define a helper struct that holds the two types.
+ template<typename InT, typename OutT>
+ struct MixedPrecision {
+   using in_type  = InT;
+   using out_type = OutT;
+ };
+
+
+ // Define the list of type pairs we want to test.
+ typedef ::testing::Types< MixedPrecision<double, double>,
+                           MixedPrecision<double,  float>,
+                           MixedPrecision<float,   float>  > MixedTypes;
+ 
+
+class Horizonral_dimension
+{
+
+public: 
+
+    static constexpr int nproma     = 3;  // inner loop length
+    static constexpr int nlev        = 4;  // number of vertical levels
+    static constexpr int nblks_e     = 2;  // number of edge blocks (for p_e_in)
+    static constexpr int nblks_v     = 2;  // number of vertex blocks (for rbf arrays and outputs)
+    static constexpr int rbf_vec_dim = 6;  // fixed dimension for rbf vector (stencil points)
+    static constexpr int nblks_c     = 2;
+
+    // Parameter values.
+    int i_startblk    = 0;
+    int i_endblk      = 1;      // Test blocks [0, 1]
+    int i_startidx_in = 0;
+    int i_endidx_in   = nproma - 1; // Full range: 0 .. nproma-1
+    int i_startblk_e  = 0;
+    int i_endblk_e    = 0;
+    int slev          = 0;
+    int i_startidx_e  = 0;
+    int i_endidx_e    = nproma - 1;
+    int nlev          = 0;
+    int slev          = 0;
+    int elev          = nlev - 1;   // Full vertical range (0 .. nlev-1)
+    int cell_type     = 0;
+    bool lacc         = false;  // Not using ACC-specific behavior.
+};
+
+
+
+
+ // Define a typed test fixture.
+ template <typename TypePair>
+ class MoLibLaplaceMixedTestFixture : public ::testing::Test {
+ public:
+
+   //Arrays for nabla2_scalar_lib 
+   std::vector<T> psi_c; //Dimension (nproma,nlev,nblks_c) 
+   std::vector<int> cell_neighbor_idx; //Dimenaion (nproma,nblks_c,3) 
+   std::vector<int> cell_neighbor_blk; //Dimension (nproma,nblks_c,3)
+   std::vector<int> edge_cell_idx; //Dimension (nproma,nblks_e,2)
+   std::vector<int> edge_cell_blk; //Dimension (nproma,nblks_e,2)
+   std::vector<T> inv_dual_edge_length; //Dimension (nproma,nblks_e)
+   std::vector<int> cell_edge_idx; //Dimension (nproma,nblks_c,3)
+   std::vector<int> cell_edge_blk; //Dimension (nproma,nblks_c,3)
+   std::vector<T> geofac_n2s; // Dimension (nproma,cell_type+1,nblks_c)
+   std::vector<T> geofac_div; //Dimension (nproma,cell_type,nblks_c)
+   std::vector<T> nabla2_psi_c; //Dimension (nproma,nlev,nblks_c)
+
+   MoLibLaplaceMixedTestFixture() {
+
+
+       //Arrays for nabla2_scalar_lib
+       psi_c.resize(num_elements_3d<T>(nproma,nlev,nblks_c), static_cast<T>(1)); 
+       inv_dual_edge_length.resize(num_elements_2d<T>(nproma,nblks_e), static_cast<T>(1)); 
+       geofac_n2s.resize(num_elements_2d<T> (nproma,cell_type+1,nblks_c), static_cast<T>(1)); 
+       geofac_div.resize(num_elements_2d<T>(nproma,cell_type,nblks_c), static_cast<T>(1)); 
+       nabla2_psi_c.resize(num_elements_2d<T>(nproma,nlev,nblks_c), static_cast<T>(1)); 
+       cell_neighbor_idx.resize(num_elements_3d<int>(nproma,nblks_c,3)); 
+       cell_neighbor_blk.resize(num_elements_3d<int>(nproma,nblks_e,3)); 
+       edge_cell_idx.resize(num_elements_3d<int> (nproma,nblks_e,2)); 
+       edge_cell_blk.resize(num_elements_3d<int>(nproma,nlev,nblks_c)); 
+       cell_edge_idx.resize(num_elements_2d<int>(nproma,nblks_c,3)); 
+       cell_edge_blk.resize(num_elements_2d<int>(nproma,nblks_c,3)); 
+       geofac_n2s.resize(num_elements_2d<T>(nproma,cell_type+1,nblks_c), static_cast<T>(1)); 
+       geofac_div.resize(num_elements_2d<T>(nproma,cell_type,nblks_c), static_cast<T>(1)); 
+       nabla2_psi_c.resize(num_elements_2d<T>(nproma,nlev,nblks_c), static_cast<T>(0)); 
+
+     // Allocate and initialize inputs.
+     psi_c.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1));
+
+     cell_neighbor_idx.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1);
+     cell_neighbor_blk.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 0);
+     geofac_n2s.resize(num_elements_3d<InType>(rbf_vec_dim, 2, nproma, nblks_v), static_cast<InType>(1));
+ 
+     // Allocate output arrays and initialize to zero.
+     p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0));
+     p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0));
+   }
+ };
+
+typedef ::testing::Types<float, double> SingleType;
+
+TYPED_TEST_SUITE(MoLibLaplaceMixedTestFixture, nabla2_scalar_lib);
+TYPED_TEST(MoLibLaplaceMixedTestFixture, nabla2_scalar_lib) {
+  using InType  = typename TestFixture::InType;
+  using OutType = typename TestFixture::OutType;
+
+  
+  nabla2_scalar_lib<InType, OutType>(
+    this->psi_c.data(), 
+    this->cell_neighbor_idx.data(),
+    this->cell_neighbor_blk.data(),
+    this->edge_cell_idx.data(),
+    this->edge_cell_blk.data(),
+    this->inv_dual_edge_length.data(),
+    this->cell_edge_idx.data(),
+    this->cell_edge_blk.data(),
+    this->geofac_n2s.data(),
+    this->geofac_div.data(),
+    this->nabla2_psi_c.data(), 
+    this->i_startblk,
+    this->i_endblk,
+    this->i_startidx_in,
+    this->i_endidx_in,
+    this->i_startblk_e,
+    this->i_endblk_e, 
+    this->i_startidx_e, 
+    this->i_endidx_e, 
+    this->nlev, 
+    this->slev, 
+    this->elev, 
+    this->nproma, 
+    this->nblks_e, 
+    this->nblks_c, 
+    this->cell_type, 
+    this->lacc)
+
+  for (int block = this->i_startblk; block <= this->i_endblk; ++block) {
+    for (int level = this->slev ; level < this->elev; ++level) {
+      for (int i = this->i_startidx ; i < this->i_endidx; ++i) {
+
+        size_t idx = i + level * this->nproma + block * this->nproma * this->nlev;
+        EXPECT_NEAR(this->nabla2_psi_c[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5))
+            << "Failure at block " << block << ", level " << level << ", index " << i;
+      }
+    }
+  }
+}
-- 
GitLab


From 2a5aa431f7ea2f1cbd4ee0edb19c4262e469c857 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante2.lvt.dkrz.de>
Date: Wed, 9 Apr 2025 16:24:28 +0200
Subject: [PATCH 46/50] WIP:test for laplace

---
 src/horizontal/mo_lib_laplace.cpp |  23 +++--
 src/horizontal/mo_lib_laplace.hpp |   4 +-
 test/c/CMakeLists.txt             |   2 +
 test/c/test_mo_lib_laplace.cpp    | 135 +++++++++++++-----------------
 4 files changed, 78 insertions(+), 86 deletions(-)

diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp
index 8dac99a..3c52f20 100644
--- a/src/horizontal/mo_lib_laplace.cpp
+++ b/src/horizontal/mo_lib_laplace.cpp
@@ -18,18 +18,18 @@
 #include <utility>
 #include <memory>
 
-template<typename T>
+template<typename T, typename S>
 void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
                        const int* edge_cell_idx, const int* edge_cell_blk, const T* inv_dual_edge_length,
                        const int* cell_edge_idx, const int* cell_edge_blk,
-                       const T* geofac_n2s, const T* geofac_div, T* nabla2_psi_c,
+                       const T* geofac_n2s, const T* geofac_div, S* nabla2_psi_c,
                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){
 
     typedef Kokkos::View<const T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT2D;
     typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
-    typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
+    typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
     typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
 
 
@@ -90,7 +90,7 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int*
 }//void
 
 template
-void nabla2_scalar_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+void nabla2_scalar_lib<double, double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
                        const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length,
                        const int* cell_edge_idx, const int* cell_edge_blk,
                        const double* geofac_n2s, const double* geofac_div, double* nabla2_psi_c,
@@ -98,8 +98,20 @@ void nabla2_scalar_lib<double>(const double* psi_c, const int* cell_neighbor_idx
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
 
+
 template
-void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+void nabla2_scalar_lib<double, float>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                       const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const double* geofac_n2s, const double* geofac_div, float* nabla2_psi_c,
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
+
+
+
+template
+void nabla2_scalar_lib<float, float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
                        const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length,
                        const int* cell_edge_idx, const int* cell_edge_blk,
                        const float* geofac_n2s, const float* geofac_div, float* nabla2_psi_c,
@@ -107,7 +119,6 @@ void nabla2_scalar_lib<float>(const float* psi_c, const int* cell_neighbor_idx,
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
 
-
 //--------------------------------nabla2_scalar_avg_lib-------------------------------------
 
 
diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp
index 5e89713..e132247 100644
--- a/src/horizontal/mo_lib_laplace.hpp
+++ b/src/horizontal/mo_lib_laplace.hpp
@@ -14,11 +14,11 @@
 #include <Kokkos_Core.hpp>
 #include <vector>
 
-template<typename T>
+template<typename T, typename S>
 void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
                        const int* dge_cell_idx, const int* edge_cell_blk, const T* inv_dual_edge_length,
                        const int* cell_edge_idx, const int* cell_edge_blk,
-                       const T* geofac_n2s, const T* geofac_div, const T* nabla2_psi_c, 
+                       const T* geofac_n2s, const T* geofac_div, S* nabla2_psi_c, 
                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, 
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, 
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt
index 13c5dfe..2180e1a 100644
--- a/test/c/CMakeLists.txt
+++ b/test/c/CMakeLists.txt
@@ -26,6 +26,7 @@ set(SOURCES
   test_tdma_solver.cpp
   test_interpolation_vector.cpp
   test_intp_rbf.cpp
+  test_mo_lib_laplace.cpp
 )
 # Create the test executable from your test files, including main.cpp.
 add_executable(iconmath_test_c ${SOURCES})
@@ -35,6 +36,7 @@ target_link_libraries(iconmath_test_c
   PUBLIC
     iconmath-support
     iconmath-interpolation
+    iconmath-horizontal
   PRIVATE
     gtest_main
     Kokkos::kokkos
diff --git a/test/c/test_mo_lib_laplace.cpp b/test/c/test_mo_lib_laplace.cpp
index 741f430..f5e1995 100644
--- a/test/c/test_mo_lib_laplace.cpp
+++ b/test/c/test_mo_lib_laplace.cpp
@@ -12,19 +12,22 @@
  #include <gtest/gtest.h>
  #include <Kokkos_Core.hpp>
  #include <vector>
- #include "mo_lib_laplace.cpp" 
+ #include "mo_lib_laplace.hpp" 
  
- // Free-function helpers for 3D and 4D array sizes (assumed column-major)
- template<typename T>
+template<typename T>
  size_t num_elements_3d(int d1, int d2, int d3) {
    return static_cast<size_t>(d1) * d2 * d3;
  }
- 
+
+template<typename T>
+ size_t num_elements_2d(int d1, int d2) {
+   return static_cast<size_t>(d1) * d2;
+ }
+
  template<typename T>
  size_t num_elements_4d(int d1, int d2, int d3, int d4) {
    return static_cast<size_t>(d1) * d2 * d3 * d4;
- }
- 
+}
  // Define a helper struct that holds the two types.
  template<typename InT, typename OutT>
  struct MixedPrecision {
@@ -37,97 +40,73 @@
  typedef ::testing::Types< MixedPrecision<double, double>,
                            MixedPrecision<double,  float>,
                            MixedPrecision<float,   float>  > MixedTypes;
- 
-
-class Horizonral_dimension
-{
-
-public: 
-
-    static constexpr int nproma     = 3;  // inner loop length
-    static constexpr int nlev        = 4;  // number of vertical levels
-    static constexpr int nblks_e     = 2;  // number of edge blocks (for p_e_in)
-    static constexpr int nblks_v     = 2;  // number of vertex blocks (for rbf arrays and outputs)
-    static constexpr int rbf_vec_dim = 6;  // fixed dimension for rbf vector (stencil points)
-    static constexpr int nblks_c     = 2;
-
-    // Parameter values.
-    int i_startblk    = 0;
-    int i_endblk      = 1;      // Test blocks [0, 1]
-    int i_startidx_in = 0;
-    int i_endidx_in   = nproma - 1; // Full range: 0 .. nproma-1
-    int i_startblk_e  = 0;
-    int i_endblk_e    = 0;
-    int slev          = 0;
-    int i_startidx_e  = 0;
-    int i_endidx_e    = nproma - 1;
-    int nlev          = 0;
-    int slev          = 0;
-    int elev          = nlev - 1;   // Full vertical range (0 .. nlev-1)
-    int cell_type     = 0;
-    bool lacc         = false;  // Not using ACC-specific behavior.
-};
-
-
 
 
  // Define a typed test fixture.
  template <typename TypePair>
  class MoLibLaplaceMixedTestFixture : public ::testing::Test {
- public:
+  public:
+    using InType  = typename TypePair::in_type;
+    using OutType = typename TypePair::out_type;
+
+    static constexpr int nproma         = 3;  // inner loop length
+    static constexpr int nblks_e        = 2;  // number of edge blocks (for p_e_in)
+    static constexpr int nblks_v        = 2;  // number of vertex blocks (for rbf arrays and outputs)
+    static constexpr int rbf_vec_dim    = 6;  // fixed dimension for rbf vector (stencil points)
+    static constexpr int nblks_c        = 2;
+
+    int  i_startblk    = 0;
+    int i_endblk       = 1;
+    int i_startidx_in  = 0;
+    int i_endidx_in    = nproma-1 ;
+    int i_startblk_e   = 0 ; 
+    int i_endblk_e     = 0 ;
+    int i_startidx_e   = 0; 
+    int i_endidx_e     = 0 ; 
+    int nlev           = 4;  
+    int slev           = 0 ; 
+    int elev           = nlev - 1;
+    int cell_type      = 0 ; 
+    const bool lacc    = false ; 
+
+
 
    //Arrays for nabla2_scalar_lib 
-   std::vector<T> psi_c; //Dimension (nproma,nlev,nblks_c) 
+   std::vector<InType> psi_c; //Dimension (nproma,nlev,nblks_c) 
    std::vector<int> cell_neighbor_idx; //Dimenaion (nproma,nblks_c,3) 
    std::vector<int> cell_neighbor_blk; //Dimension (nproma,nblks_c,3)
    std::vector<int> edge_cell_idx; //Dimension (nproma,nblks_e,2)
    std::vector<int> edge_cell_blk; //Dimension (nproma,nblks_e,2)
-   std::vector<T> inv_dual_edge_length; //Dimension (nproma,nblks_e)
+   std::vector<InType> inv_dual_edge_length; //Dimension (nproma,nblks_e)
    std::vector<int> cell_edge_idx; //Dimension (nproma,nblks_c,3)
    std::vector<int> cell_edge_blk; //Dimension (nproma,nblks_c,3)
-   std::vector<T> geofac_n2s; // Dimension (nproma,cell_type+1,nblks_c)
-   std::vector<T> geofac_div; //Dimension (nproma,cell_type,nblks_c)
-   std::vector<T> nabla2_psi_c; //Dimension (nproma,nlev,nblks_c)
+   std::vector<InType> geofac_n2s; // Dimension (nproma,cell_type+1,nblks_c)
+   std::vector<InType> geofac_div; //Dimension (nproma,cell_type,nblks_c)
+   std::vector<OutType> nabla2_psi_c; //Dimension (nproma,nlev,nblks_c)
 
    MoLibLaplaceMixedTestFixture() {
 
-
        //Arrays for nabla2_scalar_lib
-       psi_c.resize(num_elements_3d<T>(nproma,nlev,nblks_c), static_cast<T>(1)); 
-       inv_dual_edge_length.resize(num_elements_2d<T>(nproma,nblks_e), static_cast<T>(1)); 
-       geofac_n2s.resize(num_elements_2d<T> (nproma,cell_type+1,nblks_c), static_cast<T>(1)); 
-       geofac_div.resize(num_elements_2d<T>(nproma,cell_type,nblks_c), static_cast<T>(1)); 
-       nabla2_psi_c.resize(num_elements_2d<T>(nproma,nlev,nblks_c), static_cast<T>(1)); 
-       cell_neighbor_idx.resize(num_elements_3d<int>(nproma,nblks_c,3)); 
-       cell_neighbor_blk.resize(num_elements_3d<int>(nproma,nblks_e,3)); 
-       edge_cell_idx.resize(num_elements_3d<int> (nproma,nblks_e,2)); 
-       edge_cell_blk.resize(num_elements_3d<int>(nproma,nlev,nblks_c)); 
-       cell_edge_idx.resize(num_elements_2d<int>(nproma,nblks_c,3)); 
-       cell_edge_blk.resize(num_elements_2d<int>(nproma,nblks_c,3)); 
-       geofac_n2s.resize(num_elements_2d<T>(nproma,cell_type+1,nblks_c), static_cast<T>(1)); 
-       geofac_div.resize(num_elements_2d<T>(nproma,cell_type,nblks_c), static_cast<T>(1)); 
-       nabla2_psi_c.resize(num_elements_2d<T>(nproma,nlev,nblks_c), static_cast<T>(0)); 
-
-     // Allocate and initialize inputs.
-     psi_c.resize(num_elements_3d<InType>(nproma, nlev, nblks_e), static_cast<InType>(1));
-
-     cell_neighbor_idx.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 1);
-     cell_neighbor_blk.resize(num_elements_3d<int>(rbf_vec_dim, nproma, nblks_v), 0);
-     geofac_n2s.resize(num_elements_3d<InType>(rbf_vec_dim, 2, nproma, nblks_v), static_cast<InType>(1));
- 
-     // Allocate output arrays and initialize to zero.
-     p_u_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0));
-     p_v_out.resize(num_elements_3d<OutType>(nproma, nlev, nblks_v), static_cast<OutType>(0));
+       psi_c.resize(num_elements_3d<InType>(nproma,nlev,nblks_c), static_cast<InType>(1));
+       inv_dual_edge_length.resize(num_elements_2d<InType>(nproma,nblks_e), static_cast<InType>(1)); 
+       geofac_n2s.resize(num_elements_3d<InType>(nproma,cell_type+1,nblks_c), static_cast<InType>(1)); 
+       geofac_div.resize(num_elements_3d<InType>(nproma,cell_type,nblks_c), static_cast<InType>(1)); 
+       nabla2_psi_c.resize(num_elements_3d<OutType>(nproma,nlev,nblks_c), static_cast<OutType>(0)); 
+       cell_neighbor_idx.resize(num_elements_3d<int>(nproma,nblks_c,3), 1); 
+       cell_neighbor_blk.resize(num_elements_3d<int>(nproma,nblks_c,3), 0); 
+       edge_cell_idx.resize(num_elements_3d<int>(nproma,nblks_e,2), 1); 
+       edge_cell_blk.resize(num_elements_3d<int>(nproma,nblks_e,2), 0); 
+       cell_edge_idx.resize(num_elements_3d<int>(nproma,nblks_c,3), 1); 
+       cell_edge_blk.resize(num_elements_3d<int>(nproma,nblks_c,3), 0); 
    }
  };
 
-typedef ::testing::Types<float, double> SingleType;
 
-TYPED_TEST_SUITE(MoLibLaplaceMixedTestFixture, nabla2_scalar_lib);
-TYPED_TEST(MoLibLaplaceMixedTestFixture, nabla2_scalar_lib) {
+TYPED_TEST_SUITE(MoLibLaplaceMixedTestFixture, MixedTypes);
+
+TYPED_TEST(MoLibLaplaceMixedTestFixture, BasicTest) {
   using InType  = typename TestFixture::InType;
   using OutType = typename TestFixture::OutType;
-
   
   nabla2_scalar_lib<InType, OutType>(
     this->psi_c.data(), 
@@ -145,7 +124,7 @@ TYPED_TEST(MoLibLaplaceMixedTestFixture, nabla2_scalar_lib) {
     this->i_endblk,
     this->i_startidx_in,
     this->i_endidx_in,
-    this->i_startblk_e,
+    this->i_startblk_e, 
     this->i_endblk_e, 
     this->i_startidx_e, 
     this->i_endidx_e, 
@@ -154,13 +133,13 @@ TYPED_TEST(MoLibLaplaceMixedTestFixture, nabla2_scalar_lib) {
     this->elev, 
     this->nproma, 
     this->nblks_e, 
-    this->nblks_c, 
+    this->nblks_c,
     this->cell_type, 
-    this->lacc)
+    this->lacc);
 
   for (int block = this->i_startblk; block <= this->i_endblk; ++block) {
     for (int level = this->slev ; level < this->elev; ++level) {
-      for (int i = this->i_startidx ; i < this->i_endidx; ++i) {
+      for (int i = this->i_startidx_in ; i < this->i_endidx_in; ++i) {
 
         size_t idx = i + level * this->nproma + block * this->nproma * this->nlev;
         EXPECT_NEAR(this->nabla2_psi_c[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5))
-- 
GitLab


From 38db433dba4597cbac291ffca01e2bfdfb49687b Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante3.lvt.dkrz.de>
Date: Wed, 9 Apr 2025 22:28:51 +0200
Subject: [PATCH 47/50] Fixed linking error

---
 src/horizontal/mo_lib_laplace_bindings.cpp | 27 ++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/src/horizontal/mo_lib_laplace_bindings.cpp b/src/horizontal/mo_lib_laplace_bindings.cpp
index 4e22793..984c50a 100644
--- a/src/horizontal/mo_lib_laplace_bindings.cpp
+++ b/src/horizontal/mo_lib_laplace_bindings.cpp
@@ -17,12 +17,12 @@
 void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
                        const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length,
                        const int* cell_edge_idx, const int* cell_edge_blk,
-                       const double* geofac_n2s, const double* geofac_div, const double* nabla2_psi_c,
+                       const double* geofac_n2s, const double* geofac_div, double* nabla2_psi_c,
                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){
 
-    nabla2_scalar_lib<double>(psi_c, cell_neighbor_idx, cell_neighbor_blk,
+    nabla2_scalar_lib<double, double>(psi_c, cell_neighbor_idx, cell_neighbor_blk,
                        edge_cell_idx, edge_cell_blk, inv_dual_edge_length,
                        cell_edge_idx, cell_edge_blk,
                        geofac_n2s, geofac_div, nabla2_psi_c,
@@ -35,12 +35,12 @@ void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, con
 void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
                        const int* edge_cell_idx, const int* edge_cell_blk, const float* inv_dual_edge_length,
                        const int* cell_edge_idx, const int* cell_edge_blk,
-                       const float* geofac_n2s, const float* geofac_div, const float* nabla2_psi_c,
+                       const float* geofac_n2s, const float* geofac_div, float* nabla2_psi_c,
                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){
 
-    nabla2_scalar_lib<float>(psi_c, cell_neighbor_idx, cell_neighbor_blk,
+    nabla2_scalar_lib<float,float>(psi_c, cell_neighbor_idx, cell_neighbor_blk,
                        edge_cell_idx, edge_cell_blk, inv_dual_edge_length,
                        cell_edge_idx, cell_edge_blk,
                        geofac_n2s, geofac_div, nabla2_psi_c,
@@ -49,6 +49,25 @@ void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, cons
                        nlev,  slev,  elev,  nproma,  nblks_e,  nblks_c,  cell_type, lacc);
 }
 
+
+
+void nabla2_scalar_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                       const int* edge_cell_idx, const int* edge_cell_blk, const double* inv_dual_edge_length,
+                       const int* cell_edge_idx, const int* cell_edge_blk,
+                       const double* geofac_n2s, const double* geofac_div, float* nabla2_psi_c,
+                       int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+                       int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e,
+                       int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc){
+
+    nabla2_scalar_lib<double, float>(psi_c, cell_neighbor_idx, cell_neighbor_blk,
+                       edge_cell_idx, edge_cell_blk, inv_dual_edge_length,
+                       cell_edge_idx, cell_edge_blk,
+                       geofac_n2s, geofac_div, nabla2_psi_c,
+                       i_startblk, i_endblk, i_startidx_in, i_endidx_in,
+                       i_startblk_e, i_endblk_e, i_startidx_e, i_endidx_e,
+                       nlev, slev, elev, nproma, nblks_e, nblks_c, cell_type, lacc);
+}
+
 void nabla2_scalar_avg_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
                       const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c,
                       int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3],
-- 
GitLab


From ecc93abc47a7c4225393562a4aba1b41b4f95068 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante3.lvt.dkrz.de>
Date: Wed, 9 Apr 2025 22:32:09 +0200
Subject: [PATCH 48/50] Fixed typo

---
 src/horizontal/mo_lib_divrot.F90 | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/horizontal/mo_lib_divrot.F90 b/src/horizontal/mo_lib_divrot.F90
index a2f2ad9..c1360f6 100644
--- a/src/horizontal/mo_lib_divrot.F90
+++ b/src/horizontal/mo_lib_divrot.F90
@@ -561,7 +561,7 @@ CONTAINS
 !!
 !! !LITERATURE
 !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh
-!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys.,
+!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys.,
 !! 181, 729-752
 !!
   SUBROUTINE recon_lsq_cell_q_lib(p_cc, lsq_idx_c, lsq_blk_c, &
@@ -800,7 +800,7 @@ CONTAINS
 !!
 !! !LITERATURE
 !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh
-!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys.,
+!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys.,
 !! 181, 729-752
 !!
   SUBROUTINE recon_lsq_cell_q_svd_lib(p_cc, lsq_idx_c, lsq_blk_c, &
@@ -1008,7 +1008,7 @@ CONTAINS
 !!
 !! !LITERATURE
 !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh
-!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys.,
+!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys.,
 !! 181, 729-752
 !!
   SUBROUTINE recon_lsq_cell_c_lib(p_cc, lsq_idx_c, lsq_blk_c, &
@@ -1295,7 +1295,7 @@ CONTAINS
 !!
 !! !LITERATURE
 !! Ollivier-Gooch et al (2002): A High-Order-Accurate Unstructured Mesh
-!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Comput. Phys.,
+!! Finite-Volume Scheme for the Advection-Diffusion Equation, J. Compute. Phys.,
 !! 181, 729-752
 !!
   SUBROUTINE recon_lsq_cell_c_svd_lib(p_cc, lsq_idx_c, lsq_blk_c, &
-- 
GitLab


From 59e788ffbca278a46d0c1161b1aa608de556dbe5 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante3.lvt.dkrz.de>
Date: Thu, 10 Apr 2025 11:02:38 +0200
Subject: [PATCH 49/50] Basic structure test for nabla2_scalar_lib
 nabla2_scalar_avg_lib

---
 src/horizontal/mo_lib_laplace.cpp          | 19 +++--
 src/horizontal/mo_lib_laplace.hpp          |  6 +-
 src/horizontal/mo_lib_laplace_bindings.cpp | 22 ++++-
 src/horizontal/mo_lib_laplace_bindings.h   |  4 +-
 test/c/test_mo_lib_laplace.cpp             | 97 ++++++++++++++++------
 5 files changed, 109 insertions(+), 39 deletions(-)

diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp
index 3c52f20..1aca254 100644
--- a/src/horizontal/mo_lib_laplace.cpp
+++ b/src/horizontal/mo_lib_laplace.cpp
@@ -122,15 +122,15 @@ void nabla2_scalar_lib<float, float>(const float* psi_c, const int* cell_neighbo
 //--------------------------------nabla2_scalar_avg_lib-------------------------------------
 
 
-template<typename T>
+template<typename T, typename S>
 void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
-                      const T* geofac_n2s, const T* avg_coeff, T* nabla2_psi_c,
+                      const T* geofac_n2s, const T* avg_coeff, S* nabla2_psi_c,
                       int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3],
                       int nblks_c, int cell_type, int patch_id,
                       int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){
 
     typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
-    typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
+    typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
     typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
 
     UnmanagedConstT3D psi_c_view(psi_c, nproma,nlev,nblks_c);
@@ -273,19 +273,26 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i
 
 
 template
-void nabla2_scalar_avg_lib<double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
+void nabla2_scalar_avg_lib<double, double>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
                       const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c,
                       int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3],
                       int nblks_c, int cell_type, int patch_id,
                       int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); 
 
 template
-void nabla2_scalar_avg_lib<float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
-                      const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c,
+void nabla2_scalar_avg_lib<double, float>(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                      const double* geofac_n2s, const double* avg_coeff, float* nabla2_psi_c,
                       int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3],
                       int nblks_c, int cell_type, int patch_id,
                       int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc); 
 
+template
+void nabla2_scalar_avg_lib<float, float>(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                      const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c,
+                      int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3],
+                      int nblks_c, int cell_type, int patch_id,
+                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc);
+
 
 //-----------------------------------------nabla2_vec_atmos_lib---------------------------------------
 
diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp
index e132247..0fde697 100644
--- a/src/horizontal/mo_lib_laplace.hpp
+++ b/src/horizontal/mo_lib_laplace.hpp
@@ -22,9 +22,11 @@ void nabla2_scalar_lib(const T* psi_c, const int* cell_neighbor_idx, const int*
                        int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, 
                        int i_startblk_e, int i_endblk_e, int i_startidx_e, int i_endidx_e, 
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
-template<typename T>
+
+
+template<typename T, typename S>
 void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk, 
-                      const T* geofac_n2s, const T* avg_coeff,  T* nabla2_psi_c, 
+                      const T* geofac_n2s, const T* avg_coeff,  S* nabla2_psi_c, 
                       int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3], 
                       int nblks_c, int cell_type, int patch_id, 
                       int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc);
diff --git a/src/horizontal/mo_lib_laplace_bindings.cpp b/src/horizontal/mo_lib_laplace_bindings.cpp
index 984c50a..a4e556b 100644
--- a/src/horizontal/mo_lib_laplace_bindings.cpp
+++ b/src/horizontal/mo_lib_laplace_bindings.cpp
@@ -75,7 +75,7 @@ void nabla2_scalar_avg_lib_dp(const double* psi_c, const int* cell_neighbor_idx,
                       int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){
 
 
-    nabla2_scalar_avg_lib<double>(psi_c, cell_neighbor_idx,cell_neighbor_blk,
+    nabla2_scalar_avg_lib<double, double>(psi_c, cell_neighbor_idx,cell_neighbor_blk,
                                     geofac_n2s, avg_coeff, nabla2_psi_c,
                                     i_startblk_in,  i_endblk_in,  i_startidx_in,  i_endidx_in,
                                     nblks_c,  cell_type,  patch_id,
@@ -83,13 +83,27 @@ void nabla2_scalar_avg_lib_dp(const double* psi_c, const int* cell_neighbor_idx,
 
 }
 
+void nabla2_scalar_avg_lib_sp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
+                      const double* geofac_n2s, const double* avg_coeff, float* nabla2_psi_c,
+                      int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3],
+                      int nblks_c, int cell_type, int patch_id,
+                      int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){
+
+    nabla2_scalar_avg_lib<double, float>(psi_c, cell_neighbor_idx,  cell_neighbor_blk,
+                                   geofac_n2s, avg_coeff, nabla2_psi_c,
+                                   i_startblk_in,  i_endblk_in,  i_startidx_in,  i_endidx_in,
+                                   nblks_c,  cell_type,  patch_id,
+                                   nlev,  slev,  elev,  nproma, l_limited_area, lacc);
+}
+
+
 void nabla2_scalar_avg_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
                       const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c,
                       int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3],
                       int nblks_c, int cell_type, int patch_id,
                       int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc){
 
-    nabla2_scalar_avg_lib<float>(psi_c, cell_neighbor_idx,  cell_neighbor_blk,
+    nabla2_scalar_avg_lib<float, float>(psi_c, cell_neighbor_idx,  cell_neighbor_blk,
                                    geofac_n2s, avg_coeff, nabla2_psi_c,
                                    i_startblk_in,  i_endblk_in,  i_startidx_in,  i_endidx_in,
                                    nblks_c,  cell_type,  patch_id,
@@ -97,6 +111,10 @@ void nabla2_scalar_avg_lib_sp(const float* psi_c, const int* cell_neighbor_idx,
 }
 
 
+
+
+
+
 void nabla2_vec_atmos_lib_dp(const double* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
                           const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
                           const int* vert_edge_idx, const int* vert_edge_blk, const double* tangent_orientation,
diff --git a/src/horizontal/mo_lib_laplace_bindings.h b/src/horizontal/mo_lib_laplace_bindings.h
index 7b22527..49f3c4b 100644
--- a/src/horizontal/mo_lib_laplace_bindings.h
+++ b/src/horizontal/mo_lib_laplace_bindings.h
@@ -31,13 +31,13 @@ void nabla2_scalar_lib_sp(const float* psi_c, const int* cell_neighbor_idx, cons
                        int nlev, int slev, int elev, int nproma, int nblks_e, int nblks_c, int cell_type, bool lacc);
 
 void nabla2_scalar_avg_lib_dp(const double* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
-                      const double* geofac_n2s, const double* avg_coeff, double* nabla2_psi_c,
+                      const double* geofac_n2s, const double* avg_coeff, const double* nabla2_psi_c,
                       int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3],
                       int nblks_c, int cell_type, int patch_id,
                       int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc);
 
 void nabla2_scalar_avg_lib_sp(const float* psi_c, const int* cell_neighbor_idx, const int* cell_neighbor_blk,
-                      const float* geofac_n2s, const float* avg_coeff, float* nabla2_psi_c,
+                      const float* geofac_n2s, const float* avg_coeff, const float* nabla2_psi_c,
                       int i_startblk_in[3], int i_endblk_in[3], int i_startidx_in[3], int i_endidx_in[3],
                       int nblks_c, int cell_type, int patch_id,
                       int nlev, int slev, int elev, int nproma, bool l_limited_area, bool lacc);
diff --git a/test/c/test_mo_lib_laplace.cpp b/test/c/test_mo_lib_laplace.cpp
index f5e1995..0e25792 100644
--- a/test/c/test_mo_lib_laplace.cpp
+++ b/test/c/test_mo_lib_laplace.cpp
@@ -63,6 +63,16 @@ template<typename T>
     int i_endblk_e     = 0 ;
     int i_startidx_e   = 0; 
     int i_endidx_e     = 0 ; 
+
+    //nabla2_scalar_avg_lib_sp specifics
+    std::vector<int> i_startblk_in{}; 
+    std::vector<int> i_endblk_in{};
+    std::vector<int> i_startidx_array{};
+    std::vector<int> i_endidx_array{}; 
+    int patch_id       = 0;
+    const bool l_limited_area = false; 
+    //
+
     int nlev           = 4;  
     int slev           = 0 ; 
     int elev           = nlev - 1;
@@ -84,6 +94,9 @@ template<typename T>
    std::vector<InType> geofac_div; //Dimension (nproma,cell_type,nblks_c)
    std::vector<OutType> nabla2_psi_c; //Dimension (nproma,nlev,nblks_c)
 
+   //Arrays for nabla2_scalar_avg_lib_sp
+   std::vector<InType> avg_coeff; 
+
    MoLibLaplaceMixedTestFixture() {
 
        //Arrays for nabla2_scalar_lib
@@ -98,43 +111,33 @@ template<typename T>
        edge_cell_blk.resize(num_elements_3d<int>(nproma,nblks_e,2), 0); 
        cell_edge_idx.resize(num_elements_3d<int>(nproma,nblks_c,3), 1); 
        cell_edge_blk.resize(num_elements_3d<int>(nproma,nblks_c,3), 0); 
+
+       //Allocating nabla2_scalar_avg_lib_sp specifics
+       i_startblk_in.resize(3, i_startblk); 
+       i_endblk_in.resize(3, i_endblk); 
+       i_startidx_array.resize(3, i_startidx_in); 
+       i_endidx_array.resize(3, i_endidx_in);
    }
  };
 
 
 TYPED_TEST_SUITE(MoLibLaplaceMixedTestFixture, MixedTypes);
 
-TYPED_TEST(MoLibLaplaceMixedTestFixture, BasicTest) {
+TYPED_TEST(MoLibLaplaceMixedTestFixture, nabla2_scalar_lib) {
   using InType  = typename TestFixture::InType;
   using OutType = typename TestFixture::OutType;
   
   nabla2_scalar_lib<InType, OutType>(
-    this->psi_c.data(), 
-    this->cell_neighbor_idx.data(),
-    this->cell_neighbor_blk.data(),
-    this->edge_cell_idx.data(),
-    this->edge_cell_blk.data(),
-    this->inv_dual_edge_length.data(),
-    this->cell_edge_idx.data(),
-    this->cell_edge_blk.data(),
-    this->geofac_n2s.data(),
-    this->geofac_div.data(),
-    this->nabla2_psi_c.data(), 
-    this->i_startblk,
-    this->i_endblk,
-    this->i_startidx_in,
-    this->i_endidx_in,
-    this->i_startblk_e, 
-    this->i_endblk_e, 
-    this->i_startidx_e, 
-    this->i_endidx_e, 
-    this->nlev, 
-    this->slev, 
-    this->elev, 
-    this->nproma, 
-    this->nblks_e, 
-    this->nblks_c,
-    this->cell_type, 
+    this->psi_c.data(), this->cell_neighbor_idx.data(),
+    this->cell_neighbor_blk.data(), this->edge_cell_idx.data(),
+    this->edge_cell_blk.data(), this->inv_dual_edge_length.data(),
+    this->cell_edge_idx.data(),this->cell_edge_blk.data(),
+    this->geofac_n2s.data(), this->geofac_div.data(),
+    this->nabla2_psi_c.data(), this->i_startblk,
+    this->i_endblk, this->i_startidx_in, this->i_endidx_in,
+    this->i_startblk_e, this->i_endblk_e,  this->i_startidx_e, 
+    this->i_endidx_e,  this->nlev, this->slev, this->elev, 
+    this->nproma, this->nblks_e, this->nblks_c, this->cell_type, 
     this->lacc);
 
   for (int block = this->i_startblk; block <= this->i_endblk; ++block) {
@@ -148,3 +151,43 @@ TYPED_TEST(MoLibLaplaceMixedTestFixture, BasicTest) {
     }
   }
 }
+
+
+TYPED_TEST(MoLibLaplaceMixedTestFixture, nabla2_scalar_avg_lib) {
+using InType  = typename TestFixture::InType;
+using OutType = typename TestFixture::OutType;
+
+
+nabla2_scalar_avg_lib<InType, OutType>(
+    this->psi_c.data(), this->cell_neighbor_idx.data(),
+    this->cell_neighbor_blk.data(), this->geofac_n2s.data(),
+    this->avg_coeff.data(), this->nabla2_psi_c.data(),
+    this->i_startblk_in.data(), this->i_endblk_in.data(),
+    this->i_startidx_array.data(), this->i_endidx_array.data(),
+    this->nblks_c, this->cell_type, this->patch_id,
+    this->nlev, this->slev, this->elev, this->nproma,
+    this->l_limited_area, this->lacc);
+
+  for (int block = this->i_startblk; block <= this->i_endblk; ++block) {
+    for (int level = this->slev ; level < this->elev; ++level) {
+      for (int i = this->i_startidx_in ; i < this->i_endidx_in; ++i) {
+  
+        size_t idx = i + level * this->nproma + block * this->nproma * this->nlev;
+        EXPECT_NEAR(this->nabla2_psi_c[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5))
+            << "Failure at block " << block << ", level " << level << ", index " << i;
+      }
+    }
+  }
+}
+
+
+
+
+
+
+
+
+
+
+
+
-- 
GitLab


From 2de0e2bd68dab4324072ae5954302145788e6a78 Mon Sep 17 00:00:00 2001
From: Ali Sedighi <k202194@levante3.lvt.dkrz.de>
Date: Thu, 10 Apr 2025 13:32:23 +0200
Subject: [PATCH 50/50] Added test for rot_vertex_atmos_lib

---
 src/horizontal/mo_lib_laplace.cpp          | 21 +++--
 src/horizontal/mo_lib_laplace.hpp          |  4 +-
 src/horizontal/mo_lib_laplace_bindings.cpp | 28 ++++++-
 src/horizontal/mo_lib_laplace_bindings.h   |  4 +-
 test/c/test_mo_lib_laplace.cpp             | 95 ++++++++++++++++++----
 5 files changed, 121 insertions(+), 31 deletions(-)

diff --git a/src/horizontal/mo_lib_laplace.cpp b/src/horizontal/mo_lib_laplace.cpp
index 1aca254..c5c3ce3 100644
--- a/src/horizontal/mo_lib_laplace.cpp
+++ b/src/horizontal/mo_lib_laplace.cpp
@@ -296,12 +296,12 @@ void nabla2_scalar_avg_lib<float, float>(const float* psi_c, const int* cell_nei
 
 //-----------------------------------------nabla2_vec_atmos_lib---------------------------------------
 
-template<typename T>
+template<typename T, typename S>
 void nabla2_vec_atmos_lib(const T* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
                           const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
                           const int* vert_edge_idx, const int* vert_edge_blk, const T* tangent_orientation,
                           const T* inv_primal_edge_length, const T* inv_dual_edge_length, const T* geofac_div,
-                          const T* geofac_rot, T* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
+                          const T* geofac_rot, S* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
                           int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v,
                           int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c,
                           int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc){
@@ -309,7 +309,7 @@ void nabla2_vec_atmos_lib(const T* vec_e, const int* edge_cell_idx, const int* e
     typedef Kokkos::View<const T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D;
     typedef Kokkos::View<const int***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D;
 
-    typedef Kokkos::View<T***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
+    typedef Kokkos::View<S***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D;
     typedef Kokkos::View<const T**, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT2D;
 
     UnmanagedConstT3D vec_e_view(vec_e, nproma,nlev,nblks_e);
@@ -370,7 +370,7 @@ void nabla2_vec_atmos_lib(const T* vec_e, const int* edge_cell_idx, const int* e
 
 
 template
-void nabla2_vec_atmos_lib<double>(const double* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
+void nabla2_vec_atmos_lib<double, double>(const double* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
                           const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
                           const int* vert_edge_idx, const int* vert_edge_blk, const double* tangent_orientation,
                           const double* inv_primal_edge_length, const double* inv_dual_edge_length, const double* geofac_div,
@@ -381,7 +381,17 @@ void nabla2_vec_atmos_lib<double>(const double* vec_e, const int* edge_cell_idx,
 
 
 template
-void nabla2_vec_atmos_lib<float>(const float* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
+void nabla2_vec_atmos_lib<double, float>(const double* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
+                          const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
+                          const int* vert_edge_idx, const int* vert_edge_blk, const double* tangent_orientation,
+                          const double* inv_primal_edge_length, const double* inv_dual_edge_length, const double* geofac_div,
+                          const double* geofac_rot, float* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
+                          int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c,
+                          int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc);
+
+ template
+void nabla2_vec_atmos_lib<float, float>(const float* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
                           const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
                           const int* vert_edge_idx, const int* vert_edge_blk, const float* tangent_orientation,
                           const float* inv_primal_edge_length, const float* inv_dual_edge_length, const float* geofac_div,
@@ -389,3 +399,4 @@ void nabla2_vec_atmos_lib<float>(const float* vec_e, const int* edge_cell_idx, c
                           int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v,
                           int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c,
                           int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc);
+
diff --git a/src/horizontal/mo_lib_laplace.hpp b/src/horizontal/mo_lib_laplace.hpp
index 0fde697..ac0906d 100644
--- a/src/horizontal/mo_lib_laplace.hpp
+++ b/src/horizontal/mo_lib_laplace.hpp
@@ -33,12 +33,12 @@ void nabla2_scalar_avg_lib(const T* psi_c, const int* cell_neighbor_idx, const i
 
 
 
-template<typename T>
+template<typename T, typename S>
 void nabla2_vec_atmos_lib(const T* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
                           const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
                           const int* vert_edge_idx, const int* vert_edge_blk, const T* tangent_orientation,
                           const T* inv_primal_edge_length, const T* inv_dual_edge_length, const T* geofac_div,
-                          const T* geofac_rot, T* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
+                          const T* geofac_rot, S* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
                           int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v,
                           int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c,
                           int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc);
diff --git a/src/horizontal/mo_lib_laplace_bindings.cpp b/src/horizontal/mo_lib_laplace_bindings.cpp
index a4e556b..e4dfe7a 100644
--- a/src/horizontal/mo_lib_laplace_bindings.cpp
+++ b/src/horizontal/mo_lib_laplace_bindings.cpp
@@ -124,7 +124,7 @@ void nabla2_vec_atmos_lib_dp(const double* vec_e, const int* edge_cell_idx, cons
                           int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c,
                           int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc){
 
-    nabla2_vec_atmos_lib<double>(vec_e, edge_cell_idx, edge_cell_blk, edge_vertex_idx,
+    nabla2_vec_atmos_lib<double, double>(vec_e, edge_cell_idx, edge_cell_blk, edge_vertex_idx,
                            edge_vertex_blk, cell_edge_idx, cell_edge_blk,
                            vert_edge_idx, vert_edge_blk, tangent_orientation,
                            inv_primal_edge_length, inv_dual_edge_length, geofac_div,
@@ -135,6 +135,27 @@ void nabla2_vec_atmos_lib_dp(const double* vec_e, const int* edge_cell_idx, cons
 }
 
 
+
+
+void nabla2_vec_atmos_lib_dp(const double* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
+                          const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
+                          const int* vert_edge_idx, const int* vert_edge_blk, const double* tangent_orientation,
+                          const double* inv_primal_edge_length, const double* inv_dual_edge_length, const double* geofac_div,
+                          const double* geofac_rot, float* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
+                          int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v,
+                          int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c,
+                          int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc){
+
+    nabla2_vec_atmos_lib<double, float>(vec_e, edge_cell_idx, edge_cell_blk, edge_vertex_idx,
+                           edge_vertex_blk, cell_edge_idx, cell_edge_blk,
+                           vert_edge_idx, vert_edge_blk, tangent_orientation,
+                           inv_primal_edge_length, inv_dual_edge_length, geofac_div,
+                           geofac_rot,nabla2_vec_e, i_startblk_c, i_endblk_c, i_startidx_c,
+                           i_endidx_c, i_startblk_v, i_endblk_v, i_startidx_v, i_endidx_v,
+                           i_startblk, i_endblk, i_startidx_in, i_endidx_in, nlev, nblks_c,
+                           nblks_v, nblks_e, slev, elev, nproma, cell_type, lacc);
+}
+
 void nabla2_vec_atmos_lib_sp(const float* vec_e, const int* edge_cell_idx, const int* edge_cell_blk, const int* edge_vertex_idx,
                           const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
                           const int* vert_edge_idx, const int* vert_edge_blk, const float* tangent_orientation,
@@ -144,7 +165,7 @@ void nabla2_vec_atmos_lib_sp(const float* vec_e, const int* edge_cell_idx, const
                           int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c,
                           int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc){
 
-    nabla2_vec_atmos_lib<float>(vec_e, edge_cell_idx, edge_cell_blk, edge_vertex_idx,
+    nabla2_vec_atmos_lib<float,float>(vec_e, edge_cell_idx, edge_cell_blk, edge_vertex_idx,
                                  edge_vertex_blk, cell_edge_idx, cell_edge_blk,
                                  vert_edge_idx, vert_edge_blk, tangent_orientation,
                                  inv_primal_edge_length, inv_dual_edge_length, geofac_div,
@@ -152,6 +173,5 @@ void nabla2_vec_atmos_lib_sp(const float* vec_e, const int* edge_cell_idx, const
                                  i_endidx_c, i_startblk_v, i_endblk_v, i_startidx_v, i_endidx_v,
                                  i_startblk, i_endblk, i_startidx_in, i_endidx_in, nlev, nblks_c,
                                  nblks_v, nblks_e, slev, elev, nproma, cell_type, lacc);
-
-
 }
+
diff --git a/src/horizontal/mo_lib_laplace_bindings.h b/src/horizontal/mo_lib_laplace_bindings.h
index 49f3c4b..9329a52 100644
--- a/src/horizontal/mo_lib_laplace_bindings.h
+++ b/src/horizontal/mo_lib_laplace_bindings.h
@@ -46,7 +46,7 @@ void nabla2_vec_atmos_lib_dp(const double* vec_e, const int* edge_cell_idx, cons
                           const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
                           const int* vert_edge_idx, const int* vert_edge_blk, const double* tangent_orientation,
                           const double* inv_primal_edge_length, const double* inv_dual_edge_length, const double* geofac_div,
-                          const double* geofac_rot, double* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
+                          const double* geofac_rot, const double* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
                           int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v,
                           int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c,
                           int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc);
@@ -56,7 +56,7 @@ void nabla2_vec_atmos_lib_sp(const float* vec_e, const int* edge_cell_idx, const
                           const int* edge_vertex_blk, const int* cell_edge_idx, const int* cell_edge_blk,
                           const int* vert_edge_idx, const int* vert_edge_blk, const float* tangent_orientation,
                           const float* inv_primal_edge_length, const float* inv_dual_edge_length, const float* geofac_div,
-                          const float* geofac_rot, float* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
+                          const float* geofac_rot, const float* nabla2_vec_e, int i_startblk_c, int i_endblk_c, int i_startidx_c,
                           int i_endidx_c, int i_startblk_v, int i_endblk_v, int i_startidx_v, int i_endidx_v,
                           int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int nlev, int nblks_c,
                           int nblks_v, int nblks_e, int slev, int elev, int nproma, int cell_type, bool lacc);
diff --git a/test/c/test_mo_lib_laplace.cpp b/test/c/test_mo_lib_laplace.cpp
index 0e25792..ad482b6 100644
--- a/test/c/test_mo_lib_laplace.cpp
+++ b/test/c/test_mo_lib_laplace.cpp
@@ -59,11 +59,21 @@ template<typename T>
     int i_endblk       = 1;
     int i_startidx_in  = 0;
     int i_endidx_in    = nproma-1 ;
+
     int i_startblk_e   = 0 ; 
     int i_endblk_e     = 0 ;
     int i_startidx_e   = 0; 
     int i_endidx_e     = 0 ; 
 
+    int i_startblk_c = 0; 
+    int i_endblk_c = 0 ; 
+    int i_startidx_c = 0; 
+    int i_endidx_c = 0;
+    int i_startblk_v = 0; 
+    int i_endblk_v = 0; 
+    int i_startidx_v =0;
+    int i_endidx_v = 0; 
+
     //nabla2_scalar_avg_lib_sp specifics
     std::vector<int> i_startblk_in{}; 
     std::vector<int> i_endblk_in{};
@@ -71,7 +81,9 @@ template<typename T>
     std::vector<int> i_endidx_array{}; 
     int patch_id       = 0;
     const bool l_limited_area = false; 
-    //
+    
+
+
 
     int nlev           = 4;  
     int slev           = 0 ; 
@@ -82,20 +94,35 @@ template<typename T>
 
 
    //Arrays for nabla2_scalar_lib 
-   std::vector<InType> psi_c; //Dimension (nproma,nlev,nblks_c) 
-   std::vector<int> cell_neighbor_idx; //Dimenaion (nproma,nblks_c,3) 
-   std::vector<int> cell_neighbor_blk; //Dimension (nproma,nblks_c,3)
-   std::vector<int> edge_cell_idx; //Dimension (nproma,nblks_e,2)
-   std::vector<int> edge_cell_blk; //Dimension (nproma,nblks_e,2)
-   std::vector<InType> inv_dual_edge_length; //Dimension (nproma,nblks_e)
-   std::vector<int> cell_edge_idx; //Dimension (nproma,nblks_c,3)
-   std::vector<int> cell_edge_blk; //Dimension (nproma,nblks_c,3)
-   std::vector<InType> geofac_n2s; // Dimension (nproma,cell_type+1,nblks_c)
-   std::vector<InType> geofac_div; //Dimension (nproma,cell_type,nblks_c)
-   std::vector<OutType> nabla2_psi_c; //Dimension (nproma,nlev,nblks_c)
+   std::vector<InType> psi_c{}; //Dimension (nproma,nlev,nblks_c) 
+   std::vector<int> cell_neighbor_idx{}; //Dimenaion (nproma,nblks_c,3) 
+   std::vector<int> cell_neighbor_blk{}; //Dimension (nproma,nblks_c,3)
+   std::vector<int> edge_cell_idx{}; //Dimension (nproma,nblks_e,2)
+   std::vector<int> edge_cell_blk{}; //Dimension (nproma,nblks_e,2)
+   std::vector<InType> inv_dual_edge_length{}; //Dimension (nproma,nblks_e)
+   std::vector<int> cell_edge_idx{}; //Dimension (nproma,nblks_c,3)
+   std::vector<int> cell_edge_blk{}; //Dimension (nproma,nblks_c,3)
+   std::vector<InType> geofac_n2s{}; // Dimension (nproma,cell_type+1,nblks_c)
+   std::vector<InType> geofac_div{}; //Dimension (nproma,cell_type,nblks_c)
+   std::vector<OutType> nabla2_psi_c{}; //Dimension (nproma,nlev,nblks_c)
 
    //Arrays for nabla2_scalar_avg_lib_sp
-   std::vector<InType> avg_coeff; 
+   std::vector<InType> avg_coeff{}; 
+
+   //Arrays for nabla2_vec_atmos_lib
+   std::vector<InType> vec_e{}; //Dimension (nproma,nlev,nblks_e)
+   std::vector<int> edge_vertex_idx{}; //Dimension (nproma,nblks_e,4)
+   std::vector<int> edge_vertex_blk{}; //Dimension (nproma,nblks_e,4)
+   std::vector<int> vert_edge_idx{}; //Dimension (nproma,nblks_v,6)
+   std::vector<int> vert_edge_blk{}; //Dimension (nproma,nblks_v,6)
+   std::vector<InType> tangent_orientation{}; //Dimension (nproma,nblks_e)
+   std::vector<InType> inv_primal_edge_length{};//Dimension (nproma,nblks_e)
+
+   std::vector<InType> geofac_rot{}; //Dimension (nproma,9-cell_type,nblks_v)
+   std::vector<OutType> nabla2_vec_e{}; //Dimension (nproma,nlev,nblks_e)
+
+
+
 
    MoLibLaplaceMixedTestFixture() {
 
@@ -111,6 +138,17 @@ template<typename T>
        edge_cell_blk.resize(num_elements_3d<int>(nproma,nblks_e,2), 0); 
        cell_edge_idx.resize(num_elements_3d<int>(nproma,nblks_c,3), 1); 
        cell_edge_blk.resize(num_elements_3d<int>(nproma,nblks_c,3), 0); 
+       
+
+       vec_e.resize(num_elements_3d<InType>(nproma,nlev,nblks_e), static_cast<InType>(1)); 
+       tangent_orientation.resize(num_elements_2d<InType>(nproma,nblks_e), static_cast<InType>(1)); 
+       inv_primal_edge_length.resize(num_elements_2d<InType>(nproma,nblks_e), static_cast<InType>(1)); 
+       geofac_rot.resize(num_elements_3d<InType>(nproma,9-cell_type,nblks_v), static_cast<InType>(1)); 
+       nabla2_vec_e.resize(num_elements_3d<OutType>(nproma,nlev,nblks_e), static_cast<InType>(0));
+       edge_vertex_idx.resize(num_elements_3d<InType>(nproma,nblks_e,4), 1); 
+       edge_vertex_blk.resize(num_elements_3d<InType>(nproma,nblks_e,4), 0); 
+       vert_edge_idx.resize(num_elements_3d<InType>(nproma,nblks_v,6), 1); 
+       vert_edge_blk.resize(num_elements_3d<InType>(nproma,nblks_v,6), 0); 
 
        //Allocating nabla2_scalar_avg_lib_sp specifics
        i_startblk_in.resize(3, i_startblk); 
@@ -181,13 +219,34 @@ nabla2_scalar_avg_lib<InType, OutType>(
 }
 
 
+TYPED_TEST(MoLibLaplaceMixedTestFixture, nabla2_vec_atmos_lib) {
+using InType  = typename TestFixture::InType;
+using OutType = typename TestFixture::OutType;
 
 
+nabla2_vec_atmos_lib<InType, OutType>(
+    this->vec_e.data(), this->edge_cell_idx.data(),
+    this->edge_cell_blk.data(), this->edge_vertex_idx.data(),
+    this->edge_vertex_blk.data(), this->cell_edge_idx.data(),
+    this->cell_edge_blk.data(), this->vert_edge_idx.data(),
+    this->vert_edge_blk.data(), this->tangent_orientation.data(),
+    this->inv_primal_edge_length.data(), this->inv_dual_edge_length.data(), 
+    this->geofac_div.data(), this->geofac_rot.data(), this->nabla2_vec_e.data(), 
+    this->i_startblk_c, this->i_endblk_c, this->i_startidx_c, this->i_endidx_c,
+    this->i_startblk_v, this->i_endblk_v, this->i_startidx_v, this->i_endidx_v,
+    this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in,
+    this->nlev, this->nblks_c, this->nblks_v, this-> nblks_e, this->slev, this->elev, 
+    this->nproma, this->cell_type, this->lacc);
 
+  for (int block = this->i_startblk; block <= this->i_endblk; ++block) {
+    for (int level = this->slev ; level < this->elev; ++level) {
+      for (int i = this->i_startidx_in ; i < this->i_endidx_in; ++i) {
 
-
-
-
-
-
+        size_t idx = i + level * this->nproma + block * this->nproma * this->nlev;
+        EXPECT_NEAR(this->nabla2_vec_e[idx], static_cast<OutType>(6), static_cast<OutType>(1e-5))
+            << "Failure at block " << block << ", level " << level << ", index " << i;
+      }
+    }
+  }
+}
 
-- 
GitLab