diff --git a/_typos.toml b/_typos.toml
index 4fe4968b64e1df08f66e6891fecf0a67a60c9bc4..8de4a86304bbea42529095bd68ed5726e1a028bf 100644
--- a/_typos.toml
+++ b/_typos.toml
@@ -1,6 +1,7 @@
 [default]
 extend-ignore-re = [
 	".*_pn",
+	"f4dout_*",
 ]
 extend-ignore-words-re = [
   "Comput",
@@ -10,6 +11,7 @@ extend-ignore-words-re = [
 Wirth = "Wirth" # author name
 nin = "nin" # number of inputs
 Pilar = "Pilar" # author name
+Comput = "Comput" # abbreviation for Computational
 
 [default.extend-identifiers]
 f4dout = "f4dout" # file name
diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt
index 078a14df177906b11f6d3914ac815eb4b28ed162..f3b75c052ee368835163bc770682b1805de41def 100644
--- a/src/horizontal/CMakeLists.txt
+++ b/src/horizontal/CMakeLists.txt
@@ -11,6 +11,7 @@
 
 add_library(
   iconmath-horizontal
+  mo_lib_divrot.cpp
   mo_lib_divrot.F90
   mo_lib_laplace.F90
   mo_lib_gradients.F90)
@@ -57,7 +58,7 @@ target_include_directories(
     # Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for
     # multiple compile languages
     # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html
-    $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>>
+    $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${PROJECT_SOURCE_DIR}/src>>
   PRIVATE
     # Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple
     # compile languages
diff --git a/src/horizontal/mo_lib_divrot.cpp b/src/horizontal/mo_lib_divrot.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d086e8ba4d7910986117e912fa32cdc4f4426ac5
--- /dev/null
+++ b/src/horizontal/mo_lib_divrot.cpp
@@ -0,0 +1,1359 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#include <iostream>
+#include <vector>
+
+#include <horizontal/mo_lib_divrot.hpp>
+#include <support/mo_lib_loopindices.hpp>
+
+template <typename T>
+void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx,
+                      const int *cell_neighbor_blk, const T *lsq_qtmat_c,
+                      const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c,
+                      const T *lsq_moments, T *p_coeff, int i_startblk,
+                      int i_endblk, int i_startidx_in, int i_endidx_in,
+                      int slev, int elev, int nproma, bool l_consv, bool lacc,
+                      bool acc_async, int nblks_c, int nlev, int lsq_dim_unk,
+                      int lsq_dim_c) {
+  // Wrap raw pointers in unmanaged Kokkos Views.
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstT4D;
+  typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT4D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+
+  Kokkos::View<T *> z_d("z_d", lsq_dim_c);
+  Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk);
+
+  UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, lsq_dim_c);
+  UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, lsq_dim_c);
+
+  UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c);
+  UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c);
+
+  UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk,
+                                     lsq_dim_c, nblks_c);
+  UnmanagedConstT3D lsq_rmat_rdiag_c_view(lsq_rmat_rdiag_c, nproma, lsq_dim_unk,
+                                          nblks_c);
+  UnmanagedConstT3D lsq_rmat_utri_c_view(
+      lsq_rmat_utri_c, nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2,
+      nblks_c);
+  UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk);
+
+  for (int jb = i_startblk; jb < i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
+                                                       {elev, i_endidx});
+    Kokkos::parallel_for(
+        "recon_lsq_cell_l_inner", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jc) {
+          z_d(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) -
+                   p_cc_view(jc, jk, jb);
+          z_d(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) -
+                   p_cc_view(jc, jk, jb);
+          z_d(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) -
+                   p_cc_view(jc, jk, jb);
+          // matrix multiplication Q^T d (partitioned into 2 dot products)
+          z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0) +
+                            lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1) +
+                            lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2);
+          z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0) +
+                            lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1) +
+                            lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2);
+
+          p_coeff_view(2, jc, jk, jb) =
+              lsq_rmat_rdiag_c_view(jc, 1, jb) * z_qt_times_d(1);
+          p_coeff_view(1, jc, jk, jb) =
+              lsq_rmat_rdiag_c_view(jc, 0, jb) *
+              (z_qt_times_d(0) -
+               lsq_rmat_utri_c_view(jc, 0, jb) * p_coeff_view(2, jc, jk, jb));
+          p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb);
+        });
+    if (l_consv) {
+      Kokkos::parallel_for(
+          "recon_lsq_cell_l_consv", innerPolicy,
+          KOKKOS_LAMBDA(const int jk, const int jc) {
+            p_coeff_view(0, jc, jk, jb) =
+                p_coeff_view(0, jc, jk, jb) -
+                p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) -
+                p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1);
+          });
+    }
+  }
+
+  if (!acc_async)
+    Kokkos::fence();
+}
+
+ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_L);
+
+template <typename T>
+void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx,
+                          const int *cell_neighbor_blk, const T *lsq_pseudoinv,
+                          const T *lsq_moments, T *p_coeff, int i_startblk,
+                          int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nproma, bool l_consv,
+                          bool lacc, bool acc_async, int nblks_c, int nlev,
+                          int lsq_dim_unk, int lsq_dim_c) {
+  // Wrap raw pointers in unmanaged Kokkos Views.
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstT4D;
+  typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT4D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+
+  Kokkos::View<T *> z_b("z_b", lsq_dim_c);
+
+  UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, lsq_dim_c);
+  UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, lsq_dim_c);
+
+  UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c);
+  UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c);
+
+  UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk,
+                                       lsq_dim_c, nblks_c);
+  UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk);
+
+  for (int jb = i_startblk; jb < i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
+                                                       {elev, i_endidx});
+    Kokkos::parallel_for(
+        "recon_lsq_cell_l_svd_inner", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jc) {
+          z_b(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) -
+                   p_cc_view(jc, jk, jb);
+          z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) -
+                   p_cc_view(jc, jk, jb);
+          z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) -
+                   p_cc_view(jc, jk, jb);
+
+          p_coeff_view(2, jc, jk, jb) =
+              lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) +
+              lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) +
+              lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2);
+          p_coeff_view(1, jc, jk, jb) =
+              lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) +
+              lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) +
+              lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2);
+          p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb);
+        });
+    if (l_consv) {
+      Kokkos::parallel_for(
+          "recon_lsq_cell_l_svd_consv", innerPolicy,
+          KOKKOS_LAMBDA(const int jk, const int jc) {
+            p_coeff_view(0, jc, jk, jb) =
+                p_coeff_view(0, jc, jk, jb) -
+                p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) -
+                p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1);
+          });
+    }
+  }
+
+  if (!acc_async)
+    Kokkos::fence();
+}
+
+ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD);
+
+template <typename T>
+void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c,
+                      const T *lsq_qtmat_c, const T *lsq_rmat_rdiag_c,
+                      const T *lsq_rmat_utri_c, const T *lsq_moments,
+                      T *p_coeff, int i_startblk, int i_endblk,
+                      int i_startidx_in, int i_endidx_in, int slev, int elev,
+                      int nproma, int patch_id, bool l_limited_area, bool lacc,
+                      int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) {
+  // Wrap raw pointers in unmanaged Kokkos Views.
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstT4D;
+  typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT4D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+
+  Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, nlev);
+  Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk);
+
+  UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c);
+  UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c);
+
+  UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c);
+  UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c);
+
+  UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk,
+                                     lsq_dim_c, nblks_c);
+  UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c);
+  UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma,
+                              (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2,
+                              nblks_c);
+  UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk);
+
+  if (patch_id > 0 || l_limited_area) {
+    Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy(
+        {0, i_startidx_in, slev, i_startblk},
+        {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk});
+    Kokkos::parallel_for(
+        "recon_lsq_cell_q_init", initPolicy,
+        KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) {
+          p_coeff_view(ji, jc, jk, jb) = 0;
+        });
+  }
+
+  for (int jb = i_startblk; jb < i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
+                                                       {elev, i_endidx});
+    Kokkos::parallel_for(
+        "recon_lsq_cell_q_step1", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jc) {
+          z_d(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) -
+                           p_cc_view(jc, jk, jb);
+          z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) -
+                           p_cc_view(jc, jk, jb);
+          z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) -
+                           p_cc_view(jc, jk, jb);
+          z_d(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) -
+                           p_cc_view(jc, jk, jb);
+          z_d(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) -
+                           p_cc_view(jc, jk, jb);
+          z_d(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) -
+                           p_cc_view(jc, jk, jb);
+          z_d(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) -
+                           p_cc_view(jc, jk, jb);
+          z_d(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) -
+                           p_cc_view(jc, jk, jb);
+          z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) -
+                           p_cc_view(jc, jk, jb);
+        });
+    Kokkos::parallel_for(
+        "recon_lsq_cell_q_step2", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jc) {
+          z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) +
+                            lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) +
+                            lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) +
+                            lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) +
+                            lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) +
+                            lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) +
+                            lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) +
+                            lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) +
+                            lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk);
+          z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) +
+                            lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) +
+                            lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) +
+                            lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) +
+                            lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) +
+                            lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) +
+                            lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) +
+                            lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) +
+                            lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk);
+          z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) +
+                            lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) +
+                            lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) +
+                            lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) +
+                            lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) +
+                            lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) +
+                            lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) +
+                            lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) +
+                            lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk);
+          z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) +
+                            lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) +
+                            lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) +
+                            lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) +
+                            lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) +
+                            lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) +
+                            lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) +
+                            lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) +
+                            lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk);
+          z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) +
+                            lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) +
+                            lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) +
+                            lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) +
+                            lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) +
+                            lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) +
+                            lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) +
+                            lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) +
+                            lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk);
+
+          p_coeff_view(5, jc, jk, jb) = ptr_rrdiag(jc, 4, jb) * z_qt_times_d(4);
+          p_coeff_view(4, jc, jk, jb) =
+              ptr_rrdiag(jc, 3, jb) *
+              (z_qt_times_d(3) -
+               ptr_rutri(jc, 0, jb) * p_coeff_view(5, jc, jk, jb));
+          p_coeff_view(3, jc, jk, jb) =
+              ptr_rrdiag(jc, 2, jb) *
+              (z_qt_times_d(2) -
+               ptr_rutri(jc, 1, jb) * p_coeff_view(4, jc, jk, jb) -
+               ptr_rutri(jc, 2, jb) * p_coeff_view(5, jc, jk, jb));
+          p_coeff_view(2, jc, jk, jb) =
+              ptr_rrdiag(jc, 1, jb) *
+              (z_qt_times_d(1) -
+               ptr_rutri(jc, 3, jb) * p_coeff_view(3, jc, jk, jb) -
+               ptr_rutri(jc, 4, jb) * p_coeff_view(4, jc, jk, jb) -
+               ptr_rutri(jc, 5, jb) * p_coeff_view(5, jc, jk, jb));
+          p_coeff_view(1, jc, jk, jb) =
+              ptr_rrdiag(jc, 0, jb) *
+              (z_qt_times_d(0) -
+               ptr_rutri(jc, 6, jb) * p_coeff_view(2, jc, jk, jb) -
+               ptr_rutri(jc, 7, jb) * p_coeff_view(3, jc, jk, jb) -
+               ptr_rutri(jc, 8, jb) * p_coeff_view(4, jc, jk, jb) -
+               ptr_rutri(jc, 9, jb) * p_coeff_view(5, jc, jk, jb));
+          p_coeff_view(0, jc, jk, jb) =
+              p_cc_view(jc, jk, jb) -
+              p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) -
+              p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) -
+              p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) -
+              p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) -
+              p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4);
+        });
+  }
+
+  Kokkos::fence();
+}
+
+ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_Q);
+
+template <typename T>
+void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c,
+                          const int *lsq_blk_c, const T *lsq_pseudoinv,
+                          const T *lsq_moments, T *p_coeff, int i_startblk,
+                          int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nproma, int patch_id,
+                          bool l_limited_area, bool lacc, int nblks_c, int nlev,
+                          int lsq_dim_unk, int lsq_dim_c) {
+  // Wrap raw pointers in unmanaged Kokkos Views.
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstT4D;
+  typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT4D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+
+  Kokkos::View<T ***> z_b("z_b", lsq_dim_c, nproma, elev);
+
+  UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c);
+  UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c);
+
+  UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c);
+  UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c);
+
+  UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk,
+                                       lsq_dim_c, nblks_c);
+  UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk);
+
+  if (patch_id > 0 || l_limited_area) {
+    Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy(
+        {0, i_startidx_in, slev, i_startblk},
+        {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk});
+    Kokkos::parallel_for(
+        "recon_lsq_cell_q_svd_init", initPolicy,
+        KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) {
+          p_coeff_view(ji, jc, jk, jb) = 0;
+        });
+  }
+
+  for (int jb = i_startblk; jb < i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
+                                                       {elev, i_endidx});
+    Kokkos::parallel_for(
+        "recon_lsq_cell_q_svd_step1", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jc) {
+          z_b(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) -
+                           p_cc_view(jc, jk, jb);
+          z_b(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) -
+                           p_cc_view(jc, jk, jb);
+          z_b(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) -
+                           p_cc_view(jc, jk, jb);
+          z_b(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) -
+                           p_cc_view(jc, jk, jb);
+          z_b(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) -
+                           p_cc_view(jc, jk, jb);
+          z_b(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) -
+                           p_cc_view(jc, jk, jb);
+          z_b(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) -
+                           p_cc_view(jc, jk, jb);
+          z_b(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) -
+                           p_cc_view(jc, jk, jb);
+          z_b(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) -
+                           p_cc_view(jc, jk, jb);
+        });
+    Kokkos::parallel_for(
+        "recon_lsq_cell_q_svd_step2", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jc) {
+          p_coeff_view(5, jc, jk, jb) =
+              lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0, jc, jk) +
+              lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1, jc, jk) +
+              lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2, jc, jk) +
+              lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3, jc, jk) +
+              lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4, jc, jk) +
+              lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5, jc, jk) +
+              lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6, jc, jk) +
+              lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7, jc, jk) +
+              lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8, jc, jk);
+          p_coeff_view(4, jc, jk, jb) =
+              lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0, jc, jk) +
+              lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1, jc, jk) +
+              lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2, jc, jk) +
+              lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3, jc, jk) +
+              lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4, jc, jk) +
+              lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5, jc, jk) +
+              lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6, jc, jk) +
+              lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7, jc, jk) +
+              lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8, jc, jk);
+          p_coeff_view(3, jc, jk, jb) =
+              lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0, jc, jk) +
+              lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1, jc, jk) +
+              lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2, jc, jk) +
+              lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3, jc, jk) +
+              lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4, jc, jk) +
+              lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5, jc, jk) +
+              lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6, jc, jk) +
+              lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7, jc, jk) +
+              lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8, jc, jk);
+          p_coeff_view(2, jc, jk, jb) =
+              lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0, jc, jk) +
+              lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1, jc, jk) +
+              lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2, jc, jk) +
+              lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3, jc, jk) +
+              lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4, jc, jk) +
+              lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5, jc, jk) +
+              lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6, jc, jk) +
+              lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7, jc, jk) +
+              lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8, jc, jk);
+          p_coeff_view(1, jc, jk, jb) =
+              lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0, jc, jk) +
+              lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1, jc, jk) +
+              lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2, jc, jk) +
+              lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3, jc, jk) +
+              lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4, jc, jk) +
+              lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5, jc, jk) +
+              lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6, jc, jk) +
+              lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7, jc, jk) +
+              lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8, jc, jk);
+          p_coeff_view(0, jc, jk, jb) =
+              p_cc_view(jc, jk, jb) -
+              p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) -
+              p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) -
+              p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) -
+              p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) -
+              p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4);
+        });
+  }
+
+  Kokkos::fence();
+}
+
+ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD);
+
+template <typename T>
+void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c,
+                      const T *lsq_qtmat_c, const T *lsq_rmat_rdiag_c,
+                      const T *lsq_rmat_utri_c, const T *lsq_moments,
+                      T *p_coeff, int i_startblk, int i_endblk,
+                      int i_startidx_in, int i_endidx_in, int slev, int elev,
+                      int nproma, int patch_id, bool l_limited_area, bool lacc,
+                      int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) {
+  // Wrap raw pointers in unmanaged Kokkos Views.
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstT4D;
+  typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT4D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+
+  Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, elev);
+  Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 9);
+
+  UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c);
+  UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c);
+
+  UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c);
+  UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c);
+
+  UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk,
+                                     lsq_dim_c, nblks_c);
+  UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c);
+  UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma,
+                              (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2,
+                              nblks_c);
+  UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk);
+
+  if (patch_id > 0 || l_limited_area) {
+    Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy(
+        {0, i_startidx_in, slev, i_startblk},
+        {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk});
+    Kokkos::parallel_for(
+        "recon_lsq_cell_c_init", initPolicy,
+        KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) {
+          p_coeff_view(ji, jc, jk, jb) = 0;
+        });
+  }
+
+  for (int jb = i_startblk; jb < i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
+                                                       {elev, i_endidx});
+    Kokkos::parallel_for(
+        "recon_lsq_cell_c_step1", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jc) {
+          z_d(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) -
+                           p_cc_view(jc, jk, jb);
+          z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) -
+                           p_cc_view(jc, jk, jb);
+          z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) -
+                           p_cc_view(jc, jk, jb);
+          z_d(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) -
+                           p_cc_view(jc, jk, jb);
+          z_d(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) -
+                           p_cc_view(jc, jk, jb);
+          z_d(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) -
+                           p_cc_view(jc, jk, jb);
+          z_d(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) -
+                           p_cc_view(jc, jk, jb);
+          z_d(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) -
+                           p_cc_view(jc, jk, jb);
+          z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) -
+                           p_cc_view(jc, jk, jb);
+        });
+    Kokkos::parallel_for(
+        "recon_lsq_cell_c_step2", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jc) {
+          z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) +
+                            lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) +
+                            lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) +
+                            lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) +
+                            lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) +
+                            lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) +
+                            lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) +
+                            lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) +
+                            lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk);
+          z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) +
+                            lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) +
+                            lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) +
+                            lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) +
+                            lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) +
+                            lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) +
+                            lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) +
+                            lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) +
+                            lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk);
+          z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) +
+                            lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) +
+                            lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) +
+                            lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) +
+                            lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) +
+                            lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) +
+                            lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) +
+                            lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) +
+                            lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk);
+          z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) +
+                            lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) +
+                            lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) +
+                            lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) +
+                            lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) +
+                            lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) +
+                            lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) +
+                            lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) +
+                            lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk);
+          z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) +
+                            lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) +
+                            lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) +
+                            lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) +
+                            lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) +
+                            lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) +
+                            lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) +
+                            lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) +
+                            lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk);
+          z_qt_times_d(5) = lsq_qtmat_c_view(jc, 5, 0, jb) * z_d(0, jc, jk) +
+                            lsq_qtmat_c_view(jc, 5, 1, jb) * z_d(1, jc, jk) +
+                            lsq_qtmat_c_view(jc, 5, 2, jb) * z_d(2, jc, jk) +
+                            lsq_qtmat_c_view(jc, 5, 3, jb) * z_d(3, jc, jk) +
+                            lsq_qtmat_c_view(jc, 5, 4, jb) * z_d(4, jc, jk) +
+                            lsq_qtmat_c_view(jc, 5, 5, jb) * z_d(5, jc, jk) +
+                            lsq_qtmat_c_view(jc, 5, 6, jb) * z_d(6, jc, jk) +
+                            lsq_qtmat_c_view(jc, 5, 7, jb) * z_d(7, jc, jk) +
+                            lsq_qtmat_c_view(jc, 5, 8, jb) * z_d(8, jc, jk);
+          z_qt_times_d(6) = lsq_qtmat_c_view(jc, 6, 0, jb) * z_d(0, jc, jk) +
+                            lsq_qtmat_c_view(jc, 6, 1, jb) * z_d(1, jc, jk) +
+                            lsq_qtmat_c_view(jc, 6, 2, jb) * z_d(2, jc, jk) +
+                            lsq_qtmat_c_view(jc, 6, 3, jb) * z_d(3, jc, jk) +
+                            lsq_qtmat_c_view(jc, 6, 4, jb) * z_d(4, jc, jk) +
+                            lsq_qtmat_c_view(jc, 6, 5, jb) * z_d(5, jc, jk) +
+                            lsq_qtmat_c_view(jc, 6, 6, jb) * z_d(6, jc, jk) +
+                            lsq_qtmat_c_view(jc, 6, 7, jb) * z_d(7, jc, jk) +
+                            lsq_qtmat_c_view(jc, 6, 8, jb) * z_d(8, jc, jk);
+          z_qt_times_d(7) = lsq_qtmat_c_view(jc, 7, 0, jb) * z_d(0, jc, jk) +
+                            lsq_qtmat_c_view(jc, 7, 1, jb) * z_d(1, jc, jk) +
+                            lsq_qtmat_c_view(jc, 7, 2, jb) * z_d(2, jc, jk) +
+                            lsq_qtmat_c_view(jc, 7, 3, jb) * z_d(3, jc, jk) +
+                            lsq_qtmat_c_view(jc, 7, 4, jb) * z_d(4, jc, jk) +
+                            lsq_qtmat_c_view(jc, 7, 5, jb) * z_d(5, jc, jk) +
+                            lsq_qtmat_c_view(jc, 7, 6, jb) * z_d(6, jc, jk) +
+                            lsq_qtmat_c_view(jc, 7, 7, jb) * z_d(7, jc, jk) +
+                            lsq_qtmat_c_view(jc, 7, 8, jb) * z_d(8, jc, jk);
+          z_qt_times_d(8) = lsq_qtmat_c_view(jc, 8, 0, jb) * z_d(0, jc, jk) +
+                            lsq_qtmat_c_view(jc, 8, 1, jb) * z_d(1, jc, jk) +
+                            lsq_qtmat_c_view(jc, 8, 2, jb) * z_d(2, jc, jk) +
+                            lsq_qtmat_c_view(jc, 8, 3, jb) * z_d(3, jc, jk) +
+                            lsq_qtmat_c_view(jc, 8, 4, jb) * z_d(4, jc, jk) +
+                            lsq_qtmat_c_view(jc, 8, 5, jb) * z_d(5, jc, jk) +
+                            lsq_qtmat_c_view(jc, 8, 6, jb) * z_d(6, jc, jk) +
+                            lsq_qtmat_c_view(jc, 8, 7, jb) * z_d(7, jc, jk) +
+                            lsq_qtmat_c_view(jc, 8, 8, jb) * z_d(8, jc, jk);
+
+          p_coeff_view(9, jc, jk, jb) = ptr_rrdiag(jc, 8, jb) * z_qt_times_d(8);
+          p_coeff_view(8, jc, jk, jb) =
+              ptr_rrdiag(jc, 7, jb) *
+              (z_qt_times_d(7) -
+               ptr_rutri(jc, 0, jb) * p_coeff_view(9, jc, jk, jb));
+          p_coeff_view(7, jc, jk, jb) =
+              ptr_rrdiag(jc, 6, jb) *
+              (z_qt_times_d(6) -
+               (ptr_rutri(jc, 1, jb) * p_coeff_view(8, jc, jk, jb) +
+                ptr_rutri(jc, 2, jb) * p_coeff_view(9, jc, jk, jb)));
+          p_coeff_view(6, jc, jk, jb) =
+              ptr_rrdiag(jc, 5, jb) *
+              (z_qt_times_d(5) -
+               (ptr_rutri(jc, 3, jb) * p_coeff_view(7, jc, jk, jb) +
+                ptr_rutri(jc, 4, jb) * p_coeff_view(8, jc, jk, jb) +
+                ptr_rutri(jc, 5, jb) * p_coeff_view(9, jc, jk, jb)));
+          p_coeff_view(5, jc, jk, jb) =
+              ptr_rrdiag(jc, 4, jb) *
+              (z_qt_times_d(4) -
+               (ptr_rutri(jc, 6, jb) * p_coeff_view(6, jc, jk, jb) +
+                ptr_rutri(jc, 7, jb) * p_coeff_view(7, jc, jk, jb) +
+                ptr_rutri(jc, 8, jb) * p_coeff_view(8, jc, jk, jb) +
+                ptr_rutri(jc, 9, jb) * p_coeff_view(9, jc, jk, jb)));
+          p_coeff_view(4, jc, jk, jb) =
+              ptr_rrdiag(jc, 3, jb) *
+              (z_qt_times_d(3) -
+               (ptr_rutri(jc, 10, jb) * p_coeff_view(5, jc, jk, jb) +
+                ptr_rutri(jc, 11, jb) * p_coeff_view(6, jc, jk, jb) +
+                ptr_rutri(jc, 12, jb) * p_coeff_view(7, jc, jk, jb) +
+                ptr_rutri(jc, 13, jb) * p_coeff_view(8, jc, jk, jb) +
+                ptr_rutri(jc, 14, jb) * p_coeff_view(9, jc, jk, jb)));
+          p_coeff_view(3, jc, jk, jb) =
+              ptr_rrdiag(jc, 2, jb) *
+              (z_qt_times_d(2) -
+               (ptr_rutri(jc, 15, jb) * p_coeff_view(4, jc, jk, jb) +
+                ptr_rutri(jc, 16, jb) * p_coeff_view(5, jc, jk, jb) +
+                ptr_rutri(jc, 17, jb) * p_coeff_view(6, jc, jk, jb) +
+                ptr_rutri(jc, 18, jb) * p_coeff_view(7, jc, jk, jb) +
+                ptr_rutri(jc, 19, jb) * p_coeff_view(8, jc, jk, jb) +
+                ptr_rutri(jc, 20, jb) * p_coeff_view(9, jc, jk, jb)));
+          p_coeff_view(2, jc, jk, jb) =
+              ptr_rrdiag(jc, 1, jb) *
+              (z_qt_times_d(1) -
+               (ptr_rutri(jc, 21, jb) * p_coeff_view(3, jc, jk, jb) +
+                ptr_rutri(jc, 22, jb) * p_coeff_view(4, jc, jk, jb) +
+                ptr_rutri(jc, 23, jb) * p_coeff_view(5, jc, jk, jb) +
+                ptr_rutri(jc, 24, jb) * p_coeff_view(6, jc, jk, jb) +
+                ptr_rutri(jc, 25, jb) * p_coeff_view(7, jc, jk, jb) +
+                ptr_rutri(jc, 26, jb) * p_coeff_view(8, jc, jk, jb) +
+                ptr_rutri(jc, 27, jb) * p_coeff_view(9, jc, jk, jb)));
+          p_coeff_view(1, jc, jk, jb) =
+              ptr_rrdiag(jc, 0, jb) *
+              (z_qt_times_d(0) -
+               (ptr_rutri(jc, 28, jb) * p_coeff_view(2, jc, jk, jb) +
+                ptr_rutri(jc, 29, jb) * p_coeff_view(3, jc, jk, jb) +
+                ptr_rutri(jc, 30, jb) * p_coeff_view(4, jc, jk, jb) +
+                ptr_rutri(jc, 31, jb) * p_coeff_view(5, jc, jk, jb) +
+                ptr_rutri(jc, 32, jb) * p_coeff_view(6, jc, jk, jb) +
+                ptr_rutri(jc, 33, jb) * p_coeff_view(7, jc, jk, jb) +
+                ptr_rutri(jc, 34, jb) * p_coeff_view(8, jc, jk, jb) +
+                ptr_rutri(jc, 35, jb) * p_coeff_view(9, jc, jk, jb)));
+          p_coeff_view(0, jc, jk, jb) =
+              p_cc_view(jc, jk, jb) -
+              (p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) +
+               p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) +
+               p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) +
+               p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) +
+               p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) +
+               p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5) +
+               p_coeff_view(7, jc, jk, jb) * lsq_moments_view(jc, jb, 6) +
+               p_coeff_view(8, jc, jk, jb) * lsq_moments_view(jc, jb, 7) +
+               p_coeff_view(9, jc, jk, jb) * lsq_moments_view(jc, jb, 8));
+        });
+  }
+
+  Kokkos::fence();
+}
+
+ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_C);
+
+template <typename T>
+void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c,
+                          const int *lsq_blk_c, const T *lsq_pseudoinv,
+                          const T *lsq_moments, T *p_coeff, int i_startblk,
+                          int i_endblk, int i_startidx_in, int i_endidx_in,
+                          int slev, int elev, int nproma, int patch_id,
+                          bool l_limited_area,
+                          bool lacc, int nblks_c, int nlev, int lsq_dim_unk,
+                          int lsq_dim_c) {
+  // Wrap raw pointers in unmanaged Kokkos Views.
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstT4D;
+  typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT4D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+
+  Kokkos::View<T *> z_b("z_b", 9);
+
+  UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c);
+  UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c);
+
+  UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c);
+  UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c);
+
+  UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk,
+                                       lsq_dim_c, nblks_c);
+  UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk);
+
+  if (patch_id > 0 || l_limited_area) {
+    for (int jb = i_startblk; jb < i_endblk; ++jb) {
+      int i_startidx, i_endidx;
+      get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                        i_endblk, i_startidx, i_endidx);
+
+      Kokkos::MDRangePolicy<Kokkos::Rank<3>> initPolicy(
+          {slev, i_startidx, 0}, {elev, i_endidx, lsq_dim_unk + 1});
+      Kokkos::parallel_for(
+          "recon_lsq_cell_c_svd_init", initPolicy,
+          KOKKOS_LAMBDA(const int jk, const int jc, const int ji) {
+            p_coeff_view(ji, jc, jk, jb) = 0;
+          });
+    }
+  }
+
+  for (int jb = i_startblk; jb < i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
+                                                       {elev, i_endidx});
+    Kokkos::parallel_for(
+        "recon_lsq_cell_c_svd_inner", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jc) {
+          z_b(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) -
+                   p_cc_view(jc, jk, jb);
+          z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) -
+                   p_cc_view(jc, jk, jb);
+          z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) -
+                   p_cc_view(jc, jk, jb);
+          z_b(3) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) -
+                   p_cc_view(jc, jk, jb);
+          z_b(4) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) -
+                   p_cc_view(jc, jk, jb);
+          z_b(5) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) -
+                   p_cc_view(jc, jk, jb);
+          z_b(6) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) -
+                   p_cc_view(jc, jk, jb);
+          z_b(7) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) -
+                   p_cc_view(jc, jk, jb);
+          z_b(8) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) -
+                   p_cc_view(jc, jk, jb);
+
+          p_coeff_view(9, jc, jk, jb) =
+              lsq_pseudoinv_view(jc, 8, 0, jb) * z_b(0) +
+              lsq_pseudoinv_view(jc, 8, 1, jb) * z_b(1) +
+              lsq_pseudoinv_view(jc, 8, 2, jb) * z_b(2) +
+              lsq_pseudoinv_view(jc, 8, 3, jb) * z_b(3) +
+              lsq_pseudoinv_view(jc, 8, 4, jb) * z_b(4) +
+              lsq_pseudoinv_view(jc, 8, 5, jb) * z_b(5) +
+              lsq_pseudoinv_view(jc, 8, 6, jb) * z_b(6) +
+              lsq_pseudoinv_view(jc, 8, 7, jb) * z_b(7) +
+              lsq_pseudoinv_view(jc, 8, 8, jb) * z_b(8);
+          p_coeff_view(8, jc, jk, jb) =
+              lsq_pseudoinv_view(jc, 7, 0, jb) * z_b(0) +
+              lsq_pseudoinv_view(jc, 7, 1, jb) * z_b(1) +
+              lsq_pseudoinv_view(jc, 7, 2, jb) * z_b(2) +
+              lsq_pseudoinv_view(jc, 7, 3, jb) * z_b(3) +
+              lsq_pseudoinv_view(jc, 7, 4, jb) * z_b(4) +
+              lsq_pseudoinv_view(jc, 7, 5, jb) * z_b(5) +
+              lsq_pseudoinv_view(jc, 7, 6, jb) * z_b(6) +
+              lsq_pseudoinv_view(jc, 7, 7, jb) * z_b(7) +
+              lsq_pseudoinv_view(jc, 7, 8, jb) * z_b(8);
+          p_coeff_view(7, jc, jk, jb) =
+              lsq_pseudoinv_view(jc, 6, 0, jb) * z_b(0) +
+              lsq_pseudoinv_view(jc, 6, 1, jb) * z_b(1) +
+              lsq_pseudoinv_view(jc, 6, 2, jb) * z_b(2) +
+              lsq_pseudoinv_view(jc, 6, 3, jb) * z_b(3) +
+              lsq_pseudoinv_view(jc, 6, 4, jb) * z_b(4) +
+              lsq_pseudoinv_view(jc, 6, 5, jb) * z_b(5) +
+              lsq_pseudoinv_view(jc, 6, 6, jb) * z_b(6) +
+              lsq_pseudoinv_view(jc, 6, 7, jb) * z_b(7) +
+              lsq_pseudoinv_view(jc, 6, 8, jb) * z_b(8);
+          p_coeff_view(6, jc, jk, jb) =
+              lsq_pseudoinv_view(jc, 5, 0, jb) * z_b(0) +
+              lsq_pseudoinv_view(jc, 5, 1, jb) * z_b(1) +
+              lsq_pseudoinv_view(jc, 5, 2, jb) * z_b(2) +
+              lsq_pseudoinv_view(jc, 5, 3, jb) * z_b(3) +
+              lsq_pseudoinv_view(jc, 5, 4, jb) * z_b(4) +
+              lsq_pseudoinv_view(jc, 5, 5, jb) * z_b(5) +
+              lsq_pseudoinv_view(jc, 5, 6, jb) * z_b(6) +
+              lsq_pseudoinv_view(jc, 5, 7, jb) * z_b(7) +
+              lsq_pseudoinv_view(jc, 5, 8, jb) * z_b(8);
+          p_coeff_view(5, jc, jk, jb) =
+              lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0) +
+              lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1) +
+              lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2) +
+              lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3) +
+              lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4) +
+              lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5) +
+              lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6) +
+              lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7) +
+              lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8);
+          p_coeff_view(4, jc, jk, jb) =
+              lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0) +
+              lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1) +
+              lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2) +
+              lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3) +
+              lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4) +
+              lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5) +
+              lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6) +
+              lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7) +
+              lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8);
+          p_coeff_view(3, jc, jk, jb) =
+              lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0) +
+              lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1) +
+              lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2) +
+              lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3) +
+              lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4) +
+              lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5) +
+              lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6) +
+              lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7) +
+              lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8);
+          p_coeff_view(2, jc, jk, jb) =
+              lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) +
+              lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) +
+              lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2) +
+              lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3) +
+              lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4) +
+              lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5) +
+              lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6) +
+              lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7) +
+              lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8);
+          p_coeff_view(1, jc, jk, jb) =
+              lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) +
+              lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) +
+              lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2) +
+              lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3) +
+              lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4) +
+              lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5) +
+              lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6) +
+              lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7) +
+              lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8);
+          p_coeff_view(0, jc, jk, jb) =
+              p_cc_view(jc, jk, jb) -
+              p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) -
+              p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) -
+              p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) -
+              p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) -
+              p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) -
+              p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5) -
+              p_coeff_view(7, jc, jk, jb) * lsq_moments_view(jc, jb, 6) -
+              p_coeff_view(8, jc, jk, jb) * lsq_moments_view(jc, jb, 7) -
+              p_coeff_view(9, jc, jk, jb) * lsq_moments_view(jc, jb, 8);
+        });
+  }
+
+  Kokkos::fence();
+}
+
+ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD);
+
+template <typename T>
+void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk,
+           const T *geofac_div, T *div_vec_c, int i_startblk, int i_endblk,
+           int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma,
+           bool lacc, int nlev, int nblks_c, int nblks_e) {
+  // Wrap raw pointers in unmanaged Kokkos Views.
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT3D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+
+  UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e);
+
+  UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3);
+  UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3);
+
+  UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c);
+  UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c);
+
+  for (int jb = i_startblk; jb < i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
+                                                       {elev, i_endidx});
+    Kokkos::parallel_for(
+        "div3d_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) {
+          div_vec_c_view(jc, jk, jb) =
+              vec_e_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) *
+                  geofac_div_view(jc, 0, jb) +
+              vec_e_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) *
+                  geofac_div_view(jc, 1, jb) +
+              vec_e_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) *
+                  geofac_div_view(jc, 2, jb);
+        });
+  }
+}
+
+ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV3D);
+
+template <typename T>
+void div3d_2field(const T *vec_e, const int *cell_edge_idx,
+                  const int *cell_edge_blk, const T *geofac_div, T *div_vec_c,
+                  const T *in2, T *out2, int i_startblk, int i_endblk,
+                  int i_startidx_in, int i_endidx_in, int slev, int elev,
+                  int nproma, bool lacc, int nlev, int nblks_c, int nblks_e) {
+  // Wrap raw pointers in unmanaged Kokkos Views.
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT3D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+
+  UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e);
+
+  UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3);
+  UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3);
+
+  UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c);
+  UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c);
+
+  UnmanagedConstT3D in2_view(in2, nproma, nlev, nblks_e);
+  UnmanagedT3D out2_view(out2, nproma, nlev, nblks_c);
+
+  for (int jb = i_startblk; jb < i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
+                                                       {elev, i_endidx});
+    Kokkos::parallel_for(
+        "div3d_2field_inner", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jc) {
+          div_vec_c_view(jc, jk, jb) =
+              vec_e_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) *
+                  geofac_div_view(jc, 0, jb) +
+              vec_e_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) *
+                  geofac_div_view(jc, 1, jb) +
+              vec_e_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) *
+                  geofac_div_view(jc, 2, jb);
+
+          out2_view(jc, jk, jb) =
+              in2_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) *
+                  geofac_div_view(jc, 0, jb) +
+              in2_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) *
+                  geofac_div_view(jc, 1, jb) +
+              in2_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) *
+                  geofac_div_view(jc, 2, jb);
+        });
+  }
+}
+
+ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV3D_2FIELD);
+
+template <typename T>
+void div4d(const int *cell_edge_idx, const int *cell_edge_blk,
+           const T *geofac_div, const T *f4din, T *f4dout, int dim4d,
+           int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,
+           const int *slev, const int *elev, int nproma, bool lacc, int nlev,
+           int nblks_c, int nblks_e) {
+  // Wrap raw pointers in unmanaged Kokkos Views.
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<const T ****, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstT4D;
+  typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT3D;
+  typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT4D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+
+  UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3);
+  UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3);
+
+  UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c);
+
+  UnmanagedConstT4D f4din_view(f4din, nproma, nlev, nblks_e, dim4d);
+  UnmanagedT4D f4dout_view(f4dout, nproma, nlev, nblks_c, dim4d);
+
+  for (int jb = i_startblk; jb < i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    for (int ji = 0; ji < dim4d; ++ji) {
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev[ji], i_startidx},
+                                                         {elev[ji], i_endidx});
+      Kokkos::parallel_for(
+          "div4d_inner", innerPolicy,
+          KOKKOS_LAMBDA(const int jk, const int jc) {
+            f4dout_view(jc, jk, jb, ji) =
+                f4din_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0), ji) *
+                    geofac_div_view(jc, 0, jb) +
+                f4din_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1), ji) *
+                    geofac_div_view(jc, 1, jb) +
+                f4din_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2), ji) *
+                    geofac_div_view(jc, 2, jb);
+          });
+    }
+  }
+}
+
+ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV4D);
+
+template <typename T>
+void div_avg(const T *vec_e, const int *cell_neighbor_idx,
+             const int *cell_neighbor_blk, const int *cell_edge_idx,
+             const int *cell_edge_blk, const T *geofac_div, const T *avg_coeff,
+             T *div_vec_c, const T *opt_in2, T *opt_out2,
+             const int *i_startblk_in, const int *i_endblk_in,
+             const int *i_startidx_in, const int *i_endidx_in, int slev,
+             int elev, int nproma, int patch_id, bool l_limited_area,
+             bool l2fields, bool lacc, int nlev, int nblks_c, int nblks_e) {
+  // Wrap raw pointers in unmanaged Kokkos Views.
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT3D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+
+  UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e);
+
+  UnmanagedConstInt3D inidx(cell_neighbor_idx, nproma, nblks_c, 3);
+  UnmanagedConstInt3D inblk(cell_neighbor_blk, nproma, nblks_c, 3);
+  UnmanagedConstInt3D ieidx(cell_edge_idx, nproma, nblks_c, 3);
+  UnmanagedConstInt3D ieblk(cell_edge_blk, nproma, nblks_c, 3);
+
+  UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 4, nblks_e);
+  UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, nlev, nblks_c);
+
+  UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c);
+
+  UnmanagedConstT3D opt_in2_view(opt_in2, nproma, nlev, nblks_e);
+  UnmanagedT3D opt_out2_view(opt_out2, nproma, nlev, nblks_c);
+
+  Kokkos::View<T ***> aux_c("aux_c", nproma, nlev, nblks_c);
+  Kokkos::View<T ***> aux_c2("aux_c2", nproma, nlev, nblks_c);
+
+  int i_startblk = i_startblk_in[0];
+  int i_endblk = i_endblk_in[0];
+
+  if (l2fields) {
+    for (int jb = i_startblk; jb < i_endblk; ++jb) {
+      int i_startidx, i_endidx;
+      get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb,
+                        i_startblk, i_endblk, i_startidx, i_endidx);
+
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
+                                                         {elev, i_endidx});
+      Kokkos::parallel_for(
+          "div_avg_step1", innerPolicy,
+          KOKKOS_LAMBDA(const int jk, const int jc) {
+            aux_c(jc, jk, jb) =
+                vec_e_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) *
+                    geofac_div_view(jc, 0, jb) +
+                vec_e_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) *
+                    geofac_div_view(jc, 1, jb) +
+                vec_e_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) *
+                    geofac_div_view(jc, 2, jb);
+            aux_c2(jc, jk, jb) =
+                opt_in2_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) *
+                    geofac_div_view(jc, 0, jb) +
+                opt_in2_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) *
+                    geofac_div_view(jc, 1, jb) +
+                opt_in2_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) *
+                    geofac_div_view(jc, 2, jb);
+          });
+    }
+  } else {
+    for (int jb = i_startblk; jb < i_endblk; ++jb) {
+      int i_startidx, i_endidx;
+      get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb,
+                        i_startblk, i_endblk, i_startidx, i_endidx);
+
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
+                                                         {elev, i_endidx});
+      Kokkos::parallel_for(
+          "div_avg_step2", innerPolicy,
+          KOKKOS_LAMBDA(const int jk, const int jc) {
+            aux_c(jc, jk, jb) =
+                vec_e_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) *
+                    geofac_div_view(jc, 0, jb) +
+                vec_e_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) *
+                    geofac_div_view(jc, 1, jb) +
+                vec_e_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) *
+                    geofac_div_view(jc, 2, jb);
+          });
+    }
+  }
+
+  if (patch_id > 0 || l_limited_area) {
+    i_startblk = i_startblk_in[1];
+    i_endblk = i_endblk_in[1];
+
+    for (int jb = i_startblk; jb < i_endblk; ++jb) {
+      int i_startidx, i_endidx;
+      get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb,
+                        i_startblk, i_endblk, i_startidx, i_endidx);
+
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
+                                                         {elev, i_endidx});
+      Kokkos::parallel_for(
+          "div_avg_step3", innerPolicy,
+          KOKKOS_LAMBDA(const int jk, const int jc) {
+            div_vec_c_view(jc, jk, jb) = aux_c(jc, jk, jb);
+          });
+    }
+
+    if (l2fields) {
+      for (int jb = i_startblk; jb < i_endblk; ++jb) {
+        int i_startidx, i_endidx;
+        get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb,
+                          i_startblk, i_endblk, i_startidx, i_endidx);
+
+        Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
+                                                           {elev, i_endidx});
+        Kokkos::parallel_for(
+            "div_avg_step4", innerPolicy,
+            KOKKOS_LAMBDA(const int jk, const int jc) {
+              opt_out2_view(jc, jk, jb) = aux_c2(jc, jk, jb);
+            });
+      }
+    }
+  }
+
+  i_startblk = i_startblk_in[2];
+  i_endblk = i_endblk_in[2];
+
+  if (l2fields) {
+    for (int jb = i_startblk; jb < i_endblk; ++jb) {
+      int i_startidx, i_endidx;
+      get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb,
+                        i_startblk, i_endblk, i_startidx, i_endidx);
+
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
+                                                         {elev, i_endidx});
+      Kokkos::parallel_for(
+          "div_avg_step5", innerPolicy,
+          KOKKOS_LAMBDA(const int jk, const int jc) {
+            div_vec_c_view(jc, jk, jb) =
+                aux_c(jc, jk, jb) * avg_coeff_view(jc, 0, jb) +
+                aux_c(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) *
+                    avg_coeff_view(jc, 1, jb) +
+                aux_c(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) *
+                    avg_coeff_view(jc, 2, jb) +
+                aux_c(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) *
+                    avg_coeff_view(jc, 3, jb);
+            opt_out2_view(jc, jk, jb) =
+                aux_c2(jc, jk, jb) * avg_coeff_view(jc, 0, jb) +
+                aux_c2(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) *
+                    avg_coeff_view(jc, 1, jb) +
+                aux_c2(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) *
+                    avg_coeff_view(jc, 2, jb) +
+                aux_c2(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) *
+                    avg_coeff_view(jc, 3, jb);
+          });
+    }
+  } else {
+    for (int jb = i_startblk; jb < i_endblk; ++jb) {
+      int i_startidx, i_endidx;
+      get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb,
+                        i_startblk, i_endblk, i_startidx, i_endidx);
+
+      Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
+                                                         {elev, i_endidx});
+      Kokkos::parallel_for(
+          "div_avg_step6", innerPolicy,
+          KOKKOS_LAMBDA(const int jk, const int jc) {
+            div_vec_c_view(jc, jk, jb) =
+                aux_c(jc, jk, jb) * avg_coeff_view(jc, 0, jb) +
+                aux_c(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) *
+                    avg_coeff_view(jc, 1, jb) +
+                aux_c(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) *
+                    avg_coeff_view(jc, 2, jb) +
+                aux_c(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) *
+                    avg_coeff_view(jc, 3, jb);
+          });
+    }
+  }
+}
+
+ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV_AVG);
+
+template <typename T>
+void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx,
+                      const int *vert_edge_blk, const T *geofac_rot, T *rot_vec,
+                      int i_startblk, int i_endblk, int i_startidx_in,
+                      int i_endidx_in, int slev, int elev, int nproma,
+                      bool lacc, int nlev, int nblks_e, int nblks_v) {
+  // Wrap raw pointers in unmanaged Kokkos Views.
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT3D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+
+  UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e);
+
+  UnmanagedConstInt3D iidx(vert_edge_idx, nproma, nblks_v, 6);
+  UnmanagedConstInt3D iblk(vert_edge_blk, nproma, nblks_v, 6);
+
+  UnmanagedConstT3D geofac_rot_view(geofac_rot, nproma, 6, nblks_v);
+
+  UnmanagedT3D rot_vec_view(rot_vec, nproma, nlev, nblks_v);
+
+  for (int jb = i_startblk; jb < i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
+                                                       {elev, i_endidx});
+    Kokkos::parallel_for(
+        "rot_vertex_atmos_inner", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jv) {
+          rot_vec_view(jv, jk, jb) =
+              vec_e_view(iidx(jv, jb, 0), jk, iblk(jv, jb, 0)) *
+                  geofac_rot_view(jv, 0, jb) +
+              vec_e_view(iidx(jv, jb, 1), jk, iblk(jv, jb, 1)) *
+                  geofac_rot_view(jv, 1, jb) +
+              vec_e_view(iidx(jv, jb, 2), jk, iblk(jv, jb, 2)) *
+                  geofac_rot_view(jv, 2, jb) +
+              vec_e_view(iidx(jv, jb, 3), jk, iblk(jv, jb, 3)) *
+                  geofac_rot_view(jv, 3, jb) +
+              vec_e_view(iidx(jv, jb, 4), jk, iblk(jv, jb, 4)) *
+                  geofac_rot_view(jv, 4, jb) +
+              vec_e_view(iidx(jv, jb, 5), jk, iblk(jv, jb, 5)) *
+                  geofac_rot_view(jv, 5, jb);
+        });
+  }
+}
+
+ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_ROT_VERTEX_ATMOS);
+
+template <typename T>
+void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx,
+                   const int *vert_edge_blk, const T *geofac_rot, T *rot_vec,
+                   int i_startblk, int i_endblk, int i_startidx_in,
+                   int i_endidx_in, int slev, int elev, int nproma, bool lacc,
+                   bool acc_async, int nlev, int nblks_e, int nblks_v) {
+  // Wrap raw pointers in unmanaged Kokkos Views.
+  typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedConstT3D;
+  typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged>
+      UnmanagedT3D;
+  typedef Kokkos::View<const int ***, Kokkos::LayoutLeft,
+                       Kokkos::MemoryUnmanaged>
+      UnmanagedConstInt3D;
+
+  UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e);
+
+  UnmanagedConstInt3D iidx(vert_edge_idx, nproma, nblks_v, 6);
+  UnmanagedConstInt3D iblk(vert_edge_blk, nproma, nblks_v, 6);
+
+  UnmanagedConstT3D geofac_rot_view(geofac_rot, nproma, 6, nblks_v);
+
+  UnmanagedT3D rot_vec_view(rot_vec, nproma, nlev, nblks_v);
+
+  for (int jb = i_startblk; jb < i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk,
+                      i_endblk, i_startidx, i_endidx);
+
+    Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx},
+                                                       {elev, i_endidx});
+    Kokkos::parallel_for(
+        "rot_vertex_atmos_inner", innerPolicy,
+        KOKKOS_LAMBDA(const int jk, const int jv) {
+          rot_vec_view(jv, jk, jb) =
+              vec_e_view(iidx(jv, jb, 0), jk, iblk(jv, jb, 0)) *
+                  geofac_rot_view(jv, 0, jb) +
+              vec_e_view(iidx(jv, jb, 1), jk, iblk(jv, jb, 1)) *
+                  geofac_rot_view(jv, 1, jb) +
+              vec_e_view(iidx(jv, jb, 2), jk, iblk(jv, jb, 2)) *
+                  geofac_rot_view(jv, 2, jb) +
+              vec_e_view(iidx(jv, jb, 3), jk, iblk(jv, jb, 3)) *
+                  geofac_rot_view(jv, 3, jb) +
+              vec_e_view(iidx(jv, jb, 4), jk, iblk(jv, jb, 4)) *
+                  geofac_rot_view(jv, 4, jb) +
+              vec_e_view(iidx(jv, jb, 5), jk, iblk(jv, jb, 5)) *
+                  geofac_rot_view(jv, 5, jb);
+        });
+  }
+
+  if (!acc_async)
+    Kokkos::fence();
+}
+
+ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_ROT_VERTEX_RI);
diff --git a/src/horizontal/mo_lib_divrot.hpp b/src/horizontal/mo_lib_divrot.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..b8e97430192affa7d1820e0df36c61f1be3d98ed
--- /dev/null
+++ b/src/horizontal/mo_lib_divrot.hpp
@@ -0,0 +1,130 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#pragma once
+
+#include <Kokkos_Core.hpp>
+#include <types.hpp>
+
+#define ICONMATH_DECLARE_RECON_LSQ_CELL_L(_type)                               \
+  void recon_lsq_cell_l(                                                       \
+      const _type *p_cc, const int *cell_neighbor_idx,                         \
+      const int *cell_neighbor_blk, const _type *lsq_qtmat_c,                  \
+      const _type *lsq_rmat_rdiag_c, const _type *lsq_rmat_utri_c,             \
+      const _type *lsq_moments, _type *p_coeff, int i_startblk, int i_endblk,  \
+      int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma,      \
+      bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev,          \
+      int lsq_dim_unk, int lsq_dim_c)
+
+#define ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD(_type)                           \
+  void recon_lsq_cell_l_svd(                                                   \
+      const _type *p_cc, const int *cell_neighbor_idx,                         \
+      const int *cell_neighbor_blk, const _type *lsq_pseudoinv,                \
+      const _type *lsq_moments, _type *p_coeff, int i_startblk, int i_endblk,  \
+      int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma,      \
+      bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev,          \
+      int lsq_dim_unk, int lsq_dim_c)
+
+#define ICONMATH_DECLARE_RECON_LSQ_CELL_Q(_type)                               \
+  void recon_lsq_cell_q(                                                       \
+      const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c,           \
+      const _type *lsq_qtmat_c, const _type *lsq_rmat_rdiag_c,                 \
+      const _type *lsq_rmat_utri_c, const _type *lsq_moments, _type *p_coeff,  \
+      int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,        \
+      int slev, int elev, int nproma, int patch_id, bool l_limited_area,       \
+      bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c)
+
+#define ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD(_type)                           \
+  void recon_lsq_cell_q_svd(                                                   \
+      const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c,           \
+      const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff,    \
+      int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,        \
+      int slev, int elev, int nproma, int patch_id, bool l_limited_area,       \
+      bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c)
+
+#define ICONMATH_DECLARE_RECON_LSQ_CELL_C(_type)                               \
+  void recon_lsq_cell_c(                                                       \
+      const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c,           \
+      const _type *lsq_qtmat_c, const _type *lsq_rmat_rdiag_c,                 \
+      const _type *lsq_rmat_utri_c, const _type *lsq_moments, _type *p_coeff,  \
+      int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,        \
+      int slev, int elev, int nproma, int patch_id, bool l_limited_area,       \
+      bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c)
+
+#define ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD(_type)                           \
+  void recon_lsq_cell_c_svd(                                                   \
+      const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c,           \
+      const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff,    \
+      int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in,        \
+      int slev, int elev, int nproma, int patch_id,                            \
+      bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk,  \
+      int lsq_dim_c)
+
+#define ICONMATH_DECLARE_DIV3D(_type)                                          \
+  void div3d(const _type *vec_e, const int *cell_edge_idx,                     \
+             const int *cell_edge_blk, const _type *geofac_div,                \
+             _type *div_vec_c, int i_startblk, int i_endblk,                   \
+             int i_startidx_in, int i_endidx_in, int slev, int elev,           \
+             int nproma, bool lacc, int nlev, int nblks_c, int nblks_e)
+
+#define ICONMATH_DECLARE_DIV3D_2FIELD(_type)                                   \
+  void div3d_2field(const _type *vec_e, const int *cell_edge_idx,              \
+                    const int *cell_edge_blk, const _type *geofac_div,         \
+                    _type *div_vec_c, const _type *in2, _type *out2,           \
+                    int i_startblk, int i_endblk, int i_startidx_in,           \
+                    int i_endidx_in, int slev, int elev, int nproma,           \
+                    bool lacc, int nlev, int nblks_c, int nblks_e)
+
+#define ICONMATH_DECLARE_DIV4D(_type)                                          \
+  void div4d(const int *cell_edge_idx, const int *cell_edge_blk,               \
+             const _type *geofac_div, const _type *f4din, _type *f4dout,       \
+             int dim4d, int i_startblk, int i_endblk, int i_startidx_in,       \
+             int i_endidx_in, const int *slev, const int *elev, int nproma,    \
+             bool lacc, int nlev, int nblks_c, int nblks_e)
+
+#define ICONMATH_DECLARE_DIV_AVG(_type)                                        \
+  void div_avg(const _type *vec_e, const int *cell_neighbor_idx,               \
+               const int *cell_neighbor_blk, const int *cell_edge_idx,         \
+               const int *cell_edge_blk, const _type *geofac_div,              \
+               const _type *avg_coeff, _type *div_vec_c, const _type *opt_in2, \
+               _type *opt_out2, const int *i_startblk_in,                      \
+               const int *i_endblk_in, const int *i_startidx_in,               \
+               const int *i_endidx_in, int slev, int elev, int nproma,         \
+               int patch_id, bool l_limited_area, bool l2fields, bool lacc,    \
+               int nlev, int nblks_c, int nblks_e)
+
+#define ICONMATH_DECLARE_ROT_VERTEX_ATMOS(_type)                               \
+  void rot_vertex_atmos(                                                       \
+      const _type *vec_e, const int *vert_edge_idx, const int *vert_edge_blk,  \
+      const _type *geofac_rot, _type *rot_vec, int i_startblk, int i_endblk,   \
+      int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma,      \
+      bool lacc, int nlev, int nblks_e, int nblks_v)
+
+#define ICONMATH_DECLARE_ROT_VERTEX_RI(_type)                                  \
+  void rot_vertex_ri(                                                          \
+      const _type *vec_e, const int *vert_edge_idx, const int *vert_edge_blk,  \
+      const _type *geofac_rot, _type *rot_vec, int i_startblk, int i_endblk,   \
+      int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma,      \
+      bool lacc, bool acc_async, int nlev, int nblks_e, int nblks_v)
+
+// Declare as templates
+template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_L(T);
+template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD(T);
+template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_Q(T);
+template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD(T);
+template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_C(T);
+template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD(T);
+template <typename T> ICONMATH_DECLARE_DIV3D(T);
+template <typename T> ICONMATH_DECLARE_DIV3D_2FIELD(T);
+template <typename T> ICONMATH_DECLARE_DIV4D(T);
+template <typename T> ICONMATH_DECLARE_DIV_AVG(T);
+template <typename T> ICONMATH_DECLARE_ROT_VERTEX_ATMOS(T);
+template <typename T> ICONMATH_DECLARE_ROT_VERTEX_RI(T);
diff --git a/src/types.hpp b/src/types.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..7192e18faed86d8534411653cb040fac09720310
--- /dev/null
+++ b/src/types.hpp
@@ -0,0 +1,16 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#pragma once
+
+#define ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(_macro)                       \
+  template _macro(float);                                                      \
+  template _macro(double)
diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt
index c9320cb7eec1402eb107dee72f0b68d7ba3a9908..98a21b277ca3ae22a8fafcc5e7640d7d62bfc4fc 100644
--- a/test/c/CMakeLists.txt
+++ b/test/c/CMakeLists.txt
@@ -27,6 +27,9 @@ endif()
 
 set(SOURCES
   main.cpp
+  test_horizontal_div.cpp
+  test_horizontal_recon.cpp
+  test_horizontal_rot.cpp
   test_tdma_solver.cpp
   test_interpolation_vector.cpp
   test_intp_rbf.cpp
@@ -35,11 +38,14 @@ set(SOURCES
 # Create the test executable from your test files, including main.cpp.
 add_executable(iconmath_test_c ${SOURCES})
 
+target_include_directories(iconmath_test_c PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
+
 # Link the test executable with GoogleTest and Kokkos.
 target_link_libraries(iconmath_test_c
   PUBLIC
     iconmath-support
     iconmath-interpolation
+    iconmath-horizontal
   PRIVATE
     gtest_main
     Kokkos::kokkos
diff --git a/test/c/dim_helper.hpp b/test/c/dim_helper.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..165d5d9d3052bb034c5795797942cf090bd669db
--- /dev/null
+++ b/test/c/dim_helper.hpp
@@ -0,0 +1,88 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#pragma once
+
+// Template function for computing array size.
+// For example, we get the array size of a 4-dimensional array A(2, 3, 4, 5) by
+//    dim_combine(2, 3, 4, 5).
+// Which will automatically instantiate
+//    dim_combine<int, int, int, int>(2, 3, 4, 5).
+// The function then call dim_combine recursively
+//    dim_combine<int, int, int, int>(2, 3, 4, 5) {
+//      return static_cast<size_t>(2) * dim_combine<int, int, int>(3, 4, 5);
+//    }
+//    dim_combine<int, int, int>(3, 4, 5) {
+//      return static_cast<size_t>(3) * dim_combine<int, int>(4, 5);
+//    }
+//    dim_combine<int, int>(4, 5) {
+//      return static_cast<size_t>(4) * dim_combine<int>(5);
+//    }
+// Where the last dim_combine is specialized as
+//    dim_combine<int>(5) {
+//      return static_cast<size_t>(5);
+//    }
+// Which gives
+//    dim_combine<int, int, int, int>(2, 3, 4, 5) =
+//      static_cast<size_t>(2) * static_cast<size_t>(3) *
+//      static_cast<size_t>(4) * static_cast<size_t>(5)
+/// Template helpers for combining multiple dimension array sizes.
+/// The base function of dimension combine. Should not be used.
+template <typename... Ts> size_t dim_combine(Ts... dims) { return 0; }
+/// Template specialization of only one dimension, returns the dimension itself.
+template <typename T> size_t dim_combine(T dim) {
+  return static_cast<size_t>(dim);
+}
+/// Template specialization of picking out the first dimension. The combined
+/// dimension is the first dimension times the combined dimension of the rest.
+template <typename T, typename... Ts> size_t dim_combine(T dim, Ts... dims) {
+  return static_cast<size_t>(dim) * dim_combine(dims...);
+}
+
+// Template function for LayoutLeft ID access in compile time.
+// For example, a multi-dimensional array A of dimensions <2, 3, 4, 5> gets its
+// corresponding vector id (LayoutLeft) by
+//    at<2, 3, 4, 5>(id1, id2, id3, id4).
+// The at_impl then adds the id from beginning to the end and pass the id prefix
+// to the next recursive at_impl function. In this example,
+//    at<2, 3, 4, 5>(id1, id2, id3, id4) {
+//      return id1 + at_impl<3, 4, 5>(2, id2, id3, id4);
+//    }
+//    at_impl<3, 4, 5>(2, id2, id3, id4) {
+//      return id2 * 2 + at_impl<4, 5>(2 * 3, id3, id4);
+//    }
+//    at_impl<4, 5>(2 * 3, id3, id4) {
+//      return id3 * 2 * 3 + at_impl<5>(2 * 3 * 4, id4);
+//    }
+//    at_impl<5>(2 * 3 * 4, id4) {
+//      return id4 * 2 * 3 * 4;
+//    }
+// Which gives
+//    at<2, 3, 4, 5>(id1, id2, id3, id4) = id1         + id2 * 2         +
+//                                         id3 * 2 * 3 + id4 * 2 * 3 * 4
+/// Helper type converting integer numbers to int
+template <class T, auto> using always_t = T;
+/// Base function of at_impl. Should not be used.
+template <int... Dims> int at_impl(always_t<int, Dims>... ids) { return 0; }
+/// Template specialization of the last ID
+template <int LastDim> int at_impl(int prefix, int id) { return id * prefix; }
+/// Template specialization of at_impl, accumulate the return value using the
+/// first id and pass the prefix to the next recursive at_impl function.
+template <int FirstDim, int... Dims>
+int at_impl(int prefix, int id, always_t<int, Dims>... ids) {
+  return id * prefix + at_impl<Dims...>(prefix * FirstDim, ids...);
+}
+/// at<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming
+/// LayoutLeft. Use this function instead of at_impl.
+template <int FirstDim, int... Dims>
+int at(int id, always_t<int, Dims>... ids) {
+  return id + at_impl<Dims...>(FirstDim, ids...);
+}
diff --git a/test/c/test_horizontal_div.cpp b/test/c/test_horizontal_div.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..596d19e708d6c6e466fa2f23c579cb51f8689e19
--- /dev/null
+++ b/test/c/test_horizontal_div.cpp
@@ -0,0 +1,1070 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#include <iostream>
+#include <random>
+#include <vector>
+
+#include <Kokkos_Core.hpp>
+#include <gtest/gtest.h>
+#include <dim_helper.hpp>
+#include <horizontal/mo_lib_divrot.hpp>
+#include <support/mo_lib_loopindices.hpp>
+
+/// Test class for the horizontal divergence tests. Templated for the ValueType
+template <typename ValueType> class HorizontalDivTest : public ::testing::Test {
+protected:
+  static constexpr int nproma = 3;  // inner loop length
+  static constexpr int nlev = 2;    // number of vertical levels
+  static constexpr int nblks_c = 1; // number of cell blocks
+  static constexpr int nblks_e = 1; // number of edge blocks
+  static constexpr int dim4d = 2;   // 4th dimension size
+
+  int i_startblk = 0;
+  int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1]
+  int i_startidx_in = 0;
+  int i_endidx_in = nproma; // Full range: 0 .. nproma-1
+  std::vector<int> slev;
+  std::vector<int> elev;
+  bool lacc = false; // Not using ACC-specific behavior.
+
+  std::vector<ValueType> vec_e;
+  std::vector<int> cell_edge_idx;
+  std::vector<int> cell_edge_blk;
+  std::vector<ValueType> geofac_div;
+  std::vector<ValueType> div_vec_c;
+  std::vector<ValueType> f4din;
+  std::vector<ValueType> f4dout;
+
+  // Followings are needed in HorizontalDivAvgTest
+  std::vector<int> cell_neighbor_idx;
+  std::vector<int> cell_neighbor_blk;
+  std::vector<ValueType> avg_coeff;
+  std::vector<ValueType> opt_in2;
+  std::vector<ValueType> opt_out2;
+
+  HorizontalDivTest() {
+    slev.resize(dim4d, 0);
+    elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1)
+
+    vec_e.resize(dim_combine(nproma, nlev, nblks_e));
+    cell_edge_idx.resize(dim_combine(nproma, nblks_c, 3));
+    cell_edge_blk.resize(dim_combine(nproma, nblks_c, 3));
+    geofac_div.resize(dim_combine(nproma, 3, nblks_c));
+    div_vec_c.resize(dim_combine(nproma, nlev, nblks_c));
+    f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d));
+    f4dout.resize(dim_combine(nproma, nlev, nblks_c, dim4d));
+    cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, 3));
+    cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, 3));
+    avg_coeff.resize(dim_combine(nproma, 4, nblks_c));
+    opt_in2.resize(dim_combine(nproma, nlev, nblks_e));
+    opt_out2.resize(dim_combine(nproma, nlev, nblks_c));
+  }
+};
+
+/// ValueTypes which the divrot tests should run with
+typedef ::testing::Types<float, double> ValueTypes;
+
+TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes);
+
+TYPED_TEST(HorizontalDivTest, TestDiv3DSpecific) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int nblks_e = this->nblks_e;
+
+  const auto &vec_e_at = at<nproma, nlev, nblks_e>;
+  const auto &cell_edge_at = at<nproma, nblks_c, 3>;
+  const auto &geofac_div_at = at<nproma, 3, nblks_c>;
+  const auto &div_vec_c_at = at<nproma, nlev, nblks_c>;
+
+  // Initialization with specific values
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern
+    }
+
+    // Set edge indices to point to specific cells (including self)
+    this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i;
+    this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma;
+    this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma;
+
+    // All edges are in the same block for this test
+    for (int j = 0; j < 3; ++j) {
+      this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0;
+    }
+
+    // Geometric factors
+    this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5;
+    this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3;
+    this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2;
+
+    // Initialize div_vec_c to zero
+    for (int k = 0; k < nlev; ++k) {
+      this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0;
+    }
+  }
+
+  // Call the div3d function
+  div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(),
+                   this->cell_edge_blk.data(), this->geofac_div.data(),
+                   this->div_vec_c.data(), this->i_startblk, this->i_endblk,
+                   this->i_startidx_in, this->i_endidx_in, this->slev[0],
+                   this->elev[0], this->nproma, this->lacc, this->nlev,
+                   this->nblks_c, this->nblks_e);
+
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6);
+}
+
+TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int nblks_e = this->nblks_e;
+
+  const auto &vec_e_at = at<nproma, nlev, nblks_e>;
+  const auto &cell_edge_at = at<nproma, nblks_c, 3>;
+  const auto &geofac_div_at = at<nproma, 3, nblks_c>;
+  const auto &div_vec_c_at = at<nproma, nlev, nblks_c>;
+
+  // Set up random number generators
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<int> int_distrib(0, nproma - 1);
+  std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0);
+
+  // Initialization with random values
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen);
+    }
+
+    // Set random edge indices
+    for (int j = 0; j < 3; ++j) {
+      this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen);
+      this->cell_edge_blk[cell_edge_at(i, 0, j)] =
+          0; // Keep in same block for simplicity
+    }
+
+    // Random geometric factors
+    for (int j = 0; j < 3; ++j) {
+      this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen);
+    }
+
+    // Initialize div_vec_c to random values
+    for (int k = 0; k < nlev; ++k) {
+      this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen);
+    }
+  }
+
+  // Call the div3d function
+  div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(),
+                   this->cell_edge_blk.data(), this->geofac_div.data(),
+                   this->div_vec_c.data(), this->i_startblk, this->i_endblk,
+                   this->i_startidx_in, this->i_endidx_in, this->slev[0],
+                   this->elev[0], this->nproma, this->lacc, this->nlev,
+                   this->nblks_c, this->nblks_e);
+
+  // Calculate reference values separately and verify results
+  std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0);
+
+  for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb,
+                      this->i_startblk, this->i_endblk, i_startidx, i_endidx);
+
+    for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) {
+      for (int jc = i_startidx; jc < i_endidx; ++jc) {
+        ref_div_vec_c[div_vec_c_at(jc, jk, jb)] =
+            this->vec_e[vec_e_at(
+                this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk,
+                this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] *
+                this->geofac_div[geofac_div_at(jc, 0, jb)] +
+            this->vec_e[vec_e_at(
+                this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk,
+                this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] *
+                this->geofac_div[geofac_div_at(jc, 1, jb)] +
+            this->vec_e[vec_e_at(
+                this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk,
+                this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] *
+                this->geofac_div[geofac_div_at(jc, 2, jb)];
+      }
+    }
+  }
+
+  // Verify results
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)],
+                  ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5)
+          << "Results differ at i=" << i << ", k=" << k;
+    }
+  }
+}
+
+TYPED_TEST(HorizontalDivTest, TestDiv3D2FSpecific) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int nblks_e = this->nblks_e;
+  constexpr int dim4d = this->dim4d;
+
+  const auto &vec_e_at = at<nproma, nlev, nblks_e>;
+  const auto &cell_edge_at = at<nproma, nblks_c, 3>;
+  const auto &geofac_div_at = at<nproma, 3, nblks_c>;
+  const auto &div_vec_c_at = at<nproma, nlev, nblks_c>;
+  const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>;
+  const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>;
+
+  // Initialization with specific values
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern
+      this->f4din[f4d_at(i, k, 0, 0)] =
+          (i + 1) * (k + 2); // Different pattern for second field
+    }
+
+    // Set edge indices to point to specific cells (including self)
+    this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i;
+    this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma;
+    this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma;
+
+    // All edges are in the same block for this test
+    for (int j = 0; j < 3; ++j) {
+      this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0;
+    }
+
+    // Geometric factors
+    this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5;
+    this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3;
+    this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2;
+
+    // Initialize div_vec_c and f4dout to zero
+    for (int k = 0; k < nlev; ++k) {
+      this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0;
+      this->f4dout[f4dout_at(i, k, 0, 0)] = 0.0;
+    }
+  }
+
+  // Call the div3d_2field function
+  div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(),
+                          this->cell_edge_blk.data(), this->geofac_div.data(),
+                          this->div_vec_c.data(), this->f4din.data(),
+                          this->f4dout.data(), this->i_startblk, this->i_endblk,
+                          this->i_startidx_in, this->i_endidx_in, this->slev[0],
+                          this->elev[0], this->nproma, this->lacc, this->nlev,
+                          this->nblks_c, this->nblks_e);
+
+  // Check first field (same as in div3d test)
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6);
+
+  // Check second field (expected values calculated manually)
+  EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 3.4, 1e-6);
+  EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 5.1, 1e-6);
+  EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 4.2, 1e-6);
+  EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 6.3, 1e-6);
+  EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 4.4, 1e-6);
+  EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 6.6, 1e-6);
+}
+
+TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int nblks_e = this->nblks_e;
+  constexpr int dim4d = this->dim4d;
+
+  const auto &vec_e_at = at<nproma, nlev, nblks_e>;
+  const auto &cell_edge_at = at<nproma, nblks_c, 3>;
+  const auto &geofac_div_at = at<nproma, 3, nblks_c>;
+  const auto &div_vec_c_at = at<nproma, nlev, nblks_c>;
+  const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>;
+  const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>;
+
+  // Set up random number generators
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<int> int_distrib(0, nproma - 1);
+  std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0);
+
+  // Initialization with random values
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen);
+      this->f4din[f4d_at(i, k, 0, 0)] = real_distrib(gen);
+    }
+
+    // Set random edge indices
+    for (int j = 0; j < 3; ++j) {
+      this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen);
+      this->cell_edge_blk[cell_edge_at(i, 0, j)] =
+          0; // Keep in same block for simplicity
+    }
+
+    // Random geometric factors
+    for (int j = 0; j < 3; ++j) {
+      this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen);
+    }
+
+    // Initialize div_vec_c and f4dout to random values
+    for (int k = 0; k < nlev; ++k) {
+      this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen);
+      this->f4dout[f4dout_at(i, k, 0, 0)] = real_distrib(gen);
+    }
+  }
+
+  // Call the div3d_2field function
+  div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(),
+                          this->cell_edge_blk.data(), this->geofac_div.data(),
+                          this->div_vec_c.data(), this->f4din.data(),
+                          this->f4dout.data(), this->i_startblk, this->i_endblk,
+                          this->i_startidx_in, this->i_endidx_in, this->slev[0],
+                          this->elev[0], this->nproma, this->lacc, this->nlev,
+                          this->nblks_c, this->nblks_e);
+
+  // Calculate reference values separately and verify results
+  std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0);
+  std::vector<TypeParam> ref_f4dout(nproma * nlev * nblks_c * dim4d, 0.0);
+
+  for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb,
+                      this->i_startblk, this->i_endblk, i_startidx, i_endidx);
+
+    for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) {
+      for (int jc = i_startidx; jc < i_endidx; ++jc) {
+        // Calculate reference value for first field
+        ref_div_vec_c[div_vec_c_at(jc, jk, jb)] =
+            this->vec_e[vec_e_at(
+                this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk,
+                this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] *
+                this->geofac_div[geofac_div_at(jc, 0, jb)] +
+            this->vec_e[vec_e_at(
+                this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk,
+                this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] *
+                this->geofac_div[geofac_div_at(jc, 1, jb)] +
+            this->vec_e[vec_e_at(
+                this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk,
+                this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] *
+                this->geofac_div[geofac_div_at(jc, 2, jb)];
+
+        // Calculate reference value for second field
+        ref_f4dout[f4dout_at(jc, jk, jb, 0)] =
+            this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk,
+                               this->cell_edge_blk[cell_edge_at(jc, jb, 0)],
+                               0)] *
+                this->geofac_div[geofac_div_at(jc, 0, jb)] +
+            this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk,
+                               this->cell_edge_blk[cell_edge_at(jc, jb, 1)],
+                               0)] *
+                this->geofac_div[geofac_div_at(jc, 1, jb)] +
+            this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk,
+                               this->cell_edge_blk[cell_edge_at(jc, jb, 2)],
+                               0)] *
+                this->geofac_div[geofac_div_at(jc, 2, jb)];
+      }
+    }
+  }
+
+  // Verify results for first field
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)],
+                  ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5)
+          << "First field results differ at i=" << i << ", k=" << k;
+    }
+  }
+
+  // Verify results for second field
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      EXPECT_NEAR(this->f4dout[f4dout_at(i, k, 0, 0)],
+                  ref_f4dout[f4dout_at(i, k, 0, 0)], 1e-5)
+          << "Second field results differ at i=" << i << ", k=" << k;
+    }
+  }
+}
+
+TYPED_TEST(HorizontalDivTest, TestDiv4DSpecific) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int nblks_e = this->nblks_e;
+  constexpr int dim4d = this->dim4d;
+
+  const auto &cell_edge_at = at<nproma, nblks_c, 3>;
+  const auto &geofac_div_at = at<nproma, 3, nblks_c>;
+  const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>;
+  const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>;
+
+  // Initialization
+  for (int i = 0; i < nproma; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      this->cell_edge_idx[cell_edge_at(i, 0, j)] = (i + j) % nproma;
+      this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0;
+      this->geofac_div[geofac_div_at(i, j, 0)] = 0.1 * (j + 1);
+    }
+
+    for (int k = 0; k < nlev; ++k) {
+      for (int d = 0; d < dim4d; ++d) {
+        this->f4din[f4din_at(i, k, 0, d)] = 1.0 + i + k + d;
+        this->f4dout[f4dout_at(i, k, 0, d)] = 0.0;
+      }
+    }
+  }
+
+  // Test function
+  div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(),
+                   this->geofac_div.data(), this->f4din.data(),
+                   this->f4dout.data(), this->dim4d, this->i_startblk,
+                   this->i_endblk, this->i_startidx_in, this->i_endidx_in,
+                   this->slev.data(), this->elev.data(), this->nproma,
+                   this->lacc, this->nlev, this->nblks_c, this->nblks_e);
+
+  EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 1.4, 1e-6);
+  EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 1.1, 1e-6);
+  EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 1.1, 1e-6);
+  EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 2.0, 1e-6);
+  EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 1.7, 1e-6);
+  EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 1.7, 1e-6);
+  EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 1)], 2.0, 1e-6);
+  EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 1)], 1.7, 1e-6);
+  EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 1)], 1.7, 1e-6);
+  EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 1)], 2.6, 1e-6);
+  EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 1)], 2.3, 1e-6);
+  EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 1)], 2.3, 1e-6);
+}
+
+TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int nblks_e = this->nblks_e;
+  constexpr int dim4d = this->dim4d;
+
+  const auto &cell_edge_at = at<nproma, nblks_c, 3>;
+  const auto &geofac_div_at = at<nproma, 3, nblks_c>;
+  const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>;
+  const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>;
+
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<int> int_distrib(0, nproma - 1);
+  std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0);
+
+  // Initialize with random values
+  for (int i = 0; i < nproma; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen);
+      this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0;
+      this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen);
+    }
+
+    for (int k = 0; k < nlev; ++k) {
+      for (int d = 0; d < dim4d; ++d) {
+        this->f4din[f4din_at(i, k, 0, d)] = real_distrib(gen);
+        this->f4dout[f4dout_at(i, k, 0, d)] = 0.0;
+      }
+    }
+  }
+
+  // Test function
+  div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(),
+                   this->geofac_div.data(), this->f4din.data(),
+                   this->f4dout.data(), this->dim4d, this->i_startblk,
+                   this->i_endblk, this->i_startidx_in, this->i_endidx_in,
+                   this->slev.data(), this->elev.data(), this->nproma,
+                   this->lacc, this->nlev, this->nblks_c, this->nblks_e);
+
+  // Compute reference result and check
+  for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb,
+                      this->i_startblk, this->i_endblk, i_startidx, i_endidx);
+
+    for (int ji = 0; ji < dim4d; ++ji) {
+      for (int jk = this->slev[ji]; jk < this->elev[ji]; ++jk) {
+        for (int jc = i_startidx; jc < i_endidx; ++jc) {
+          TypeParam expected = 0.0;
+          for (int je = 0; je < 3; ++je) {
+            expected +=
+                this->f4din[f4din_at(
+                    this->cell_edge_idx[cell_edge_at(jc, jb, je)], jk,
+                    this->cell_edge_blk[cell_edge_at(jc, jb, je)], ji)] *
+                this->geofac_div[geofac_div_at(jc, je, jb)];
+          }
+
+          EXPECT_NEAR(this->f4dout[f4dout_at(jc, jk, jb, ji)], expected, 1e-5)
+              << "Random test fails at jc=" << jc << ", jk=" << jk
+              << ", jb=" << jb << ", ji=" << ji;
+        }
+      }
+    }
+  }
+}
+
+TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes);
+
+TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int nblks_e = this->nblks_e;
+  constexpr int dim4d = this->dim4d;
+
+  const auto &vec_e_at = at<nproma, nlev, nblks_e>;
+  const auto &cell_edge_at = at<nproma, nblks_c, 3>;
+  const auto &geofac_div_at = at<nproma, 3, nblks_c>;
+  const auto &div_vec_c_at = at<nproma, nlev, nblks_c>;
+
+  // Vectors for additional parameters
+  // Vectors for block and index ranges
+  std::vector<int> i_startblk_in(3, 0);
+  std::vector<int> i_endblk_in(3, nblks_c);
+  std::vector<int> i_startidx_in(3, 0);
+  std::vector<int> i_endidx_in(3, nproma);
+
+  // Parameters for the test
+  int patch_id = 1;
+  bool l_limited_area = true;
+  bool l2fields = true;
+
+  const auto &cell_neighbor_at = at<nproma, nblks_c, 3>;
+  const auto &avg_coeff_at = at<nproma, 4, nblks_c>;
+
+  // Initialize the vectors with specific values
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern
+      this->opt_in2[vec_e_at(i, k, 0)] =
+          (i + 1) * (k + 1) * 0.5; // Half of vec_e
+    }
+
+    // Set edge indices to point to specific cells
+    this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i;
+    this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma;
+    this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma;
+
+    // Set neighbor indices similarly
+    this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i;
+    this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma;
+    this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma;
+
+    // All edges and neighbors are in the same block for this test
+    for (int j = 0; j < 3; ++j) {
+      this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0;
+      this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0;
+    }
+
+    // Geometric factors
+    this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5;
+    this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3;
+    this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2;
+
+    // Average coefficients
+    this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self
+    this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor
+    this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor
+    this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor
+
+    // Initialize div_vec_c and opt_out2 to zero
+    for (int k = 0; k < nlev; ++k) {
+      this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0;
+      this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0;
+    }
+  }
+
+  // Call the div_avg function
+  div_avg<TypeParam>(
+      this->vec_e.data(), this->cell_neighbor_idx.data(),
+      this->cell_neighbor_blk.data(), this->cell_edge_idx.data(),
+      this->cell_edge_blk.data(), this->geofac_div.data(),
+      this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(),
+      this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(),
+      i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0],
+      this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev,
+      this->nblks_c, this->nblks_e);
+
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6);
+
+  EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.94, 1e-6);
+  EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 1.88, 1e-6);
+  EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 1.02, 1e-6);
+  EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 2.04, 1e-6);
+  EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 1.04, 1e-6);
+  EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 2.08, 1e-6);
+}
+
+TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int nblks_e = this->nblks_e;
+
+  const auto &vec_e_at = at<nproma, nlev, nblks_e>;
+  const auto &cell_edge_at = at<nproma, nblks_c, 3>;
+  const auto &geofac_div_at = at<nproma, 3, nblks_c>;
+  const auto &div_vec_c_at = at<nproma, nlev, nblks_c>;
+
+  // Vectors for block and index ranges
+  std::vector<int> i_startblk_in(3, 0);
+  std::vector<int> i_endblk_in(3, nblks_c);
+  std::vector<int> i_startidx_in(3, 0);
+  std::vector<int> i_endidx_in(3, nproma);
+
+  // Parameters for the test
+  int patch_id = 1;
+  bool l_limited_area = true;
+  bool l2fields = true;
+
+  const auto &cell_neighbor_at = at<nproma, nblks_c, 3>;
+  const auto &avg_coeff_at = at<nproma, 4, nblks_c>;
+
+  // Set up random number generators
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<int> int_distrib(0, nproma - 1);
+  std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0);
+
+  // Initialize with random values
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen);
+      this->opt_in2[vec_e_at(i, k, 0)] = real_distrib(gen);
+    }
+
+    // Set random edge indices
+    for (int j = 0; j < 3; ++j) {
+      this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen);
+      this->cell_edge_blk[cell_edge_at(i, 0, j)] =
+          0; // Keep in same block for simplicity
+
+      this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen);
+      this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] =
+          0; // Keep in same block for simplicity
+    }
+
+    // Random geometric factors
+    for (int j = 0; j < 3; ++j) {
+      this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen);
+    }
+
+    // Random average coefficients
+    for (int j = 0; j < 4; ++j) {
+      this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen);
+    }
+
+    // Random initial values for div_vec_c and opt_out2
+    for (int k = 0; k < nlev; ++k) {
+      this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen);
+      this->opt_out2[div_vec_c_at(i, k, 0)] = real_distrib(gen);
+    }
+  }
+
+  // Call the div_avg function
+  div_avg<TypeParam>(
+      this->vec_e.data(), this->cell_neighbor_idx.data(),
+      this->cell_neighbor_blk.data(), this->cell_edge_idx.data(),
+      this->cell_edge_blk.data(), this->geofac_div.data(),
+      this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(),
+      this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(),
+      i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0],
+      this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev,
+      this->nblks_c, this->nblks_e);
+
+  // Calculate reference values manually
+  std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c));
+  std::vector<TypeParam> aux_c2(dim_combine(nproma, nlev, nblks_c));
+  std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c));
+  std::vector<TypeParam> ref_opt_out2(dim_combine(nproma, nlev, nblks_c));
+
+  // Step 1: Calculate aux_c and aux_c2
+  for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb,
+                      i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx);
+
+    for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) {
+      for (int jc = i_startidx; jc < i_endidx; ++jc) {
+        aux_c[div_vec_c_at(jc, jk, jb)] =
+            this->vec_e[vec_e_at(
+                this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk,
+                this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] *
+                this->geofac_div[geofac_div_at(jc, 0, jb)] +
+            this->vec_e[vec_e_at(
+                this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk,
+                this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] *
+                this->geofac_div[geofac_div_at(jc, 1, jb)] +
+            this->vec_e[vec_e_at(
+                this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk,
+                this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] *
+                this->geofac_div[geofac_div_at(jc, 2, jb)];
+
+        aux_c2[div_vec_c_at(jc, jk, jb)] =
+            this->opt_in2[vec_e_at(
+                this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk,
+                this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] *
+                this->geofac_div[geofac_div_at(jc, 0, jb)] +
+            this->opt_in2[vec_e_at(
+                this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk,
+                this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] *
+                this->geofac_div[geofac_div_at(jc, 1, jb)] +
+            this->opt_in2[vec_e_at(
+                this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk,
+                this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] *
+                this->geofac_div[geofac_div_at(jc, 2, jb)];
+      }
+    }
+  }
+
+  // Step 2: Assign aux_c to div_vec_c and aux_c2 to opt_out2 for patch_id > 0
+  for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb,
+                      i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx);
+
+    for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) {
+      for (int jc = i_startidx; jc < i_endidx; ++jc) {
+        ref_div_vec_c[div_vec_c_at(jc, jk, jb)] =
+            aux_c[div_vec_c_at(jc, jk, jb)];
+        ref_opt_out2[div_vec_c_at(jc, jk, jb)] =
+            aux_c2[div_vec_c_at(jc, jk, jb)];
+      }
+    }
+  }
+
+  // Step 3: Perform averaging for the rest of the blocks
+  for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb,
+                      i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx);
+
+    for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) {
+      for (int jc = i_startidx; jc < i_endidx; ++jc) {
+        ref_div_vec_c[div_vec_c_at(jc, jk, jb)] =
+            aux_c[div_vec_c_at(jc, jk, jb)] *
+                this->avg_coeff[avg_coeff_at(jc, 0, jb)] +
+            aux_c[div_vec_c_at(
+                this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk,
+                this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] *
+                this->avg_coeff[avg_coeff_at(jc, 1, jb)] +
+            aux_c[div_vec_c_at(
+                this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk,
+                this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] *
+                this->avg_coeff[avg_coeff_at(jc, 2, jb)] +
+            aux_c[div_vec_c_at(
+                this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk,
+                this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] *
+                this->avg_coeff[avg_coeff_at(jc, 3, jb)];
+
+        ref_opt_out2[div_vec_c_at(jc, jk, jb)] =
+            aux_c2[div_vec_c_at(jc, jk, jb)] *
+                this->avg_coeff[avg_coeff_at(jc, 0, jb)] +
+            aux_c2[div_vec_c_at(
+                this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk,
+                this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] *
+                this->avg_coeff[avg_coeff_at(jc, 1, jb)] +
+            aux_c2[div_vec_c_at(
+                this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk,
+                this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] *
+                this->avg_coeff[avg_coeff_at(jc, 2, jb)] +
+            aux_c2[div_vec_c_at(
+                this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk,
+                this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] *
+                this->avg_coeff[avg_coeff_at(jc, 3, jb)];
+      }
+    }
+  }
+
+  // Verify results
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)],
+                  ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5)
+          << "div_vec_c results differ at i=" << i << ", k=" << k;
+
+      EXPECT_NEAR(this->opt_out2[div_vec_c_at(i, k, 0)],
+                  ref_opt_out2[div_vec_c_at(i, k, 0)], 1e-5)
+          << "opt_out2 results differ at i=" << i << ", k=" << k;
+    }
+  }
+}
+
+TYPED_TEST(HorizontalDivTest, TestDivAvgSpecificNoL2fields) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int nblks_e = this->nblks_e;
+  constexpr int dim4d = this->dim4d;
+
+  const auto &vec_e_at = at<nproma, nlev, nblks_e>;
+  const auto &cell_edge_at = at<nproma, nblks_c, 3>;
+  const auto &geofac_div_at = at<nproma, 3, nblks_c>;
+  const auto &div_vec_c_at = at<nproma, nlev, nblks_c>;
+
+  // Vectors for block and index ranges
+  std::vector<int> i_startblk_in(3, 0);
+  std::vector<int> i_endblk_in(3, nblks_c);
+  std::vector<int> i_startidx_in(3, 0);
+  std::vector<int> i_endidx_in(3, nproma);
+
+  // Parameters for the test
+  int patch_id = 1;
+  bool l_limited_area = true;
+  bool l2fields = false;
+
+  const auto &cell_neighbor_at = at<nproma, nblks_c, 3>;
+  const auto &avg_coeff_at = at<nproma, 4, nblks_c>;
+
+  // Initialize the vectors with specific values
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern
+      this->opt_in2[vec_e_at(i, k, 0)] =
+          (i + 1) * (k + 1) * 0.5; // Half of vec_e
+    }
+
+    // Set edge indices to point to specific cells
+    this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i;
+    this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma;
+    this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma;
+
+    // Set neighbor indices similarly
+    this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i;
+    this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma;
+    this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma;
+
+    // All edges and neighbors are in the same block for this test
+    for (int j = 0; j < 3; ++j) {
+      this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0;
+      this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0;
+    }
+
+    // Geometric factors
+    this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5;
+    this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3;
+    this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2;
+
+    // Average coefficients
+    this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self
+    this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor
+    this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor
+    this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor
+
+    // Initialize div_vec_c and opt_out2 to zero
+    for (int k = 0; k < nlev; ++k) {
+      this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0;
+      this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0;
+    }
+  }
+
+  // Call the div_avg function
+  div_avg<TypeParam>(
+      this->vec_e.data(), this->cell_neighbor_idx.data(),
+      this->cell_neighbor_blk.data(), this->cell_edge_idx.data(),
+      this->cell_edge_blk.data(), this->geofac_div.data(),
+      this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(),
+      this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(),
+      i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0],
+      this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev,
+      this->nblks_c, this->nblks_e);
+
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6);
+  EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6);
+
+  EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.0, 1e-6);
+  EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 0.0, 1e-6);
+  EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 0.0, 1e-6);
+  EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 0.0, 1e-6);
+  EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 0.0, 1e-6);
+  EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 0.0, 1e-6);
+}
+
+TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int nblks_e = this->nblks_e;
+
+  const auto &vec_e_at = at<nproma, nlev, nblks_e>;
+  const auto &cell_edge_at = at<nproma, nblks_c, 3>;
+  const auto &geofac_div_at = at<nproma, 3, nblks_c>;
+  const auto &div_vec_c_at = at<nproma, nlev, nblks_c>;
+
+  // Vectors for block and index ranges
+  std::vector<int> i_startblk_in(3, 0);
+  std::vector<int> i_endblk_in(3, nblks_c);
+  std::vector<int> i_startidx_in(3, 0);
+  std::vector<int> i_endidx_in(3, nproma);
+
+  // Parameters for the test
+  int patch_id = 1;
+  bool l_limited_area = true;
+  bool l2fields = false; // Set to false for this test
+
+  const auto &cell_neighbor_at = at<nproma, nblks_c, 3>;
+  const auto &avg_coeff_at = at<nproma, 4, nblks_c>;
+
+  // Set up random number generators
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<int> int_distrib(0, nproma - 1);
+  std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0);
+
+  // Initialize with random values
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen);
+      this->opt_in2[vec_e_at(i, k, 0)] =
+          real_distrib(gen); // Not used but initialize anyway
+    }
+
+    // Set random edge indices
+    for (int j = 0; j < 3; ++j) {
+      this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen);
+      this->cell_edge_blk[cell_edge_at(i, 0, j)] =
+          0; // Keep in same block for simplicity
+
+      this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen);
+      this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] =
+          0; // Keep in same block for simplicity
+    }
+
+    // Random geometric factors
+    for (int j = 0; j < 3; ++j) {
+      this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen);
+    }
+
+    // Random average coefficients
+    for (int j = 0; j < 4; ++j) {
+      this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen);
+    }
+
+    // Random initial values for div_vec_c and opt_out2
+    for (int k = 0; k < nlev; ++k) {
+      this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen);
+      this->opt_out2[div_vec_c_at(i, k, 0)] =
+          real_distrib(gen); // Not used but initialize anyway
+    }
+  }
+
+  // Call the div_avg function with l2fields=false
+  div_avg<TypeParam>(
+      this->vec_e.data(), this->cell_neighbor_idx.data(),
+      this->cell_neighbor_blk.data(), this->cell_edge_idx.data(),
+      this->cell_edge_blk.data(), this->geofac_div.data(),
+      this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(),
+      this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(),
+      i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0],
+      this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev,
+      this->nblks_c, this->nblks_e);
+
+  // Calculate reference values manually
+  std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c));
+  std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c));
+
+  // Step 1: Calculate aux_c (but not aux_c2 since l2fields=false)
+  for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb,
+                      i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx);
+
+    for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) {
+      for (int jc = i_startidx; jc < i_endidx; ++jc) {
+        aux_c[div_vec_c_at(jc, jk, jb)] =
+            this->vec_e[vec_e_at(
+                this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk,
+                this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] *
+                this->geofac_div[geofac_div_at(jc, 0, jb)] +
+            this->vec_e[vec_e_at(
+                this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk,
+                this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] *
+                this->geofac_div[geofac_div_at(jc, 1, jb)] +
+            this->vec_e[vec_e_at(
+                this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk,
+                this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] *
+                this->geofac_div[geofac_div_at(jc, 2, jb)];
+      }
+    }
+  }
+
+  // Step 2: Assign aux_c to div_vec_c for patch_id > 0 (opt_out2 not updated
+  // since l2fields=false)
+  for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb,
+                      i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx);
+
+    for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) {
+      for (int jc = i_startidx; jc < i_endidx; ++jc) {
+        ref_div_vec_c[div_vec_c_at(jc, jk, jb)] =
+            aux_c[div_vec_c_at(jc, jk, jb)];
+      }
+    }
+  }
+
+  // Step 3: Perform averaging for the rest of the blocks (only for div_vec_c,
+  // not opt_out2)
+  for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb,
+                      i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx);
+
+    for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) {
+      for (int jc = i_startidx; jc < i_endidx; ++jc) {
+        ref_div_vec_c[div_vec_c_at(jc, jk, jb)] =
+            aux_c[div_vec_c_at(jc, jk, jb)] *
+                this->avg_coeff[avg_coeff_at(jc, 0, jb)] +
+            aux_c[div_vec_c_at(
+                this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk,
+                this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] *
+                this->avg_coeff[avg_coeff_at(jc, 1, jb)] +
+            aux_c[div_vec_c_at(
+                this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk,
+                this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] *
+                this->avg_coeff[avg_coeff_at(jc, 2, jb)] +
+            aux_c[div_vec_c_at(
+                this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk,
+                this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] *
+                this->avg_coeff[avg_coeff_at(jc, 3, jb)];
+      }
+    }
+  }
+
+  // Verify results - only check div_vec_c since l2fields=false means opt_out2
+  // isn't updated
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)],
+                  ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5)
+          << "div_vec_c results differ at i=" << i << ", k=" << k;
+    }
+  }
+}
diff --git a/test/c/test_horizontal_recon.cpp b/test/c/test_horizontal_recon.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8938a101bd2a350da12a0def450472d1e0c9ec9f
--- /dev/null
+++ b/test/c/test_horizontal_recon.cpp
@@ -0,0 +1,1199 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#include <iostream>
+#include <random>
+#include <vector>
+
+#include <Kokkos_Core.hpp>
+#include <gtest/gtest.h>
+#include <dim_helper.hpp>
+#include <horizontal/mo_lib_divrot.hpp>
+#include <support/mo_lib_loopindices.hpp>
+
+/// Enum class for the reconstruction method
+enum class ReconstructionMethod {
+  linear,
+  quadratic,
+  cubic,
+};
+
+/// Base test class for the horizontal reconstruct tests. Templated for the ValueType
+/// and ReconMethod for the reconstruction method.
+template <typename ValueType, int ReconMethod>
+class HorizontalReconTest : public ::testing::Test {
+protected:
+  // lsq_dim_c and lsq_dim_unk are instantiated in compile time.
+  static constexpr std::tuple<int, int>
+  init_lsq_dim(ReconstructionMethod method) {
+    switch (method) {
+    case ReconstructionMethod::linear:
+      return std::make_tuple(3, 2);
+    case ReconstructionMethod::quadratic:
+      return std::make_tuple(9, 5);
+    case ReconstructionMethod::cubic:
+      return std::make_tuple(9, 9);
+    }
+  }
+
+  // Constant dimensions.
+  static constexpr int nproma = 3;  // inner loop length
+  static constexpr int nlev = 1;    // number of vertical levels
+  static constexpr int nblks_c = 1; // number of cell blocks (for p_e_in)
+  static constexpr std::tuple<int, int> lsq_dim =
+      init_lsq_dim(static_cast<ReconstructionMethod>(ReconMethod));
+  static constexpr int lsq_dim_c = std::get<0>(lsq_dim);
+  static constexpr int lsq_dim_unk = std::get<1>(lsq_dim);
+
+  // Parameter values.
+  int i_startblk = 0;
+  int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1]
+  int i_startidx_in = 0;
+  int i_endidx_in = nproma; // Full range: 0 .. nproma-1
+  int slev = 0;
+  int elev = nlev; // Full vertical range (0 .. nlev-1)
+  int patch_id = 0;
+  bool lacc = false;          // Not using ACC-specific behavior.
+  bool acc_async = false;     // No asynchronous execution.
+  bool l_consv = true;        // With conservative correction.
+  bool l_limited_area = true; // Limited area setup
+
+  std::vector<ValueType> p_cc;
+  std::vector<int> cell_neighbor_idx;
+  std::vector<int> cell_neighbor_blk;
+  std::vector<ValueType> lsq_qtmat_c;
+  std::vector<ValueType> lsq_rmat_rdiag_c;
+  std::vector<ValueType> lsq_rmat_utri_c;
+  std::vector<ValueType> lsq_moments;
+  std::vector<ValueType> lsq_pseudoinv;
+  std::vector<ValueType> p_coeff;
+
+  HorizontalReconTest() {
+    p_cc.resize(dim_combine(nproma, nlev, nblks_c));
+    cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, lsq_dim_c));
+    cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, lsq_dim_c));
+    lsq_qtmat_c.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c));
+    lsq_rmat_rdiag_c.resize(dim_combine(nproma, lsq_dim_unk, nblks_c));
+    lsq_rmat_utri_c.resize(dim_combine(
+        nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c));
+    lsq_moments.resize(dim_combine(nproma, nblks_c, lsq_dim_unk));
+    lsq_pseudoinv.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c));
+    p_coeff.resize(dim_combine(lsq_dim_unk + 1, nproma, nlev, nblks_c));
+  }
+};
+
+/// Test class for the horizontal tests. The reconstruction method is specified
+/// to linear.
+template <typename ValueType>
+class HorizontalReconLinearTest
+    : public HorizontalReconTest<ValueType, static_cast<int>(
+                                                ReconstructionMethod::linear)> {
+};
+
+/// Test class for the horizontal tests. The reconstruction method is specified
+/// to quadratic.
+template <typename ValueType>
+class HorizontalReconQuadraticTest
+    : public HorizontalReconTest<
+          ValueType, static_cast<int>(ReconstructionMethod::quadratic)> {};
+
+/// Test class for the horizontal tests. The reconstruction method is specified
+/// to cubic.
+template <typename ValueType>
+class HorizontalReconCubicTest
+    : public HorizontalReconTest<ValueType, static_cast<int>(
+                                                ReconstructionMethod::cubic)> {
+};
+
+/// ValueTypes which the divrot tests should run with
+typedef ::testing::Types<float, double> ValueTypes;
+
+TYPED_TEST_SUITE(HorizontalReconLinearTest, ValueTypes);
+
+TYPED_TEST(HorizontalReconLinearTest, TestLsqCell) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int lsq_dim_c = this->lsq_dim_c;
+  constexpr int lsq_dim_unk = this->lsq_dim_unk;
+
+  const auto &p_cc_at = at<nproma, nlev, nblks_c>;
+  const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>;
+  const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>;
+  const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>;
+  const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>;
+  const auto &rmat_utri_at =
+      at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>;
+  const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>;
+
+  // Initialization
+  for (int i = 0; i < nproma; ++i) {
+    this->p_cc[p_cc_at(i, 0, 0)] = (i + 1);
+
+    this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma;
+    this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i;
+    this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i;
+    for (int j = 0; j < lsq_dim_c; ++j) {
+      this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0;
+      this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0;
+      this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5;
+    }
+    for (int j = 0; j < lsq_dim_unk + 1; ++j) {
+      this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0;
+    }
+
+    this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = 2.0;
+    this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = 2.0;
+    this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = 0.1;
+
+    this->lsq_moments[moments_at(i, 0, 0)] = 0.2;
+    this->lsq_moments[moments_at(i, 0, 1)] = 0.3;
+  }
+
+  // Test function
+  recon_lsq_cell_l<TypeParam>(
+      this->p_cc.data(), this->cell_neighbor_idx.data(),
+      this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(),
+      this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(),
+      this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async,
+      this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c);
+
+  // Check result
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))],
+      0.34, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))],
+      1.8, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))],
+      1.0, 1e-6);
+}
+
+TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int lsq_dim_c = this->lsq_dim_c;
+  constexpr int lsq_dim_unk = this->lsq_dim_unk;
+
+  const auto &p_cc_at = at<nproma, nlev, nblks_c>;
+  const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>;
+  const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>;
+  const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>;
+  const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>;
+  const auto &rmat_utri_at =
+      at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>;
+  const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>;
+
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<int> int_distrib(0, nproma - 1);
+  std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0);
+
+  // Initialization
+  for (int i = 0; i < nproma; ++i) {
+    this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen);
+
+    for (int j = 0; j < lsq_dim_c; ++j) {
+      this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen);
+      this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0;
+      this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = real_distrib(gen);
+      this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = real_distrib(gen);
+    }
+    for (int j = 0; j < lsq_dim_unk + 1; ++j) {
+      this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen);
+    }
+
+    this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = real_distrib(gen);
+    this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = real_distrib(gen);
+    this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = real_distrib(gen);
+
+    this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen);
+    this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen);
+  }
+
+  // Test function
+  recon_lsq_cell_l<TypeParam>(
+      this->p_cc.data(), this->cell_neighbor_idx.data(),
+      this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(),
+      this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(),
+      this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async,
+      this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c);
+
+  // Compute reference result
+  std::vector<TypeParam> z_d(lsq_dim_c);
+  std::vector<TypeParam> z_qt_times_d(lsq_dim_unk);
+  std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma);
+  for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb,
+                      this->i_startblk, this->i_endblk, i_startidx, i_endidx);
+    for (int jk = this->slev; jk < this->elev; ++jk) {
+      for (int jc = i_startidx; jc < i_endidx; ++jc) {
+        for (int i = 0; i < lsq_dim_c; ++i) {
+          z_d[i] = this->p_cc[p_cc_at(
+                       this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk,
+                       this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] -
+                   this->p_cc[p_cc_at(jc, jk, jb)];
+        }
+        z_qt_times_d[0] = 0.0;
+        z_qt_times_d[1] = 0.0;
+        for (int i = 0; i < lsq_dim_c; ++i) {
+          z_qt_times_d[0] += this->lsq_qtmat_c[qtmat_at(jc, 0, i, jb)] * z_d[i];
+          z_qt_times_d[1] += this->lsq_qtmat_c[qtmat_at(jc, 1, i, jb)] * z_d[i];
+        }
+        p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] =
+            this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 1, jb)] * z_qt_times_d[1];
+        p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] =
+            this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 0, jb)] *
+            (z_qt_times_d[0] -
+             this->lsq_rmat_utri_c[rmat_utri_at(jc, 0, jb)] *
+                 p_result[at<lsq_dim_unk + 1, nproma>(2, jc)]);
+        p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] =
+            this->p_cc[p_cc_at(jc, jk, jb)] -
+            p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] *
+                this->lsq_moments[moments_at(jc, jb, 0)] -
+            p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] *
+                this->lsq_moments[moments_at(jc, jb, 1)];
+      }
+    }
+  }
+
+  // Check result
+  for (int i = 0; i < lsq_dim_unk + 1; ++i) {
+    for (int jc = 0; jc < nproma; ++jc) {
+      EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))],
+                  p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5)
+          << "For loop result fails for i = " << i << ", jc = " << jc;
+    }
+  }
+}
+
+TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVD) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int lsq_dim_c = this->lsq_dim_c;
+  constexpr int lsq_dim_unk = this->lsq_dim_unk;
+
+  const auto &p_cc_at = at<nproma, nlev, nblks_c>;
+  const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>;
+  const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>;
+  const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>;
+  const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>;
+
+  // Initialization
+  for (int i = 0; i < nproma; ++i) {
+    this->p_cc[p_cc_at(i, 0, 0)] = (i + 1);
+
+    this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma;
+    this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i;
+    this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i;
+    for (int j = 0; j < lsq_dim_c; ++j) {
+      this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0;
+      this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0;
+      this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5;
+    }
+    for (int j = 0; j < lsq_dim_unk + 1; ++j) {
+      this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0;
+    }
+
+    this->lsq_moments[moments_at(i, 0, 0)] = 0.2;
+    this->lsq_moments[moments_at(i, 0, 1)] = 0.3;
+  }
+
+  // Test function
+  recon_lsq_cell_l_svd<TypeParam>(
+      this->p_cc.data(), this->cell_neighbor_idx.data(),
+      this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(),
+      this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async,
+      this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c);
+
+  // Check result
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))],
+      0.65, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))],
+      1.0, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))],
+      0.5, 1e-6);
+}
+
+TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int lsq_dim_c = this->lsq_dim_c;
+  constexpr int lsq_dim_unk = this->lsq_dim_unk;
+
+  const auto &p_cc_at = at<nproma, nlev, nblks_c>;
+  const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>;
+  const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>;
+  const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>;
+  const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>;
+
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<int> int_distrib(0, nproma - 1);
+  std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0);
+
+  // Initialization
+  for (int i = 0; i < nproma; ++i) {
+    this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen);
+
+    for (int j = 0; j < lsq_dim_c; ++j) {
+      this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen);
+      this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0;
+      this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = real_distrib(gen);
+      this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = real_distrib(gen);
+    }
+    for (int j = 0; j < lsq_dim_unk + 1; ++j) {
+      this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen);
+    }
+
+    this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen);
+    this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen);
+  }
+
+  // Test function
+  recon_lsq_cell_l_svd<TypeParam>(
+      this->p_cc.data(), this->cell_neighbor_idx.data(),
+      this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(),
+      this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async,
+      this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c);
+
+  // Compute reference result
+  std::vector<TypeParam> z_d(lsq_dim_c);
+  std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma);
+  for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb,
+                      this->i_startblk, this->i_endblk, i_startidx, i_endidx);
+    for (int jk = this->slev; jk < this->elev; ++jk) {
+      for (int jc = i_startidx; jc < i_endidx; ++jc) {
+        for (int i = 0; i < lsq_dim_c; ++i) {
+          z_d[i] = this->p_cc[p_cc_at(
+                       this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk,
+                       this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] -
+                   this->p_cc[p_cc_at(jc, jk, jb)];
+        }
+        p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] =
+            this->lsq_pseudoinv[pseudoinv_at(jc, 1, 0, jb)] * z_d[0] +
+            this->lsq_pseudoinv[pseudoinv_at(jc, 1, 1, jb)] * z_d[1] +
+            this->lsq_pseudoinv[pseudoinv_at(jc, 1, 2, jb)] * z_d[2];
+        p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] =
+            this->lsq_pseudoinv[pseudoinv_at(jc, 0, 0, jb)] * z_d[0] +
+            this->lsq_pseudoinv[pseudoinv_at(jc, 0, 1, jb)] * z_d[1] +
+            this->lsq_pseudoinv[pseudoinv_at(jc, 0, 2, jb)] * z_d[2];
+        p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] =
+            p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] =
+                this->p_cc[p_cc_at(jc, jk, jb)] -
+                p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] *
+                    this->lsq_moments[moments_at(jc, jb, 0)] -
+                p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] *
+                    this->lsq_moments[moments_at(jc, jb, 1)];
+      }
+    }
+  }
+
+  // Check result
+  for (int i = 0; i < lsq_dim_unk + 1; ++i) {
+    for (int jc = 0; jc < nproma; ++jc) {
+      EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))],
+                  p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5)
+          << "For loop result fails for i = " << i << ", jc = " << jc;
+    }
+  }
+}
+
+TYPED_TEST_SUITE(HorizontalReconQuadraticTest, ValueTypes);
+
+TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCell) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int lsq_dim_c = this->lsq_dim_c;
+  constexpr int lsq_dim_unk = this->lsq_dim_unk;
+
+  const auto &p_cc_at = at<nproma, nlev, nblks_c>;
+  const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>;
+  const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>;
+  const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>;
+  const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>;
+  const auto &rmat_utri_at =
+      at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>;
+  const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>;
+
+  // Initialization
+  for (int i = 0; i < nproma; ++i) {
+    this->p_cc[p_cc_at(i, 0, 0)] = (i + 1);
+
+    this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma;
+    this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0;
+    for (int j = 1; j < lsq_dim_c; ++j) {
+      this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i;
+      this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0;
+    }
+
+    for (int j = 0; j < lsq_dim_c; ++j) {
+      this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0;
+      this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5;
+      this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.2;
+      this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7;
+      this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 1.3;
+    }
+    for (int j = 0; j < lsq_dim_unk + 1; ++j) {
+      this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0;
+    }
+
+    for (int j = 0; j < lsq_dim_unk; ++j) {
+      this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0;
+    }
+
+    for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) {
+      this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0;
+    }
+
+    this->lsq_moments[moments_at(i, 0, 0)] = 0.2;
+    this->lsq_moments[moments_at(i, 0, 1)] = 0.3;
+    this->lsq_moments[moments_at(i, 0, 2)] = 0.4;
+    this->lsq_moments[moments_at(i, 0, 3)] = 0.5;
+    this->lsq_moments[moments_at(i, 0, 4)] = 0.6;
+  }
+
+  // Test function
+  recon_lsq_cell_q<TypeParam>(
+      this->p_cc.data(), this->cell_neighbor_idx.data(),
+      this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(),
+      this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(),
+      this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->patch_id, this->l_limited_area,
+      this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk,
+      this->lsq_dim_c);
+
+  // Check result
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))],
+      0.24, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))],
+      3.2, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))],
+      -2.2, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))],
+      2.8, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))],
+      -3.8, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))],
+      2.6, 1e-6);
+}
+
+TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int lsq_dim_c = this->lsq_dim_c;
+  constexpr int lsq_dim_unk = this->lsq_dim_unk;
+
+  const auto &p_cc_at = at<nproma, nlev, nblks_c>;
+  const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>;
+  const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>;
+  const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>;
+  const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>;
+  const auto &rmat_utri_at =
+      at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>;
+  const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>;
+
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<int> int_distrib(0, nproma - 1);
+  std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0);
+
+  // Initialization
+  for (int i = 0; i < nproma; ++i) {
+    this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen);
+
+    for (int j = 0; j < lsq_dim_c; ++j) {
+      this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen);
+      this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0;
+    }
+
+    for (int j = 0; j < lsq_dim_unk; ++j) {
+      for (int k = 0; k < lsq_dim_c; ++k) {
+        this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen);
+      }
+      this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen);
+      this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen);
+    }
+    for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) {
+      this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen);
+    }
+
+    for (int j = 0; j < lsq_dim_unk + 1; ++j) {
+      this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen);
+    }
+  }
+
+  // Test function
+  recon_lsq_cell_q<TypeParam>(
+      this->p_cc.data(), this->cell_neighbor_idx.data(),
+      this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(),
+      this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(),
+      this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->patch_id, this->l_limited_area,
+      this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk,
+      this->lsq_dim_c);
+
+  // Compute reference result
+  std::vector<TypeParam> z_d(lsq_dim_c);
+  std::vector<TypeParam> z_qt_times_d(lsq_dim_unk);
+  std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma);
+  for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb,
+                      this->i_startblk, this->i_endblk, i_startidx, i_endidx);
+    for (int jk = this->slev; jk < this->elev; ++jk) {
+      for (int jc = i_startidx; jc < i_endidx; ++jc) {
+        for (int i = 0; i < lsq_dim_c; ++i) {
+          z_d[i] = this->p_cc[p_cc_at(
+                       this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk,
+                       this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] -
+                   this->p_cc[p_cc_at(jc, jk, jb)];
+        }
+        for (int j = 0; j < lsq_dim_unk; ++j) {
+          z_qt_times_d[j] = 0.0;
+          for (int i = 0; i < lsq_dim_c; ++i) {
+            z_qt_times_d[j] +=
+                this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i];
+          }
+        }
+        int utri_id = 0;
+        for (int j = lsq_dim_unk; j > 0; --j) {
+          p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1];
+          for (int k = j + 1; k <= lsq_dim_unk; ++k) {
+            p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -=
+                this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] *
+                p_result[at<lsq_dim_unk + 1, nproma>(k, jc)];
+          }
+          p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *=
+              this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)];
+        }
+        p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] =
+            this->p_cc[p_cc_at(jc, jk, jb)];
+        for (int j = 0; j < lsq_dim_unk; ++j) {
+          p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -=
+              p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] *
+              this->lsq_moments[moments_at(jc, jb, j)];
+        }
+      }
+    }
+  }
+
+  // Check result
+  for (int i = 0; i < lsq_dim_unk + 1; ++i) {
+    for (int jc = 0; jc < nproma; ++jc) {
+      EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))],
+                  p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5)
+          << "For loop result fails for i = " << i << ", jc = " << jc;
+    }
+  }
+}
+
+TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVD) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int lsq_dim_c = this->lsq_dim_c;
+  constexpr int lsq_dim_unk = this->lsq_dim_unk;
+
+  const auto &p_cc_at = at<nproma, nlev, nblks_c>;
+  const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>;
+  const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>;
+  const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>;
+  const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>;
+
+  // Initialization
+  for (int i = 0; i < nproma; ++i) {
+    this->p_cc[p_cc_at(i, 0, 0)] = (i + 1);
+
+    this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma;
+    this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0;
+    for (int j = 1; j < lsq_dim_c; ++j) {
+      this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i;
+      this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0;
+    }
+
+    for (int j = 0; j < lsq_dim_c; ++j) {
+      this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0;
+      this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5;
+      this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.2;
+      this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7;
+      this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 1.3;
+    }
+    for (int j = 0; j < lsq_dim_unk + 1; ++j) {
+      this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0;
+    }
+
+    this->lsq_moments[moments_at(i, 0, 0)] = 0.2;
+    this->lsq_moments[moments_at(i, 0, 1)] = 0.3;
+    this->lsq_moments[moments_at(i, 0, 2)] = 0.4;
+    this->lsq_moments[moments_at(i, 0, 3)] = 0.5;
+    this->lsq_moments[moments_at(i, 0, 4)] = 0.6;
+  }
+
+  // Test function
+  recon_lsq_cell_q_svd<TypeParam>(
+      this->p_cc.data(), this->cell_neighbor_idx.data(),
+      this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(),
+      this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->patch_id, this->l_limited_area,
+      this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk,
+      this->lsq_dim_c);
+
+  // Check result
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))],
+      -0.56, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))],
+      1.0, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))],
+      0.5, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))],
+      0.2, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))],
+      0.7, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))],
+      1.3, 1e-6);
+}
+
+TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int lsq_dim_c = this->lsq_dim_c;
+  constexpr int lsq_dim_unk = this->lsq_dim_unk;
+
+  const auto &p_cc_at = at<nproma, nlev, nblks_c>;
+  const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>;
+  const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>;
+  const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>;
+  const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>;
+  const auto &rmat_utri_at =
+      at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>;
+  const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>;
+
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<int> int_distrib(0, nproma - 1);
+  std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0);
+
+  // Initialization is done only for iblk = 0 and ilev = 0
+  for (int i = 0; i < nproma; ++i) {
+    this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen);
+
+    for (int j = 0; j < lsq_dim_c; ++j) {
+      this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen);
+      this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0;
+    }
+
+    for (int j = 0; j < lsq_dim_unk; ++j) {
+      for (int k = 0; k < lsq_dim_c; ++k) {
+        this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen);
+      }
+      this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen);
+    }
+
+    for (int j = 0; j < lsq_dim_unk + 1; ++j) {
+      this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen);
+    }
+  }
+
+  // Test function
+  recon_lsq_cell_q_svd<TypeParam>(
+      this->p_cc.data(), this->cell_neighbor_idx.data(),
+      this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(),
+      this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->patch_id, this->l_limited_area,
+      this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk,
+      this->lsq_dim_c);
+
+  // Compute reference result
+  std::vector<TypeParam> z_d(lsq_dim_c);
+  std::vector<TypeParam> z_qt_times_d(lsq_dim_unk);
+  std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma);
+  for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb,
+                      this->i_startblk, this->i_endblk, i_startidx, i_endidx);
+    for (int jk = this->slev; jk < this->elev; ++jk) {
+      for (int jc = i_startidx; jc < i_endidx; ++jc) {
+        for (int i = 0; i < lsq_dim_c; ++i) {
+          z_d[i] = this->p_cc[p_cc_at(
+                       this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk,
+                       this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] -
+                   this->p_cc[p_cc_at(jc, jk, jb)];
+        }
+        for (int j = 1; j < lsq_dim_unk + 1; ++j) {
+          p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0;
+          for (int i = 0; i < lsq_dim_c; ++i) {
+            p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] +=
+                this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i];
+          }
+        }
+        p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] =
+            this->p_cc[p_cc_at(jc, jk, jb)];
+        for (int j = 0; j < lsq_dim_unk; ++j) {
+          p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -=
+              p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] *
+              this->lsq_moments[moments_at(jc, jb, j)];
+        }
+      }
+    }
+  }
+
+  // Check result
+  for (int j = 0; j < lsq_dim_unk + 1; ++j) {
+    for (int jc = 0; jc < nproma; ++jc) {
+      EXPECT_NEAR(this->p_coeff[(p_coeff_at(j, jc, 0, 0))],
+                  p_result[(at<lsq_dim_unk + 1, nproma>(j, jc))], 1e-5)
+          << "For loop result fails for j = " << j << ", jc = " << jc;
+    }
+  }
+}
+
+TYPED_TEST_SUITE(HorizontalReconCubicTest, ValueTypes);
+
+TYPED_TEST(HorizontalReconCubicTest, TestLsqCell) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int lsq_dim_c = this->lsq_dim_c;
+  constexpr int lsq_dim_unk = this->lsq_dim_unk;
+
+  const auto &p_cc_at = at<nproma, nlev, nblks_c>;
+  const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>;
+  const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>;
+  const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>;
+  const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>;
+  const auto &rmat_utri_at =
+      at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>;
+  const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>;
+
+  // Initialization
+  for (int i = 0; i < nproma; ++i) {
+    this->p_cc[p_cc_at(i, 0, 0)] = (i + 1);
+
+    this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma;
+    this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0;
+    for (int j = 1; j < lsq_dim_c; ++j) {
+      this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i;
+      this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0;
+    }
+
+    for (int j = 0; j < lsq_dim_c; ++j) {
+      this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0;
+      this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.9;
+      this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.8;
+      this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7;
+      this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 0.6;
+      this->lsq_qtmat_c[qtmat_at(i, 5, j, 0)] = 0.5;
+      this->lsq_qtmat_c[qtmat_at(i, 6, j, 0)] = 0.4;
+      this->lsq_qtmat_c[qtmat_at(i, 7, j, 0)] = 0.3;
+      this->lsq_qtmat_c[qtmat_at(i, 8, j, 0)] = 0.2;
+    }
+    for (int j = 0; j < lsq_dim_unk + 1; ++j) {
+      this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0;
+    }
+
+    for (int j = 0; j < lsq_dim_unk; ++j) {
+      this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0;
+    }
+
+    for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) {
+      this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0;
+    }
+
+    this->lsq_moments[moments_at(i, 0, 0)] = 0.2;
+    this->lsq_moments[moments_at(i, 0, 1)] = 0.3;
+    this->lsq_moments[moments_at(i, 0, 2)] = 0.4;
+    this->lsq_moments[moments_at(i, 0, 3)] = 0.5;
+    this->lsq_moments[moments_at(i, 0, 4)] = 0.6;
+    this->lsq_moments[moments_at(i, 0, 5)] = 0.7;
+    this->lsq_moments[moments_at(i, 0, 6)] = 0.8;
+    this->lsq_moments[moments_at(i, 0, 7)] = 0.9;
+    this->lsq_moments[moments_at(i, 0, 8)] = 1.0;
+  }
+
+  // Test function
+  recon_lsq_cell_c<TypeParam>(
+      this->p_cc.data(), this->cell_neighbor_idx.data(),
+      this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(),
+      this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(),
+      this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->patch_id, this->l_limited_area,
+      this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk,
+      this->lsq_dim_c);
+
+  // Check result
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))],
+      0.28, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))],
+      0.4, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))],
+      -0.2, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))],
+      0.4, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))],
+      -0.2, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))],
+      0.4, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))],
+      -0.2, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))],
+      0.4, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))],
+      -0.2, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))],
+      0.4, 1e-6);
+}
+
+TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int lsq_dim_c = this->lsq_dim_c;
+  constexpr int lsq_dim_unk = this->lsq_dim_unk;
+
+  const auto &p_cc_at = at<nproma, nlev, nblks_c>;
+  const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>;
+  const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>;
+  const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>;
+  const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>;
+  const auto &rmat_utri_at =
+      at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>;
+  const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>;
+
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<int> int_distrib(0, nproma - 1);
+  std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0);
+
+  // Initialization
+  for (int i = 0; i < nproma; ++i) {
+    this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen);
+
+    for (int j = 0; j < lsq_dim_c; ++j) {
+      this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen);
+      this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0;
+    }
+
+    for (int j = 0; j < lsq_dim_unk; ++j) {
+      for (int k = 0; k < lsq_dim_c; ++k) {
+        this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen);
+      }
+      this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen);
+      this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen);
+    }
+    for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) {
+      this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen);
+    }
+
+    for (int j = 0; j < lsq_dim_unk + 1; ++j) {
+      this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen);
+    }
+  }
+
+  // Test function
+  recon_lsq_cell_c<TypeParam>(
+      this->p_cc.data(), this->cell_neighbor_idx.data(),
+      this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(),
+      this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(),
+      this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->patch_id, this->l_limited_area,
+      this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk,
+      this->lsq_dim_c);
+
+  // Compute reference result
+  std::vector<TypeParam> z_d(lsq_dim_c);
+  std::vector<TypeParam> z_qt_times_d(lsq_dim_unk);
+  std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma);
+  for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb,
+                      this->i_startblk, this->i_endblk, i_startidx, i_endidx);
+    for (int jk = this->slev; jk < this->elev; ++jk) {
+      for (int jc = i_startidx; jc < i_endidx; ++jc) {
+        for (int i = 0; i < lsq_dim_c; ++i) {
+          z_d[i] = this->p_cc[p_cc_at(
+                       this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk,
+                       this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] -
+                   this->p_cc[p_cc_at(jc, jk, jb)];
+        }
+        for (int j = 0; j < lsq_dim_unk; ++j) {
+          z_qt_times_d[j] = 0.0;
+          for (int i = 0; i < lsq_dim_c; ++i) {
+            z_qt_times_d[j] +=
+                this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i];
+          }
+        }
+        int utri_id = 0;
+        for (int j = lsq_dim_unk; j > 0; --j) {
+          p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1];
+          for (int k = j + 1; k <= lsq_dim_unk; ++k) {
+            p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -=
+                this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] *
+                p_result[at<lsq_dim_unk + 1, nproma>(k, jc)];
+          }
+          p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *=
+              this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)];
+        }
+        p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] =
+            this->p_cc[p_cc_at(jc, jk, jb)];
+        for (int j = 0; j < lsq_dim_unk; ++j) {
+          p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -=
+              p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] *
+              this->lsq_moments[moments_at(jc, jb, j)];
+        }
+      }
+    }
+  }
+
+  // Check result
+  for (int i = 0; i < lsq_dim_unk + 1; ++i) {
+    for (int jc = 0; jc < nproma; ++jc) {
+      EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))],
+                  p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5)
+          << "For loop result fails for i = " << i << ", jc = " << jc;
+    }
+  }
+}
+
+TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVD) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int lsq_dim_c = this->lsq_dim_c;
+  constexpr int lsq_dim_unk = this->lsq_dim_unk;
+
+  const auto &p_cc_at = at<nproma, nlev, nblks_c>;
+  const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>;
+  const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>;
+  const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>;
+  const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>;
+
+  // Initialization
+  for (int i = 0; i < nproma; ++i) {
+    this->p_cc[p_cc_at(i, 0, 0)] = (i + 1);
+
+    this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma;
+    this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0;
+    for (int j = 1; j < lsq_dim_c; ++j) {
+      this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i;
+      this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0;
+    }
+
+    for (int j = 0; j < lsq_dim_c; ++j) {
+      this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0;
+      this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.9;
+      this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.8;
+      this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7;
+      this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 0.6;
+      this->lsq_pseudoinv[pseudoinv_at(i, 5, j, 0)] = 0.5;
+      this->lsq_pseudoinv[pseudoinv_at(i, 6, j, 0)] = 0.4;
+      this->lsq_pseudoinv[pseudoinv_at(i, 7, j, 0)] = 0.3;
+      this->lsq_pseudoinv[pseudoinv_at(i, 8, j, 0)] = 0.2;
+    }
+    for (int j = 0; j < lsq_dim_unk + 1; ++j) {
+      this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0;
+    }
+
+    this->lsq_moments[moments_at(i, 0, 0)] = 0.2;
+    this->lsq_moments[moments_at(i, 0, 1)] = 0.3;
+    this->lsq_moments[moments_at(i, 0, 2)] = 0.4;
+    this->lsq_moments[moments_at(i, 0, 3)] = 0.5;
+    this->lsq_moments[moments_at(i, 0, 4)] = 0.6;
+    this->lsq_moments[moments_at(i, 0, 5)] = 0.7;
+    this->lsq_moments[moments_at(i, 0, 6)] = 0.8;
+    this->lsq_moments[moments_at(i, 0, 7)] = 0.9;
+    this->lsq_moments[moments_at(i, 0, 8)] = 1.0;
+  }
+
+  // Test function
+  recon_lsq_cell_c_svd<TypeParam>(
+      this->p_cc.data(), this->cell_neighbor_idx.data(),
+      this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(),
+      this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->patch_id, this->l_limited_area,
+      this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk,
+      this->lsq_dim_c);
+
+  // Check result
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))],
+      -1.64, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))],
+      1.0, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))],
+      0.9, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))],
+      0.8, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))],
+      0.7, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))],
+      0.6, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))],
+      0.5, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))],
+      0.4, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))],
+      0.3, 1e-6);
+  EXPECT_NEAR(
+      this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))],
+      0.2, 1e-6);
+}
+
+TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_c = this->nblks_c;
+  constexpr int lsq_dim_c = this->lsq_dim_c;
+  constexpr int lsq_dim_unk = this->lsq_dim_unk;
+
+  const auto &p_cc_at = at<nproma, nlev, nblks_c>;
+  const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>;
+  const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>;
+  const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>;
+  const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>;
+
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<int> int_distrib(0, nproma - 1);
+  std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0);
+
+  // Initialization
+  for (int i = 0; i < nproma; ++i) {
+    this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen);
+
+    for (int j = 0; j < lsq_dim_c; ++j) {
+      this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen);
+      this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0;
+    }
+
+    for (int j = 0; j < lsq_dim_unk; ++j) {
+      for (int k = 0; k < lsq_dim_c; ++k) {
+        this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen);
+      }
+      this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen);
+    }
+
+    for (int j = 0; j < lsq_dim_unk + 1; ++j) {
+      this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen);
+    }
+  }
+
+  // Test function
+  recon_lsq_cell_c_svd<TypeParam>(
+      this->p_cc.data(), this->cell_neighbor_idx.data(),
+      this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(),
+      this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk,
+      this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev,
+      this->elev, this->nproma, this->patch_id, this->l_limited_area,
+      this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk,
+      this->lsq_dim_c);
+
+  // Compute reference result
+  std::vector<TypeParam> z_d(lsq_dim_c);
+  std::vector<TypeParam> z_qt_times_d(lsq_dim_unk);
+  std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma);
+
+  for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb,
+                      this->i_startblk, this->i_endblk, i_startidx, i_endidx);
+    for (int jk = this->slev; jk < this->elev; ++jk) {
+      for (int jc = i_startidx; jc < i_endidx; ++jc) {
+        for (int i = 0; i < lsq_dim_c; ++i) {
+          z_d[i] = this->p_cc[p_cc_at(
+                       this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk,
+                       this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] -
+                   this->p_cc[p_cc_at(jc, jk, jb)];
+        }
+        for (int j = 1; j < lsq_dim_unk + 1; ++j) {
+          p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0;
+          for (int i = 0; i < lsq_dim_c; ++i) {
+            p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] +=
+                this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i];
+          }
+        }
+        p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] =
+            this->p_cc[p_cc_at(jc, jk, jb)];
+        for (int j = 0; j < lsq_dim_unk; ++j) {
+          p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -=
+              p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] *
+              this->lsq_moments[moments_at(jc, jb, j)];
+        }
+      }
+    }
+  }
+  // Check result
+  for (int i = 0; i < lsq_dim_unk + 1; ++i) {
+    for (int jc = 0; jc < nproma; ++jc) {
+      EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))],
+                  p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5)
+          << "For loop result fails for i = " << i << ", jc = " << jc;
+    }
+  }
+}
diff --git a/test/c/test_horizontal_rot.cpp b/test/c/test_horizontal_rot.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..68e80245f2fa4ea19f173f1f8ac095fda5505775
--- /dev/null
+++ b/test/c/test_horizontal_rot.cpp
@@ -0,0 +1,378 @@
+// ICON
+//
+// ---------------------------------------------------------------
+// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss
+// Contact information: icon-model.org
+//
+// See AUTHORS.TXT for a list of authors
+// See LICENSES/ for license information
+// SPDX-License-Identifier: BSD-3-Clause
+// ---------------------------------------------------------------
+
+#include <iostream>
+#include <random>
+#include <vector>
+
+#include <Kokkos_Core.hpp>
+#include <gtest/gtest.h>
+#include <dim_helper.hpp>
+#include <horizontal/mo_lib_divrot.hpp>
+#include <support/mo_lib_loopindices.hpp>
+
+/// Test class for the horizontal rotation tests. Templated for the ValueType.
+template <typename ValueType>
+class HorizontalRotVertexTest : public ::testing::Test {
+protected:
+  static constexpr int nproma = 3;  // inner loop length
+  static constexpr int nlev = 2;    // number of vertical levels
+  static constexpr int nblks_e = 1; // number of edge blocks
+  static constexpr int nblks_v = 1; // number of vertex blocks
+  static constexpr int dim4d = 2;   // 4th dimension size
+
+  int i_startblk = 0;
+  int i_endblk = nblks_v; // Test blocks [0 .. nblks_v-1]
+  int i_startidx_in = 0;
+  int i_endidx_in = nproma; // Full range: 0 .. nproma-1
+  std::vector<int> slev;
+  std::vector<int> elev;
+  bool lacc = false;      // Not using ACC-specific behavior.
+  bool acc_async = false; // Not using ACC-specific behavior.
+
+  std::vector<ValueType> vec_e;
+  std::vector<int> vert_edge_idx;
+  std::vector<int> vert_edge_blk;
+  std::vector<ValueType> geofac_rot;
+  std::vector<ValueType> rot_vec;
+  std::vector<ValueType> f4din;
+  std::vector<ValueType> f4dout;
+
+  HorizontalRotVertexTest() {
+    slev.resize(dim4d, 0);
+    elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1)
+
+    vec_e.resize(dim_combine(nproma, nlev, nblks_e));
+    vert_edge_idx.resize(dim_combine(nproma, nblks_v, 6));
+    vert_edge_blk.resize(dim_combine(nproma, nblks_v, 6));
+    geofac_rot.resize(dim_combine(nproma, 6, nblks_v));
+    rot_vec.resize(dim_combine(nproma, nlev, nblks_v));
+    f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d));
+    f4dout.resize(dim_combine(nproma, nlev, nblks_v, dim4d));
+  }
+};
+
+/// ValueTypes which the divrot tests should run with
+typedef ::testing::Types<float, double> ValueTypes;
+
+TYPED_TEST_SUITE(HorizontalRotVertexTest, ValueTypes);
+
+TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosSpecific) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_e = this->nblks_e;
+  constexpr int nblks_v = this->nblks_v;
+
+  const auto &vec_e_at = at<nproma, nlev, nblks_e>;
+  const auto &vert_edge_at = at<nproma, nblks_v, 6>;
+  const auto &geofac_rot_at = at<nproma, 6, nblks_v>;
+  const auto &rot_vec_at = at<nproma, nlev, nblks_v>;
+
+  // Initialization with specific values
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern
+    }
+
+    // Set edge indices to point to specific edges
+    for (int j = 0; j < 6; ++j) {
+      this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma;
+      // All edges are in the same block for this test
+      this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0;
+    }
+
+    // Geometric factors for rotation
+    this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3;
+    this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2;
+    this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1;
+    this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2;
+    this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1;
+    this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1;
+
+    // Initialize rot_vec to zero
+    for (int k = 0; k < nlev; ++k) {
+      this->rot_vec[rot_vec_at(i, k, 0)] = 0.0;
+    }
+  }
+
+  // Call the rot_vertex_atmos function
+  rot_vertex_atmos<TypeParam>(
+      this->vec_e.data(), this->vert_edge_idx.data(),
+      this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(),
+      this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in,
+      this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev,
+      this->nblks_e, this->nblks_v);
+
+  // Expected values based on the initialization pattern
+  EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6);
+  EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6);
+  EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6);
+  EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6);
+  EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6);
+  EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6);
+}
+
+TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosRandom) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_e = this->nblks_e;
+  constexpr int nblks_v = this->nblks_v;
+
+  const auto &vec_e_at = at<nproma, nlev, nblks_e>;
+  const auto &vert_edge_at = at<nproma, nblks_v, 6>;
+  const auto &geofac_rot_at = at<nproma, 6, nblks_v>;
+  const auto &rot_vec_at = at<nproma, nlev, nblks_v>;
+
+  // Set up random number generators
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<int> int_distrib(0, nproma - 1);
+  std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0);
+
+  // Initialization with random values
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen);
+    }
+
+    // Set random edge indices
+    for (int j = 0; j < 6; ++j) {
+      this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen);
+      this->vert_edge_blk[vert_edge_at(i, 0, j)] =
+          0; // Keep in same block for simplicity
+    }
+
+    // Random geometric factors
+    for (int j = 0; j < 6; ++j) {
+      this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen);
+    }
+
+    // Initialize rot_vec to random values
+    for (int k = 0; k < nlev; ++k) {
+      this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen);
+    }
+  }
+
+  // Call the rot_vertex_atmos function
+  rot_vertex_atmos<TypeParam>(
+      this->vec_e.data(), this->vert_edge_idx.data(),
+      this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(),
+      this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in,
+      this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev,
+      this->nblks_e, this->nblks_v);
+
+  // Calculate reference values separately and verify results
+  std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0);
+
+  for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb,
+                      this->i_startblk, this->i_endblk, i_startidx, i_endidx);
+
+    for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) {
+      for (int jv = i_startidx; jv < i_endidx; ++jv) {
+        ref_rot_vec[rot_vec_at(jv, jk, jb)] =
+            this->vec_e[vec_e_at(
+                this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk,
+                this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] *
+                this->geofac_rot[geofac_rot_at(jv, 0, jb)] +
+            this->vec_e[vec_e_at(
+                this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk,
+                this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] *
+                this->geofac_rot[geofac_rot_at(jv, 1, jb)] +
+            this->vec_e[vec_e_at(
+                this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk,
+                this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] *
+                this->geofac_rot[geofac_rot_at(jv, 2, jb)] +
+            this->vec_e[vec_e_at(
+                this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk,
+                this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] *
+                this->geofac_rot[geofac_rot_at(jv, 3, jb)] +
+            this->vec_e[vec_e_at(
+                this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk,
+                this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] *
+                this->geofac_rot[geofac_rot_at(jv, 4, jb)] +
+            this->vec_e[vec_e_at(
+                this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk,
+                this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] *
+                this->geofac_rot[geofac_rot_at(jv, 5, jb)];
+      }
+    }
+  }
+
+  // Verify results
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)],
+                  ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5)
+          << "Results differ at i=" << i << ", k=" << k;
+    }
+  }
+}
+
+TYPED_TEST_SUITE(HorizontalRotVertexTest, ValueTypes);
+
+TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRISpecific) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_e = this->nblks_e;
+  constexpr int nblks_v = this->nblks_v;
+
+  const auto &vec_e_at = at<nproma, nlev, nblks_e>;
+  const auto &vert_edge_at = at<nproma, nblks_v, 6>;
+  const auto &geofac_rot_at = at<nproma, 6, nblks_v>;
+  const auto &rot_vec_at = at<nproma, nlev, nblks_v>;
+
+  // Initialization with specific values
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern
+    }
+
+    // Set edge indices to point to specific edges
+    for (int j = 0; j < 6; ++j) {
+      this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma;
+      // All edges are in the same block for this test
+      this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0;
+    }
+
+    // Geometric factors for rotation
+    this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3;
+    this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2;
+    this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1;
+    this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2;
+    this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1;
+    this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1;
+
+    // Initialize rot_vec to zero
+    for (int k = 0; k < nlev; ++k) {
+      this->rot_vec[rot_vec_at(i, k, 0)] = 0.0;
+    }
+  }
+
+  // Call the rot_vertex_ri function
+  rot_vertex_ri<TypeParam>(
+      this->vec_e.data(), this->vert_edge_idx.data(),
+      this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(),
+      this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in,
+      this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async,
+      this->nlev, this->nblks_e, this->nblks_v);
+
+  // Expected values based on the initialization pattern
+  EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6);
+  EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6);
+  EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6);
+  EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6);
+  EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6);
+  EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6);
+}
+
+TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRIRandom) {
+  constexpr int nproma = this->nproma;
+  constexpr int nlev = this->nlev;
+  constexpr int nblks_e = this->nblks_e;
+  constexpr int nblks_v = this->nblks_v;
+
+  const auto &vec_e_at = at<nproma, nlev, nblks_e>;
+  const auto &vert_edge_at = at<nproma, nblks_v, 6>;
+  const auto &geofac_rot_at = at<nproma, 6, nblks_v>;
+  const auto &rot_vec_at = at<nproma, nlev, nblks_v>;
+
+  // Set up random number generators
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<int> int_distrib(0, nproma - 1);
+  std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0);
+
+  // Initialization with random values
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen);
+    }
+
+    // Set random edge indices
+    for (int j = 0; j < 6; ++j) {
+      this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen);
+      this->vert_edge_blk[vert_edge_at(i, 0, j)] =
+          0; // Keep in same block for simplicity
+    }
+
+    // Random geometric factors
+    for (int j = 0; j < 6; ++j) {
+      this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen);
+    }
+
+    // Initialize rot_vec to random values
+    for (int k = 0; k < nlev; ++k) {
+      this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen);
+    }
+  }
+
+  // Call the rot_vertex_ri function
+  rot_vertex_ri<TypeParam>(
+      this->vec_e.data(), this->vert_edge_idx.data(),
+      this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(),
+      this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in,
+      this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async,
+      this->nlev, this->nblks_e, this->nblks_v);
+
+  // Ensure computation is complete for both modes
+  Kokkos::fence();
+
+  // Calculate reference values separately and verify results
+  std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0);
+
+  for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) {
+    int i_startidx, i_endidx;
+    get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb,
+                      this->i_startblk, this->i_endblk, i_startidx, i_endidx);
+
+    for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) {
+      for (int jv = i_startidx; jv < i_endidx; ++jv) {
+        ref_rot_vec[rot_vec_at(jv, jk, jb)] =
+            this->vec_e[vec_e_at(
+                this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk,
+                this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] *
+                this->geofac_rot[geofac_rot_at(jv, 0, jb)] +
+            this->vec_e[vec_e_at(
+                this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk,
+                this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] *
+                this->geofac_rot[geofac_rot_at(jv, 1, jb)] +
+            this->vec_e[vec_e_at(
+                this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk,
+                this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] *
+                this->geofac_rot[geofac_rot_at(jv, 2, jb)] +
+            this->vec_e[vec_e_at(
+                this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk,
+                this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] *
+                this->geofac_rot[geofac_rot_at(jv, 3, jb)] +
+            this->vec_e[vec_e_at(
+                this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk,
+                this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] *
+                this->geofac_rot[geofac_rot_at(jv, 4, jb)] +
+            this->vec_e[vec_e_at(
+                this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk,
+                this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] *
+                this->geofac_rot[geofac_rot_at(jv, 5, jb)];
+      }
+    }
+  }
+
+  // Verify results
+  for (int i = 0; i < nproma; ++i) {
+    for (int k = 0; k < nlev; ++k) {
+      EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)],
+                  ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5)
+          << "Results differ at i=" << i << ", k=" << k << ")";
+    }
+  }
+}
+