diff --git a/_typos.toml b/_typos.toml index 4fe4968b64e1df08f66e6891fecf0a67a60c9bc4..8de4a86304bbea42529095bd68ed5726e1a028bf 100644 --- a/_typos.toml +++ b/_typos.toml @@ -1,6 +1,7 @@ [default] extend-ignore-re = [ ".*_pn", + "f4dout_*", ] extend-ignore-words-re = [ "Comput", @@ -10,6 +11,7 @@ extend-ignore-words-re = [ Wirth = "Wirth" # author name nin = "nin" # number of inputs Pilar = "Pilar" # author name +Comput = "Comput" # abbreviation for Computational [default.extend-identifiers] f4dout = "f4dout" # file name diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index 078a14df177906b11f6d3914ac815eb4b28ed162..f3b75c052ee368835163bc770682b1805de41def 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -11,6 +11,7 @@ add_library( iconmath-horizontal + mo_lib_divrot.cpp mo_lib_divrot.F90 mo_lib_laplace.F90 mo_lib_gradients.F90) @@ -57,7 +58,7 @@ target_include_directories( # Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for # multiple compile languages # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html - $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>> + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${PROJECT_SOURCE_DIR}/src>> PRIVATE # Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple # compile languages diff --git a/src/horizontal/mo_lib_divrot.cpp b/src/horizontal/mo_lib_divrot.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d086e8ba4d7910986117e912fa32cdc4f4426ac5 --- /dev/null +++ b/src/horizontal/mo_lib_divrot.cpp @@ -0,0 +1,1359 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <iostream> +#include <vector> + +#include <horizontal/mo_lib_divrot.hpp> +#include <support/mo_lib_loopindices.hpp> + +template <typename T> +void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *lsq_qtmat_c, + const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool l_consv, bool lacc, + bool acc_async, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + Kokkos::View<T *> z_d("z_d", lsq_dim_c); + Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk); + + UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); + + UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D lsq_rmat_rdiag_c_view(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, + nblks_c); + UnmanagedConstT3D lsq_rmat_utri_c_view( + lsq_rmat_utri_c, nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, + nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_l_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_d(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_d(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_d(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + // matrix multiplication Q^T d (partitioned into 2 dot products) + z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0) + + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1) + + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2); + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0) + + lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1) + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2); + + p_coeff_view(2, jc, jk, jb) = + lsq_rmat_rdiag_c_view(jc, 1, jb) * z_qt_times_d(1); + p_coeff_view(1, jc, jk, jb) = + lsq_rmat_rdiag_c_view(jc, 0, jb) * + (z_qt_times_d(0) - + lsq_rmat_utri_c_view(jc, 0, jb) * p_coeff_view(2, jc, jk, jb)); + p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb); + }); + if (l_consv) { + Kokkos::parallel_for( + "recon_lsq_cell_l_consv", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_coeff_view(0, jc, jk, jb) = + p_coeff_view(0, jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1); + }); + } + } + + if (!acc_async) + Kokkos::fence(); +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_L); + +template <typename T> +void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *lsq_pseudoinv, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool l_consv, + bool lacc, bool acc_async, int nblks_c, int nlev, + int lsq_dim_unk, int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + Kokkos::View<T *> z_b("z_b", lsq_dim_c); + + UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); + + UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_l_svd_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_b(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + + p_coeff_view(2, jc, jk, jb) = + lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2); + p_coeff_view(1, jc, jk, jb) = + lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2); + p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb); + }); + if (l_consv) { + Kokkos::parallel_for( + "recon_lsq_cell_l_svd_consv", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_coeff_view(0, jc, jk, jb) = + p_coeff_view(0, jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1); + }); + } + } + + if (!acc_async) + Kokkos::fence(); +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD); + +template <typename T> +void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const T *lsq_qtmat_c, const T *lsq_rmat_rdiag_c, + const T *lsq_rmat_utri_c, const T *lsq_moments, + T *p_coeff, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, + int nproma, int patch_id, bool l_limited_area, bool lacc, + int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, nlev); + Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk); + + UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); + + UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c); + UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma, + (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, + nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + if (patch_id > 0 || l_limited_area) { + Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( + {0, i_startidx_in, slev, i_startblk}, + {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk}); + Kokkos::parallel_for( + "recon_lsq_cell_q_init", initPolicy, + KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { + p_coeff_view(ji, jc, jk, jb) = 0; + }); + } + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_q_step1", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_d(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_d(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_d(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_d(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_d(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_d(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); + }); + Kokkos::parallel_for( + "recon_lsq_cell_q_step2", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); + + p_coeff_view(5, jc, jk, jb) = ptr_rrdiag(jc, 4, jb) * z_qt_times_d(4); + p_coeff_view(4, jc, jk, jb) = + ptr_rrdiag(jc, 3, jb) * + (z_qt_times_d(3) - + ptr_rutri(jc, 0, jb) * p_coeff_view(5, jc, jk, jb)); + p_coeff_view(3, jc, jk, jb) = + ptr_rrdiag(jc, 2, jb) * + (z_qt_times_d(2) - + ptr_rutri(jc, 1, jb) * p_coeff_view(4, jc, jk, jb) - + ptr_rutri(jc, 2, jb) * p_coeff_view(5, jc, jk, jb)); + p_coeff_view(2, jc, jk, jb) = + ptr_rrdiag(jc, 1, jb) * + (z_qt_times_d(1) - + ptr_rutri(jc, 3, jb) * p_coeff_view(3, jc, jk, jb) - + ptr_rutri(jc, 4, jb) * p_coeff_view(4, jc, jk, jb) - + ptr_rutri(jc, 5, jb) * p_coeff_view(5, jc, jk, jb)); + p_coeff_view(1, jc, jk, jb) = + ptr_rrdiag(jc, 0, jb) * + (z_qt_times_d(0) - + ptr_rutri(jc, 6, jb) * p_coeff_view(2, jc, jk, jb) - + ptr_rutri(jc, 7, jb) * p_coeff_view(3, jc, jk, jb) - + ptr_rutri(jc, 8, jb) * p_coeff_view(4, jc, jk, jb) - + ptr_rutri(jc, 9, jb) * p_coeff_view(5, jc, jk, jb)); + p_coeff_view(0, jc, jk, jb) = + p_cc_view(jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - + p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) - + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4); + }); + } + + Kokkos::fence(); +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_Q); + +template <typename T> +void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, + const int *lsq_blk_c, const T *lsq_pseudoinv, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int patch_id, + bool l_limited_area, bool lacc, int nblks_c, int nlev, + int lsq_dim_unk, int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + Kokkos::View<T ***> z_b("z_b", lsq_dim_c, nproma, elev); + + UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); + + UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + if (patch_id > 0 || l_limited_area) { + Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( + {0, i_startidx_in, slev, i_startblk}, + {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk}); + Kokkos::parallel_for( + "recon_lsq_cell_q_svd_init", initPolicy, + KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { + p_coeff_view(ji, jc, jk, jb) = 0; + }); + } + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_q_svd_step1", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_b(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_b(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_b(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_b(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_b(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_b(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_b(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_b(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_b(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); + }); + Kokkos::parallel_for( + "recon_lsq_cell_q_svd_step2", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_coeff_view(5, jc, jk, jb) = + lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8, jc, jk); + p_coeff_view(4, jc, jk, jb) = + lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8, jc, jk); + p_coeff_view(3, jc, jk, jb) = + lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8, jc, jk); + p_coeff_view(2, jc, jk, jb) = + lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8, jc, jk); + p_coeff_view(1, jc, jk, jb) = + lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8, jc, jk); + p_coeff_view(0, jc, jk, jb) = + p_cc_view(jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - + p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) - + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4); + }); + } + + Kokkos::fence(); +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD); + +template <typename T> +void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const T *lsq_qtmat_c, const T *lsq_rmat_rdiag_c, + const T *lsq_rmat_utri_c, const T *lsq_moments, + T *p_coeff, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, + int nproma, int patch_id, bool l_limited_area, bool lacc, + int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, elev); + Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 9); + + UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); + + UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c); + UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma, + (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, + nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + if (patch_id > 0 || l_limited_area) { + Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( + {0, i_startidx_in, slev, i_startblk}, + {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk}); + Kokkos::parallel_for( + "recon_lsq_cell_c_init", initPolicy, + KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { + p_coeff_view(ji, jc, jk, jb) = 0; + }); + } + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_c_step1", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_d(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_d(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_d(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_d(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_d(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_d(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); + }); + Kokkos::parallel_for( + "recon_lsq_cell_c_step2", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(5) = lsq_qtmat_c_view(jc, 5, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 5, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 5, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 5, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 5, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 5, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 5, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 5, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 5, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(6) = lsq_qtmat_c_view(jc, 6, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 6, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 6, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 6, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 6, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 6, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 6, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 6, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 6, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(7) = lsq_qtmat_c_view(jc, 7, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 7, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 7, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 7, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 7, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 7, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 7, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 7, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 7, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(8) = lsq_qtmat_c_view(jc, 8, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 8, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 8, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 8, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 8, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 8, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 8, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 8, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 8, 8, jb) * z_d(8, jc, jk); + + p_coeff_view(9, jc, jk, jb) = ptr_rrdiag(jc, 8, jb) * z_qt_times_d(8); + p_coeff_view(8, jc, jk, jb) = + ptr_rrdiag(jc, 7, jb) * + (z_qt_times_d(7) - + ptr_rutri(jc, 0, jb) * p_coeff_view(9, jc, jk, jb)); + p_coeff_view(7, jc, jk, jb) = + ptr_rrdiag(jc, 6, jb) * + (z_qt_times_d(6) - + (ptr_rutri(jc, 1, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 2, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(6, jc, jk, jb) = + ptr_rrdiag(jc, 5, jb) * + (z_qt_times_d(5) - + (ptr_rutri(jc, 3, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 4, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 5, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(5, jc, jk, jb) = + ptr_rrdiag(jc, 4, jb) * + (z_qt_times_d(4) - + (ptr_rutri(jc, 6, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 7, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 8, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 9, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(4, jc, jk, jb) = + ptr_rrdiag(jc, 3, jb) * + (z_qt_times_d(3) - + (ptr_rutri(jc, 10, jb) * p_coeff_view(5, jc, jk, jb) + + ptr_rutri(jc, 11, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 12, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 13, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 14, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(3, jc, jk, jb) = + ptr_rrdiag(jc, 2, jb) * + (z_qt_times_d(2) - + (ptr_rutri(jc, 15, jb) * p_coeff_view(4, jc, jk, jb) + + ptr_rutri(jc, 16, jb) * p_coeff_view(5, jc, jk, jb) + + ptr_rutri(jc, 17, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 18, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 19, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 20, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(2, jc, jk, jb) = + ptr_rrdiag(jc, 1, jb) * + (z_qt_times_d(1) - + (ptr_rutri(jc, 21, jb) * p_coeff_view(3, jc, jk, jb) + + ptr_rutri(jc, 22, jb) * p_coeff_view(4, jc, jk, jb) + + ptr_rutri(jc, 23, jb) * p_coeff_view(5, jc, jk, jb) + + ptr_rutri(jc, 24, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 25, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 26, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 27, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(1, jc, jk, jb) = + ptr_rrdiag(jc, 0, jb) * + (z_qt_times_d(0) - + (ptr_rutri(jc, 28, jb) * p_coeff_view(2, jc, jk, jb) + + ptr_rutri(jc, 29, jb) * p_coeff_view(3, jc, jk, jb) + + ptr_rutri(jc, 30, jb) * p_coeff_view(4, jc, jk, jb) + + ptr_rutri(jc, 31, jb) * p_coeff_view(5, jc, jk, jb) + + ptr_rutri(jc, 32, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 33, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 34, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 35, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(0, jc, jk, jb) = + p_cc_view(jc, jk, jb) - + (p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) + + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) + + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) + + p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) + + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) + + p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5) + + p_coeff_view(7, jc, jk, jb) * lsq_moments_view(jc, jb, 6) + + p_coeff_view(8, jc, jk, jb) * lsq_moments_view(jc, jb, 7) + + p_coeff_view(9, jc, jk, jb) * lsq_moments_view(jc, jb, 8)); + }); + } + + Kokkos::fence(); +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_C); + +template <typename T> +void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, + const int *lsq_blk_c, const T *lsq_pseudoinv, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int patch_id, + bool l_limited_area, + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + Kokkos::View<T *> z_b("z_b", 9); + + UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); + + UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + if (patch_id > 0 || l_limited_area) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<3>> initPolicy( + {slev, i_startidx, 0}, {elev, i_endidx, lsq_dim_unk + 1}); + Kokkos::parallel_for( + "recon_lsq_cell_c_svd_init", initPolicy, + KOKKOS_LAMBDA(const int jk, const int jc, const int ji) { + p_coeff_view(ji, jc, jk, jb) = 0; + }); + } + } + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_c_svd_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_b(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_b(3) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_b(4) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_b(5) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_b(6) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_b(7) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_b(8) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); + + p_coeff_view(9, jc, jk, jb) = + lsq_pseudoinv_view(jc, 8, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 8, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 8, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 8, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 8, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 8, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 8, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 8, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 8, 8, jb) * z_b(8); + p_coeff_view(8, jc, jk, jb) = + lsq_pseudoinv_view(jc, 7, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 7, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 7, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 7, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 7, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 7, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 7, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 7, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 7, 8, jb) * z_b(8); + p_coeff_view(7, jc, jk, jb) = + lsq_pseudoinv_view(jc, 6, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 6, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 6, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 6, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 6, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 6, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 6, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 6, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 6, 8, jb) * z_b(8); + p_coeff_view(6, jc, jk, jb) = + lsq_pseudoinv_view(jc, 5, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 5, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 5, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 5, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 5, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 5, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 5, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 5, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 5, 8, jb) * z_b(8); + p_coeff_view(5, jc, jk, jb) = + lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8); + p_coeff_view(4, jc, jk, jb) = + lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8); + p_coeff_view(3, jc, jk, jb) = + lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8); + p_coeff_view(2, jc, jk, jb) = + lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8); + p_coeff_view(1, jc, jk, jb) = + lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8); + p_coeff_view(0, jc, jk, jb) = + p_cc_view(jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - + p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) - + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) - + p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5) - + p_coeff_view(7, jc, jk, jb) * lsq_moments_view(jc, jb, 6) - + p_coeff_view(8, jc, jk, jb) * lsq_moments_view(jc, jb, 7) - + p_coeff_view(9, jc, jk, jb) * lsq_moments_view(jc, jb, 8); + }); + } + + Kokkos::fence(); +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD); + +template <typename T> +void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, + const T *geofac_div, T *div_vec_c, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool lacc, int nlev, int nblks_c, int nblks_e) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c); + UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div3d_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = + vec_e_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + vec_e_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + vec_e_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + }); + } +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV3D); + +template <typename T> +void div3d_2field(const T *vec_e, const int *cell_edge_idx, + const int *cell_edge_blk, const T *geofac_div, T *div_vec_c, + const T *in2, T *out2, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, + int nproma, bool lacc, int nlev, int nblks_c, int nblks_e) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c); + UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c); + + UnmanagedConstT3D in2_view(in2, nproma, nlev, nblks_e); + UnmanagedT3D out2_view(out2, nproma, nlev, nblks_c); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div3d_2field_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = + vec_e_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + vec_e_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + vec_e_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + + out2_view(jc, jk, jb) = + in2_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + in2_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + in2_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + }); + } +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV3D_2FIELD); + +template <typename T> +void div4d(const int *cell_edge_idx, const int *cell_edge_blk, + const T *geofac_div, const T *f4din, T *f4dout, int dim4d, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + const int *slev, const int *elev, int nproma, bool lacc, int nlev, + int nblks_c, int nblks_e) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c); + + UnmanagedConstT4D f4din_view(f4din, nproma, nlev, nblks_e, dim4d); + UnmanagedT4D f4dout_view(f4dout, nproma, nlev, nblks_c, dim4d); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + for (int ji = 0; ji < dim4d; ++ji) { + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev[ji], i_startidx}, + {elev[ji], i_endidx}); + Kokkos::parallel_for( + "div4d_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + f4dout_view(jc, jk, jb, ji) = + f4din_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0), ji) * + geofac_div_view(jc, 0, jb) + + f4din_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1), ji) * + geofac_div_view(jc, 1, jb) + + f4din_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2), ji) * + geofac_div_view(jc, 2, jb); + }); + } + } +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV4D); + +template <typename T> +void div_avg(const T *vec_e, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const int *cell_edge_idx, + const int *cell_edge_blk, const T *geofac_div, const T *avg_coeff, + T *div_vec_c, const T *opt_in2, T *opt_out2, + const int *i_startblk_in, const int *i_endblk_in, + const int *i_startidx_in, const int *i_endidx_in, int slev, + int elev, int nproma, int patch_id, bool l_limited_area, + bool l2fields, bool lacc, int nlev, int nblks_c, int nblks_e) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D inidx(cell_neighbor_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D inblk(cell_neighbor_blk, nproma, nblks_c, 3); + UnmanagedConstInt3D ieidx(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D ieblk(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 4, nblks_e); + UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, nlev, nblks_c); + + UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c); + + UnmanagedConstT3D opt_in2_view(opt_in2, nproma, nlev, nblks_e); + UnmanagedT3D opt_out2_view(opt_out2, nproma, nlev, nblks_c); + + Kokkos::View<T ***> aux_c("aux_c", nproma, nlev, nblks_c); + Kokkos::View<T ***> aux_c2("aux_c2", nproma, nlev, nblks_c); + + int i_startblk = i_startblk_in[0]; + int i_endblk = i_endblk_in[0]; + + if (l2fields) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step1", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + aux_c(jc, jk, jb) = + vec_e_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + vec_e_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + vec_e_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + aux_c2(jc, jk, jb) = + opt_in2_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + opt_in2_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + opt_in2_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + }); + } + } else { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step2", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + aux_c(jc, jk, jb) = + vec_e_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + vec_e_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + vec_e_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + }); + } + } + + if (patch_id > 0 || l_limited_area) { + i_startblk = i_startblk_in[1]; + i_endblk = i_endblk_in[1]; + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step3", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = aux_c(jc, jk, jb); + }); + } + + if (l2fields) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step4", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + opt_out2_view(jc, jk, jb) = aux_c2(jc, jk, jb); + }); + } + } + } + + i_startblk = i_startblk_in[2]; + i_endblk = i_endblk_in[2]; + + if (l2fields) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step5", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = + aux_c(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + + aux_c(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) * + avg_coeff_view(jc, 1, jb) + + aux_c(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) * + avg_coeff_view(jc, 2, jb) + + aux_c(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) * + avg_coeff_view(jc, 3, jb); + opt_out2_view(jc, jk, jb) = + aux_c2(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + + aux_c2(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) * + avg_coeff_view(jc, 1, jb) + + aux_c2(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) * + avg_coeff_view(jc, 2, jb) + + aux_c2(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) * + avg_coeff_view(jc, 3, jb); + }); + } + } else { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step6", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = + aux_c(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + + aux_c(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) * + avg_coeff_view(jc, 1, jb) + + aux_c(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) * + avg_coeff_view(jc, 2, jb) + + aux_c(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) * + avg_coeff_view(jc, 3, jb); + }); + } + } +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV_AVG); + +template <typename T> +void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, + const int *vert_edge_blk, const T *geofac_rot, T *rot_vec, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + bool lacc, int nlev, int nblks_e, int nblks_v) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D iidx(vert_edge_idx, nproma, nblks_v, 6); + UnmanagedConstInt3D iblk(vert_edge_blk, nproma, nblks_v, 6); + + UnmanagedConstT3D geofac_rot_view(geofac_rot, nproma, 6, nblks_v); + + UnmanagedT3D rot_vec_view(rot_vec, nproma, nlev, nblks_v); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "rot_vertex_atmos_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + rot_vec_view(jv, jk, jb) = + vec_e_view(iidx(jv, jb, 0), jk, iblk(jv, jb, 0)) * + geofac_rot_view(jv, 0, jb) + + vec_e_view(iidx(jv, jb, 1), jk, iblk(jv, jb, 1)) * + geofac_rot_view(jv, 1, jb) + + vec_e_view(iidx(jv, jb, 2), jk, iblk(jv, jb, 2)) * + geofac_rot_view(jv, 2, jb) + + vec_e_view(iidx(jv, jb, 3), jk, iblk(jv, jb, 3)) * + geofac_rot_view(jv, 3, jb) + + vec_e_view(iidx(jv, jb, 4), jk, iblk(jv, jb, 4)) * + geofac_rot_view(jv, 4, jb) + + vec_e_view(iidx(jv, jb, 5), jk, iblk(jv, jb, 5)) * + geofac_rot_view(jv, 5, jb); + }); + } +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_ROT_VERTEX_ATMOS); + +template <typename T> +void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, + const int *vert_edge_blk, const T *geofac_rot, T *rot_vec, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, bool lacc, + bool acc_async, int nlev, int nblks_e, int nblks_v) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D iidx(vert_edge_idx, nproma, nblks_v, 6); + UnmanagedConstInt3D iblk(vert_edge_blk, nproma, nblks_v, 6); + + UnmanagedConstT3D geofac_rot_view(geofac_rot, nproma, 6, nblks_v); + + UnmanagedT3D rot_vec_view(rot_vec, nproma, nlev, nblks_v); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "rot_vertex_atmos_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + rot_vec_view(jv, jk, jb) = + vec_e_view(iidx(jv, jb, 0), jk, iblk(jv, jb, 0)) * + geofac_rot_view(jv, 0, jb) + + vec_e_view(iidx(jv, jb, 1), jk, iblk(jv, jb, 1)) * + geofac_rot_view(jv, 1, jb) + + vec_e_view(iidx(jv, jb, 2), jk, iblk(jv, jb, 2)) * + geofac_rot_view(jv, 2, jb) + + vec_e_view(iidx(jv, jb, 3), jk, iblk(jv, jb, 3)) * + geofac_rot_view(jv, 3, jb) + + vec_e_view(iidx(jv, jb, 4), jk, iblk(jv, jb, 4)) * + geofac_rot_view(jv, 4, jb) + + vec_e_view(iidx(jv, jb, 5), jk, iblk(jv, jb, 5)) * + geofac_rot_view(jv, 5, jb); + }); + } + + if (!acc_async) + Kokkos::fence(); +} + +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_ROT_VERTEX_RI); diff --git a/src/horizontal/mo_lib_divrot.hpp b/src/horizontal/mo_lib_divrot.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b8e97430192affa7d1820e0df36c61f1be3d98ed --- /dev/null +++ b/src/horizontal/mo_lib_divrot.hpp @@ -0,0 +1,130 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +#include <Kokkos_Core.hpp> +#include <types.hpp> + +#define ICONMATH_DECLARE_RECON_LSQ_CELL_L(_type) \ + void recon_lsq_cell_l( \ + const _type *p_cc, const int *cell_neighbor_idx, \ + const int *cell_neighbor_blk, const _type *lsq_qtmat_c, \ + const _type *lsq_rmat_rdiag_c, const _type *lsq_rmat_utri_c, \ + const _type *lsq_moments, _type *p_coeff, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ + bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, \ + int lsq_dim_unk, int lsq_dim_c) + +#define ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD(_type) \ + void recon_lsq_cell_l_svd( \ + const _type *p_cc, const int *cell_neighbor_idx, \ + const int *cell_neighbor_blk, const _type *lsq_pseudoinv, \ + const _type *lsq_moments, _type *p_coeff, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ + bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, \ + int lsq_dim_unk, int lsq_dim_c) + +#define ICONMATH_DECLARE_RECON_LSQ_CELL_Q(_type) \ + void recon_lsq_cell_q( \ + const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ + const _type *lsq_qtmat_c, const _type *lsq_rmat_rdiag_c, \ + const _type *lsq_rmat_utri_c, const _type *lsq_moments, _type *p_coeff, \ + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ + int slev, int elev, int nproma, int patch_id, bool l_limited_area, \ + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) + +#define ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD(_type) \ + void recon_lsq_cell_q_svd( \ + const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ + const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff, \ + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ + int slev, int elev, int nproma, int patch_id, bool l_limited_area, \ + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) + +#define ICONMATH_DECLARE_RECON_LSQ_CELL_C(_type) \ + void recon_lsq_cell_c( \ + const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ + const _type *lsq_qtmat_c, const _type *lsq_rmat_rdiag_c, \ + const _type *lsq_rmat_utri_c, const _type *lsq_moments, _type *p_coeff, \ + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ + int slev, int elev, int nproma, int patch_id, bool l_limited_area, \ + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) + +#define ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD(_type) \ + void recon_lsq_cell_c_svd( \ + const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ + const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff, \ + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ + int slev, int elev, int nproma, int patch_id, \ + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ + int lsq_dim_c) + +#define ICONMATH_DECLARE_DIV3D(_type) \ + void div3d(const _type *vec_e, const int *cell_edge_idx, \ + const int *cell_edge_blk, const _type *geofac_div, \ + _type *div_vec_c, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, \ + int nproma, bool lacc, int nlev, int nblks_c, int nblks_e) + +#define ICONMATH_DECLARE_DIV3D_2FIELD(_type) \ + void div3d_2field(const _type *vec_e, const int *cell_edge_idx, \ + const int *cell_edge_blk, const _type *geofac_div, \ + _type *div_vec_c, const _type *in2, _type *out2, \ + int i_startblk, int i_endblk, int i_startidx_in, \ + int i_endidx_in, int slev, int elev, int nproma, \ + bool lacc, int nlev, int nblks_c, int nblks_e) + +#define ICONMATH_DECLARE_DIV4D(_type) \ + void div4d(const int *cell_edge_idx, const int *cell_edge_blk, \ + const _type *geofac_div, const _type *f4din, _type *f4dout, \ + int dim4d, int i_startblk, int i_endblk, int i_startidx_in, \ + int i_endidx_in, const int *slev, const int *elev, int nproma, \ + bool lacc, int nlev, int nblks_c, int nblks_e) + +#define ICONMATH_DECLARE_DIV_AVG(_type) \ + void div_avg(const _type *vec_e, const int *cell_neighbor_idx, \ + const int *cell_neighbor_blk, const int *cell_edge_idx, \ + const int *cell_edge_blk, const _type *geofac_div, \ + const _type *avg_coeff, _type *div_vec_c, const _type *opt_in2, \ + _type *opt_out2, const int *i_startblk_in, \ + const int *i_endblk_in, const int *i_startidx_in, \ + const int *i_endidx_in, int slev, int elev, int nproma, \ + int patch_id, bool l_limited_area, bool l2fields, bool lacc, \ + int nlev, int nblks_c, int nblks_e) + +#define ICONMATH_DECLARE_ROT_VERTEX_ATMOS(_type) \ + void rot_vertex_atmos( \ + const _type *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, \ + const _type *geofac_rot, _type *rot_vec, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ + bool lacc, int nlev, int nblks_e, int nblks_v) + +#define ICONMATH_DECLARE_ROT_VERTEX_RI(_type) \ + void rot_vertex_ri( \ + const _type *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, \ + const _type *geofac_rot, _type *rot_vec, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ + bool lacc, bool acc_async, int nlev, int nblks_e, int nblks_v) + +// Declare as templates +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_L(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_Q(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_C(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD(T); +template <typename T> ICONMATH_DECLARE_DIV3D(T); +template <typename T> ICONMATH_DECLARE_DIV3D_2FIELD(T); +template <typename T> ICONMATH_DECLARE_DIV4D(T); +template <typename T> ICONMATH_DECLARE_DIV_AVG(T); +template <typename T> ICONMATH_DECLARE_ROT_VERTEX_ATMOS(T); +template <typename T> ICONMATH_DECLARE_ROT_VERTEX_RI(T); diff --git a/src/types.hpp b/src/types.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7192e18faed86d8534411653cb040fac09720310 --- /dev/null +++ b/src/types.hpp @@ -0,0 +1,16 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +#define ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(_macro) \ + template _macro(float); \ + template _macro(double) diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index c9320cb7eec1402eb107dee72f0b68d7ba3a9908..98a21b277ca3ae22a8fafcc5e7640d7d62bfc4fc 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -27,6 +27,9 @@ endif() set(SOURCES main.cpp + test_horizontal_div.cpp + test_horizontal_recon.cpp + test_horizontal_rot.cpp test_tdma_solver.cpp test_interpolation_vector.cpp test_intp_rbf.cpp @@ -35,11 +38,14 @@ set(SOURCES # Create the test executable from your test files, including main.cpp. add_executable(iconmath_test_c ${SOURCES}) +target_include_directories(iconmath_test_c PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) + # Link the test executable with GoogleTest and Kokkos. target_link_libraries(iconmath_test_c PUBLIC iconmath-support iconmath-interpolation + iconmath-horizontal PRIVATE gtest_main Kokkos::kokkos diff --git a/test/c/dim_helper.hpp b/test/c/dim_helper.hpp new file mode 100644 index 0000000000000000000000000000000000000000..165d5d9d3052bb034c5795797942cf090bd669db --- /dev/null +++ b/test/c/dim_helper.hpp @@ -0,0 +1,88 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +// Template function for computing array size. +// For example, we get the array size of a 4-dimensional array A(2, 3, 4, 5) by +// dim_combine(2, 3, 4, 5). +// Which will automatically instantiate +// dim_combine<int, int, int, int>(2, 3, 4, 5). +// The function then call dim_combine recursively +// dim_combine<int, int, int, int>(2, 3, 4, 5) { +// return static_cast<size_t>(2) * dim_combine<int, int, int>(3, 4, 5); +// } +// dim_combine<int, int, int>(3, 4, 5) { +// return static_cast<size_t>(3) * dim_combine<int, int>(4, 5); +// } +// dim_combine<int, int>(4, 5) { +// return static_cast<size_t>(4) * dim_combine<int>(5); +// } +// Where the last dim_combine is specialized as +// dim_combine<int>(5) { +// return static_cast<size_t>(5); +// } +// Which gives +// dim_combine<int, int, int, int>(2, 3, 4, 5) = +// static_cast<size_t>(2) * static_cast<size_t>(3) * +// static_cast<size_t>(4) * static_cast<size_t>(5) +/// Template helpers for combining multiple dimension array sizes. +/// The base function of dimension combine. Should not be used. +template <typename... Ts> size_t dim_combine(Ts... dims) { return 0; } +/// Template specialization of only one dimension, returns the dimension itself. +template <typename T> size_t dim_combine(T dim) { + return static_cast<size_t>(dim); +} +/// Template specialization of picking out the first dimension. The combined +/// dimension is the first dimension times the combined dimension of the rest. +template <typename T, typename... Ts> size_t dim_combine(T dim, Ts... dims) { + return static_cast<size_t>(dim) * dim_combine(dims...); +} + +// Template function for LayoutLeft ID access in compile time. +// For example, a multi-dimensional array A of dimensions <2, 3, 4, 5> gets its +// corresponding vector id (LayoutLeft) by +// at<2, 3, 4, 5>(id1, id2, id3, id4). +// The at_impl then adds the id from beginning to the end and pass the id prefix +// to the next recursive at_impl function. In this example, +// at<2, 3, 4, 5>(id1, id2, id3, id4) { +// return id1 + at_impl<3, 4, 5>(2, id2, id3, id4); +// } +// at_impl<3, 4, 5>(2, id2, id3, id4) { +// return id2 * 2 + at_impl<4, 5>(2 * 3, id3, id4); +// } +// at_impl<4, 5>(2 * 3, id3, id4) { +// return id3 * 2 * 3 + at_impl<5>(2 * 3 * 4, id4); +// } +// at_impl<5>(2 * 3 * 4, id4) { +// return id4 * 2 * 3 * 4; +// } +// Which gives +// at<2, 3, 4, 5>(id1, id2, id3, id4) = id1 + id2 * 2 + +// id3 * 2 * 3 + id4 * 2 * 3 * 4 +/// Helper type converting integer numbers to int +template <class T, auto> using always_t = T; +/// Base function of at_impl. Should not be used. +template <int... Dims> int at_impl(always_t<int, Dims>... ids) { return 0; } +/// Template specialization of the last ID +template <int LastDim> int at_impl(int prefix, int id) { return id * prefix; } +/// Template specialization of at_impl, accumulate the return value using the +/// first id and pass the prefix to the next recursive at_impl function. +template <int FirstDim, int... Dims> +int at_impl(int prefix, int id, always_t<int, Dims>... ids) { + return id * prefix + at_impl<Dims...>(prefix * FirstDim, ids...); +} +/// at<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming +/// LayoutLeft. Use this function instead of at_impl. +template <int FirstDim, int... Dims> +int at(int id, always_t<int, Dims>... ids) { + return id + at_impl<Dims...>(FirstDim, ids...); +} diff --git a/test/c/test_horizontal_div.cpp b/test/c/test_horizontal_div.cpp new file mode 100644 index 0000000000000000000000000000000000000000..596d19e708d6c6e466fa2f23c579cb51f8689e19 --- /dev/null +++ b/test/c/test_horizontal_div.cpp @@ -0,0 +1,1070 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <iostream> +#include <random> +#include <vector> + +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> +#include <dim_helper.hpp> +#include <horizontal/mo_lib_divrot.hpp> +#include <support/mo_lib_loopindices.hpp> + +/// Test class for the horizontal divergence tests. Templated for the ValueType +template <typename ValueType> class HorizontalDivTest : public ::testing::Test { +protected: + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 2; // number of vertical levels + static constexpr int nblks_c = 1; // number of cell blocks + static constexpr int nblks_e = 1; // number of edge blocks + static constexpr int dim4d = 2; // 4th dimension size + + int i_startblk = 0; + int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] + int i_startidx_in = 0; + int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + std::vector<int> slev; + std::vector<int> elev; + bool lacc = false; // Not using ACC-specific behavior. + + std::vector<ValueType> vec_e; + std::vector<int> cell_edge_idx; + std::vector<int> cell_edge_blk; + std::vector<ValueType> geofac_div; + std::vector<ValueType> div_vec_c; + std::vector<ValueType> f4din; + std::vector<ValueType> f4dout; + + // Followings are needed in HorizontalDivAvgTest + std::vector<int> cell_neighbor_idx; + std::vector<int> cell_neighbor_blk; + std::vector<ValueType> avg_coeff; + std::vector<ValueType> opt_in2; + std::vector<ValueType> opt_out2; + + HorizontalDivTest() { + slev.resize(dim4d, 0); + elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) + + vec_e.resize(dim_combine(nproma, nlev, nblks_e)); + cell_edge_idx.resize(dim_combine(nproma, nblks_c, 3)); + cell_edge_blk.resize(dim_combine(nproma, nblks_c, 3)); + geofac_div.resize(dim_combine(nproma, 3, nblks_c)); + div_vec_c.resize(dim_combine(nproma, nlev, nblks_c)); + f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); + f4dout.resize(dim_combine(nproma, nlev, nblks_c, dim4d)); + cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, 3)); + cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, 3)); + avg_coeff.resize(dim_combine(nproma, 4, nblks_c)); + opt_in2.resize(dim_combine(nproma, nlev, nblks_e)); + opt_out2.resize(dim_combine(nproma, nlev, nblks_c)); + } +}; + +/// ValueTypes which the divrot tests should run with +typedef ::testing::Types<float, double> ValueTypes; + +TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes); + +TYPED_TEST(HorizontalDivTest, TestDiv3DSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + } + + // Set edge indices to point to specific cells (including self) + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Initialize div_vec_c to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + } + } + + // Call the div3d function + div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); +} + +TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize div_vec_c to random values + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the div3d function + div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "Results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST(HorizontalDivTest, TestDiv3D2FSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + this->f4din[f4d_at(i, k, 0, 0)] = + (i + 1) * (k + 2); // Different pattern for second field + } + + // Set edge indices to point to specific cells (including self) + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Initialize div_vec_c and f4dout to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + this->f4dout[f4dout_at(i, k, 0, 0)] = 0.0; + } + } + + // Call the div3d_2field function + div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->f4din.data(), + this->f4dout.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Check first field (same as in div3d test) + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); + + // Check second field (expected values calculated manually) + EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 5.1, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 6.3, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 4.4, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 6.6, 1e-6); +} + +TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + this->f4din[f4d_at(i, k, 0, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize div_vec_c and f4dout to random values + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + this->f4dout[f4dout_at(i, k, 0, 0)] = real_distrib(gen); + } + } + + // Call the div3d_2field function + div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->f4din.data(), + this->f4dout.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); + std::vector<TypeParam> ref_f4dout(nproma * nlev * nblks_c * dim4d, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + // Calculate reference value for first field + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + + // Calculate reference value for second field + ref_f4dout[f4dout_at(jc, jk, jb, 0)] = + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)], + 0)] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)], + 0)] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)], + 0)] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Verify results for first field + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "First field results differ at i=" << i << ", k=" << k; + } + } + + // Verify results for second field + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->f4dout[f4dout_at(i, k, 0, 0)], + ref_f4dout[f4dout_at(i, k, 0, 0)], 1e-5) + << "Second field results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST(HorizontalDivTest, TestDiv4DSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = (i + j) % nproma; + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->geofac_div[geofac_div_at(i, j, 0)] = 0.1 * (j + 1); + } + + for (int k = 0; k < nlev; ++k) { + for (int d = 0; d < dim4d; ++d) { + this->f4din[f4din_at(i, k, 0, d)] = 1.0 + i + k + d; + this->f4dout[f4dout_at(i, k, 0, d)] = 0.0; + } + } + } + + // Test function + div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), + this->f4dout.data(), this->dim4d, this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 1.4, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 1.1, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 1.1, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 2.0, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 1)], 2.0, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 1)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 1)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 1)], 2.6, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 1)], 2.3, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 1)], 2.3, 1e-6); +} + +TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); + + // Initialize with random values + for (int i = 0; i < nproma; ++i) { + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + for (int k = 0; k < nlev; ++k) { + for (int d = 0; d < dim4d; ++d) { + this->f4din[f4din_at(i, k, 0, d)] = real_distrib(gen); + this->f4dout[f4dout_at(i, k, 0, d)] = 0.0; + } + } + } + + // Test function + div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), + this->f4dout.data(), this->dim4d, this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); + + // Compute reference result and check + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int ji = 0; ji < dim4d; ++ji) { + for (int jk = this->slev[ji]; jk < this->elev[ji]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + TypeParam expected = 0.0; + for (int je = 0; je < 3; ++je) { + expected += + this->f4din[f4din_at( + this->cell_edge_idx[cell_edge_at(jc, jb, je)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, je)], ji)] * + this->geofac_div[geofac_div_at(jc, je, jb)]; + } + + EXPECT_NEAR(this->f4dout[f4dout_at(jc, jk, jb, ji)], expected, 1e-5) + << "Random test fails at jc=" << jc << ", jk=" << jk + << ", jb=" << jb << ", ji=" << ji; + } + } + } + } +} + +TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes); + +TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for additional parameters + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = true; + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Initialize the vectors with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + this->opt_in2[vec_e_at(i, k, 0)] = + (i + 1) * (k + 1) * 0.5; // Half of vec_e + } + + // Set edge indices to point to specific cells + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // Set neighbor indices similarly + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges and neighbors are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Average coefficients + this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self + this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor + this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor + this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor + + // Initialize div_vec_c and opt_out2 to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0; + } + } + + // Call the div_avg function + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6); + + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.94, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 1.88, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 1.02, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 2.04, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 1.04, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 2.08, 1e-6); +} + +TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = true; + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialize with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + this->opt_in2[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Random average coefficients + for (int j = 0; j < 4; ++j) { + this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); + } + + // Random initial values for div_vec_c and opt_out2 + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + this->opt_out2[div_vec_c_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the div_avg function + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values manually + std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> aux_c2(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_opt_out2(dim_combine(nproma, nlev, nblks_c)); + + // Step 1: Calculate aux_c and aux_c2 + for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + aux_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + + aux_c2[div_vec_c_at(jc, jk, jb)] = + this->opt_in2[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->opt_in2[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->opt_in2[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Step 2: Assign aux_c to div_vec_c and aux_c2 to opt_out2 for patch_id > 0 + for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)]; + ref_opt_out2[div_vec_c_at(jc, jk, jb)] = + aux_c2[div_vec_c_at(jc, jk, jb)]; + } + } + } + + // Step 3: Perform averaging for the rest of the blocks + for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)] * + this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + + ref_opt_out2[div_vec_c_at(jc, jk, jb)] = + aux_c2[div_vec_c_at(jc, jk, jb)] * + this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c2[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c2[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c2[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "div_vec_c results differ at i=" << i << ", k=" << k; + + EXPECT_NEAR(this->opt_out2[div_vec_c_at(i, k, 0)], + ref_opt_out2[div_vec_c_at(i, k, 0)], 1e-5) + << "opt_out2 results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST(HorizontalDivTest, TestDivAvgSpecificNoL2fields) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = false; + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Initialize the vectors with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + this->opt_in2[vec_e_at(i, k, 0)] = + (i + 1) * (k + 1) * 0.5; // Half of vec_e + } + + // Set edge indices to point to specific cells + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // Set neighbor indices similarly + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges and neighbors are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Average coefficients + this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self + this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor + this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor + this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor + + // Initialize div_vec_c and opt_out2 to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0; + } + } + + // Call the div_avg function + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6); + + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 0.0, 1e-6); +} + +TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = false; // Set to false for this test + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialize with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + this->opt_in2[vec_e_at(i, k, 0)] = + real_distrib(gen); // Not used but initialize anyway + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Random average coefficients + for (int j = 0; j < 4; ++j) { + this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); + } + + // Random initial values for div_vec_c and opt_out2 + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + this->opt_out2[div_vec_c_at(i, k, 0)] = + real_distrib(gen); // Not used but initialize anyway + } + } + + // Call the div_avg function with l2fields=false + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values manually + std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); + + // Step 1: Calculate aux_c (but not aux_c2 since l2fields=false) + for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + aux_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Step 2: Assign aux_c to div_vec_c for patch_id > 0 (opt_out2 not updated + // since l2fields=false) + for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)]; + } + } + } + + // Step 3: Perform averaging for the rest of the blocks (only for div_vec_c, + // not opt_out2) + for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)] * + this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + } + } + } + + // Verify results - only check div_vec_c since l2fields=false means opt_out2 + // isn't updated + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "div_vec_c results differ at i=" << i << ", k=" << k; + } + } +} diff --git a/test/c/test_horizontal_recon.cpp b/test/c/test_horizontal_recon.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8938a101bd2a350da12a0def450472d1e0c9ec9f --- /dev/null +++ b/test/c/test_horizontal_recon.cpp @@ -0,0 +1,1199 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <iostream> +#include <random> +#include <vector> + +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> +#include <dim_helper.hpp> +#include <horizontal/mo_lib_divrot.hpp> +#include <support/mo_lib_loopindices.hpp> + +/// Enum class for the reconstruction method +enum class ReconstructionMethod { + linear, + quadratic, + cubic, +}; + +/// Base test class for the horizontal reconstruct tests. Templated for the ValueType +/// and ReconMethod for the reconstruction method. +template <typename ValueType, int ReconMethod> +class HorizontalReconTest : public ::testing::Test { +protected: + // lsq_dim_c and lsq_dim_unk are instantiated in compile time. + static constexpr std::tuple<int, int> + init_lsq_dim(ReconstructionMethod method) { + switch (method) { + case ReconstructionMethod::linear: + return std::make_tuple(3, 2); + case ReconstructionMethod::quadratic: + return std::make_tuple(9, 5); + case ReconstructionMethod::cubic: + return std::make_tuple(9, 9); + } + } + + // Constant dimensions. + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 1; // number of vertical levels + static constexpr int nblks_c = 1; // number of cell blocks (for p_e_in) + static constexpr std::tuple<int, int> lsq_dim = + init_lsq_dim(static_cast<ReconstructionMethod>(ReconMethod)); + static constexpr int lsq_dim_c = std::get<0>(lsq_dim); + static constexpr int lsq_dim_unk = std::get<1>(lsq_dim); + + // Parameter values. + int i_startblk = 0; + int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] + int i_startidx_in = 0; + int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + int slev = 0; + int elev = nlev; // Full vertical range (0 .. nlev-1) + int patch_id = 0; + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // No asynchronous execution. + bool l_consv = true; // With conservative correction. + bool l_limited_area = true; // Limited area setup + + std::vector<ValueType> p_cc; + std::vector<int> cell_neighbor_idx; + std::vector<int> cell_neighbor_blk; + std::vector<ValueType> lsq_qtmat_c; + std::vector<ValueType> lsq_rmat_rdiag_c; + std::vector<ValueType> lsq_rmat_utri_c; + std::vector<ValueType> lsq_moments; + std::vector<ValueType> lsq_pseudoinv; + std::vector<ValueType> p_coeff; + + HorizontalReconTest() { + p_cc.resize(dim_combine(nproma, nlev, nblks_c)); + cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); + cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); + lsq_qtmat_c.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); + lsq_rmat_rdiag_c.resize(dim_combine(nproma, lsq_dim_unk, nblks_c)); + lsq_rmat_utri_c.resize(dim_combine( + nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c)); + lsq_moments.resize(dim_combine(nproma, nblks_c, lsq_dim_unk)); + lsq_pseudoinv.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); + p_coeff.resize(dim_combine(lsq_dim_unk + 1, nproma, nlev, nblks_c)); + } +}; + +/// Test class for the horizontal tests. The reconstruction method is specified +/// to linear. +template <typename ValueType> +class HorizontalReconLinearTest + : public HorizontalReconTest<ValueType, static_cast<int>( + ReconstructionMethod::linear)> { +}; + +/// Test class for the horizontal tests. The reconstruction method is specified +/// to quadratic. +template <typename ValueType> +class HorizontalReconQuadraticTest + : public HorizontalReconTest< + ValueType, static_cast<int>(ReconstructionMethod::quadratic)> {}; + +/// Test class for the horizontal tests. The reconstruction method is specified +/// to cubic. +template <typename ValueType> +class HorizontalReconCubicTest + : public HorizontalReconTest<ValueType, static_cast<int>( + ReconstructionMethod::cubic)> { +}; + +/// ValueTypes which the divrot tests should run with +typedef ::testing::Types<float, double> ValueTypes; + +TYPED_TEST_SUITE(HorizontalReconLinearTest, ValueTypes); + +TYPED_TEST(HorizontalReconLinearTest, TestLsqCell) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = 2.0; + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = 2.0; + this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = 0.1; + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + } + + // Test function + recon_lsq_cell_l<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.34, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 1.0, 1e-6); +} + +TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = real_distrib(gen); + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = real_distrib(gen); + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = real_distrib(gen); + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = real_distrib(gen); + this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = real_distrib(gen); + + this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen); + } + + // Test function + recon_lsq_cell_l<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + z_qt_times_d[0] = 0.0; + z_qt_times_d[1] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + z_qt_times_d[0] += this->lsq_qtmat_c[qtmat_at(jc, 0, i, jb)] * z_d[i]; + z_qt_times_d[1] += this->lsq_qtmat_c[qtmat_at(jc, 1, i, jb)] * z_d[i]; + } + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] = + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 1, jb)] * z_qt_times_d[1]; + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] = + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 0, jb)] * + (z_qt_times_d[0] - + this->lsq_rmat_utri_c[rmat_utri_at(jc, 0, jb)] * + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)]); + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)] - + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * + this->lsq_moments[moments_at(jc, jb, 0)] - + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * + this->lsq_moments[moments_at(jc, jb, 1)]; + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} + +TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + } + + // Test function + recon_lsq_cell_l_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.65, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.5, 1e-6); +} + +TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = real_distrib(gen); + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = real_distrib(gen); + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + + this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen); + } + + // Test function + recon_lsq_cell_l_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] = + this->lsq_pseudoinv[pseudoinv_at(jc, 1, 0, jb)] * z_d[0] + + this->lsq_pseudoinv[pseudoinv_at(jc, 1, 1, jb)] * z_d[1] + + this->lsq_pseudoinv[pseudoinv_at(jc, 1, 2, jb)] * z_d[2]; + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] = + this->lsq_pseudoinv[pseudoinv_at(jc, 0, 0, jb)] * z_d[0] + + this->lsq_pseudoinv[pseudoinv_at(jc, 0, 1, jb)] * z_d[1] + + this->lsq_pseudoinv[pseudoinv_at(jc, 0, 2, jb)] * z_d[2]; + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)] - + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * + this->lsq_moments[moments_at(jc, jb, 0)] - + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * + this->lsq_moments[moments_at(jc, jb, 1)]; + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} + +TYPED_TEST_SUITE(HorizontalReconQuadraticTest, ValueTypes); + +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCell) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; + this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.2; + this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7; + this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 1.3; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0; + } + + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + } + + // Test function + recon_lsq_cell_q<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.24, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 3.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + -2.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 2.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + -3.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 2.6, 1e-6); +} + +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_q<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 0; j < lsq_dim_unk; ++j) { + z_qt_times_d[j] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + z_qt_times_d[j] += + this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i]; + } + } + int utri_id = 0; + for (int j = lsq_dim_unk; j > 0; --j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1]; + for (int k = j + 1; k <= lsq_dim_unk; ++k) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -= + this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] * + p_result[at<lsq_dim_unk + 1, nproma>(k, jc)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *= + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} + +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; + this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.2; + this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; + this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 1.3; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + } + + // Test function + recon_lsq_cell_q_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + -0.56, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.5, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + 0.7, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 1.3, 1e-6); +} + +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization is done only for iblk = 0 and ilev = 0 + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_q_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 1; j < lsq_dim_unk + 1; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += + this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; + } + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + + // Check result + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(j, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(j, jc))], 1e-5) + << "For loop result fails for j = " << j << ", jc = " << jc; + } + } +} + +TYPED_TEST_SUITE(HorizontalReconCubicTest, ValueTypes); + +TYPED_TEST(HorizontalReconCubicTest, TestLsqCell) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.9; + this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.8; + this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7; + this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 0.6; + this->lsq_qtmat_c[qtmat_at(i, 5, j, 0)] = 0.5; + this->lsq_qtmat_c[qtmat_at(i, 6, j, 0)] = 0.4; + this->lsq_qtmat_c[qtmat_at(i, 7, j, 0)] = 0.3; + this->lsq_qtmat_c[qtmat_at(i, 8, j, 0)] = 0.2; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0; + } + + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + this->lsq_moments[moments_at(i, 0, 5)] = 0.7; + this->lsq_moments[moments_at(i, 0, 6)] = 0.8; + this->lsq_moments[moments_at(i, 0, 7)] = 0.9; + this->lsq_moments[moments_at(i, 0, 8)] = 1.0; + } + + // Test function + recon_lsq_cell_c<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.28, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], + 0.4, 1e-6); +} + +TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_c<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 0; j < lsq_dim_unk; ++j) { + z_qt_times_d[j] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + z_qt_times_d[j] += + this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i]; + } + } + int utri_id = 0; + for (int j = lsq_dim_unk; j > 0; --j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1]; + for (int k = j + 1; k <= lsq_dim_unk; ++k) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -= + this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] * + p_result[at<lsq_dim_unk + 1, nproma>(k, jc)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *= + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} + +TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.9; + this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.8; + this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; + this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 0.6; + this->lsq_pseudoinv[pseudoinv_at(i, 5, j, 0)] = 0.5; + this->lsq_pseudoinv[pseudoinv_at(i, 6, j, 0)] = 0.4; + this->lsq_pseudoinv[pseudoinv_at(i, 7, j, 0)] = 0.3; + this->lsq_pseudoinv[pseudoinv_at(i, 8, j, 0)] = 0.2; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + this->lsq_moments[moments_at(i, 0, 5)] = 0.7; + this->lsq_moments[moments_at(i, 0, 6)] = 0.8; + this->lsq_moments[moments_at(i, 0, 7)] = 0.9; + this->lsq_moments[moments_at(i, 0, 8)] = 1.0; + } + + // Test function + recon_lsq_cell_c_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + -1.64, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.9, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + 0.7, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 0.6, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], + 0.5, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], + 0.3, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], + 0.2, 1e-6); +} + +TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_c_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 1; j < lsq_dim_unk + 1; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += + this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; + } + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} diff --git a/test/c/test_horizontal_rot.cpp b/test/c/test_horizontal_rot.cpp new file mode 100644 index 0000000000000000000000000000000000000000..68e80245f2fa4ea19f173f1f8ac095fda5505775 --- /dev/null +++ b/test/c/test_horizontal_rot.cpp @@ -0,0 +1,378 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <iostream> +#include <random> +#include <vector> + +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> +#include <dim_helper.hpp> +#include <horizontal/mo_lib_divrot.hpp> +#include <support/mo_lib_loopindices.hpp> + +/// Test class for the horizontal rotation tests. Templated for the ValueType. +template <typename ValueType> +class HorizontalRotVertexTest : public ::testing::Test { +protected: + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 2; // number of vertical levels + static constexpr int nblks_e = 1; // number of edge blocks + static constexpr int nblks_v = 1; // number of vertex blocks + static constexpr int dim4d = 2; // 4th dimension size + + int i_startblk = 0; + int i_endblk = nblks_v; // Test blocks [0 .. nblks_v-1] + int i_startidx_in = 0; + int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + std::vector<int> slev; + std::vector<int> elev; + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // Not using ACC-specific behavior. + + std::vector<ValueType> vec_e; + std::vector<int> vert_edge_idx; + std::vector<int> vert_edge_blk; + std::vector<ValueType> geofac_rot; + std::vector<ValueType> rot_vec; + std::vector<ValueType> f4din; + std::vector<ValueType> f4dout; + + HorizontalRotVertexTest() { + slev.resize(dim4d, 0); + elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) + + vec_e.resize(dim_combine(nproma, nlev, nblks_e)); + vert_edge_idx.resize(dim_combine(nproma, nblks_v, 6)); + vert_edge_blk.resize(dim_combine(nproma, nblks_v, 6)); + geofac_rot.resize(dim_combine(nproma, 6, nblks_v)); + rot_vec.resize(dim_combine(nproma, nlev, nblks_v)); + f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); + f4dout.resize(dim_combine(nproma, nlev, nblks_v, dim4d)); + } +}; + +/// ValueTypes which the divrot tests should run with +typedef ::testing::Types<float, double> ValueTypes; + +TYPED_TEST_SUITE(HorizontalRotVertexTest, ValueTypes); + +TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + } + + // Set edge indices to point to specific edges + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma; + // All edges are in the same block for this test + this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; + } + + // Geometric factors for rotation + this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3; + this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1; + + // Initialize rot_vec to zero + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = 0.0; + } + } + + // Call the rot_vertex_atmos function + rot_vertex_atmos<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_e, this->nblks_v); + + // Expected values based on the initialization pattern + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6); +} + +TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); + this->vert_edge_blk[vert_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 6; ++j) { + this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize rot_vec to random values + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the rot_vertex_atmos function + rot_vertex_atmos<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_e, this->nblks_v); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jv = i_startidx; jv < i_endidx; ++jv) { + ref_rot_vec[rot_vec_at(jv, jk, jb)] = + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * + this->geofac_rot[geofac_rot_at(jv, 0, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * + this->geofac_rot[geofac_rot_at(jv, 1, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * + this->geofac_rot[geofac_rot_at(jv, 2, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * + this->geofac_rot[geofac_rot_at(jv, 3, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * + this->geofac_rot[geofac_rot_at(jv, 4, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * + this->geofac_rot[geofac_rot_at(jv, 5, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)], + ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) + << "Results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST_SUITE(HorizontalRotVertexTest, ValueTypes); + +TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRISpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + } + + // Set edge indices to point to specific edges + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma; + // All edges are in the same block for this test + this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; + } + + // Geometric factors for rotation + this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3; + this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1; + + // Initialize rot_vec to zero + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = 0.0; + } + } + + // Call the rot_vertex_ri function + rot_vertex_ri<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async, + this->nlev, this->nblks_e, this->nblks_v); + + // Expected values based on the initialization pattern + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6); +} + +TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRIRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); + this->vert_edge_blk[vert_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 6; ++j) { + this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize rot_vec to random values + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the rot_vertex_ri function + rot_vertex_ri<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async, + this->nlev, this->nblks_e, this->nblks_v); + + // Ensure computation is complete for both modes + Kokkos::fence(); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jv = i_startidx; jv < i_endidx; ++jv) { + ref_rot_vec[rot_vec_at(jv, jk, jb)] = + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * + this->geofac_rot[geofac_rot_at(jv, 0, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * + this->geofac_rot[geofac_rot_at(jv, 1, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * + this->geofac_rot[geofac_rot_at(jv, 2, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * + this->geofac_rot[geofac_rot_at(jv, 3, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * + this->geofac_rot[geofac_rot_at(jv, 4, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * + this->geofac_rot[geofac_rot_at(jv, 5, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)], + ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) + << "Results differ at i=" << i << ", k=" << k << ")"; + } + } +} +