From 670c30e9eb273f83b519167b097fbce4785c56c8 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Mon, 24 Feb 2025 15:59:28 +0100 Subject: [PATCH 01/35] Add cpp implementations (untested) --- src/horizontal/CMakeLists.txt | 2 + src/horizontal/lib_divrot.cpp | 328 ++++++++++++++++++++++++++++++ src/horizontal/lib_divrot.hpp | 42 ++++ test/c/test_horizontal_divrot.cpp | 0 4 files changed, 372 insertions(+) create mode 100644 src/horizontal/lib_divrot.cpp create mode 100644 src/horizontal/lib_divrot.hpp create mode 100644 test/c/test_horizontal_divrot.cpp diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index 078a14d..198488f 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -11,6 +11,7 @@ add_library( iconmath-horizontal + lib_divrot.cpp mo_lib_divrot.F90 mo_lib_laplace.F90 mo_lib_gradients.F90) @@ -57,6 +58,7 @@ target_include_directories( # Path to the internal C/C++ headers (for testing): Requires CMake 3.15+ for # multiple compile languages # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html + $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${PROJECT_SOURCE_DIR}/src>> $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>> PRIVATE # Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp new file mode 100644 index 0000000..9dd698c --- /dev/null +++ b/src/horizontal/lib_divrot.cpp @@ -0,0 +1,328 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <lib_divrot.hpp> +#include <support/mo_lib_loopindices.hpp> +#include <vector> + +template <typename T> +void recon_lsq_cell_l_(const T *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *lsq_qtmat_c, + const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, + const T *lsq_moments, T &p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool l_consv, bool lacc, + bool acc_async, int nblks_c, int lsq_dim_unk, + int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedT3D z_d(3); + UnmanagedT3D z_qt_times_d(2); + + UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D p_cc_view(p_cc); + UnmanagedT3D p_coeff_view(p_coeff); + + UnmanagedConstT3D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D lsq_rmat_rdiag_c_view(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, + nblks_c); + UnmanagedConstT3D lsq_rmat_utri_c_view( + lsq_rmat_utri_c, nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, + nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::parallel_for( + "recon_lsq_cell_l_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_d(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_d(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_d(3) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + // matrix multiplication Q^T d (partitioned into 2 dot products) + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1) + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2) + + lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3); + z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1) + + lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2) + + lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3); + + p_coeff_view(3, jc, jk, jb) = + lsq_rmat_rdiag_c_view(jc, 2, jb) * z_qt_times_d(2); + p_coeff_view(2, jc, jk, jb) = + lsq_rmat_rdiag_c_view(jc, 1, jb) * + (z_qt_times_d(1) - + lsq_rmat_utri_c_view(jc, 1, jb) * p_coeff_view(3, jc, jk, jb)); + p_coeff_view(1, jc, jk, jb) = p_cc_view(jc, jk, jb); + }); + if (l_consv) { + Kokkos::parallel_for( + "recon_lsq_cell_l_consv", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_coeff_view(1, jc, jk, jb) = + p_coeff_view(1, jc, jk, jb) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2); + }); + } + } + + if (!acc_async) + Kokkos::fence(); +} + +template <typename T> +void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *lsq_pseudoinv, + const T *lsq_moments, T &p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool l_consv, + bool lacc, bool acc_async, int nblks_c, + int lsq_dim_unk, int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedT3D z_b(3); + + UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D p_cc_view(p_cc); + UnmanagedT3D p_coeff_view(p_coeff); + + UnmanagedConstT3D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::parallel_for( + "recon_lsq_cell_l_svd_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_b(3) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + + p_coeff_view(3, jc, jk, jb) = + lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3); + p_coeff_view(2, jc, jk, jb) = + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3); + p_coeff_view(1, jc, jk, jb) = p_cc_view(jc, jk, jb); + }); + if (l_consv) { + Kokkos::parallel_for( + "recon_lsq_cell_l_svd_consv", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_coeff_view(1, jc, jk, jb) = + p_coeff_view(1, jc, jk, jb) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2); + }); + } + } + + if (!acc_async) + Kokkos::fence(); +} + +template <typename T> +void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, + const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int patch_id, int lsq_high_set_dim_c, bool l_limited_area, + bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedT3D z_d(lsq_high_set_dim_c, nproma, elev); + UnmanagedT3D z_qt_times_d(5); + + UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc); + UnmanagedT3D p_coeff_view(p_coeff); + + UnmanagedConstT3D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c); + UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma, + (lsq_dim_unk ^ 2 - lsq_dim_unk) / 2, nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + if (patch_id > 1 || l_limited_area) { + Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( + {1, i_startidx_in, slev, i_startblk}, + {7, i_endidx_in + 1, elev + 1, i_endblk + 1}); + Kokkos::parallel_for( + "recon_lsq_cell_q_init", initPolicy, + KOKKOS_LAMBDA(const int z, const int jc, const int jk, const int jb) { + p_coeff_view(z, jc, jk, jb) = 0; + }); + } + + for (int jb = i_startblk; jb <= i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( + {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::parallel_for( + "recon_lsq_cell_q_step1", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_d(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_d(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_d(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_d(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_d(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); + z_d(9, jc, jk) = p_cc_view(iidx(jc, jb, 9), jk, iblk(jc, jb, 9)) - + p_cc_view(jc, jk, jb); + }); + Kokkos::parallel_for( + "recon_lsq_cell_q_step2", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk) + + lsq_qtmat_c_view(jc, 1, 9, jb) * z_d(9, jc, jk); + z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk) + + lsq_qtmat_c_view(jc, 2, 9, jb) * z_d(9, jc, jk); + z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk) + + lsq_qtmat_c_view(jc, 3, 9, jb) * z_d(9, jc, jk); + z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk) + + lsq_qtmat_c_view(jc, 4, 9, jb) * z_d(9, jc, jk); + z_qt_times_d(5) = lsq_qtmat_c_view(jc, 5, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 5, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 5, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 5, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 5, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 5, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 5, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 5, 8, jb) * z_d(8, jc, jk) + + lsq_qtmat_c_view(jc, 5, 9, jb) * z_d(9, jc, jk); + + p_coeff_view(6, jc, jk, jb) = ptr_rrdiag(jc, 5, jb) * z_qt_times_d(5); + p_coeff_view(5, jc, jk, jb) = + ptr_rrdiag(jc, 4, jb) * + (z_qt_times_d(4) - + ptr_rutri(jc, 1, jb) * p_coeff_view(6, jc, jk, jb)); + p_coeff_view(4, jc, jk, jb) = + ptr_rrdiag(jc, 3, jb) * + (z_qt_times_d(3) - + ptr_rutri(jc, 2, jb) * p_coeff_view(5, jc, jk, jb) - + ptr_rutri(jc, 3, jb) * p_coeff_view(6, jc, jk, jb)); + p_coeff_view(3, jc, jk, jb) = + ptr_rrdiag(jc, 2, jb) * + (z_qt_times_d(2) - + ptr_rutri(jc, 4, jb) * p_coeff_view(4, jc, jk, jb) - + ptr_rutri(jc, 5, jb) * p_coeff_view(5, jc, jk, jb) - + ptr_rutri(jc, 6, jb) * p_coeff_view(6, jc, jk, jb)); + p_coeff_view(2, jc, jk, jb) = + ptr_rrdiag(jc, 1, jb) * + (z_qt_times_d(1) - + ptr_rutri(jc, 7, jb) * p_coeff_view(3, jc, jk, jb) - + ptr_rutri(jc, 8, jb) * p_coeff_view(4, jc, jk, jb) - + ptr_rutri(jc, 9, jb) * p_coeff_view(5, jc, jk, jb) - + ptr_rutri(jc, 10, jb) * p_coeff_view(6, jc, jk, jb)); + p_coeff_view(1, jc, jk, jb) = + p_cc(jc, jk, jb) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - + p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) - + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) - + p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5); + }); + } + + Kokkos::fence(); +} diff --git a/src/horizontal/lib_divrot.hpp b/src/horizontal/lib_divrot.hpp new file mode 100644 index 0000000..6977e5d --- /dev/null +++ b/src/horizontal/lib_divrot.hpp @@ -0,0 +1,42 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +#include <Kokkos_Core.hpp> + +template <typename T> +void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *lsq_qtmat_c, + const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, + const T *lsq_moments, T &p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool l_consv, bool lacc, + bool acc_async, int nblks_c, int lsq_dim_unk, + int lsq_dim_c); + +template <typename T> +void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *lsq_pseudoinv, + const T *lsq_moments, T &p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool l_consv, + bool lacc, bool acc_async, int nblks_c, + int lsq_dim_unk, int lsq_dim_c); + +template <typename T> +void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, + const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int patch_id, int lsq_high_set_dim_c, bool l_limited_area, + bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c); diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp new file mode 100644 index 0000000..e69de29 -- GitLab From f0cb7af9e59cd93ff5a3a920767430e1c033814b Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Mon, 24 Feb 2025 22:25:34 +0100 Subject: [PATCH 02/35] Fix indexes --- src/horizontal/lib_divrot.cpp | 162 +++++++++++++++++----------------- 1 file changed, 81 insertions(+), 81 deletions(-) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index 9dd698c..addd485 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -49,46 +49,46 @@ void recon_lsq_cell_l_(const T *p_cc, const int *cell_neighbor_idx, nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - for (int jb = i_startblk; jb <= i_endblk; ++jb) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); Kokkos::parallel_for( "recon_lsq_cell_l_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { + z_d(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); z_d(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - p_cc_view(jc, jk, jb); z_d(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - p_cc_view(jc, jk, jb); - z_d(3) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - - p_cc_view(jc, jk, jb); // matrix multiplication Q^T d (partitioned into 2 dot products) - z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1) + - lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2) + - lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3); - z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1) + - lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2) + - lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3); + z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0) + + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1) + + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2); + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0) + + lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1) + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2); - p_coeff_view(3, jc, jk, jb) = - lsq_rmat_rdiag_c_view(jc, 2, jb) * z_qt_times_d(2); p_coeff_view(2, jc, jk, jb) = - lsq_rmat_rdiag_c_view(jc, 1, jb) * - (z_qt_times_d(1) - - lsq_rmat_utri_c_view(jc, 1, jb) * p_coeff_view(3, jc, jk, jb)); - p_coeff_view(1, jc, jk, jb) = p_cc_view(jc, jk, jb); + lsq_rmat_rdiag_c_view(jc, 1, jb) * z_qt_times_d(1); + p_coeff_view(1, jc, jk, jb) = + lsq_rmat_rdiag_c_view(jc, 0, jb) * + (z_qt_times_d(0) - + lsq_rmat_utri_c_view(jc, 0, jb) * p_coeff_view(2, jc, jk, jb)); + p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb); }); if (l_consv) { Kokkos::parallel_for( "recon_lsq_cell_l_consv", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - p_coeff_view(1, jc, jk, jb) = - p_coeff_view(1, jc, jk, jb) - - p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - - p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2); + p_coeff_view(0, jc, jk, jb) = + p_coeff_view(0, jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1); }); } } @@ -126,41 +126,41 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, lsq_dim_c, nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - for (int jb = i_startblk; jb <= i_endblk; ++jb) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); Kokkos::parallel_for( "recon_lsq_cell_l_svd_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { + z_b(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - p_cc_view(jc, jk, jb); z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - p_cc_view(jc, jk, jb); - z_b(3) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - - p_cc_view(jc, jk, jb); - p_coeff_view(3, jc, jk, jb) = - lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3); p_coeff_view(2, jc, jk, jb) = + lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) + - lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2) + - lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3); - p_coeff_view(1, jc, jk, jb) = p_cc_view(jc, jk, jb); + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2); + p_coeff_view(1, jc, jk, jb) = + lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2); + p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb); }); if (l_consv) { Kokkos::parallel_for( "recon_lsq_cell_l_svd_consv", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - p_coeff_view(1, jc, jk, jb) = - p_coeff_view(1, jc, jk, jb) - - p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - - p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2); + p_coeff_view(0, jc, jk, jb) = + p_coeff_view(0, jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1); }); } } @@ -199,13 +199,13 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, lsq_dim_c, nblks_c); UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c); UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma, - (lsq_dim_unk ^ 2 - lsq_dim_unk) / 2, nblks_c); + (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, + nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); if (patch_id > 1 || l_limited_area) { Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( - {1, i_startidx_in, slev, i_startblk}, - {7, i_endidx_in + 1, elev + 1, i_endblk + 1}); + {0, i_startidx_in, slev, i_startblk}, {6, i_endidx_in, elev, i_endblk}); Kokkos::parallel_for( "recon_lsq_cell_q_init", initPolicy, KOKKOS_LAMBDA(const int z, const int jc, const int jk, const int jb) { @@ -213,16 +213,18 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, }); } - for (int jb = i_startblk; jb <= i_endblk; ++jb) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy( - {slev, i_startidx}, {elev + 1, i_endidx + 1}); + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); Kokkos::parallel_for( "recon_lsq_cell_q_step1", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { + z_d(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - p_cc_view(jc, jk, jb); z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - @@ -239,88 +241,86 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, p_cc_view(jc, jk, jb); z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - p_cc_view(jc, jk, jb); - z_d(9, jc, jk) = p_cc_view(iidx(jc, jb, 9), jk, iblk(jc, jb, 9)) - - p_cc_view(jc, jk, jb); }); Kokkos::parallel_for( "recon_lsq_cell_q_step2", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + + z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk) + - lsq_qtmat_c_view(jc, 1, 9, jb) * z_d(9, jc, jk); - z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk) + - lsq_qtmat_c_view(jc, 2, 9, jb) * z_d(9, jc, jk); - z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk) + - lsq_qtmat_c_view(jc, 3, 9, jb) * z_d(9, jc, jk); - z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk) + - lsq_qtmat_c_view(jc, 4, 9, jb) * z_d(9, jc, jk); - z_qt_times_d(5) = lsq_qtmat_c_view(jc, 5, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 5, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 5, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 5, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 5, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 5, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 5, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 5, 8, jb) * z_d(8, jc, jk) + - lsq_qtmat_c_view(jc, 5, 9, jb) * z_d(9, jc, jk); + lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); - p_coeff_view(6, jc, jk, jb) = ptr_rrdiag(jc, 5, jb) * z_qt_times_d(5); - p_coeff_view(5, jc, jk, jb) = - ptr_rrdiag(jc, 4, jb) * - (z_qt_times_d(4) - - ptr_rutri(jc, 1, jb) * p_coeff_view(6, jc, jk, jb)); + p_coeff_view(5, jc, jk, jb) = ptr_rrdiag(jc, 4, jb) * z_qt_times_d(4); p_coeff_view(4, jc, jk, jb) = ptr_rrdiag(jc, 3, jb) * (z_qt_times_d(3) - - ptr_rutri(jc, 2, jb) * p_coeff_view(5, jc, jk, jb) - - ptr_rutri(jc, 3, jb) * p_coeff_view(6, jc, jk, jb)); + ptr_rutri(jc, 0, jb) * p_coeff_view(5, jc, jk, jb)); p_coeff_view(3, jc, jk, jb) = ptr_rrdiag(jc, 2, jb) * (z_qt_times_d(2) - - ptr_rutri(jc, 4, jb) * p_coeff_view(4, jc, jk, jb) - - ptr_rutri(jc, 5, jb) * p_coeff_view(5, jc, jk, jb) - - ptr_rutri(jc, 6, jb) * p_coeff_view(6, jc, jk, jb)); + ptr_rutri(jc, 1, jb) * p_coeff_view(4, jc, jk, jb) - + ptr_rutri(jc, 2, jb) * p_coeff_view(5, jc, jk, jb)); p_coeff_view(2, jc, jk, jb) = ptr_rrdiag(jc, 1, jb) * (z_qt_times_d(1) - + ptr_rutri(jc, 3, jb) * p_coeff_view(3, jc, jk, jb) - + ptr_rutri(jc, 4, jb) * p_coeff_view(4, jc, jk, jb) - + ptr_rutri(jc, 5, jb) * p_coeff_view(5, jc, jk, jb)); + p_coeff_view(1, jc, jk, jb) = + ptr_rrdiag(jc, 0, jb) * + (z_qt_times_d(0) - + ptr_rutri(jc, 6, jb) * p_coeff_view(2, jc, jk, jb) - ptr_rutri(jc, 7, jb) * p_coeff_view(3, jc, jk, jb) - ptr_rutri(jc, 8, jb) * p_coeff_view(4, jc, jk, jb) - - ptr_rutri(jc, 9, jb) * p_coeff_view(5, jc, jk, jb) - - ptr_rutri(jc, 10, jb) * p_coeff_view(6, jc, jk, jb)); - p_coeff_view(1, jc, jk, jb) = + ptr_rutri(jc, 9, jb) * p_coeff_view(5, jc, jk, jb)); + p_coeff_view(0, jc, jk, jb) = p_cc(jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) - - p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) - - p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5); + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4); }); } -- GitLab From 9d04113e48f8e7081adc57b1f7f890346785a58b Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Tue, 25 Feb 2025 14:18:33 +0100 Subject: [PATCH 03/35] Add cpp implementations (untested) --- src/horizontal/lib_divrot.cpp | 617 +++++++++++++++++++++++++++++++++- src/horizontal/lib_divrot.hpp | 29 ++ 2 files changed, 630 insertions(+), 16 deletions(-) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index addd485..5b51d98 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -25,22 +25,27 @@ void recon_lsq_cell_l_(const T *p_cc, const int *cell_neighbor_idx, // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT1D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedT3D z_d(3); - UnmanagedT3D z_qt_times_d(2); + UnmanagedT1D z_d(3); + UnmanagedT1D z_qt_times_d(2); UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT3D p_coeff_view(p_coeff); + UnmanagedT4D p_coeff_view(p_coeff); - UnmanagedConstT3D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, + UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); UnmanagedConstT3D lsq_rmat_rdiag_c_view(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c); @@ -108,21 +113,26 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; - typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT1D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedT3D z_b(3); + UnmanagedT1D z_b(3); UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT3D p_coeff_view(p_coeff); + UnmanagedT4D p_coeff_view(p_coeff); - UnmanagedConstT3D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, + UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); @@ -180,22 +190,29 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT1D; typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT3D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; UnmanagedT3D z_d(lsq_high_set_dim_c, nproma, elev); - UnmanagedT3D z_qt_times_d(5); + UnmanagedT1D z_qt_times_d(5); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT3D p_coeff_view(p_coeff); + UnmanagedT4D p_coeff_view(p_coeff); - UnmanagedConstT3D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, + UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c); UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma, @@ -208,8 +225,8 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, {0, i_startidx_in, slev, i_startblk}, {6, i_endidx_in, elev, i_endblk}); Kokkos::parallel_for( "recon_lsq_cell_q_init", initPolicy, - KOKKOS_LAMBDA(const int z, const int jc, const int jk, const int jb) { - p_coeff_view(z, jc, jk, jb) = 0; + KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { + p_coeff_view(ji, jc, jk, jb) = 0; }); } @@ -326,3 +343,571 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::fence(); } + +template <typename T> +void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, + const int *lsq_blk_c, const T *lsq_pseudoinv, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int patch_id, + int lsq_high_set_dim_c, bool l_limited_area, + bool lacc, int nblks_c, int lsq_dim_unk, + int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedT3D z_b(lsq_high_set_dim_c, nproma, elev); + + UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc); + UnmanagedT4D p_coeff_view(p_coeff); + + UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + if (patch_id > 1 || l_limited_area) { + Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( + {0, i_startidx_in, slev, i_startblk}, {6, i_endidx_in, elev, i_endblk}); + Kokkos::parallel_for( + "recon_lsq_cell_q_svd_init", initPolicy, + KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { + p_coeff_view(ji, jc, jk, jb) = 0; + }); + } + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_q_svd_step1", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_b(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_b(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_b(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_b(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_b(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_b(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_b(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_b(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_b(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); + }); + Kokkos::parallel_for( + "recon_lsq_cell_q_svd_step2", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + p_coeff_view(5, jc, jk, jb) = + lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8, jc, jk); + p_coeff_view(4, jc, jk, jb) = + lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8, jc, jk); + p_coeff_view(3, jc, jk, jb) = + lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8, jc, jk); + p_coeff_view(2, jc, jk, jb) = + lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8, jc, jk); + p_coeff_view(1, jc, jk, jb) = + lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8, jc, jk); + p_coeff_view(0, jc, jk, jb) = + p_cc_view(jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - + p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) - + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4); + }); + } + + Kokkos::fence(); +} + +template <typename T> +void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, + const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int patch_id, int lsq_high_set_dim_c, bool l_limited_area, + bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT1D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedT3D z_d(lsq_high_set_dim_c, nproma, elev); + UnmanagedT1D z_qt_times_d(9); + + UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc); + UnmanagedT4D p_coeff_view(p_coeff); + + UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D ptr_rrdiag(lsq_rmat_rdiag_c, nproma, lsq_dim_unk, nblks_c); + UnmanagedConstT3D ptr_rutri(lsq_rmat_utri_c, nproma, + (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, + nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + if (patch_id > 1 || l_limited_area) { + Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( + {0, i_startidx_in, slev, i_startblk}, {9, i_endidx_in, elev, i_endblk}); + Kokkos::parallel_for( + "recon_lsq_cell_c_init", initPolicy, + KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { + p_coeff_view(ji, jc, jk, jb) = 0; + }); + } + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_c_step1", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_d(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_d(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_d(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_d(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_d(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_d(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_d(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_d(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_d(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); + }); + Kokkos::parallel_for( + "recon_lsq_cell_c_step2", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(5) = lsq_qtmat_c_view(jc, 5, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 5, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 5, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 5, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 5, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 5, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 5, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 5, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 5, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(6) = lsq_qtmat_c_view(jc, 6, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 6, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 6, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 6, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 6, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 6, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 6, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 6, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 6, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(7) = lsq_qtmat_c_view(jc, 7, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 7, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 7, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 7, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 7, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 7, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 7, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 7, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 7, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(8) = lsq_qtmat_c_view(jc, 8, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 8, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 8, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 8, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 8, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 8, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 8, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 8, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 8, 8, jb) * z_d(8, jc, jk); + + p_coeff_view(9, jc, jk, jb) = ptr_rrdiag(jc, 8, jb) * z_qt_times_d(8); + p_coeff_view(8, jc, jk, jb) = + ptr_rrdiag(jc, 7, jb) * + (z_qt_times_d(7) - + ptr_rutri(jc, 0, jb) * p_coeff_view(9, jc, jk, jb)); + p_coeff_view(7, jc, jk, jb) = + ptr_rrdiag(jc, 6, jb) * + (z_qt_times_d(6) - + (ptr_rutri(jc, 1, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 2, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(6, jc, jk, jb) = + ptr_rrdiag(jc, 5, jb) * + (z_qt_times_d(5) - + (ptr_rutri(jc, 3, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 4, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 5, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(5, jc, jk, jb) = + ptr_rrdiag(jc, 4, jb) * + (z_qt_times_d(4) - + (ptr_rutri(jc, 6, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 7, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 8, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 9, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(4, jc, jk, jb) = + ptr_rrdiag(jc, 3, jb) * + (z_qt_times_d(3) - + (ptr_rutri(jc, 10, jb) * p_coeff_view(5, jc, jk, jb) + + ptr_rutri(jc, 11, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 12, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 13, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 14, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(3, jc, jk, jb) = + ptr_rrdiag(jc, 2, jb) * + (z_qt_times_d(2) - + (ptr_rutri(jc, 15, jb) * p_coeff_view(4, jc, jk, jb) + + ptr_rutri(jc, 16, jb) * p_coeff_view(5, jc, jk, jb) + + ptr_rutri(jc, 17, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 18, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 19, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 20, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(2, jc, jk, jb) = + ptr_rrdiag(jc, 1, jb) * + (z_qt_times_d(1) - + (ptr_rutri(jc, 21, jb) * p_coeff_view(3, jc, jk, jb) + + ptr_rutri(jc, 22, jb) * p_coeff_view(4, jc, jk, jb) + + ptr_rutri(jc, 23, jb) * p_coeff_view(5, jc, jk, jb) + + ptr_rutri(jc, 24, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 25, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 26, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 27, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(1, jc, jk, jb) = + ptr_rrdiag(jc, 0, jb) * + (z_qt_times_d(0) - + (ptr_rutri(jc, 28, jb) * p_coeff_view(2, jc, jk, jb) + + ptr_rutri(jc, 29, jb) * p_coeff_view(3, jc, jk, jb) + + ptr_rutri(jc, 30, jb) * p_coeff_view(4, jc, jk, jb) + + ptr_rutri(jc, 31, jb) * p_coeff_view(5, jc, jk, jb) + + ptr_rutri(jc, 32, jb) * p_coeff_view(6, jc, jk, jb) + + ptr_rutri(jc, 33, jb) * p_coeff_view(7, jc, jk, jb) + + ptr_rutri(jc, 34, jb) * p_coeff_view(8, jc, jk, jb) + + ptr_rutri(jc, 35, jb) * p_coeff_view(9, jc, jk, jb))); + p_coeff_view(0, jc, jk, jb) = + p_cc(jc, jk, jb) - + (p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) + + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) + + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) + + p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) + + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) + + p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5) + + p_coeff_view(7, jc, jk, jb) * lsq_moments_view(jc, jb, 6) + + p_coeff_view(8, jc, jk, jb) * lsq_moments_view(jc, jb, 7) + + p_coeff_view(9, jc, jk, jb) * lsq_moments_view(jc, jb, 8)); + }); + } + + Kokkos::fence(); +} + +template <typename T> +void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, + const int *lsq_blk_c, const T *lsq_pseudoinv, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int patch_id, + int lsq_high_set_dim_c, bool l_limited_area, + bool lacc, int nblks_c, int lsq_dim_unk, + int lsq_dim_c) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT1D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedT1D z_b(9); + + UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); + + UnmanagedConstT3D p_cc_view(p_cc); + UnmanagedT4D p_coeff_view(p_coeff); + + UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, + lsq_dim_c, nblks_c); + UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); + + if (patch_id > 1 || l_limited_area) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<3>> initPolicy({slev, i_startidx, 0}, + {elev, i_endidx, 9}); + Kokkos::parallel_for( + "recon_lsq_cell_c_svd_init", initPolicy, + KOKKOS_LAMBDA(const int jk, const int jc, const int ji) { + p_coeff_view(ji, jc, jk, jb) = 0; + }); + } + } + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "recon_lsq_cell_c_svd_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + z_b(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_b(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_b(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_b(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_b(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_b(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_b(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_b(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_b(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); + + p_coeff_view(9, jc, jk, jb) = + lsq_pseudoinv_view(jc, 8, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 8, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 8, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 8, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 8, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 8, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 8, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 8, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 8, 8, jb) * z_b(8, jc, jk); + p_coeff_view(8, jc, jk, jb) = + lsq_pseudoinv_view(jc, 7, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 7, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 7, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 7, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 7, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 7, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 7, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 7, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 7, 8, jb) * z_b(8, jc, jk); + p_coeff_view(7, jc, jk, jb) = + lsq_pseudoinv_view(jc, 6, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 6, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 6, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 6, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 6, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 6, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 6, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 6, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 6, 8, jb) * z_b(8, jc, jk); + p_coeff_view(6, jc, jk, jb) = + lsq_pseudoinv_view(jc, 5, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 5, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 5, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 5, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 5, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 5, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 5, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 5, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 5, 8, jb) * z_b(8, jc, jk); + p_coeff_view(5, jc, jk, jb) = + lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8, jc, jk); + p_coeff_view(4, jc, jk, jb) = + lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8, jc, jk); + p_coeff_view(3, jc, jk, jb) = + lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8, jc, jk); + p_coeff_view(2, jc, jk, jb) = + lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8, jc, jk); + p_coeff_view(1, jc, jk, jb) = + lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0, jc, jk) + + lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1, jc, jk) + + lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2, jc, jk) + + lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3, jc, jk) + + lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4, jc, jk) + + lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5, jc, jk) + + lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6, jc, jk) + + lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7, jc, jk) + + lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8, jc, jk); + p_coeff_view(0, jc, jk, jb) = + p_cc_view(jc, jk, jb) - + p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - + p_coeff_view(4, jc, jk, jb) * lsq_moments_view(jc, jb, 3) - + p_coeff_view(5, jc, jk, jb) * lsq_moments_view(jc, jb, 4) - + p_coeff_view(6, jc, jk, jb) * lsq_moments_view(jc, jb, 5) - + p_coeff_view(7, jc, jk, jb) * lsq_moments_view(jc, jb, 6) - + p_coeff_view(8, jc, jk, jb) * lsq_moments_view(jc, jb, 7) - + p_coeff_view(9, jc, jk, jb) * lsq_moments_view(jc, jb, 8); + }); + } +} diff --git a/src/horizontal/lib_divrot.hpp b/src/horizontal/lib_divrot.hpp index 6977e5d..c32ee12 100644 --- a/src/horizontal/lib_divrot.hpp +++ b/src/horizontal/lib_divrot.hpp @@ -40,3 +40,32 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c); + +template <typename T> +void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, + const int *lsq_blk_c, const T *lsq_pseudoinv, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int patch_id, + int lsq_high_set_dim_c, bool l_limited_area, + bool lacc, int nblks_c, int lsq_dim_unk, + int lsq_dim_c); + +template <typename T> +void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, + const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + int patch_id, int lsq_high_set_dim_c, bool l_limited_area, + bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c); + +template <typename T> +void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, + const int *lsq_blk_c, const T *lsq_pseudoinv, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, int patch_id, + int lsq_high_set_dim_c, bool l_limited_area, + bool lacc, int nblks_c, int lsq_dim_unk, + int lsq_dim_c); -- GitLab From 642a9b166d2bdf4f04dadcb21674984e2876f86b Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Wed, 26 Feb 2025 11:03:35 +0100 Subject: [PATCH 04/35] Add cpp implementations (untested) --- src/horizontal/lib_divrot.cpp | 442 +++++++++++++++++++++++++++++++++- src/horizontal/lib_divrot.hpp | 52 +++- 2 files changed, 486 insertions(+), 8 deletions(-) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index 5b51d98..3586b03 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -182,7 +182,7 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, template <typename T> void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, + const T *lsq_moments, const T *lsq_qtmat_c, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -347,7 +347,7 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, template <typename T> void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T *p_coeff, int i_startblk, + const T *lsq_moments, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -487,7 +487,7 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, template <typename T> void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, + const T *lsq_moments, const T *lsq_qtmat_c, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -730,7 +730,7 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, template <typename T> void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T *p_coeff, int i_startblk, + const T *lsq_moments, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -910,4 +910,438 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, p_coeff_view(9, jc, jk, jb) * lsq_moments_view(jc, jb, 8); }); } + + Kokkos::fence(); +} + +template <typename T> +void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, + const T *geofac_div, T *div_vec_c, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool lacc, int nlev, int nblks_c, int nblks_e) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c); + UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div3d_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = + vec_e_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + vec_e_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + vec_e_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + }); + } +} + +template <typename T> +void div3d_2field(const T *vec_e, const int *cell_edge_idx, + const int *cell_edge_blk, const T *geofac_div, T &div_vec_c, + const T *in2, T &out2, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, + int nproma, bool lacc, int nlev, int nblks_c, int nblks_e) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c); + UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c); + + UnmanagedConstT3D in2_view(in2, nproma, nlev, nblks_e); + UnmanagedT3D out2_view(out2, nproma, nlev, nblks_c); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div3d_2field_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = + vec_e_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + vec_e_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + vec_e_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + + out2_view(jc, jk, jb) = + in2_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + in2_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + in2_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + }); + } +} + +template <typename T> +void div4d(const int *cell_edge_idx, const int *cell_edge_blk, + const T *geofac_div, const T *f4din, T &f4dout, int dim4d, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + const int *slev, const int *elev, int nproma, bool lacc, int nlev, + int nblks_c, int nblks_e) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstT4D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT4D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstInt3D iidx(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D iblk(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 3, nblks_c); + + UnmanagedConstT4D f4din_view(f4din, nproma, nlev, nblks_e, dim4d); + UnmanagedT4D f4dout_view(f4dout, nproma, nlev, nblks_c, dim4d); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + for (int ji = 0; ji < dim4d; ++ji) { + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev[ji], i_startidx}, + {elev[ji], i_endidx}); + Kokkos::parallel_for( + "div4d_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + f4dout_view(jc, jk, jb, ji) = + f4din_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0), ji) * + geofac_div_view(jc, 0, jb) + + f4din_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1), ji) * + geofac_div_view(jc, 1, jb) + + f4din_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2), ji) * + geofac_div_view(jc, 2, jb); + }); + } + } +} + +template <typename T> +void div_avg(const T *vec_e, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const int *cell_edge_idx, + const int *cell_edge_blk, const T *geofac_div, const T *avg_coeff, + T &div_vec_c, const T *opt_in2, T &opt_out2, + const int *i_startblk_in, const int *i_endblk_in, + const int *i_startidx_in, const int *i_endidx_in, int slev, + int elev, int nproma, int patch_id, bool l_limited_area, + bool l2fields, bool lacc, int nlev, int nblks_c, int nblks_e) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D inidx(cell_neighbor_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D inblk(cell_neighbor_blk, nproma, nblks_c, 3); + UnmanagedConstInt3D ieidx(cell_edge_idx, nproma, nblks_c, 3); + UnmanagedConstInt3D ieblk(cell_edge_blk, nproma, nblks_c, 3); + + UnmanagedConstT3D geofac_div_view(geofac_div, nproma, 4, nblks_e); + UnmanagedConstT3D avg_coeff_view(avg_coeff, nproma, nlev, nblks_c); + + UnmanagedT3D div_vec_c_view(div_vec_c, nproma, nlev, nblks_c); + + UnmanagedConstT3D opt_in2_view(opt_in2, nproma, nlev, nblks_e); + UnmanagedT3D opt_out2_view(opt_out2, nproma, nlev, nblks_c); + + UnmanagedT3D aux_c(nproma, nlev, nblks_c); + UnmanagedT3D aux_c2(nproma, nlev, nblks_c); + + int i_startblk = i_startblk_in[0]; + int i_endblk = i_endblk_in[0]; + + if (l2fields) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step1", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + aux_c(jc, jk, jb) = + vec_e_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + vec_e_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + vec_e_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + aux_c2(jc, jk, jb) = + opt_in2(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + opt_in2(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + opt_in2(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + }); + } + } else { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step2", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + aux_c(jc, jk, jb) = + vec_e_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * + geofac_div_view(jc, 0, jb) + + vec_e_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * + geofac_div_view(jc, 1, jb) + + vec_e_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * + geofac_div_view(jc, 2, jb); + }); + } + } + + if (patch_id > 1 || l_limited_area) { + i_startblk = i_startblk_in[1]; + i_endblk = i_endblk_in[1]; + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step3", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = aux_c(jc, jk, jb); + }); + } + + if (l2fields) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step4", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + opt_out2_view(jc, jk, jb) = aux_c2(jc, jk, jb); + }); + } + } + } + + i_startblk = i_startblk_in[2]; + i_endblk = i_endblk_in[2]; + + if (l2fields) { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step5", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = + aux_c(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + + aux_c(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) * + avg_coeff_view(jc, 1, jb) + + aux_c(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) * + avg_coeff_view(jc, 2, jb) + + aux_c(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) * + avg_coeff_view(jc, 3, jb); + opt_out2_view(jc, jk, jb) = + aux_c2(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + + aux_c2(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) * + avg_coeff_view(jc, 1, jb) + + aux_c2(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) * + avg_coeff_view(jc, 2, jb) + + aux_c2(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) * + avg_coeff_view(jc, 3, jb); + }); + } + } else { + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk, i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "div_avg_step6", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jc) { + div_vec_c_view(jc, jk, jb) = + aux_c(jc, jk, jb) * avg_coeff_view(jc, 0, jb) + + aux_c(inidx(jc, jb, 0), jk, inblk(jc, jb, 0)) * + avg_coeff_view(jc, 1, jb) + + aux_c(inidx(jc, jb, 1), jk, inblk(jc, jb, 1)) * + avg_coeff_view(jc, 2, jb) + + aux_c(inidx(jc, jb, 2), jk, inblk(jc, jb, 2)) * + avg_coeff_view(jc, 3, jb); + }); + } + } +} + +template <typename T> +void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, + const int *vert_edge_blk, const T *geofac_rot, T &rot_vec, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + bool lacc, int nlev, int nblks_e, int nblks_v) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D iidx(vert_edge_idx, nproma, nblks_v, 6); + UnmanagedConstInt3D iblk(vert_edge_blk, nproma, nblks_v, 6); + + UnmanagedConstT3D geofac_rot_view(geofac_rot, nproma, 6, nblks_v); + + UnmanagedT3D rot_vec_view(rot_vec, nproma, nlev, nblks_v); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "rot_vertex_atmos_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + rot_vec_view(jv, jk, jb) = + vec_e_view(iidx(jv, jb, 0), jk, iblk(jv, jb, 0)) * + geofac_rot_view(jv, 0, jb) + + vec_e_view(iidx(jv, jb, 1), jk, iblk(jv, jb, 1)) * + geofac_rot_view(jv, 1, jb) + + vec_e_view(iidx(jv, jb, 2), jk, iblk(jv, jb, 2)) * + geofac_rot_view(jv, 2, jb) + + vec_e_view(iidx(jv, jb, 3), jk, iblk(jv, jb, 3)) * + geofac_rot_view(jv, 3, jb) + + vec_e_view(iidx(jv, jb, 4), jk, iblk(jv, jb, 4)) * + geofac_rot_view(jv, 4, jb) + + vec_e_view(iidx(jv, jb, 5), jk, iblk(jv, jb, 5)) * + geofac_rot_view(jv, 5, jb); + }); + } +} + +template <typename T> +void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, + const int *vert_edge_blk, const T *geofac_rot, T &rot_vec, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, bool lacc, + bool acc_async, int nlev, int nblks_e, int nblks_v) { + // Wrap raw pointers in unmanaged Kokkos Views. + typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedConstT3D; + typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> + UnmanagedT3D; + typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, + Kokkos::MemoryUnmanaged> + UnmanagedConstInt3D; + + UnmanagedConstT3D vec_e_view(vec_e, nproma, nlev, nblks_e); + + UnmanagedConstInt3D iidx(vert_edge_idx, nproma, nblks_v, 6); + UnmanagedConstInt3D iblk(vert_edge_blk, nproma, nblks_v, 6); + + UnmanagedConstT3D geofac_rot_view(geofac_rot, nproma, 6, nblks_v); + + UnmanagedT3D rot_vec_view(rot_vec, nproma, nlev, nblks_v); + + for (int jb = i_startblk; jb < i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, + i_endblk, i_startidx, i_endidx); + + Kokkos::MDRangePolicy<Kokkos::Rank<2>> innerPolicy({slev, i_startidx}, + {elev, i_endidx}); + Kokkos::parallel_for( + "rot_vertex_atmos_inner", innerPolicy, + KOKKOS_LAMBDA(const int jk, const int jv) { + rot_vec_view(jv, jk, jb) = + vec_e_view(iidx(jv, jb, 0), jk, iblk(jv, jb, 0)) * + geofac_rot_view(jv, 0, jb) + + vec_e_view(iidx(jv, jb, 1), jk, iblk(jv, jb, 1)) * + geofac_rot_view(jv, 1, jb) + + vec_e_view(iidx(jv, jb, 2), jk, iblk(jv, jb, 2)) * + geofac_rot_view(jv, 2, jb) + + vec_e_view(iidx(jv, jb, 3), jk, iblk(jv, jb, 3)) * + geofac_rot_view(jv, 3, jb) + + vec_e_view(iidx(jv, jb, 4), jk, iblk(jv, jb, 4)) * + geofac_rot_view(jv, 4, jb) + + vec_e_view(iidx(jv, jb, 5), jk, iblk(jv, jb, 5)) * + geofac_rot_view(jv, 5, jb); + }); + } + + if (!acc_async) + Kokkos::fence(); } diff --git a/src/horizontal/lib_divrot.hpp b/src/horizontal/lib_divrot.hpp index c32ee12..36ed138 100644 --- a/src/horizontal/lib_divrot.hpp +++ b/src/horizontal/lib_divrot.hpp @@ -35,7 +35,7 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, template <typename T> void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, + const T *lsq_moments, const T *lsq_qtmat_c, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -44,7 +44,7 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, template <typename T> void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T *p_coeff, int i_startblk, + const T *lsq_moments, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -54,7 +54,7 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, template <typename T> void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, + const T *lsq_moments, const T *lsq_qtmat_c, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -63,9 +63,53 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, template <typename T> void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T *p_coeff, int i_startblk, + const T *lsq_moments, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c); + +template <typename T> +void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, + const T *geofac_div, T &div_vec_c, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool lacc, int nlev, int nblks_c, int nblks_e); + +template <typename T> +void div3d_2field(const T *vec_e, const int *cell_edge_idx, + const int *cell_edge_blk, const T *geofac_div, T &div_vec_c, + const T *in2, T &out2, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, + int nproma, bool lacc, int nlev, int nblks_c, int nblks_e); + +template <typename T> +void div4d(const int *cell_edge_idx, const int *cell_edge_blk, + const T *geofac_div, const T *f4din, T &f4dout, int dim4d, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, + const int *slev, const int *elev, int nproma, bool lacc, int nlev, + int nblks_c, int nblks_e); + +template <typename T> +void div_avg(const T *vec_e, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const int *cell_edge_idx, + const int *cell_edge_blk, const T *geofac_div, const T *avg_coeff, + T &div_vec_c, const T *opt_in2, T &opt_out2, + const int *i_startblk_in, const int *i_endblk_in, + const int *i_startidx_in, const int *i_endidx_in, int slev, + int elev, int nproma, int patch_id, bool l_limited_area, + bool l2fields, bool lacc, int nlev, int nblks_c, int nblks_e); + +template <typename T> +void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, + const int *vert_edge_blk, const T *geofac_rot, T &rot_vec, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, + bool lacc, int nlev, int nblks_e, int nblks_v); + +template <typename T> +void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, + const int *vert_edge_blk, const T *geofac_rot, T &rot_vec, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, bool lacc, + bool acc_async, int nlev, int nblks_e, int nblks_v); -- GitLab From 9560ca72ebca7ac290d1e97fda36aa3a74dd2fea Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Wed, 26 Feb 2025 11:07:20 +0100 Subject: [PATCH 05/35] Fix typo --- _typos.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/_typos.toml b/_typos.toml index 4fe4968..58a18ef 100644 --- a/_typos.toml +++ b/_typos.toml @@ -1,6 +1,7 @@ [default] extend-ignore-re = [ ".*_pn", + "f4dout_*", ] extend-ignore-words-re = [ "Comput", -- GitLab From ea4fc0b5e7bbe35b051fc732cbb875ee628e2105 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Thu, 27 Feb 2025 14:41:29 +0100 Subject: [PATCH 06/35] Fix bug and add first test --- src/horizontal/lib_divrot.cpp | 76 +++++++++----- src/horizontal/lib_divrot.hpp | 16 +-- test/c/CMakeLists.txt | 4 +- test/c/test_horizontal_divrot.cpp | 159 ++++++++++++++++++++++++++++++ 4 files changed, 220 insertions(+), 35 deletions(-) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index 3586b03..c615a42 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -9,19 +9,20 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- +#include <iostream> #include <lib_divrot.hpp> #include <support/mo_lib_loopindices.hpp> #include <vector> template <typename T> -void recon_lsq_cell_l_(const T *p_cc, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const T *lsq_qtmat_c, - const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, T &p_coeff, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, bool l_consv, bool lacc, - bool acc_async, int nblks_c, int lsq_dim_unk, - int lsq_dim_c) { +void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const T *lsq_qtmat_c, + const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, + const T *lsq_moments, T *p_coeff, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + int slev, int elev, int nproma, bool l_consv, bool lacc, + bool acc_async, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; @@ -36,14 +37,14 @@ void recon_lsq_cell_l_(const T *p_cc, const int *cell_neighbor_idx, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedT1D z_d(3); - UnmanagedT1D z_qt_times_d(2); + Kokkos::View<T *> z_d("z_d", 3); + Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 2); UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); - UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT4D p_coeff_view(p_coeff); + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -102,13 +103,32 @@ void recon_lsq_cell_l_(const T *p_cc, const int *cell_neighbor_idx, Kokkos::fence(); } +template void +recon_lsq_cell_l<float>(const float *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const float *lsq_qtmat_c, + const float *lsq_rmat_rdiag_c, + const float *lsq_rmat_utri_c, const float *lsq_moments, + float *p_coeff, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, + int nproma, bool l_consv, bool lacc, bool acc_async, + int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c); + +template void recon_lsq_cell_l<double>( + const double *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const double *lsq_qtmat_c, + const double *lsq_rmat_rdiag_c, const double *lsq_rmat_utri_c, + const double *lsq_moments, double *p_coeff, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, + int lsq_dim_unk, int lsq_dim_c); + template <typename T> void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, const int *cell_neighbor_blk, const T *lsq_pseudoinv, const T *lsq_moments, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, bool l_consv, - bool lacc, bool acc_async, int nblks_c, + bool lacc, bool acc_async, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> @@ -129,8 +149,8 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); - UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT4D p_coeff_view(p_coeff); + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -186,7 +206,8 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c) { + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; @@ -209,8 +230,8 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); - UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT4D p_coeff_view(p_coeff); + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -351,7 +372,7 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int lsq_dim_unk, + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> @@ -372,8 +393,8 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); - UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT4D p_coeff_view(p_coeff); + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -491,7 +512,8 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c) { + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; @@ -514,8 +536,8 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); - UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT4D p_coeff_view(p_coeff); + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -734,7 +756,7 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int lsq_dim_unk, + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> @@ -755,8 +777,8 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); - UnmanagedConstT3D p_cc_view(p_cc); - UnmanagedT4D p_coeff_view(p_coeff); + UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); diff --git a/src/horizontal/lib_divrot.hpp b/src/horizontal/lib_divrot.hpp index 36ed138..db60b29 100644 --- a/src/horizontal/lib_divrot.hpp +++ b/src/horizontal/lib_divrot.hpp @@ -17,10 +17,10 @@ template <typename T> void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, const int *cell_neighbor_blk, const T *lsq_qtmat_c, const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, T &p_coeff, int i_startblk, + const T *lsq_moments, T *p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, bool l_consv, bool lacc, - bool acc_async, int nblks_c, int lsq_dim_unk, + bool acc_async, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c); template <typename T> @@ -29,7 +29,7 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, const T *lsq_moments, T &p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, bool l_consv, - bool lacc, bool acc_async, int nblks_c, + bool lacc, bool acc_async, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c); template <typename T> @@ -39,7 +39,8 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c); + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); template <typename T> void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, @@ -48,7 +49,7 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int lsq_dim_unk, + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c); template <typename T> @@ -58,7 +59,8 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int lsq_dim_unk, int lsq_dim_c); + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); template <typename T> void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, @@ -67,7 +69,7 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int lsq_dim_unk, + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c); template <typename T> diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index c9320cb..cf68892 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -27,8 +27,9 @@ endif() set(SOURCES main.cpp + test_horizontal_divrot.cpp test_tdma_solver.cpp - test_interpolation_vector.cpp + # test_interpolation_vector.cpp test_intp_rbf.cpp test_interpolation_scalar.cpp ) @@ -40,6 +41,7 @@ target_link_libraries(iconmath_test_c PUBLIC iconmath-support iconmath-interpolation + iconmath-horizontal PRIVATE gtest_main Kokkos::kokkos diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index e69de29..1915fa4 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -0,0 +1,159 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> +#include <horizontal/lib_divrot.hpp> +#include <vector> + +// Template helpers for combining multiple dimension array sizes +template <typename... Ts> size_t dim_combine(Ts... dims) { return 0; } +template <typename T> size_t dim_combine(T dim) { + return static_cast<size_t>(dim); +} +template <typename T, typename... Ts> size_t dim_combine(T dim, Ts... dims) { + return static_cast<size_t>(dim) * dim_combine(dims...); +} + +// Enum class for the reconstruction method +enum class ReconstructionMethod { + linear, + quadratic, + cubic, +}; + +// Template function for LayoutLeft ID access in compile time +template <class T, auto> using always_t = T; +template <int... Dims> int At_impl(always_t<int, Dims>... ids) { return 0; } +template <int LastDim> int At_impl(int prefix, int id) { return id * prefix; } +template <int FirstDim, int... Dims> +constexpr int At_impl(int prefix, int id, always_t<int, Dims>... ids) { + return id * prefix + At_impl<Dims...>(prefix * FirstDim, ids...); +} +template <int FirstDim, int... Dims> +// At<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming +// LayoutLeft +int At(int id, always_t<int, Dims>... ids) { + return id + At_impl<Dims...>(FirstDim, ids...); +} + +typedef ::testing::Types<float, double> ValueTypes; + +template <typename ValueType> +class HorizontalDivrotTest : public ::testing::Test { +protected: + // [lsq_dim_c, lsq_dim_unk] + static constexpr std::tuple<int, int> + init_lsq_dim(ReconstructionMethod method) { + switch (method) { + case ReconstructionMethod::linear: + return std::make_tuple(3, 2); + case ReconstructionMethod::quadratic: + return std::make_tuple(9, 5); + case ReconstructionMethod::cubic: + return std::make_tuple(9, 9); + } + } + + // Constant dimensions. + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 1; // number of vertical levels + static constexpr int nblks_c = 1; // number of cell blocks (for p_e_in) + static constexpr std::tuple<int, int> lsq_dim = + init_lsq_dim(ReconstructionMethod::linear); + static constexpr int lsq_dim_c = std::get<0>(lsq_dim); + static constexpr int lsq_dim_unk = std::get<1>(lsq_dim); + + // Parameter values. + int i_startblk = 0; + int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] + int i_startidx_in = 0; + int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + int slev = 0; + int elev = nlev; // Full vertical range (0 .. nlev-1) + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // No asynchronous execution. + bool l_consv = true; // No conservative correction + + std::vector<ValueType> p_cc; + std::vector<int> cell_neighbor_idx; + std::vector<int> cell_neighbor_blk; + std::vector<ValueType> lsq_qtmat_c; + std::vector<ValueType> lsq_rmat_rdiag_c; + std::vector<ValueType> lsq_rmat_utri_c; + std::vector<ValueType> lsq_moments; + std::vector<ValueType> p_coeff; + + HorizontalDivrotTest() { + p_cc.resize(dim_combine(nproma, nlev, nblks_c)); + cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, 3)); + cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, 3)); + lsq_qtmat_c.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); + lsq_rmat_rdiag_c.resize(dim_combine(nproma, lsq_dim_unk, nblks_c)); + lsq_rmat_utri_c.resize(dim_combine( + nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c)); + lsq_moments.resize(dim_combine(nproma, nblks_c, lsq_dim_unk)); + p_coeff.resize(dim_combine(lsq_dim_c, nproma, nlev, nblks_c)); + } +}; + +TYPED_TEST_SUITE(HorizontalDivrotTest, ValueTypes); + +TYPED_TEST(HorizontalDivrotTest, TestReconLsqCellLinear) { + this->init_lsq_dim(ReconstructionMethod::linear); + constexpr int nproma = TestFixture::nproma; + constexpr int nlev = TestFixture::nlev; + constexpr int nblks_c = TestFixture::nblks_c; + constexpr int lsq_dim_c = TestFixture::lsq_dim_c; + constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[At<nproma, nlev, nblks_c>(i, 0, 0)] = (TypeParam)(i + 1); + + this->cell_neighbor_idx[At<nproma, nblks_c, 3>(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_idx[At<nproma, nblks_c, 3>(i, 0, 1)] = i; + this->cell_neighbor_idx[At<nproma, nblks_c, 3>(i, 0, 2)] = i; + for (int j = 0; j < 3; ++j) { + this->cell_neighbor_blk[At<nproma, nblks_c, 3>(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_qtmat_c[At<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>(i, 0, j, + 0)] = 1.0; + this->lsq_qtmat_c[At<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>(i, 1, j, + 0)] = 0.5; + this->p_coeff[At<lsq_dim_c, nproma, nlev, nblks_c>(j, i, 0, 0)] = 0.0; + } + + this->lsq_rmat_rdiag_c[At<nproma, lsq_dim_unk, nblks_c>(i, 0, 0)] = 2.0; + this->lsq_rmat_rdiag_c[At<nproma, lsq_dim_unk, nblks_c>(i, 1, 0)] = 2.0; + this->lsq_rmat_utri_c + [At<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>( + i, 0, 0)] = 0.1; + + this->lsq_moments[At<nproma, nblks_c, lsq_dim_unk>(i, 0, 0)] = 0.2; + this->lsq_moments[At<nproma, nblks_c, lsq_dim_unk>(i, 0, 1)] = 0.3; + } + + recon_lsq_cell_l<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + EXPECT_NEAR(this->p_coeff[0], 0.34, 1e-6); + EXPECT_NEAR(this->p_coeff[1], 1.8, 1e-6); + EXPECT_NEAR(this->p_coeff[2], 1.0, 1e-6); +} -- GitLab From a254c39e98c857e638e8213822ffabc5a91073d5 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Thu, 27 Feb 2025 14:42:51 +0100 Subject: [PATCH 07/35] Reverse commented file --- test/c/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index cf68892..18c2710 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -29,7 +29,7 @@ set(SOURCES main.cpp test_horizontal_divrot.cpp test_tdma_solver.cpp - # test_interpolation_vector.cpp + test_interpolation_vector.cpp test_intp_rbf.cpp test_interpolation_scalar.cpp ) -- GitLab From 79c31189b41cb725141020b0ffc4eb0d72ceac7d Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Thu, 27 Feb 2025 15:28:43 +0100 Subject: [PATCH 08/35] Add comments for templates --- test/c/test_horizontal_divrot.cpp | 37 +++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 1915fa4..26ba118 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -14,11 +14,15 @@ #include <horizontal/lib_divrot.hpp> #include <vector> -// Template helpers for combining multiple dimension array sizes +// Template helpers for combining multiple dimension array sizes. +// The base function of dimension combine. Should not be used. template <typename... Ts> size_t dim_combine(Ts... dims) { return 0; } +// Template specialization of only one dimension, returns the dimension itself. template <typename T> size_t dim_combine(T dim) { return static_cast<size_t>(dim); } +// Template specialization of picking out the first dimension. The combined +// dimension is the first dimension times the combined dimension of the rest. template <typename T, typename... Ts> size_t dim_combine(T dim, Ts... dims) { return static_cast<size_t>(dim) * dim_combine(dims...); } @@ -31,16 +35,41 @@ enum class ReconstructionMethod { }; // Template function for LayoutLeft ID access in compile time +// For example, a multi-dimensional array A of dimensions <2, 3, 4, 5> gets its +// corresponding vector id (LayoutLeft) by +// At<2, 3, 4, 5>(id1, id2, id3, id4). +// The At_impl then adds the id from beginning to the end and pass the id prefix +// to the next recursive At_impl function. In this example, +// At<2, 3, 4, 5>(id1, id2, id3, id4) { +// return id1 + At_impl<3, 4, 5>(2, id2, id3, id4); +// } +// At_impl<3, 4, 5>(2, id2, id3, id4) { +// return id2 * 2 + At_impl<4, 5>(2 * 3, id3, id4); +// } +// At_impl<4, 5>(2 * 3, id3, id4) { +// return id3 * 2 * 3 + At_impl<5>(2 * 3 * 4, id4); +// } +// At_impl<5>(2 * 3 * 4, id4) { +// return id4 * 2 * 3 * 4; +// } +// Which gives +// At<2, 3, 4, 5>(id1, id2, id3, id4) = id1 + id2 * 2 + +// id3 * 2 * 3 + id4 * 2 * 3 * 4 +// Helper type converting integer numbers to int template <class T, auto> using always_t = T; +// Base function of At_impl. Should not be used. template <int... Dims> int At_impl(always_t<int, Dims>... ids) { return 0; } +// Template specialization of the last ID template <int LastDim> int At_impl(int prefix, int id) { return id * prefix; } +// Template specialization of At_impl, accumulate the return value using the +// first id and pass the prefix to the next recursive At_impl function. template <int FirstDim, int... Dims> -constexpr int At_impl(int prefix, int id, always_t<int, Dims>... ids) { +int At_impl(int prefix, int id, always_t<int, Dims>... ids) { return id * prefix + At_impl<Dims...>(prefix * FirstDim, ids...); } -template <int FirstDim, int... Dims> // At<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming -// LayoutLeft +// LayoutLeft. Use this function instead of At_impl. +template <int FirstDim, int... Dims> int At(int id, always_t<int, Dims>... ids) { return id + At_impl<Dims...>(FirstDim, ids...); } -- GitLab From 35c5d8122cb9829ebbc46f27e0669cebca63289e Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Thu, 27 Feb 2025 16:04:35 +0100 Subject: [PATCH 09/35] Make reconstruction method templated --- test/c/test_horizontal_divrot.cpp | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 26ba118..082afa3 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -74,11 +74,21 @@ int At(int id, always_t<int, Dims>... ids) { return id + At_impl<Dims...>(FirstDim, ids...); } -typedef ::testing::Types<float, double> ValueTypes; +// ValueType struct for compute precision and reconstruction method. +template <typename ValueType, int ReconMethod> struct DivrotType { + using type = ValueType; + static constexpr int get_recon_method() { return ReconMethod; }; +}; + +typedef ::testing::Types< + DivrotType<float, static_cast<int>(ReconstructionMethod::linear)>, + DivrotType<double, static_cast<int>(ReconstructionMethod::linear)>> + ValueTypes; -template <typename ValueType> +template <typename ValueTypes> class HorizontalDivrotTest : public ::testing::Test { protected: + using ValueType = typename ValueTypes::type; // [lsq_dim_c, lsq_dim_unk] static constexpr std::tuple<int, int> init_lsq_dim(ReconstructionMethod method) { @@ -96,8 +106,8 @@ protected: static constexpr int nproma = 3; // inner loop length static constexpr int nlev = 1; // number of vertical levels static constexpr int nblks_c = 1; // number of cell blocks (for p_e_in) - static constexpr std::tuple<int, int> lsq_dim = - init_lsq_dim(ReconstructionMethod::linear); + static constexpr std::tuple<int, int> lsq_dim = init_lsq_dim( + static_cast<ReconstructionMethod>(ValueTypes::get_recon_method())); static constexpr int lsq_dim_c = std::get<0>(lsq_dim); static constexpr int lsq_dim_unk = std::get<1>(lsq_dim); @@ -137,7 +147,8 @@ protected: TYPED_TEST_SUITE(HorizontalDivrotTest, ValueTypes); TYPED_TEST(HorizontalDivrotTest, TestReconLsqCellLinear) { - this->init_lsq_dim(ReconstructionMethod::linear); + using ValueType = typename TestFixture::ValueType; + constexpr int nproma = TestFixture::nproma; constexpr int nlev = TestFixture::nlev; constexpr int nblks_c = TestFixture::nblks_c; @@ -146,7 +157,7 @@ TYPED_TEST(HorizontalDivrotTest, TestReconLsqCellLinear) { // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[At<nproma, nlev, nblks_c>(i, 0, 0)] = (TypeParam)(i + 1); + this->p_cc[At<nproma, nlev, nblks_c>(i, 0, 0)] = (i + 1); this->cell_neighbor_idx[At<nproma, nblks_c, 3>(i, 0, 0)] = (i + 1) % nproma; this->cell_neighbor_idx[At<nproma, nblks_c, 3>(i, 0, 1)] = i; @@ -173,7 +184,7 @@ TYPED_TEST(HorizontalDivrotTest, TestReconLsqCellLinear) { this->lsq_moments[At<nproma, nblks_c, lsq_dim_unk>(i, 0, 1)] = 0.3; } - recon_lsq_cell_l<TypeParam>( + recon_lsq_cell_l<ValueType>( this->p_cc.data(), this->cell_neighbor_idx.data(), this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), -- GitLab From b9e1e645b9a6cd43cdaf22c9978db393643cc823 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Fri, 28 Feb 2025 18:55:40 +0100 Subject: [PATCH 10/35] Instantiate and fix bugs --- src/horizontal/lib_divrot.cpp | 437 +++++++++++++++++++++++----------- 1 file changed, 297 insertions(+), 140 deletions(-) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index c615a42..9dce2e4 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -29,8 +29,6 @@ void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT1D; typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, @@ -125,7 +123,7 @@ template void recon_lsq_cell_l<double>( template <typename T> void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, const int *cell_neighbor_blk, const T *lsq_pseudoinv, - const T *lsq_moments, T &p_coeff, int i_startblk, + const T *lsq_moments, T *p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, @@ -136,15 +134,13 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT1D; typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedT1D z_b(3); + Kokkos::View<T *> z_b("z_b", 3); UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); @@ -199,10 +195,26 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, Kokkos::fence(); } +template void recon_lsq_cell_l_svd<float>( + const float *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const float *lsq_pseudoinv, + const float *lsq_moments, float *p_coeff, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, + int lsq_dim_unk, int lsq_dim_c); + +template void recon_lsq_cell_l_svd<double>( + const double *p_cc, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const double *lsq_pseudoinv, + const double *lsq_moments, double *p_coeff, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, + int lsq_dim_unk, int lsq_dim_c); + template <typename T> void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T &p_coeff, + const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -214,18 +226,14 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT1D; - typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT3D; typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedT3D z_d(lsq_high_set_dim_c, nproma, elev); - UnmanagedT1D z_qt_times_d(5); + Kokkos::View<T ***> z_d("z_d", lsq_high_set_dim_c, nproma, elev); + Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 5); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); @@ -353,7 +361,7 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, ptr_rutri(jc, 8, jb) * p_coeff_view(4, jc, jk, jb) - ptr_rutri(jc, 9, jb) * p_coeff_view(5, jc, jk, jb)); p_coeff_view(0, jc, jk, jb) = - p_cc(jc, jk, jb) - + p_cc_view(jc, jk, jb) - p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) - p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) - @@ -365,10 +373,28 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::fence(); } +template void recon_lsq_cell_q<float>( + const float *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const float *lsq_rmat_rdiag_c, const float *lsq_rmat_utri_c, + const float *lsq_moments, const float *lsq_qtmat_c, float *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int patch_id, int lsq_high_set_dim_c, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); + +template void recon_lsq_cell_q<double>( + const double *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const double *lsq_rmat_rdiag_c, const double *lsq_rmat_utri_c, + const double *lsq_moments, const double *lsq_qtmat_c, double *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int patch_id, int lsq_high_set_dim_c, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); + template <typename T> void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T &p_coeff, int i_startblk, + const T *lsq_moments, T *p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -380,15 +406,13 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT3D; typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedT3D z_b(lsq_high_set_dim_c, nproma, elev); + Kokkos::View<T ***> z_b("z_b", lsq_high_set_dim_c, nproma, elev); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); @@ -505,10 +529,26 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::fence(); } +template void recon_lsq_cell_q_svd<float>( + const float *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const float *lsq_pseudoinv, const float *lsq_moments, float *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int patch_id, int lsq_high_set_dim_c, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); + +template void recon_lsq_cell_q_svd<double>( + const double *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const double *lsq_pseudoinv, const double *lsq_moments, double *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int patch_id, int lsq_high_set_dim_c, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); + template <typename T> void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T &p_coeff, + const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -520,18 +560,14 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT1D; - typedef Kokkos::View<T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT3D; typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedT3D z_d(lsq_high_set_dim_c, nproma, elev); - UnmanagedT1D z_qt_times_d(9); + Kokkos::View<T ***> z_d("z_d", lsq_high_set_dim_c, nproma, elev); + Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 9); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); @@ -733,7 +769,7 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, ptr_rutri(jc, 34, jb) * p_coeff_view(8, jc, jk, jb) + ptr_rutri(jc, 35, jb) * p_coeff_view(9, jc, jk, jb))); p_coeff_view(0, jc, jk, jb) = - p_cc(jc, jk, jb) - + p_cc_view(jc, jk, jb) - (p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) + p_coeff_view(2, jc, jk, jb) * lsq_moments_view(jc, jb, 1) + p_coeff_view(3, jc, jk, jb) * lsq_moments_view(jc, jb, 2) + @@ -749,10 +785,28 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::fence(); } +template void recon_lsq_cell_c<float>( + const float *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const float *lsq_rmat_rdiag_c, const float *lsq_rmat_utri_c, + const float *lsq_moments, const float *lsq_qtmat_c, float *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int patch_id, int lsq_high_set_dim_c, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); + +template void recon_lsq_cell_c<double>( + const double *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const double *lsq_rmat_rdiag_c, const double *lsq_rmat_utri_c, + const double *lsq_moments, const double *lsq_qtmat_c, double *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int patch_id, int lsq_high_set_dim_c, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); + template <typename T> void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T &p_coeff, int i_startblk, + const T *lsq_moments, T *p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, bool l_limited_area, @@ -764,15 +818,13 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, typedef Kokkos::View<const T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT4D; - typedef Kokkos::View<T *, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> - UnmanagedT1D; typedef Kokkos::View<T ****, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedT4D; typedef Kokkos::View<const int ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - UnmanagedT1D z_b(9); + Kokkos::View<T *> z_b("z_b", 9); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); @@ -810,115 +862,115 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::parallel_for( "recon_lsq_cell_c_svd_inner", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - z_b(0, jc, jk) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - - p_cc_view(jc, jk, jb); - z_b(1, jc, jk) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - - p_cc_view(jc, jk, jb); - z_b(2, jc, jk) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - - p_cc_view(jc, jk, jb); - z_b(3, jc, jk) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - - p_cc_view(jc, jk, jb); - z_b(4, jc, jk) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - - p_cc_view(jc, jk, jb); - z_b(5, jc, jk) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - - p_cc_view(jc, jk, jb); - z_b(6, jc, jk) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - - p_cc_view(jc, jk, jb); - z_b(7, jc, jk) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - - p_cc_view(jc, jk, jb); - z_b(8, jc, jk) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - - p_cc_view(jc, jk, jb); + z_b(0) = p_cc_view(iidx(jc, jb, 0), jk, iblk(jc, jb, 0)) - + p_cc_view(jc, jk, jb); + z_b(1) = p_cc_view(iidx(jc, jb, 1), jk, iblk(jc, jb, 1)) - + p_cc_view(jc, jk, jb); + z_b(2) = p_cc_view(iidx(jc, jb, 2), jk, iblk(jc, jb, 2)) - + p_cc_view(jc, jk, jb); + z_b(3) = p_cc_view(iidx(jc, jb, 3), jk, iblk(jc, jb, 3)) - + p_cc_view(jc, jk, jb); + z_b(4) = p_cc_view(iidx(jc, jb, 4), jk, iblk(jc, jb, 4)) - + p_cc_view(jc, jk, jb); + z_b(5) = p_cc_view(iidx(jc, jb, 5), jk, iblk(jc, jb, 5)) - + p_cc_view(jc, jk, jb); + z_b(6) = p_cc_view(iidx(jc, jb, 6), jk, iblk(jc, jb, 6)) - + p_cc_view(jc, jk, jb); + z_b(7) = p_cc_view(iidx(jc, jb, 7), jk, iblk(jc, jb, 7)) - + p_cc_view(jc, jk, jb); + z_b(8) = p_cc_view(iidx(jc, jb, 8), jk, iblk(jc, jb, 8)) - + p_cc_view(jc, jk, jb); p_coeff_view(9, jc, jk, jb) = - lsq_pseudoinv_view(jc, 8, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 8, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 8, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 8, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 8, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 8, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 8, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 8, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 8, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 8, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 8, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 8, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 8, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 8, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 8, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 8, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 8, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 8, 8, jb) * z_b(8); p_coeff_view(8, jc, jk, jb) = - lsq_pseudoinv_view(jc, 7, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 7, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 7, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 7, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 7, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 7, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 7, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 7, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 7, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 7, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 7, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 7, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 7, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 7, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 7, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 7, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 7, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 7, 8, jb) * z_b(8); p_coeff_view(7, jc, jk, jb) = - lsq_pseudoinv_view(jc, 6, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 6, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 6, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 6, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 6, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 6, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 6, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 6, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 6, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 6, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 6, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 6, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 6, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 6, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 6, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 6, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 6, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 6, 8, jb) * z_b(8); p_coeff_view(6, jc, jk, jb) = - lsq_pseudoinv_view(jc, 5, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 5, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 5, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 5, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 5, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 5, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 5, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 5, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 5, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 5, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 5, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 5, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 5, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 5, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 5, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 5, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 5, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 5, 8, jb) * z_b(8); p_coeff_view(5, jc, jk, jb) = - lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 4, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 4, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 4, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 4, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 4, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 4, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 4, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 4, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 4, 8, jb) * z_b(8); p_coeff_view(4, jc, jk, jb) = - lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 3, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 3, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 3, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 3, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 3, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 3, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 3, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 3, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 3, 8, jb) * z_b(8); p_coeff_view(3, jc, jk, jb) = - lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 2, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 2, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 2, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 2, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 2, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 2, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 2, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 2, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 2, 8, jb) * z_b(8); p_coeff_view(2, jc, jk, jb) = - lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 1, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 1, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 1, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 1, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 1, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 1, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 1, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 1, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 1, 8, jb) * z_b(8); p_coeff_view(1, jc, jk, jb) = - lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0, jc, jk) + - lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1, jc, jk) + - lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2, jc, jk) + - lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3, jc, jk) + - lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4, jc, jk) + - lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5, jc, jk) + - lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6, jc, jk) + - lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7, jc, jk) + - lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8, jc, jk); + lsq_pseudoinv_view(jc, 0, 0, jb) * z_b(0) + + lsq_pseudoinv_view(jc, 0, 1, jb) * z_b(1) + + lsq_pseudoinv_view(jc, 0, 2, jb) * z_b(2) + + lsq_pseudoinv_view(jc, 0, 3, jb) * z_b(3) + + lsq_pseudoinv_view(jc, 0, 4, jb) * z_b(4) + + lsq_pseudoinv_view(jc, 0, 5, jb) * z_b(5) + + lsq_pseudoinv_view(jc, 0, 6, jb) * z_b(6) + + lsq_pseudoinv_view(jc, 0, 7, jb) * z_b(7) + + lsq_pseudoinv_view(jc, 0, 8, jb) * z_b(8); p_coeff_view(0, jc, jk, jb) = p_cc_view(jc, jk, jb) - p_coeff_view(1, jc, jk, jb) * lsq_moments_view(jc, jb, 0) - @@ -936,6 +988,22 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::fence(); } +template void recon_lsq_cell_c_svd<float>( + const float *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const float *lsq_pseudoinv, const float *lsq_moments, float *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int patch_id, int lsq_high_set_dim_c, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); + +template void recon_lsq_cell_c_svd<double>( + const double *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, + const double *lsq_pseudoinv, const double *lsq_moments, double *p_coeff, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, int patch_id, int lsq_high_set_dim_c, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, + int lsq_dim_c); + template <typename T> void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, const T *geofac_div, T *div_vec_c, int i_startblk, int i_endblk, @@ -978,10 +1046,24 @@ void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, } } +template void div3d<float>(const float *vec_e, const int *cell_edge_idx, + const int *cell_edge_blk, const float *geofac_div, + float *div_vec_c, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, bool lacc, int nlev, + int nblks_c, int nblks_e); + +template void div3d<double>(const double *vec_e, const int *cell_edge_idx, + const int *cell_edge_blk, const double *geofac_div, + double *div_vec_c, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, bool lacc, int nlev, + int nblks_c, int nblks_e); + template <typename T> void div3d_2field(const T *vec_e, const int *cell_edge_idx, - const int *cell_edge_blk, const T *geofac_div, T &div_vec_c, - const T *in2, T &out2, int i_startblk, int i_endblk, + const int *cell_edge_blk, const T *geofac_div, T *div_vec_c, + const T *in2, T *out2, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, bool lacc, int nlev, int nblks_c, int nblks_e) { // Wrap raw pointers in unmanaged Kokkos Views. @@ -1033,9 +1115,23 @@ void div3d_2field(const T *vec_e, const int *cell_edge_idx, } } +template void div3d_2field<float>( + const float *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, + const float *geofac_div, float *div_vec_c, const float *in2, float *out2, + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, + int elev, int nproma, bool lacc, int nlev, int nblks_c, int nblks_e); + +template void +div3d_2field<double>(const double *vec_e, const int *cell_edge_idx, + const int *cell_edge_blk, const double *geofac_div, + double *div_vec_c, const double *in2, double *out2, + int i_startblk, int i_endblk, int i_startidx_in, + int i_endidx_in, int slev, int elev, int nproma, bool lacc, + int nlev, int nblks_c, int nblks_e); + template <typename T> void div4d(const int *cell_edge_idx, const int *cell_edge_blk, - const T *geofac_div, const T *f4din, T &f4dout, int dim4d, + const T *geofac_div, const T *f4din, T *f4dout, int dim4d, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, const int *slev, const int *elev, int nproma, bool lacc, int nlev, int nblks_c, int nblks_e) { @@ -1084,11 +1180,25 @@ void div4d(const int *cell_edge_idx, const int *cell_edge_blk, } } +template void div4d<float>(const int *cell_edge_idx, const int *cell_edge_blk, + const float *geofac_div, const float *f4din, + float *f4dout, int dim4d, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + const int *slev, const int *elev, int nproma, + bool lacc, int nlev, int nblks_c, int nblks_e); + +template void div4d<double>(const int *cell_edge_idx, const int *cell_edge_blk, + const double *geofac_div, const double *f4din, + double *f4dout, int dim4d, int i_startblk, + int i_endblk, int i_startidx_in, int i_endidx_in, + const int *slev, const int *elev, int nproma, + bool lacc, int nlev, int nblks_c, int nblks_e); + template <typename T> void div_avg(const T *vec_e, const int *cell_neighbor_idx, const int *cell_neighbor_blk, const int *cell_edge_idx, const int *cell_edge_blk, const T *geofac_div, const T *avg_coeff, - T &div_vec_c, const T *opt_in2, T &opt_out2, + T *div_vec_c, const T *opt_in2, T *opt_out2, const int *i_startblk_in, const int *i_endblk_in, const int *i_startidx_in, const int *i_endidx_in, int slev, int elev, int nproma, int patch_id, bool l_limited_area, @@ -1117,8 +1227,8 @@ void div_avg(const T *vec_e, const int *cell_neighbor_idx, UnmanagedConstT3D opt_in2_view(opt_in2, nproma, nlev, nblks_e); UnmanagedT3D opt_out2_view(opt_out2, nproma, nlev, nblks_c); - UnmanagedT3D aux_c(nproma, nlev, nblks_c); - UnmanagedT3D aux_c2(nproma, nlev, nblks_c); + Kokkos::View<T ***> aux_c("aux_c", nproma, nlev, nblks_c); + Kokkos::View<T ***> aux_c2("aux_c2", nproma, nlev, nblks_c); int i_startblk = i_startblk_in[0]; int i_endblk = i_endblk_in[0]; @@ -1142,11 +1252,11 @@ void div_avg(const T *vec_e, const int *cell_neighbor_idx, vec_e_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * geofac_div_view(jc, 2, jb); aux_c2(jc, jk, jb) = - opt_in2(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * + opt_in2_view(ieidx(jc, jb, 0), jk, ieblk(jc, jb, 0)) * geofac_div_view(jc, 0, jb) + - opt_in2(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * + opt_in2_view(ieidx(jc, jb, 1), jk, ieblk(jc, jb, 1)) * geofac_div_view(jc, 1, jb) + - opt_in2(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * + opt_in2_view(ieidx(jc, jb, 2), jk, ieblk(jc, jb, 2)) * geofac_div_view(jc, 2, jb); }); } @@ -1263,9 +1373,32 @@ void div_avg(const T *vec_e, const int *cell_neighbor_idx, } } +template void div_avg<float>(const float *vec_e, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, + const int *cell_edge_idx, const int *cell_edge_blk, + const float *geofac_div, const float *avg_coeff, + float *div_vec_c, const float *opt_in2, + float *opt_out2, const int *i_startblk_in, + const int *i_endblk_in, const int *i_startidx_in, + const int *i_endidx_in, int slev, int elev, + int nproma, int patch_id, bool l_limited_area, + bool l2fields, bool lacc, int nlev, int nblks_c, + int nblks_e); + +template void +div_avg<double>(const double *vec_e, const int *cell_neighbor_idx, + const int *cell_neighbor_blk, const int *cell_edge_idx, + const int *cell_edge_blk, const double *geofac_div, + const double *avg_coeff, double *div_vec_c, + const double *opt_in2, double *opt_out2, + const int *i_startblk_in, const int *i_endblk_in, + const int *i_startidx_in, const int *i_endidx_in, int slev, + int elev, int nproma, int patch_id, bool l_limited_area, + bool l2fields, bool lacc, int nlev, int nblks_c, int nblks_e); + template <typename T> void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, - const int *vert_edge_blk, const T *geofac_rot, T &rot_vec, + const int *vert_edge_blk, const T *geofac_rot, T *rot_vec, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, bool lacc, int nlev, int nblks_e, int nblks_v) { @@ -1314,9 +1447,21 @@ void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, } } +template void rot_vertex_atmos<float>( + const float *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, + const float *geofac_rot, float *rot_vec, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool lacc, int nlev, int nblks_e, int nblks_v); + +template void rot_vertex_atmos<double>( + const double *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, + const double *geofac_rot, double *rot_vec, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool lacc, int nlev, int nblks_e, int nblks_v); + template <typename T> void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, - const int *vert_edge_blk, const T *geofac_rot, T &rot_vec, + const int *vert_edge_blk, const T *geofac_rot, T *rot_vec, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, bool lacc, bool acc_async, int nlev, int nblks_e, int nblks_v) { @@ -1367,3 +1512,15 @@ void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, if (!acc_async) Kokkos::fence(); } + +template void rot_vertex_ri<float>( + const float *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, + const float *geofac_rot, float *rot_vec, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool lacc, bool acc_async, int nlev, int nblks_e, int nblks_v); + +template void rot_vertex_ri<double>( + const double *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, + const double *geofac_rot, double *rot_vec, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, + bool lacc, bool acc_async, int nlev, int nblks_e, int nblks_v); -- GitLab From 5b6606842ed56a87c83b365eb79c6d1cdee3a08e Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Sat, 1 Mar 2025 10:47:43 +0100 Subject: [PATCH 11/35] Separate linear, quadratic, and cubic tests --- test/c/test_horizontal_divrot.cpp | 65 ++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 19 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 082afa3..8bd782c 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -74,21 +74,9 @@ int At(int id, always_t<int, Dims>... ids) { return id + At_impl<Dims...>(FirstDim, ids...); } -// ValueType struct for compute precision and reconstruction method. -template <typename ValueType, int ReconMethod> struct DivrotType { - using type = ValueType; - static constexpr int get_recon_method() { return ReconMethod; }; -}; - -typedef ::testing::Types< - DivrotType<float, static_cast<int>(ReconstructionMethod::linear)>, - DivrotType<double, static_cast<int>(ReconstructionMethod::linear)>> - ValueTypes; - -template <typename ValueTypes> +template <typename ValueType, int ReconMethod> class HorizontalDivrotTest : public ::testing::Test { protected: - using ValueType = typename ValueTypes::type; // [lsq_dim_c, lsq_dim_unk] static constexpr std::tuple<int, int> init_lsq_dim(ReconstructionMethod method) { @@ -106,8 +94,8 @@ protected: static constexpr int nproma = 3; // inner loop length static constexpr int nlev = 1; // number of vertical levels static constexpr int nblks_c = 1; // number of cell blocks (for p_e_in) - static constexpr std::tuple<int, int> lsq_dim = init_lsq_dim( - static_cast<ReconstructionMethod>(ValueTypes::get_recon_method())); + static constexpr std::tuple<int, int> lsq_dim = + init_lsq_dim(static_cast<ReconstructionMethod>(ReconMethod)); static constexpr int lsq_dim_c = std::get<0>(lsq_dim); static constexpr int lsq_dim_unk = std::get<1>(lsq_dim); @@ -144,11 +132,32 @@ protected: } }; -TYPED_TEST_SUITE(HorizontalDivrotTest, ValueTypes); +template <typename ValueType> +class HorizontalDivrotLinearTest + : public HorizontalDivrotTest< + ValueType, static_cast<int>(ReconstructionMethod::linear)> {}; -TYPED_TEST(HorizontalDivrotTest, TestReconLsqCellLinear) { - using ValueType = typename TestFixture::ValueType; +template <typename ValueType> +class HorizontalDivrotQuadraticTest + : public HorizontalDivrotTest< + ValueType, static_cast<int>(ReconstructionMethod::quadratic)> {}; + +template <typename ValueType> +class HorizontalDivrotCubicTest + : public HorizontalDivrotTest<ValueType, static_cast<int>( + ReconstructionMethod::cubic)> { +}; + +typedef ::testing::Types<float, double> ValueTypes; + +TYPED_TEST_SUITE(HorizontalDivrotLinearTest, ValueTypes); + +TYPED_TEST(HorizontalDivrotLinearTest, TestLsqDimensions) { + EXPECT_EQ(TestFixture::lsq_dim_c, 3); + EXPECT_EQ(TestFixture::lsq_dim_unk, 2); +} +TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { constexpr int nproma = TestFixture::nproma; constexpr int nlev = TestFixture::nlev; constexpr int nblks_c = TestFixture::nblks_c; @@ -184,7 +193,7 @@ TYPED_TEST(HorizontalDivrotTest, TestReconLsqCellLinear) { this->lsq_moments[At<nproma, nblks_c, lsq_dim_unk>(i, 0, 1)] = 0.3; } - recon_lsq_cell_l<ValueType>( + recon_lsq_cell_l<TypeParam>( this->p_cc.data(), this->cell_neighbor_idx.data(), this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), @@ -197,3 +206,21 @@ TYPED_TEST(HorizontalDivrotTest, TestReconLsqCellLinear) { EXPECT_NEAR(this->p_coeff[1], 1.8, 1e-6); EXPECT_NEAR(this->p_coeff[2], 1.0, 1e-6); } + +typedef ::testing::Types<float, double> ValueTypes; + +TYPED_TEST_SUITE(HorizontalDivrotQuadraticTest, ValueTypes); + +TYPED_TEST(HorizontalDivrotQuadraticTest, TestLsqDimensions) { + EXPECT_EQ(TestFixture::lsq_dim_c, 9); + EXPECT_EQ(TestFixture::lsq_dim_unk, 5); +} + +typedef ::testing::Types<float, double> ValueTypes; + +TYPED_TEST_SUITE(HorizontalDivrotCubicTest, ValueTypes); + +TYPED_TEST(HorizontalDivrotCubicTest, TestLsqDimensions) { + EXPECT_EQ(TestFixture::lsq_dim_c, 9); + EXPECT_EQ(TestFixture::lsq_dim_unk, 9); +} -- GitLab From efc6beef573e980f39a9df286c50f9ba39542401 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Mon, 3 Mar 2025 09:48:25 +0100 Subject: [PATCH 12/35] Use snake case for at functions --- test/c/test_horizontal_divrot.cpp | 79 ++++++++++++++++--------------- 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 8bd782c..a99556f 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -37,41 +37,41 @@ enum class ReconstructionMethod { // Template function for LayoutLeft ID access in compile time // For example, a multi-dimensional array A of dimensions <2, 3, 4, 5> gets its // corresponding vector id (LayoutLeft) by -// At<2, 3, 4, 5>(id1, id2, id3, id4). -// The At_impl then adds the id from beginning to the end and pass the id prefix -// to the next recursive At_impl function. In this example, -// At<2, 3, 4, 5>(id1, id2, id3, id4) { -// return id1 + At_impl<3, 4, 5>(2, id2, id3, id4); +// at<2, 3, 4, 5>(id1, id2, id3, id4). +// The at_impl then adds the id from beginning to the end and pass the id prefix +// to the next recursive at_impl function. In this example, +// at<2, 3, 4, 5>(id1, id2, id3, id4) { +// return id1 + at_impl<3, 4, 5>(2, id2, id3, id4); // } -// At_impl<3, 4, 5>(2, id2, id3, id4) { -// return id2 * 2 + At_impl<4, 5>(2 * 3, id3, id4); +// at_impl<3, 4, 5>(2, id2, id3, id4) { +// return id2 * 2 + at_impl<4, 5>(2 * 3, id3, id4); // } -// At_impl<4, 5>(2 * 3, id3, id4) { -// return id3 * 2 * 3 + At_impl<5>(2 * 3 * 4, id4); +// at_impl<4, 5>(2 * 3, id3, id4) { +// return id3 * 2 * 3 + at_impl<5>(2 * 3 * 4, id4); // } -// At_impl<5>(2 * 3 * 4, id4) { +// at_impl<5>(2 * 3 * 4, id4) { // return id4 * 2 * 3 * 4; // } // Which gives -// At<2, 3, 4, 5>(id1, id2, id3, id4) = id1 + id2 * 2 + +// at<2, 3, 4, 5>(id1, id2, id3, id4) = id1 + id2 * 2 + // id3 * 2 * 3 + id4 * 2 * 3 * 4 // Helper type converting integer numbers to int template <class T, auto> using always_t = T; -// Base function of At_impl. Should not be used. -template <int... Dims> int At_impl(always_t<int, Dims>... ids) { return 0; } +// Base function of at_impl. Should not be used. +template <int... Dims> int at_impl(always_t<int, Dims>... ids) { return 0; } // Template specialization of the last ID -template <int LastDim> int At_impl(int prefix, int id) { return id * prefix; } -// Template specialization of At_impl, accumulate the return value using the -// first id and pass the prefix to the next recursive At_impl function. +template <int LastDim> int at_impl(int prefix, int id) { return id * prefix; } +// Template specialization of at_impl, accumulate the return value using the +// first id and pass the prefix to the next recursive at_impl function. template <int FirstDim, int... Dims> -int At_impl(int prefix, int id, always_t<int, Dims>... ids) { - return id * prefix + At_impl<Dims...>(prefix * FirstDim, ids...); +int at_impl(int prefix, int id, always_t<int, Dims>... ids) { + return id * prefix + at_impl<Dims...>(prefix * FirstDim, ids...); } -// At<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming -// LayoutLeft. Use this function instead of At_impl. +// at<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming +// LayoutLeft. Use this function instead of at_impl. template <int FirstDim, int... Dims> -int At(int id, always_t<int, Dims>... ids) { - return id + At_impl<Dims...>(FirstDim, ids...); +int at(int id, always_t<int, Dims>... ids) { + return id + at_impl<Dims...>(FirstDim, ids...); } template <typename ValueType, int ReconMethod> @@ -166,31 +166,31 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[At<nproma, nlev, nblks_c>(i, 0, 0)] = (i + 1); + this->p_cc[at<nproma, nlev, nblks_c>(i, 0, 0)] = (i + 1); - this->cell_neighbor_idx[At<nproma, nblks_c, 3>(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_idx[At<nproma, nblks_c, 3>(i, 0, 1)] = i; - this->cell_neighbor_idx[At<nproma, nblks_c, 3>(i, 0, 2)] = i; + this->cell_neighbor_idx[at<nproma, nblks_c, 3>(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_idx[at<nproma, nblks_c, 3>(i, 0, 1)] = i; + this->cell_neighbor_idx[at<nproma, nblks_c, 3>(i, 0, 2)] = i; for (int j = 0; j < 3; ++j) { - this->cell_neighbor_blk[At<nproma, nblks_c, 3>(i, 0, j)] = 0; + this->cell_neighbor_blk[at<nproma, nblks_c, 3>(i, 0, j)] = 0; } for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_qtmat_c[At<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>(i, 0, j, + this->lsq_qtmat_c[at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>(i, 0, j, 0)] = 1.0; - this->lsq_qtmat_c[At<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>(i, 1, j, + this->lsq_qtmat_c[at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>(i, 1, j, 0)] = 0.5; - this->p_coeff[At<lsq_dim_c, nproma, nlev, nblks_c>(j, i, 0, 0)] = 0.0; + this->p_coeff[at<lsq_dim_c, nproma, nlev, nblks_c>(j, i, 0, 0)] = 0.0; } - this->lsq_rmat_rdiag_c[At<nproma, lsq_dim_unk, nblks_c>(i, 0, 0)] = 2.0; - this->lsq_rmat_rdiag_c[At<nproma, lsq_dim_unk, nblks_c>(i, 1, 0)] = 2.0; + this->lsq_rmat_rdiag_c[at<nproma, lsq_dim_unk, nblks_c>(i, 0, 0)] = 2.0; + this->lsq_rmat_rdiag_c[at<nproma, lsq_dim_unk, nblks_c>(i, 1, 0)] = 2.0; this->lsq_rmat_utri_c - [At<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>( + [at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>( i, 0, 0)] = 0.1; - this->lsq_moments[At<nproma, nblks_c, lsq_dim_unk>(i, 0, 0)] = 0.2; - this->lsq_moments[At<nproma, nblks_c, lsq_dim_unk>(i, 0, 1)] = 0.3; + this->lsq_moments[at<nproma, nblks_c, lsq_dim_unk>(i, 0, 0)] = 0.2; + this->lsq_moments[at<nproma, nblks_c, lsq_dim_unk>(i, 0, 1)] = 0.3; } recon_lsq_cell_l<TypeParam>( @@ -202,9 +202,12 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); - EXPECT_NEAR(this->p_coeff[0], 0.34, 1e-6); - EXPECT_NEAR(this->p_coeff[1], 1.8, 1e-6); - EXPECT_NEAR(this->p_coeff[2], 1.0, 1e-6); + EXPECT_NEAR(this->p_coeff[(at<lsq_dim_c, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.34, 1e-6); + EXPECT_NEAR(this->p_coeff[(at<lsq_dim_c, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.8, 1e-6); + EXPECT_NEAR(this->p_coeff[(at<lsq_dim_c, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 1.0, 1e-6); } typedef ::testing::Types<float, double> ValueTypes; -- GitLab From 4a18f708a16c5dcbfeccb5afa00c9205bda19e01 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Mon, 3 Mar 2025 11:29:28 +0100 Subject: [PATCH 13/35] Add exception for new typo version --- _typos.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/_typos.toml b/_typos.toml index 58a18ef..8de4a86 100644 --- a/_typos.toml +++ b/_typos.toml @@ -11,6 +11,7 @@ extend-ignore-words-re = [ Wirth = "Wirth" # author name nin = "nin" # number of inputs Pilar = "Pilar" # author name +Comput = "Comput" # abbreviation for Computational [default.extend-identifiers] f4dout = "f4dout" # file name -- GitLab From 77fa7b0db81c08976dcba21b5129f3c6448efe39 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Mon, 3 Mar 2025 13:24:52 +0100 Subject: [PATCH 14/35] Define template and instantiate functions --- src/horizontal/CMakeLists.txt | 1 - src/horizontal/lib_divrot.cpp | 197 +++-------------------------- src/horizontal/lib_divrot.hpp | 198 ++++++++++++++++-------------- src/types.hpp | 16 +++ test/c/test_horizontal_divrot.cpp | 3 +- 5 files changed, 140 insertions(+), 275 deletions(-) create mode 100644 src/types.hpp diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index 198488f..d403cb2 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -59,7 +59,6 @@ target_include_directories( # multiple compile languages # https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${PROJECT_SOURCE_DIR}/src>> - $<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:C,CXX>:${CMAKE_CURRENT_SOURCE_DIR}>> PRIVATE # Path to config.h (for C and C++ only): Requires CMake 3.15+ for multiple # compile languages diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index 9dce2e4..be6d9da 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -10,10 +10,11 @@ // --------------------------------------------------------------- #include <iostream> -#include <lib_divrot.hpp> -#include <support/mo_lib_loopindices.hpp> #include <vector> +#include <horizontal/lib_divrot.hpp> +#include <support/mo_lib_loopindices.hpp> + template <typename T> void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, const int *cell_neighbor_blk, const T *lsq_qtmat_c, @@ -101,24 +102,7 @@ void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, Kokkos::fence(); } -template void -recon_lsq_cell_l<float>(const float *p_cc, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const float *lsq_qtmat_c, - const float *lsq_rmat_rdiag_c, - const float *lsq_rmat_utri_c, const float *lsq_moments, - float *p_coeff, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, - int nproma, bool l_consv, bool lacc, bool acc_async, - int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c); - -template void recon_lsq_cell_l<double>( - const double *p_cc, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const double *lsq_qtmat_c, - const double *lsq_rmat_rdiag_c, const double *lsq_rmat_utri_c, - const double *lsq_moments, double *p_coeff, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, - int lsq_dim_unk, int lsq_dim_c); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_L); template <typename T> void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, @@ -195,21 +179,7 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, Kokkos::fence(); } -template void recon_lsq_cell_l_svd<float>( - const float *p_cc, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const float *lsq_pseudoinv, - const float *lsq_moments, float *p_coeff, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, - int lsq_dim_unk, int lsq_dim_c); - -template void recon_lsq_cell_l_svd<double>( - const double *p_cc, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const double *lsq_pseudoinv, - const double *lsq_moments, double *p_coeff, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, - int lsq_dim_unk, int lsq_dim_c); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD); template <typename T> void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, @@ -373,23 +343,7 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::fence(); } -template void recon_lsq_cell_q<float>( - const float *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const float *lsq_rmat_rdiag_c, const float *lsq_rmat_utri_c, - const float *lsq_moments, const float *lsq_qtmat_c, float *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, int patch_id, int lsq_high_set_dim_c, - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); - -template void recon_lsq_cell_q<double>( - const double *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const double *lsq_rmat_rdiag_c, const double *lsq_rmat_utri_c, - const double *lsq_moments, const double *lsq_qtmat_c, double *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, int patch_id, int lsq_high_set_dim_c, - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_Q); template <typename T> void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, @@ -529,21 +483,7 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::fence(); } -template void recon_lsq_cell_q_svd<float>( - const float *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const float *lsq_pseudoinv, const float *lsq_moments, float *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, int patch_id, int lsq_high_set_dim_c, - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); - -template void recon_lsq_cell_q_svd<double>( - const double *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const double *lsq_pseudoinv, const double *lsq_moments, double *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, int patch_id, int lsq_high_set_dim_c, - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD); template <typename T> void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, @@ -785,23 +725,7 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::fence(); } -template void recon_lsq_cell_c<float>( - const float *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const float *lsq_rmat_rdiag_c, const float *lsq_rmat_utri_c, - const float *lsq_moments, const float *lsq_qtmat_c, float *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, int patch_id, int lsq_high_set_dim_c, - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); - -template void recon_lsq_cell_c<double>( - const double *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const double *lsq_rmat_rdiag_c, const double *lsq_rmat_utri_c, - const double *lsq_moments, const double *lsq_qtmat_c, double *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, int patch_id, int lsq_high_set_dim_c, - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_C); template <typename T> void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, @@ -988,21 +912,7 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::fence(); } -template void recon_lsq_cell_c_svd<float>( - const float *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const float *lsq_pseudoinv, const float *lsq_moments, float *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, int patch_id, int lsq_high_set_dim_c, - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); - -template void recon_lsq_cell_c_svd<double>( - const double *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const double *lsq_pseudoinv, const double *lsq_moments, double *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, int patch_id, int lsq_high_set_dim_c, - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD); template <typename T> void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, @@ -1046,19 +956,7 @@ void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, } } -template void div3d<float>(const float *vec_e, const int *cell_edge_idx, - const int *cell_edge_blk, const float *geofac_div, - float *div_vec_c, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, bool lacc, int nlev, - int nblks_c, int nblks_e); - -template void div3d<double>(const double *vec_e, const int *cell_edge_idx, - const int *cell_edge_blk, const double *geofac_div, - double *div_vec_c, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, bool lacc, int nlev, - int nblks_c, int nblks_e); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV3D); template <typename T> void div3d_2field(const T *vec_e, const int *cell_edge_idx, @@ -1115,19 +1013,7 @@ void div3d_2field(const T *vec_e, const int *cell_edge_idx, } } -template void div3d_2field<float>( - const float *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, - const float *geofac_div, float *div_vec_c, const float *in2, float *out2, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, - int elev, int nproma, bool lacc, int nlev, int nblks_c, int nblks_e); - -template void -div3d_2field<double>(const double *vec_e, const int *cell_edge_idx, - const int *cell_edge_blk, const double *geofac_div, - double *div_vec_c, const double *in2, double *out2, - int i_startblk, int i_endblk, int i_startidx_in, - int i_endidx_in, int slev, int elev, int nproma, bool lacc, - int nlev, int nblks_c, int nblks_e); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV3D_2FIELD); template <typename T> void div4d(const int *cell_edge_idx, const int *cell_edge_blk, @@ -1180,19 +1066,7 @@ void div4d(const int *cell_edge_idx, const int *cell_edge_blk, } } -template void div4d<float>(const int *cell_edge_idx, const int *cell_edge_blk, - const float *geofac_div, const float *f4din, - float *f4dout, int dim4d, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - const int *slev, const int *elev, int nproma, - bool lacc, int nlev, int nblks_c, int nblks_e); - -template void div4d<double>(const int *cell_edge_idx, const int *cell_edge_blk, - const double *geofac_div, const double *f4din, - double *f4dout, int dim4d, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - const int *slev, const int *elev, int nproma, - bool lacc, int nlev, int nblks_c, int nblks_e); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV4D); template <typename T> void div_avg(const T *vec_e, const int *cell_neighbor_idx, @@ -1373,28 +1247,7 @@ void div_avg(const T *vec_e, const int *cell_neighbor_idx, } } -template void div_avg<float>(const float *vec_e, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, - const int *cell_edge_idx, const int *cell_edge_blk, - const float *geofac_div, const float *avg_coeff, - float *div_vec_c, const float *opt_in2, - float *opt_out2, const int *i_startblk_in, - const int *i_endblk_in, const int *i_startidx_in, - const int *i_endidx_in, int slev, int elev, - int nproma, int patch_id, bool l_limited_area, - bool l2fields, bool lacc, int nlev, int nblks_c, - int nblks_e); - -template void -div_avg<double>(const double *vec_e, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const int *cell_edge_idx, - const int *cell_edge_blk, const double *geofac_div, - const double *avg_coeff, double *div_vec_c, - const double *opt_in2, double *opt_out2, - const int *i_startblk_in, const int *i_endblk_in, - const int *i_startidx_in, const int *i_endidx_in, int slev, - int elev, int nproma, int patch_id, bool l_limited_area, - bool l2fields, bool lacc, int nlev, int nblks_c, int nblks_e); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_DIV_AVG); template <typename T> void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, @@ -1447,17 +1300,7 @@ void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, } } -template void rot_vertex_atmos<float>( - const float *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, - const float *geofac_rot, float *rot_vec, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - bool lacc, int nlev, int nblks_e, int nblks_v); - -template void rot_vertex_atmos<double>( - const double *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, - const double *geofac_rot, double *rot_vec, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - bool lacc, int nlev, int nblks_e, int nblks_v); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_ROT_VERTEX_ATMOS); template <typename T> void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, @@ -1513,14 +1356,4 @@ void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, Kokkos::fence(); } -template void rot_vertex_ri<float>( - const float *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, - const float *geofac_rot, float *rot_vec, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - bool lacc, bool acc_async, int nlev, int nblks_e, int nblks_v); - -template void rot_vertex_ri<double>( - const double *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, - const double *geofac_rot, double *rot_vec, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - bool lacc, bool acc_async, int nlev, int nblks_e, int nblks_v); +ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_ROT_VERTEX_RI); diff --git a/src/horizontal/lib_divrot.hpp b/src/horizontal/lib_divrot.hpp index db60b29..a0cc8cf 100644 --- a/src/horizontal/lib_divrot.hpp +++ b/src/horizontal/lib_divrot.hpp @@ -12,106 +12,122 @@ #pragma once #include <Kokkos_Core.hpp> +#include <types.hpp> -template <typename T> -void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const T *lsq_qtmat_c, - const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, T *p_coeff, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, bool l_consv, bool lacc, - bool acc_async, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +#define ICONMATH_DECLARE_RECON_LSQ_CELL_L(_type) \ + void recon_lsq_cell_l( \ + const _type *p_cc, const int *cell_neighbor_idx, \ + const int *cell_neighbor_blk, const _type *lsq_qtmat_c, \ + const _type *lsq_rmat_rdiag_c, const _type *lsq_rmat_utri_c, \ + const _type *lsq_moments, _type *p_coeff, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ + bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, \ + int lsq_dim_unk, int lsq_dim_c) -template <typename T> -void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const T *lsq_pseudoinv, - const T *lsq_moments, T &p_coeff, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, bool l_consv, - bool lacc, bool acc_async, int nblks_c, int nlev, - int lsq_dim_unk, int lsq_dim_c); +#define ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD(_type) \ + void recon_lsq_cell_l_svd( \ + const _type *p_cc, const int *cell_neighbor_idx, \ + const int *cell_neighbor_blk, const _type *lsq_pseudoinv, \ + const _type *lsq_moments, _type *p_coeff, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ + bool l_consv, bool lacc, bool acc_async, int nblks_c, int nlev, \ + int lsq_dim_unk, int lsq_dim_c) -template <typename T> -void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T &p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, - int i_endidx_in, int slev, int elev, int nproma, - int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +#define ICONMATH_DECLARE_RECON_LSQ_CELL_Q(_type) \ + void recon_lsq_cell_q( \ + const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ + const _type *lsq_rmat_rdiag_c, const _type *lsq_rmat_utri_c, \ + const _type *lsq_moments, const _type *lsq_qtmat_c, _type *p_coeff, \ + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ + int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, \ + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ + int lsq_dim_c) -template <typename T> -void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, - const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T &p_coeff, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int patch_id, - int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +#define ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD(_type) \ + void recon_lsq_cell_q_svd( \ + const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ + const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff, \ + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ + int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, \ + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ + int lsq_dim_c) -template <typename T> -void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T &p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, - int i_endidx_in, int slev, int elev, int nproma, - int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +#define ICONMATH_DECLARE_RECON_LSQ_CELL_C(_type) \ + void recon_lsq_cell_c( \ + const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ + const _type *lsq_rmat_rdiag_c, const _type *lsq_rmat_utri_c, \ + const _type *lsq_moments, const _type *lsq_qtmat_c, _type *p_coeff, \ + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ + int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, \ + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ + int lsq_dim_c) -template <typename T> -void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, - const int *lsq_blk_c, const T *lsq_pseudoinv, - const T *lsq_moments, T &p_coeff, int i_startblk, - int i_endblk, int i_startidx_in, int i_endidx_in, - int slev, int elev, int nproma, int patch_id, - int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c); +#define ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD(_type) \ + void recon_lsq_cell_c_svd( \ + const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ + const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff, \ + int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ + int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, \ + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ + int lsq_dim_c) -template <typename T> -void div3d(const T *vec_e, const int *cell_edge_idx, const int *cell_edge_blk, - const T *geofac_div, T &div_vec_c, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, - bool lacc, int nlev, int nblks_c, int nblks_e); +#define ICONMATH_DECLARE_DIV3D(_type) \ + void div3d(const _type *vec_e, const int *cell_edge_idx, \ + const int *cell_edge_blk, const _type *geofac_div, \ + _type *div_vec_c, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, \ + int nproma, bool lacc, int nlev, int nblks_c, int nblks_e) -template <typename T> -void div3d_2field(const T *vec_e, const int *cell_edge_idx, - const int *cell_edge_blk, const T *geofac_div, T &div_vec_c, - const T *in2, T &out2, int i_startblk, int i_endblk, - int i_startidx_in, int i_endidx_in, int slev, int elev, - int nproma, bool lacc, int nlev, int nblks_c, int nblks_e); +#define ICONMATH_DECLARE_DIV3D_2FIELD(_type) \ + void div3d_2field(const _type *vec_e, const int *cell_edge_idx, \ + const int *cell_edge_blk, const _type *geofac_div, \ + _type *div_vec_c, const _type *in2, _type *out2, \ + int i_startblk, int i_endblk, int i_startidx_in, \ + int i_endidx_in, int slev, int elev, int nproma, \ + bool lacc, int nlev, int nblks_c, int nblks_e) -template <typename T> -void div4d(const int *cell_edge_idx, const int *cell_edge_blk, - const T *geofac_div, const T *f4din, T &f4dout, int dim4d, - int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, - const int *slev, const int *elev, int nproma, bool lacc, int nlev, - int nblks_c, int nblks_e); +#define ICONMATH_DECLARE_DIV4D(_type) \ + void div4d(const int *cell_edge_idx, const int *cell_edge_blk, \ + const _type *geofac_div, const _type *f4din, _type *f4dout, \ + int dim4d, int i_startblk, int i_endblk, int i_startidx_in, \ + int i_endidx_in, const int *slev, const int *elev, int nproma, \ + bool lacc, int nlev, int nblks_c, int nblks_e) -template <typename T> -void div_avg(const T *vec_e, const int *cell_neighbor_idx, - const int *cell_neighbor_blk, const int *cell_edge_idx, - const int *cell_edge_blk, const T *geofac_div, const T *avg_coeff, - T &div_vec_c, const T *opt_in2, T &opt_out2, - const int *i_startblk_in, const int *i_endblk_in, - const int *i_startidx_in, const int *i_endidx_in, int slev, - int elev, int nproma, int patch_id, bool l_limited_area, - bool l2fields, bool lacc, int nlev, int nblks_c, int nblks_e); +#define ICONMATH_DECLARE_DIV_AVG(_type) \ + void div_avg(const _type *vec_e, const int *cell_neighbor_idx, \ + const int *cell_neighbor_blk, const int *cell_edge_idx, \ + const int *cell_edge_blk, const _type *geofac_div, \ + const _type *avg_coeff, _type *div_vec_c, const _type *opt_in2, \ + _type *opt_out2, const int *i_startblk_in, \ + const int *i_endblk_in, const int *i_startidx_in, \ + const int *i_endidx_in, int slev, int elev, int nproma, \ + int patch_id, bool l_limited_area, bool l2fields, bool lacc, \ + int nlev, int nblks_c, int nblks_e) -template <typename T> -void rot_vertex_atmos(const T *vec_e, const int *vert_edge_idx, - const int *vert_edge_blk, const T *geofac_rot, T &rot_vec, - int i_startblk, int i_endblk, int i_startidx_in, - int i_endidx_in, int slev, int elev, int nproma, - bool lacc, int nlev, int nblks_e, int nblks_v); +#define ICONMATH_DECLARE_ROT_VERTEX_ATMOS(_type) \ + void rot_vertex_atmos( \ + const _type *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, \ + const _type *geofac_rot, _type *rot_vec, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ + bool lacc, int nlev, int nblks_e, int nblks_v) -template <typename T> -void rot_vertex_ri(const T *vec_e, const int *vert_edge_idx, - const int *vert_edge_blk, const T *geofac_rot, T &rot_vec, - int i_startblk, int i_endblk, int i_startidx_in, - int i_endidx_in, int slev, int elev, int nproma, bool lacc, - bool acc_async, int nlev, int nblks_e, int nblks_v); +#define ICONMATH_DECLARE_ROT_VERTEX_RI(_type) \ + void rot_vertex_ri( \ + const _type *vec_e, const int *vert_edge_idx, const int *vert_edge_blk, \ + const _type *geofac_rot, _type *rot_vec, int i_startblk, int i_endblk, \ + int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, \ + bool lacc, bool acc_async, int nlev, int nblks_e, int nblks_v) + +// Declare as templates +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_L(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_Q(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_C(T); +template <typename T> ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD(T); +template <typename T> ICONMATH_DECLARE_DIV3D(T); +template <typename T> ICONMATH_DECLARE_DIV3D_2FIELD(T); +template <typename T> ICONMATH_DECLARE_DIV4D(T); +template <typename T> ICONMATH_DECLARE_DIV_AVG(T); +template <typename T> ICONMATH_DECLARE_ROT_VERTEX_ATMOS(T); +template <typename T> ICONMATH_DECLARE_ROT_VERTEX_RI(T); diff --git a/src/types.hpp b/src/types.hpp new file mode 100644 index 0000000..7192e18 --- /dev/null +++ b/src/types.hpp @@ -0,0 +1,16 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +#define ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(_macro) \ + template _macro(float); \ + template _macro(double) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index a99556f..29693ff 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -9,10 +9,11 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- +#include <vector> + #include <Kokkos_Core.hpp> #include <gtest/gtest.h> #include <horizontal/lib_divrot.hpp> -#include <vector> // Template helpers for combining multiple dimension array sizes. // The base function of dimension combine. Should not be used. -- GitLab From bc09632d8d907efa7937e23c1ce22fa3eea0c7be Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Sat, 8 Mar 2025 20:53:35 +0100 Subject: [PATCH 15/35] Add comments and adapt Doxygen style --- test/c/test_horizontal_divrot.cpp | 75 ++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 21 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 29693ff..3a46b9e 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -15,27 +15,43 @@ #include <gtest/gtest.h> #include <horizontal/lib_divrot.hpp> -// Template helpers for combining multiple dimension array sizes. -// The base function of dimension combine. Should not be used. +// Template function for computing array size. +// For example, we get the array size of a 4-dimensional array A(2, 3, 4, 5) by +// dim_combine(2, 3, 4, 5). +// Which will automatically instantiate +// dim_combine<int, int, int, int>(2, 3, 4, 5). +// The function then call dim_combine recursively +// dim_combine<int, int, int, int>(2, 3, 4, 5) { +// return static_cast<size_t>(2) * dim_combine<int, int, int>(3, 4, 5); +// } +// dim_combine<int, int, int>(3, 4, 5) { +// return static_cast<size_t>(3) * dim_combine<int, int>(4, 5); +// } +// dim_combine<int, int>(4, 5) { +// return static_cast<size_t>(4) * dim_combine<int>(5); +// } +// Where the last dim_combine is specialized as +// dim_combine<int>(5) { +// return static_cast<size_t>(5); +// } +// Which gives +// dim_combine<int, int, int, int>(2, 3, 4, 5) = +// static_cast<size_t>(2) * static_cast<size_t>(3) * +// static_cast<size_t>(4) * static_cast<size_t>(5) +/// Template helpers for combining multiple dimension array sizes. +/// The base function of dimension combine. Should not be used. template <typename... Ts> size_t dim_combine(Ts... dims) { return 0; } -// Template specialization of only one dimension, returns the dimension itself. +/// Template specialization of only one dimension, returns the dimension itself. template <typename T> size_t dim_combine(T dim) { return static_cast<size_t>(dim); } -// Template specialization of picking out the first dimension. The combined -// dimension is the first dimension times the combined dimension of the rest. +/// Template specialization of picking out the first dimension. The combined +/// dimension is the first dimension times the combined dimension of the rest. template <typename T, typename... Ts> size_t dim_combine(T dim, Ts... dims) { return static_cast<size_t>(dim) * dim_combine(dims...); } -// Enum class for the reconstruction method -enum class ReconstructionMethod { - linear, - quadratic, - cubic, -}; - -// Template function for LayoutLeft ID access in compile time +// Template function for LayoutLeft ID access in compile time. // For example, a multi-dimensional array A of dimensions <2, 3, 4, 5> gets its // corresponding vector id (LayoutLeft) by // at<2, 3, 4, 5>(id1, id2, id3, id4). @@ -56,29 +72,38 @@ enum class ReconstructionMethod { // Which gives // at<2, 3, 4, 5>(id1, id2, id3, id4) = id1 + id2 * 2 + // id3 * 2 * 3 + id4 * 2 * 3 * 4 -// Helper type converting integer numbers to int +/// Helper type converting integer numbers to int template <class T, auto> using always_t = T; -// Base function of at_impl. Should not be used. +/// Base function of at_impl. Should not be used. template <int... Dims> int at_impl(always_t<int, Dims>... ids) { return 0; } -// Template specialization of the last ID +/// Template specialization of the last ID template <int LastDim> int at_impl(int prefix, int id) { return id * prefix; } -// Template specialization of at_impl, accumulate the return value using the -// first id and pass the prefix to the next recursive at_impl function. +/// Template specialization of at_impl, accumulate the return value using the +/// first id and pass the prefix to the next recursive at_impl function. template <int FirstDim, int... Dims> int at_impl(int prefix, int id, always_t<int, Dims>... ids) { return id * prefix + at_impl<Dims...>(prefix * FirstDim, ids...); } -// at<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming -// LayoutLeft. Use this function instead of at_impl. +/// at<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming +/// LayoutLeft. Use this function instead of at_impl. template <int FirstDim, int... Dims> int at(int id, always_t<int, Dims>... ids) { return id + at_impl<Dims...>(FirstDim, ids...); } +/// Enum class for the reconstruction method +enum class ReconstructionMethod { + linear, + quadratic, + cubic, +}; + +/// Base test class for the horizontal divrot tests. Templated for the ValueType +/// and ReconMethod for the reconstruction method. template <typename ValueType, int ReconMethod> class HorizontalDivrotTest : public ::testing::Test { protected: - // [lsq_dim_c, lsq_dim_unk] + // lsq_dim_c and lsq_dim_unk are instantiated in compile time. static constexpr std::tuple<int, int> init_lsq_dim(ReconstructionMethod method) { switch (method) { @@ -133,16 +158,22 @@ protected: } }; +/// Test class for the horizontal tests. The reconstruction method is specified +/// to linear. template <typename ValueType> class HorizontalDivrotLinearTest : public HorizontalDivrotTest< ValueType, static_cast<int>(ReconstructionMethod::linear)> {}; +/// Test class for the horizontal tests. The reconstruction method is specified +/// to quadratic. template <typename ValueType> class HorizontalDivrotQuadraticTest : public HorizontalDivrotTest< ValueType, static_cast<int>(ReconstructionMethod::quadratic)> {}; +/// Test class for the horizontal tests. The reconstruction method is specified +/// to cubic. template <typename ValueType> class HorizontalDivrotCubicTest : public HorizontalDivrotTest<ValueType, static_cast<int>( @@ -194,6 +225,7 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { this->lsq_moments[at<nproma, nblks_c, lsq_dim_unk>(i, 0, 1)] = 0.3; } + // Test function recon_lsq_cell_l<TypeParam>( this->p_cc.data(), this->cell_neighbor_idx.data(), this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), @@ -203,6 +235,7 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + // Check result EXPECT_NEAR(this->p_coeff[(at<lsq_dim_c, nproma, nlev, nblks_c>(0, 0, 0, 0))], 0.34, 1e-6); EXPECT_NEAR(this->p_coeff[(at<lsq_dim_c, nproma, nlev, nblks_c>(1, 0, 0, 0))], -- GitLab From c911a6bb5ab813d551e3c0da908a50a9feb8ea75 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Sat, 8 Mar 2025 22:37:00 +0100 Subject: [PATCH 16/35] Add first random test --- test/c/test_horizontal_divrot.cpp | 158 ++++++++++++++++++++++++++---- 1 file changed, 138 insertions(+), 20 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 3a46b9e..a57f31a 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -9,11 +9,14 @@ // SPDX-License-Identifier: BSD-3-Clause // --------------------------------------------------------------- +#include <iostream> +#include <random> #include <vector> #include <Kokkos_Core.hpp> #include <gtest/gtest.h> #include <horizontal/lib_divrot.hpp> +#include <support/mo_lib_loopindices.hpp> // Template function for computing array size. // For example, we get the array size of a 4-dimensional array A(2, 3, 4, 5) by @@ -180,6 +183,7 @@ class HorizontalDivrotCubicTest ReconstructionMethod::cubic)> { }; +/// ValueTypes which the divrot tests should run with typedef ::testing::Types<float, double> ValueTypes; TYPED_TEST_SUITE(HorizontalDivrotLinearTest, ValueTypes); @@ -196,33 +200,38 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { constexpr int lsq_dim_c = TestFixture::lsq_dim_c; constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_c, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + // Initialization for (int i = 0; i < nproma; ++i) { - this->p_cc[at<nproma, nlev, nblks_c>(i, 0, 0)] = (i + 1); + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - this->cell_neighbor_idx[at<nproma, nblks_c, 3>(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_idx[at<nproma, nblks_c, 3>(i, 0, 1)] = i; - this->cell_neighbor_idx[at<nproma, nblks_c, 3>(i, 0, 2)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; for (int j = 0; j < 3; ++j) { - this->cell_neighbor_blk[at<nproma, nblks_c, 3>(i, 0, j)] = 0; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; } for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_qtmat_c[at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>(i, 0, j, - 0)] = 1.0; - this->lsq_qtmat_c[at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>(i, 1, j, - 0)] = 0.5; - this->p_coeff[at<lsq_dim_c, nproma, nlev, nblks_c>(j, i, 0, 0)] = 0.0; + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; } - this->lsq_rmat_rdiag_c[at<nproma, lsq_dim_unk, nblks_c>(i, 0, 0)] = 2.0; - this->lsq_rmat_rdiag_c[at<nproma, lsq_dim_unk, nblks_c>(i, 1, 0)] = 2.0; - this->lsq_rmat_utri_c - [at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>( - i, 0, 0)] = 0.1; + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = 2.0; + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = 2.0; + this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = 0.1; - this->lsq_moments[at<nproma, nblks_c, lsq_dim_unk>(i, 0, 0)] = 0.2; - this->lsq_moments[at<nproma, nblks_c, lsq_dim_unk>(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; } // Test function @@ -244,7 +253,118 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { 1.0, 1e-6); } -typedef ::testing::Types<float, double> ValueTypes; +TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { + constexpr int nproma = TestFixture::nproma; + constexpr int nlev = TestFixture::nlev; + constexpr int nblks_c = TestFixture::nblks_c; + constexpr int lsq_dim_c = TestFixture::lsq_dim_c; + constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_c, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < 3; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = real_distrib(gen); + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = real_distrib(gen); + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = real_distrib(gen); + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = real_distrib(gen); + this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = real_distrib(gen); + + this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen); + } + + // Test function + recon_lsq_cell_l<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(3); + std::vector<TypeParam> z_qt_times_d(2); + std::vector<TypeParam> p_result(lsq_dim_c * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + z_d[0] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + z_d[1] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + z_d[2] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + z_qt_times_d[0] = this->lsq_qtmat_c[qtmat_at(jc, 0, 0, jb)] * z_d[0] + + this->lsq_qtmat_c[qtmat_at(jc, 0, 1, jb)] * z_d[1] + + this->lsq_qtmat_c[qtmat_at(jc, 0, 2, jb)] * z_d[2]; + z_qt_times_d[1] = this->lsq_qtmat_c[qtmat_at(jc, 1, 0, jb)] * z_d[0] + + this->lsq_qtmat_c[qtmat_at(jc, 1, 1, jb)] * z_d[1] + + this->lsq_qtmat_c[qtmat_at(jc, 1, 2, jb)] * z_d[2]; + p_result[at<lsq_dim_c, nproma>(2, jc)] = + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 1, jb)] * z_qt_times_d[1]; + p_result[at<lsq_dim_c, nproma>(1, jc)] = + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 0, jb)] * + (z_qt_times_d[0] - this->lsq_rmat_utri_c[rmat_utri_at(jc, 0, jb)] * + p_result[at<lsq_dim_c, nproma>(2, jc)]); + p_result[at<lsq_dim_c, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + } + } + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + p_result[at<lsq_dim_c, nproma>(0, jc)] = + p_result[at<lsq_dim_c, nproma>(0, jc)] - + p_result[at<lsq_dim_c, nproma>(1, jc)] * + this->lsq_moments[moments_at(jc, jb, 0)] - + p_result[at<lsq_dim_c, nproma>(2, jc)] * + this->lsq_moments[moments_at(jc, jb, 1)]; + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_c; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_c, nproma>(i, jc))], 1e-6); + } + } +} TYPED_TEST_SUITE(HorizontalDivrotQuadraticTest, ValueTypes); @@ -253,8 +373,6 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestLsqDimensions) { EXPECT_EQ(TestFixture::lsq_dim_unk, 5); } -typedef ::testing::Types<float, double> ValueTypes; - TYPED_TEST_SUITE(HorizontalDivrotCubicTest, ValueTypes); TYPED_TEST(HorizontalDivrotCubicTest, TestLsqDimensions) { -- GitLab From c7a777cc52e367f23f1786712a90c3dea421c2f7 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Sun, 9 Mar 2025 11:06:02 +0100 Subject: [PATCH 17/35] Fix bug and add tests --- src/horizontal/lib_divrot.cpp | 88 ++++----- src/horizontal/lib_divrot.hpp | 23 +-- test/c/test_horizontal_divrot.cpp | 314 ++++++++++++++++++++++++------ 3 files changed, 306 insertions(+), 119 deletions(-) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index be6d9da..164be38 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -36,14 +36,14 @@ void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - Kokkos::View<T *> z_d("z_d", 3); - Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 2); + Kokkos::View<T *> z_d("z_d", lsq_dim_c); + Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk); - UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); - UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); + UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, lsq_dim_c); UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -124,13 +124,13 @@ void recon_lsq_cell_l_svd(const T *p_cc, const int *cell_neighbor_idx, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - Kokkos::View<T *> z_b("z_b", 3); + Kokkos::View<T *> z_b("z_b", lsq_dim_c); - UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, 3); - UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, 3); + UnmanagedConstInt3D iidx(cell_neighbor_idx, nproma, nblks_c, lsq_dim_c); + UnmanagedConstInt3D iblk(cell_neighbor_blk, nproma, nblks_c, lsq_dim_c); UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -183,13 +183,12 @@ ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_L_SVD); template <typename T> void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, - int i_endidx_in, int slev, int elev, int nproma, - int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c) { + const T *lsq_qtmat_c, const T *lsq_rmat_rdiag_c, + const T *lsq_rmat_utri_c, const T *lsq_moments, + T *p_coeff, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, + int nproma, int patch_id, bool l_limited_area, bool lacc, + int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; @@ -202,14 +201,14 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - Kokkos::View<T ***> z_d("z_d", lsq_high_set_dim_c, nproma, elev); - Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 5); + Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, nlev); + Kokkos::View<T *> z_qt_times_d("z_qt_times_d", lsq_dim_unk); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -219,9 +218,10 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - if (patch_id > 1 || l_limited_area) { + if (patch_id > 0 || l_limited_area) { Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( - {0, i_startidx_in, slev, i_startblk}, {6, i_endidx_in, elev, i_endblk}); + {0, i_startidx_in, slev, i_startblk}, + {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk}); Kokkos::parallel_for( "recon_lsq_cell_q_init", initPolicy, KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { @@ -351,9 +351,8 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, const T *lsq_moments, T *p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, - int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c) { + bool l_limited_area, bool lacc, int nblks_c, int nlev, + int lsq_dim_unk, int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; @@ -366,21 +365,22 @@ void recon_lsq_cell_q_svd(const T *p_cc, const int *lsq_idx_c, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - Kokkos::View<T ***> z_b("z_b", lsq_high_set_dim_c, nproma, elev); + Kokkos::View<T ***> z_b("z_b", lsq_dim_c, nproma, elev); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - if (patch_id > 1 || l_limited_area) { + if (patch_id > 0 || l_limited_area) { Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( - {0, i_startidx_in, slev, i_startblk}, {6, i_endidx_in, elev, i_endblk}); + {0, i_startidx_in, slev, i_startblk}, + {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk}); Kokkos::parallel_for( "recon_lsq_cell_q_svd_init", initPolicy, KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { @@ -487,13 +487,12 @@ ICONMATH_INSTANTIATE_FOR_EACH_VALUE_TYPE(ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD); template <typename T> void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, - const T *lsq_rmat_rdiag_c, const T *lsq_rmat_utri_c, - const T *lsq_moments, const T *lsq_qtmat_c, T *p_coeff, - int i_startblk, int i_endblk, int i_startidx_in, - int i_endidx_in, int slev, int elev, int nproma, - int patch_id, int lsq_high_set_dim_c, bool l_limited_area, - bool lacc, int nblks_c, int nlev, int lsq_dim_unk, - int lsq_dim_c) { + const T *lsq_qtmat_c, const T *lsq_rmat_rdiag_c, + const T *lsq_rmat_utri_c, const T *lsq_moments, + T *p_coeff, int i_startblk, int i_endblk, + int i_startidx_in, int i_endidx_in, int slev, int elev, + int nproma, int patch_id, bool l_limited_area, bool lacc, + int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. typedef Kokkos::View<const T ***, Kokkos::LayoutLeft, Kokkos::MemoryUnmanaged> UnmanagedConstT3D; @@ -506,14 +505,14 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::MemoryUnmanaged> UnmanagedConstInt3D; - Kokkos::View<T ***> z_d("z_d", lsq_high_set_dim_c, nproma, elev); + Kokkos::View<T ***> z_d("z_d", lsq_dim_c, nproma, elev); Kokkos::View<T *> z_qt_times_d("z_qt_times_d", 9); UnmanagedConstInt3D iidx(lsq_idx_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_qtmat_c_view(lsq_qtmat_c, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); @@ -523,9 +522,10 @@ void recon_lsq_cell_c(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - if (patch_id > 1 || l_limited_area) { + if (patch_id > 0 || l_limited_area) { Kokkos::MDRangePolicy<Kokkos::Rank<4>> initPolicy( - {0, i_startidx_in, slev, i_startblk}, {9, i_endidx_in, elev, i_endblk}); + {0, i_startidx_in, slev, i_startblk}, + {lsq_dim_unk + 1, i_endidx_in, elev, i_endblk}); Kokkos::parallel_for( "recon_lsq_cell_c_init", initPolicy, KOKKOS_LAMBDA(const int ji, const int jc, const int jk, const int jb) { @@ -754,20 +754,20 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, UnmanagedConstInt3D iblk(lsq_blk_c, nproma, nblks_c, lsq_dim_c); UnmanagedConstT3D p_cc_view(p_cc, nproma, nlev, nblks_c); - UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_c, nproma, nlev, nblks_c); + UnmanagedT4D p_coeff_view(p_coeff, lsq_dim_unk + 1, nproma, nlev, nblks_c); UnmanagedConstT4D lsq_pseudoinv_view(lsq_pseudoinv, nproma, lsq_dim_unk, lsq_dim_c, nblks_c); UnmanagedConstT3D lsq_moments_view(lsq_moments, nproma, nblks_c, lsq_dim_unk); - if (patch_id > 1 || l_limited_area) { + if (patch_id > 0 || l_limited_area) { for (int jb = i_startblk; jb < i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in, i_endidx_in, nproma, jb, i_startblk, i_endblk, i_startidx, i_endidx); - Kokkos::MDRangePolicy<Kokkos::Rank<3>> initPolicy({slev, i_startidx, 0}, - {elev, i_endidx, 9}); + Kokkos::MDRangePolicy<Kokkos::Rank<3>> initPolicy( + {slev, i_startidx, 0}, {elev, i_endidx, lsq_dim_unk + 1}); Kokkos::parallel_for( "recon_lsq_cell_c_svd_init", initPolicy, KOKKOS_LAMBDA(const int jk, const int jc, const int ji) { @@ -1156,7 +1156,7 @@ void div_avg(const T *vec_e, const int *cell_neighbor_idx, } } - if (patch_id > 1 || l_limited_area) { + if (patch_id > 0 || l_limited_area) { i_startblk = i_startblk_in[1]; i_endblk = i_endblk_in[1]; diff --git a/src/horizontal/lib_divrot.hpp b/src/horizontal/lib_divrot.hpp index a0cc8cf..dae8282 100644 --- a/src/horizontal/lib_divrot.hpp +++ b/src/horizontal/lib_divrot.hpp @@ -36,31 +36,28 @@ #define ICONMATH_DECLARE_RECON_LSQ_CELL_Q(_type) \ void recon_lsq_cell_q( \ const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ - const _type *lsq_rmat_rdiag_c, const _type *lsq_rmat_utri_c, \ - const _type *lsq_moments, const _type *lsq_qtmat_c, _type *p_coeff, \ + const _type *lsq_qtmat_c, const _type *lsq_rmat_rdiag_c, \ + const _type *lsq_rmat_utri_c, const _type *lsq_moments, _type *p_coeff, \ int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ - int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, \ - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ - int lsq_dim_c) + int slev, int elev, int nproma, int patch_id, bool l_limited_area, \ + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) #define ICONMATH_DECLARE_RECON_LSQ_CELL_Q_SVD(_type) \ void recon_lsq_cell_q_svd( \ const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff, \ int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ - int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, \ - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ - int lsq_dim_c) + int slev, int elev, int nproma, int patch_id, bool l_limited_area, \ + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) #define ICONMATH_DECLARE_RECON_LSQ_CELL_C(_type) \ void recon_lsq_cell_c( \ const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ - const _type *lsq_rmat_rdiag_c, const _type *lsq_rmat_utri_c, \ - const _type *lsq_moments, const _type *lsq_qtmat_c, _type *p_coeff, \ + const _type *lsq_qtmat_c, const _type *lsq_rmat_rdiag_c, \ + const _type *lsq_rmat_utri_c, const _type *lsq_moments, _type *p_coeff, \ int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ - int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, \ - bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ - int lsq_dim_c) + int slev, int elev, int nproma, int patch_id, bool l_limited_area, \ + bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) #define ICONMATH_DECLARE_RECON_LSQ_CELL_C_SVD(_type) \ void recon_lsq_cell_c_svd( \ diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index a57f31a..37110db 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -134,10 +134,12 @@ protected: int i_startidx_in = 0; int i_endidx_in = nproma; // Full range: 0 .. nproma-1 int slev = 0; - int elev = nlev; // Full vertical range (0 .. nlev-1) - bool lacc = false; // Not using ACC-specific behavior. - bool acc_async = false; // No asynchronous execution. - bool l_consv = true; // No conservative correction + int elev = nlev; // Full vertical range (0 .. nlev-1) + int patch_id = 0; + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // No asynchronous execution. + bool l_consv = true; // With conservative correction. + bool l_limited_area = true; // Limited area setup std::vector<ValueType> p_cc; std::vector<int> cell_neighbor_idx; @@ -150,14 +152,14 @@ protected: HorizontalDivrotTest() { p_cc.resize(dim_combine(nproma, nlev, nblks_c)); - cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, 3)); - cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, 3)); + cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); + cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); lsq_qtmat_c.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); lsq_rmat_rdiag_c.resize(dim_combine(nproma, lsq_dim_unk, nblks_c)); lsq_rmat_utri_c.resize(dim_combine( nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c)); lsq_moments.resize(dim_combine(nproma, nblks_c, lsq_dim_unk)); - p_coeff.resize(dim_combine(lsq_dim_c, nproma, nlev, nblks_c)); + p_coeff.resize(dim_combine(lsq_dim_unk + 1, nproma, nlev, nblks_c)); } }; @@ -201,9 +203,9 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_c, nproma, nlev, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; const auto &rmat_utri_at = at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; @@ -216,13 +218,12 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; - for (int j = 0; j < 3; ++j) { - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; } @@ -245,12 +246,15 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); // Check result - EXPECT_NEAR(this->p_coeff[(at<lsq_dim_c, nproma, nlev, nblks_c>(0, 0, 0, 0))], - 0.34, 1e-6); - EXPECT_NEAR(this->p_coeff[(at<lsq_dim_c, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.8, 1e-6); - EXPECT_NEAR(this->p_coeff[(at<lsq_dim_c, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.34, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 1.0, 1e-6); } TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { @@ -261,9 +265,9 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_c, nproma, nlev, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; const auto &rmat_utri_at = at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; @@ -278,14 +282,13 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { for (int i = 0; i < nproma; ++i) { this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); - for (int j = 0; j < 3; ++j) { + for (int j = 0; j < lsq_dim_c; ++j) { this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_c; ++j) { this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = real_distrib(gen); this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = real_distrib(gen); + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); } @@ -308,60 +311,50 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); // Compute reference result - std::vector<TypeParam> z_d(3); - std::vector<TypeParam> z_qt_times_d(2); - std::vector<TypeParam> p_result(lsq_dim_c * nproma); + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); for (int jk = this->slev; jk < this->elev; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { - z_d[0] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - z_d[1] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - z_d[2] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - z_qt_times_d[0] = this->lsq_qtmat_c[qtmat_at(jc, 0, 0, jb)] * z_d[0] + - this->lsq_qtmat_c[qtmat_at(jc, 0, 1, jb)] * z_d[1] + - this->lsq_qtmat_c[qtmat_at(jc, 0, 2, jb)] * z_d[2]; - z_qt_times_d[1] = this->lsq_qtmat_c[qtmat_at(jc, 1, 0, jb)] * z_d[0] + - this->lsq_qtmat_c[qtmat_at(jc, 1, 1, jb)] * z_d[1] + - this->lsq_qtmat_c[qtmat_at(jc, 1, 2, jb)] * z_d[2]; - p_result[at<lsq_dim_c, nproma>(2, jc)] = + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + z_qt_times_d[0] = 0.0; + z_qt_times_d[1] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + z_qt_times_d[0] += this->lsq_qtmat_c[qtmat_at(jc, 0, i, jb)] * z_d[i]; + z_qt_times_d[1] += this->lsq_qtmat_c[qtmat_at(jc, 1, i, jb)] * z_d[i]; + } + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] = this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 1, jb)] * z_qt_times_d[1]; - p_result[at<lsq_dim_c, nproma>(1, jc)] = + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] = this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 0, jb)] * - (z_qt_times_d[0] - this->lsq_rmat_utri_c[rmat_utri_at(jc, 0, jb)] * - p_result[at<lsq_dim_c, nproma>(2, jc)]); - p_result[at<lsq_dim_c, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)]; - } - } - for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - p_result[at<lsq_dim_c, nproma>(0, jc)] = - p_result[at<lsq_dim_c, nproma>(0, jc)] - - p_result[at<lsq_dim_c, nproma>(1, jc)] * + (z_qt_times_d[0] - + this->lsq_rmat_utri_c[rmat_utri_at(jc, 0, jb)] * + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)]); + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)] - + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * this->lsq_moments[moments_at(jc, jb, 0)] - - p_result[at<lsq_dim_c, nproma>(2, jc)] * + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * this->lsq_moments[moments_at(jc, jb, 1)]; } } } // Check result - for (int i = 0; i < lsq_dim_c; ++i) { + for (int i = 0; i < lsq_dim_unk + 1; ++i) { for (int jc = 0; jc < nproma; ++jc) { EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], - p_result[(at<lsq_dim_c, nproma>(i, jc))], 1e-6); + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; } } } @@ -373,6 +366,203 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestLsqDimensions) { EXPECT_EQ(TestFixture::lsq_dim_unk, 5); } +TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadratic) { + constexpr int nproma = TestFixture::nproma; + constexpr int nlev = TestFixture::nlev; + constexpr int nblks_c = TestFixture::nblks_c; + constexpr int lsq_dim_c = TestFixture::lsq_dim_c; + constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; + this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.2; + this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7; + this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 1.3; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0; + } + + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + } + + // Test function + recon_lsq_cell_q<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.24, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 3.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + -2.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 2.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + -3.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 2.6, 1e-6); +} + +TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticRandom) { + constexpr int nproma = TestFixture::nproma; + constexpr int nlev = TestFixture::nlev; + constexpr int nblks_c = TestFixture::nblks_c; + constexpr int lsq_dim_c = TestFixture::lsq_dim_c; + constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_q<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 0; j < lsq_dim_unk; ++j) { + z_qt_times_d[j] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + z_qt_times_d[j] += + this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i]; + } + } + int utri_id = 0; + for (int j = lsq_dim_unk; j > 0; --j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1]; + for (int k = j + 1; k <= lsq_dim_unk; ++k) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -= + this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] * + p_result[at<lsq_dim_unk + 1, nproma>(k, jc)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *= + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} + TYPED_TEST_SUITE(HorizontalDivrotCubicTest, ValueTypes); TYPED_TEST(HorizontalDivrotCubicTest, TestLsqDimensions) { -- GitLab From 601153ea69581a5dff8942ecc7e1f1c3475e0f86 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Sun, 9 Mar 2025 11:14:13 +0100 Subject: [PATCH 18/35] Add tests --- test/c/test_horizontal_divrot.cpp | 217 ++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 37110db..f61fc17 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -569,3 +569,220 @@ TYPED_TEST(HorizontalDivrotCubicTest, TestLsqDimensions) { EXPECT_EQ(TestFixture::lsq_dim_c, 9); EXPECT_EQ(TestFixture::lsq_dim_unk, 9); } + +TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubic) { + constexpr int nproma = TestFixture::nproma; + constexpr int nlev = TestFixture::nlev; + constexpr int nblks_c = TestFixture::nblks_c; + constexpr int lsq_dim_c = TestFixture::lsq_dim_c; + constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.9; + this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.8; + this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7; + this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 0.6; + this->lsq_qtmat_c[qtmat_at(i, 5, j, 0)] = 0.5; + this->lsq_qtmat_c[qtmat_at(i, 6, j, 0)] = 0.4; + this->lsq_qtmat_c[qtmat_at(i, 7, j, 0)] = 0.3; + this->lsq_qtmat_c[qtmat_at(i, 8, j, 0)] = 0.2; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0; + } + + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + this->lsq_moments[moments_at(i, 0, 5)] = 0.7; + this->lsq_moments[moments_at(i, 0, 6)] = 0.8; + this->lsq_moments[moments_at(i, 0, 7)] = 0.9; + this->lsq_moments[moments_at(i, 0, 8)] = 1.0; + } + + // Test function + recon_lsq_cell_c<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.28, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], + 0.4, 1e-6); +} + +TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicRandom) { + constexpr int nproma = TestFixture::nproma; + constexpr int nlev = TestFixture::nlev; + constexpr int nblks_c = TestFixture::nblks_c; + constexpr int lsq_dim_c = TestFixture::lsq_dim_c; + constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_c<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 0; j < lsq_dim_unk; ++j) { + z_qt_times_d[j] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + z_qt_times_d[j] += + this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i]; + } + } + int utri_id = 0; + for (int j = lsq_dim_unk; j > 0; --j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1]; + for (int k = j + 1; k <= lsq_dim_unk; ++k) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -= + this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] * + p_result[at<lsq_dim_unk + 1, nproma>(k, jc)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *= + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} -- GitLab From 1f6c95b342ea4c6e42388ccd112049e51502dff7 Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Wed, 12 Mar 2025 15:54:21 +0100 Subject: [PATCH 19/35] Inner product using lambda functions --- src/horizontal/CMakeLists.txt | 4 +- src/horizontal/lib_divrot.cpp | 71 +++++++++++++---------------------- 2 files changed, 29 insertions(+), 46 deletions(-) diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index d403cb2..af52206 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -22,7 +22,9 @@ set(Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/mod") set_target_properties( iconmath-horizontal PROPERTIES Fortran_MODULE_DIRECTORY "${Fortran_MODULE_DIRECTORY}" - EXPORT_NAME ${PROJECT_NAME}::horizontal) + EXPORT_NAME ${PROJECT_NAME}::horizontal + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON) if(IM_ENABLE_LOOP_EXCHANGE) target_compile_definitions(iconmath-horizontal PRIVATE __LOOP_EXCHANGE) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index 164be38..49e72c3 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -15,6 +15,18 @@ #include <horizontal/lib_divrot.hpp> #include <support/mo_lib_loopindices.hpp> +#define DECLARE_LAMBDA_INNER_PRODUCT(_func_name, _output, _id, _lambda) \ + auto inner_product = [=, &_output](int _id, auto &&...ts) { \ + return [=, &_output] { \ + _output(_id) = 0.0; \ + int dummy[sizeof...(ts)]{(_lambda, 0)...}; \ + }; \ + }; \ + auto _func_name = [=]<int... Is>(int _id, \ + std::integer_sequence<int, Is...>) { \ + return inner_product(_id, Is...)(); \ + }; + template <typename T> void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, const int *cell_neighbor_blk, const T *lsq_qtmat_c, @@ -261,51 +273,20 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::parallel_for( "recon_lsq_cell_q_step2", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); - z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + - lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + - lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + - lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + - lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + - lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + - lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + - lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + - lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); + auto lambda_add = [=, &z_qt_times_d](auto lsq_qtmat_c_view, auto z_d, + int jb, int jk, int jc, int unk, + int i) { + z_qt_times_d(unk) += + lsq_qtmat_c_view(jc, unk, i, jb) * z_d(i, jc, jk); + }; + DECLARE_LAMBDA_INNER_PRODUCT( + dot_product, z_qt_times_d, unk, + lambda_add(lsq_qtmat_c_view, z_d, jb, jk, jc, unk, ts)); + dot_product(0, std::make_integer_sequence<int, 9>()); + dot_product(1, std::make_integer_sequence<int, 9>()); + dot_product(2, std::make_integer_sequence<int, 9>()); + dot_product(3, std::make_integer_sequence<int, 9>()); + dot_product(4, std::make_integer_sequence<int, 9>()); p_coeff_view(5, jc, jk, jb) = ptr_rrdiag(jc, 4, jb) * z_qt_times_d(4); p_coeff_view(4, jc, jk, jb) = -- GitLab From 23e92b7162abea31d868c4aa9717cd31e4a1b94d Mon Sep 17 00:00:00 2001 From: Yen-Chen Chen <yen-chen.chen@tum.de> Date: Wed, 12 Mar 2025 15:55:43 +0100 Subject: [PATCH 20/35] Revert "Inner product using lambda functions" This reverts commit 1f6c95b342ea4c6e42388ccd112049e51502dff7. --- src/horizontal/CMakeLists.txt | 4 +- src/horizontal/lib_divrot.cpp | 71 ++++++++++++++++++++++------------- 2 files changed, 46 insertions(+), 29 deletions(-) diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index af52206..d403cb2 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -22,9 +22,7 @@ set(Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/mod") set_target_properties( iconmath-horizontal PROPERTIES Fortran_MODULE_DIRECTORY "${Fortran_MODULE_DIRECTORY}" - EXPORT_NAME ${PROJECT_NAME}::horizontal - CXX_STANDARD 20 - CXX_STANDARD_REQUIRED ON) + EXPORT_NAME ${PROJECT_NAME}::horizontal) if(IM_ENABLE_LOOP_EXCHANGE) target_compile_definitions(iconmath-horizontal PRIVATE __LOOP_EXCHANGE) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index 49e72c3..164be38 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -15,18 +15,6 @@ #include <horizontal/lib_divrot.hpp> #include <support/mo_lib_loopindices.hpp> -#define DECLARE_LAMBDA_INNER_PRODUCT(_func_name, _output, _id, _lambda) \ - auto inner_product = [=, &_output](int _id, auto &&...ts) { \ - return [=, &_output] { \ - _output(_id) = 0.0; \ - int dummy[sizeof...(ts)]{(_lambda, 0)...}; \ - }; \ - }; \ - auto _func_name = [=]<int... Is>(int _id, \ - std::integer_sequence<int, Is...>) { \ - return inner_product(_id, Is...)(); \ - }; - template <typename T> void recon_lsq_cell_l(const T *p_cc, const int *cell_neighbor_idx, const int *cell_neighbor_blk, const T *lsq_qtmat_c, @@ -273,20 +261,51 @@ void recon_lsq_cell_q(const T *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, Kokkos::parallel_for( "recon_lsq_cell_q_step2", innerPolicy, KOKKOS_LAMBDA(const int jk, const int jc) { - auto lambda_add = [=, &z_qt_times_d](auto lsq_qtmat_c_view, auto z_d, - int jb, int jk, int jc, int unk, - int i) { - z_qt_times_d(unk) += - lsq_qtmat_c_view(jc, unk, i, jb) * z_d(i, jc, jk); - }; - DECLARE_LAMBDA_INNER_PRODUCT( - dot_product, z_qt_times_d, unk, - lambda_add(lsq_qtmat_c_view, z_d, jb, jk, jc, unk, ts)); - dot_product(0, std::make_integer_sequence<int, 9>()); - dot_product(1, std::make_integer_sequence<int, 9>()); - dot_product(2, std::make_integer_sequence<int, 9>()); - dot_product(3, std::make_integer_sequence<int, 9>()); - dot_product(4, std::make_integer_sequence<int, 9>()); + z_qt_times_d(0) = lsq_qtmat_c_view(jc, 0, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 0, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 0, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 0, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 0, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 0, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 0, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 0, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 0, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(1) = lsq_qtmat_c_view(jc, 1, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 1, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 1, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 1, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 1, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 1, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 1, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 1, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 1, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(2) = lsq_qtmat_c_view(jc, 2, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 2, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 2, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 2, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 2, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 2, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 2, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 2, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 2, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(3) = lsq_qtmat_c_view(jc, 3, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 3, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 3, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 3, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 3, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 3, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 3, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 3, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 3, 8, jb) * z_d(8, jc, jk); + z_qt_times_d(4) = lsq_qtmat_c_view(jc, 4, 0, jb) * z_d(0, jc, jk) + + lsq_qtmat_c_view(jc, 4, 1, jb) * z_d(1, jc, jk) + + lsq_qtmat_c_view(jc, 4, 2, jb) * z_d(2, jc, jk) + + lsq_qtmat_c_view(jc, 4, 3, jb) * z_d(3, jc, jk) + + lsq_qtmat_c_view(jc, 4, 4, jb) * z_d(4, jc, jk) + + lsq_qtmat_c_view(jc, 4, 5, jb) * z_d(5, jc, jk) + + lsq_qtmat_c_view(jc, 4, 6, jb) * z_d(6, jc, jk) + + lsq_qtmat_c_view(jc, 4, 7, jb) * z_d(7, jc, jk) + + lsq_qtmat_c_view(jc, 4, 8, jb) * z_d(8, jc, jk); p_coeff_view(5, jc, jk, jb) = ptr_rrdiag(jc, 4, jb) * z_qt_times_d(4); p_coeff_view(4, jc, jk, jb) = -- GitLab From acf957e43ac783f5a154816f2ce2cab17567e75b Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 14 Mar 2025 21:22:35 +0100 Subject: [PATCH 21/35] replaced TestFixture:: with this-> --- test/c/test_horizontal_divrot.cpp | 60 +++++++++++++++---------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index f61fc17..5e7bb25 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -196,11 +196,11 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestLsqDimensions) { } TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { - constexpr int nproma = TestFixture::nproma; - constexpr int nlev = TestFixture::nlev; - constexpr int nblks_c = TestFixture::nblks_c; - constexpr int lsq_dim_c = TestFixture::lsq_dim_c; - constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; const auto &p_cc_at = at<nproma, nlev, nblks_c>; const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; @@ -258,11 +258,11 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { } TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { - constexpr int nproma = TestFixture::nproma; - constexpr int nlev = TestFixture::nlev; - constexpr int nblks_c = TestFixture::nblks_c; - constexpr int lsq_dim_c = TestFixture::lsq_dim_c; - constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; const auto &p_cc_at = at<nproma, nlev, nblks_c>; const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; @@ -367,11 +367,11 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestLsqDimensions) { } TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadratic) { - constexpr int nproma = TestFixture::nproma; - constexpr int nlev = TestFixture::nlev; - constexpr int nblks_c = TestFixture::nblks_c; - constexpr int lsq_dim_c = TestFixture::lsq_dim_c; - constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; const auto &p_cc_at = at<nproma, nlev, nblks_c>; const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; @@ -452,11 +452,11 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadratic) { } TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticRandom) { - constexpr int nproma = TestFixture::nproma; - constexpr int nlev = TestFixture::nlev; - constexpr int nblks_c = TestFixture::nblks_c; - constexpr int lsq_dim_c = TestFixture::lsq_dim_c; - constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; const auto &p_cc_at = at<nproma, nlev, nblks_c>; const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; @@ -571,11 +571,11 @@ TYPED_TEST(HorizontalDivrotCubicTest, TestLsqDimensions) { } TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubic) { - constexpr int nproma = TestFixture::nproma; - constexpr int nlev = TestFixture::nlev; - constexpr int nblks_c = TestFixture::nblks_c; - constexpr int lsq_dim_c = TestFixture::lsq_dim_c; - constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; const auto &p_cc_at = at<nproma, nlev, nblks_c>; const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; @@ -676,11 +676,11 @@ TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubic) { } TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicRandom) { - constexpr int nproma = TestFixture::nproma; - constexpr int nlev = TestFixture::nlev; - constexpr int nblks_c = TestFixture::nblks_c; - constexpr int lsq_dim_c = TestFixture::lsq_dim_c; - constexpr int lsq_dim_unk = TestFixture::lsq_dim_unk; + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; const auto &p_cc_at = at<nproma, nlev, nblks_c>; const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; -- GitLab From fd8760ae7f161e74731d3edab176dfe7fe069c9e Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 14 Mar 2025 21:33:27 +0100 Subject: [PATCH 22/35] removed an unused argument to one of the function in mo_lib_divrot --- src/horizontal/lib_divrot.cpp | 2 +- src/horizontal/lib_divrot.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/lib_divrot.cpp index 164be38..a24981d 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/lib_divrot.cpp @@ -733,7 +733,7 @@ void recon_lsq_cell_c_svd(const T *p_cc, const int *lsq_idx_c, const T *lsq_moments, T *p_coeff, int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, int slev, int elev, int nproma, int patch_id, - int lsq_high_set_dim_c, bool l_limited_area, + bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, int lsq_dim_c) { // Wrap raw pointers in unmanaged Kokkos Views. diff --git a/src/horizontal/lib_divrot.hpp b/src/horizontal/lib_divrot.hpp index dae8282..b8e9743 100644 --- a/src/horizontal/lib_divrot.hpp +++ b/src/horizontal/lib_divrot.hpp @@ -64,7 +64,7 @@ const _type *p_cc, const int *lsq_idx_c, const int *lsq_blk_c, \ const _type *lsq_pseudoinv, const _type *lsq_moments, _type *p_coeff, \ int i_startblk, int i_endblk, int i_startidx_in, int i_endidx_in, \ - int slev, int elev, int nproma, int patch_id, int lsq_high_set_dim_c, \ + int slev, int elev, int nproma, int patch_id, \ bool l_limited_area, bool lacc, int nblks_c, int nlev, int lsq_dim_unk, \ int lsq_dim_c) -- GitLab From 5d7f4c34c27afaec018e05f5c603c0cc3b530765 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 14 Mar 2025 21:34:07 +0100 Subject: [PATCH 23/35] added unit tests for all the svd functions --- test/c/test_horizontal_divrot.cpp | 508 ++++++++++++++++++++++++++++++ 1 file changed, 508 insertions(+) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 5e7bb25..11f98aa 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -148,6 +148,7 @@ protected: std::vector<ValueType> lsq_rmat_rdiag_c; std::vector<ValueType> lsq_rmat_utri_c; std::vector<ValueType> lsq_moments; + std::vector<ValueType> lsq_pseudoinv; std::vector<ValueType> p_coeff; HorizontalDivrotTest() { @@ -159,6 +160,7 @@ protected: lsq_rmat_utri_c.resize(dim_combine( nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c)); lsq_moments.resize(dim_combine(nproma, nblks_c, lsq_dim_unk)); + lsq_pseudoinv.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); p_coeff.resize(dim_combine(lsq_dim_unk + 1, nproma, nlev, nblks_c)); } }; @@ -257,6 +259,64 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { 1.0, 1e-6); } +TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + // this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = 2.0; + // this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = 2.0; + // this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = 0.1; + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + } + + // Test function + recon_lsq_cell_l_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.65, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.5, 1e-6); +} + TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -359,6 +419,95 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { } } +TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearSVDRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = real_distrib(gen); + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = real_distrib(gen); + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + + this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen); + } + + // Test function + recon_lsq_cell_l_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] = + this->lsq_pseudoinv[pseudoinv_at(jc, 1, 0, jb)] * z_d[0] + + this->lsq_pseudoinv[pseudoinv_at(jc, 1, 1, jb)] * z_d[1] + + this->lsq_pseudoinv[pseudoinv_at(jc, 1, 2, jb)] * z_d[2]; + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] = + this->lsq_pseudoinv[pseudoinv_at(jc, 0, 0, jb)] * z_d[0] + + this->lsq_pseudoinv[pseudoinv_at(jc, 0, 1, jb)] * z_d[1] + + this->lsq_pseudoinv[pseudoinv_at(jc, 0, 2, jb)] * z_d[2]; + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)] - + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * + this->lsq_moments[moments_at(jc, jb, 0)] - + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * + this->lsq_moments[moments_at(jc, jb, 1)]; + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} + TYPED_TEST_SUITE(HorizontalDivrotQuadraticTest, ValueTypes); TYPED_TEST(HorizontalDivrotQuadraticTest, TestLsqDimensions) { @@ -451,6 +600,79 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadratic) { 2.6, 1e-6); } +TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; + this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.2; + this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; + this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 1.3; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + } + + // Test function + recon_lsq_cell_q_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + -0.56, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.5, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + 0.7, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 1.3, 1e-6); +} + TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -563,6 +785,104 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticRandom) { } } +TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticSVDRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization is done only for iblk = 0 and ilev = 0 + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_q_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + // for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + {int jb = 0; + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + // for (int jk = this->slev; jk < this->elev; ++jk) { + {int jk = 0; + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 1; j < lsq_dim_unk + 1; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += + this->lsq_pseudoinv[pseudoinv_at(jc, j-1, i, jb)] * z_d[i]; + } + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + + // Check result + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(j, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(j, jc))], 1e-5) + << "For loop result fails for j = " << j << ", jc = " << jc; + } + } +} + TYPED_TEST_SUITE(HorizontalDivrotCubicTest, ValueTypes); TYPED_TEST(HorizontalDivrotCubicTest, TestLsqDimensions) { @@ -675,6 +995,99 @@ TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubic) { 0.4, 1e-6); } +TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.9; + this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.8; + this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; + this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 0.6; + this->lsq_pseudoinv[pseudoinv_at(i, 5, j, 0)] = 0.5; + this->lsq_pseudoinv[pseudoinv_at(i, 6, j, 0)] = 0.4; + this->lsq_pseudoinv[pseudoinv_at(i, 7, j, 0)] = 0.3; + this->lsq_pseudoinv[pseudoinv_at(i, 8, j, 0)] = 0.2; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + this->lsq_moments[moments_at(i, 0, 5)] = 0.7; + this->lsq_moments[moments_at(i, 0, 6)] = 0.8; + this->lsq_moments[moments_at(i, 0, 7)] = 0.9; + this->lsq_moments[moments_at(i, 0, 8)] = 1.0; + } + + // Test function + recon_lsq_cell_c_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + -1.64, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.9, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + 0.7, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 0.6, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], + 0.5, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], + 0.3, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], + 0.2, 1e-6); +} + TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -786,3 +1199,98 @@ TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicRandom) { } } } + +TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicSVDRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_c_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + + // for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + {int jb = 0; + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + // for (int jk = this->slev; jk < this->elev; ++jk) { + {int jk = 0; + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 1; j < lsq_dim_unk + 1; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += + this->lsq_pseudoinv[pseudoinv_at(jc, j-1, i, jb)] * z_d[i]; + } + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} -- GitLab From 0dca3eff9d4920e4af90095384771838e5637298 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Fri, 14 Mar 2025 21:34:53 +0100 Subject: [PATCH 24/35] removed all TestLsqDimensions --- test/c/test_horizontal_divrot.cpp | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 11f98aa..6d0b3c6 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -192,11 +192,6 @@ typedef ::testing::Types<float, double> ValueTypes; TYPED_TEST_SUITE(HorizontalDivrotLinearTest, ValueTypes); -TYPED_TEST(HorizontalDivrotLinearTest, TestLsqDimensions) { - EXPECT_EQ(TestFixture::lsq_dim_c, 3); - EXPECT_EQ(TestFixture::lsq_dim_unk, 2); -} - TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -510,11 +505,6 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearSVDRandom) { TYPED_TEST_SUITE(HorizontalDivrotQuadraticTest, ValueTypes); -TYPED_TEST(HorizontalDivrotQuadraticTest, TestLsqDimensions) { - EXPECT_EQ(TestFixture::lsq_dim_c, 9); - EXPECT_EQ(TestFixture::lsq_dim_unk, 5); -} - TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadratic) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -885,11 +875,6 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticSVDRandom) { TYPED_TEST_SUITE(HorizontalDivrotCubicTest, ValueTypes); -TYPED_TEST(HorizontalDivrotCubicTest, TestLsqDimensions) { - EXPECT_EQ(TestFixture::lsq_dim_c, 9); - EXPECT_EQ(TestFixture::lsq_dim_unk, 9); -} - TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubic) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; -- GitLab From a43cebbf77f72c2b8ec4a03f14c8c06953a568a1 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Sun, 16 Mar 2025 08:35:04 +0100 Subject: [PATCH 25/35] renamed the unit test to remove redundant parts --- test/c/test_horizontal_divrot.cpp | 46 +++++++++++++++---------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 6d0b3c6..4ef6b7d 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -104,7 +104,7 @@ enum class ReconstructionMethod { /// Base test class for the horizontal divrot tests. Templated for the ValueType /// and ReconMethod for the reconstruction method. template <typename ValueType, int ReconMethod> -class HorizontalDivrotTest : public ::testing::Test { +class HorizontalReconTest : public ::testing::Test { protected: // lsq_dim_c and lsq_dim_unk are instantiated in compile time. static constexpr std::tuple<int, int> @@ -151,7 +151,7 @@ protected: std::vector<ValueType> lsq_pseudoinv; std::vector<ValueType> p_coeff; - HorizontalDivrotTest() { + HorizontalReconTest() { p_cc.resize(dim_combine(nproma, nlev, nblks_c)); cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); @@ -168,31 +168,31 @@ protected: /// Test class for the horizontal tests. The reconstruction method is specified /// to linear. template <typename ValueType> -class HorizontalDivrotLinearTest - : public HorizontalDivrotTest< +class HorizontalReconLinearTest + : public HorizontalReconTest< ValueType, static_cast<int>(ReconstructionMethod::linear)> {}; /// Test class for the horizontal tests. The reconstruction method is specified /// to quadratic. template <typename ValueType> -class HorizontalDivrotQuadraticTest - : public HorizontalDivrotTest< +class HorizontalReconQuadraticTest + : public HorizontalReconTest< ValueType, static_cast<int>(ReconstructionMethod::quadratic)> {}; /// Test class for the horizontal tests. The reconstruction method is specified /// to cubic. template <typename ValueType> -class HorizontalDivrotCubicTest - : public HorizontalDivrotTest<ValueType, static_cast<int>( +class HorizontalReconCubicTest + : public HorizontalReconTest<ValueType, static_cast<int>( ReconstructionMethod::cubic)> { }; /// ValueTypes which the divrot tests should run with typedef ::testing::Types<float, double> ValueTypes; -TYPED_TEST_SUITE(HorizontalDivrotLinearTest, ValueTypes); +TYPED_TEST_SUITE(HorizontalReconLinearTest, ValueTypes); -TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { +TYPED_TEST(HorizontalReconLinearTest, TestLsqCell) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -254,7 +254,7 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinear) { 1.0, 1e-6); } -TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearSVD) { +TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVD) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -312,7 +312,7 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearSVD) { 0.5, 1e-6); } -TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { +TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -414,7 +414,7 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearRandom) { } } -TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearSVDRandom) { +TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -503,9 +503,9 @@ TYPED_TEST(HorizontalDivrotLinearTest, TestReconLsqCellLinearSVDRandom) { } } -TYPED_TEST_SUITE(HorizontalDivrotQuadraticTest, ValueTypes); +TYPED_TEST_SUITE(HorizontalReconQuadraticTest, ValueTypes); -TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadratic) { +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCell) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -590,7 +590,7 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadratic) { 2.6, 1e-6); } -TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticSVD) { +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVD) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -663,7 +663,7 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticSVD) { 1.3, 1e-6); } -TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticRandom) { +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -775,7 +775,7 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticRandom) { } } -TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticSVDRandom) { +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -873,9 +873,9 @@ TYPED_TEST(HorizontalDivrotQuadraticTest, TestReconLsqCellQuadraticSVDRandom) { } } -TYPED_TEST_SUITE(HorizontalDivrotCubicTest, ValueTypes); +TYPED_TEST_SUITE(HorizontalReconCubicTest, ValueTypes); -TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubic) { +TYPED_TEST(HorizontalReconCubicTest, TestLsqCell) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -980,7 +980,7 @@ TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubic) { 0.4, 1e-6); } -TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicSVD) { +TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVD) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1073,7 +1073,7 @@ TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicSVD) { 0.2, 1e-6); } -TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicRandom) { +TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1185,7 +1185,7 @@ TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicRandom) { } } -TYPED_TEST(HorizontalDivrotCubicTest, TestReconLsqCellCubicSVDRandom) { +TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; -- GitLab From 24f64c4cfaee1a6a112b04b88bcb3fac010b28cc Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Sun, 16 Mar 2025 15:29:23 +0100 Subject: [PATCH 26/35] reordered the unit tests --- test/c/test_horizontal_divrot.cpp | 448 +++++++++++++++--------------- 1 file changed, 224 insertions(+), 224 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 4ef6b7d..60f0641 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -254,64 +254,6 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCell) { 1.0, 1e-6); } -TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVD) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - // this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = 2.0; - // this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = 2.0; - // this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = 0.1; - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - } - - // Test function - recon_lsq_cell_l_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, - this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - 0.65, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.0, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 0.5, 1e-6); -} - TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -414,6 +356,64 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { } } +TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + // this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = 2.0; + // this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = 2.0; + // this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = 0.1; + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + } + + // Test function + recon_lsq_cell_l_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.65, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.5, 1e-6); +} + TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -590,79 +590,6 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCell) { 2.6, 1e-6); } -TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVD) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; - for (int j = 1; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; - this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.2; - this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; - this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 1.3; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - this->lsq_moments[moments_at(i, 0, 2)] = 0.4; - this->lsq_moments[moments_at(i, 0, 3)] = 0.5; - this->lsq_moments[moments_at(i, 0, 4)] = 0.6; - } - - // Test function - recon_lsq_cell_q_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - -0.56, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.0, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 0.5, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], - 0.2, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], - 0.7, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], - 1.3, 1e-6); -} - TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -775,6 +702,79 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { } } +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; + this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.2; + this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; + this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 1.3; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + } + + // Test function + recon_lsq_cell_q_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + -0.56, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.5, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + 0.7, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 1.3, 1e-6); +} + TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -980,99 +980,6 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCell) { 0.4, 1e-6); } -TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVD) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; - for (int j = 1; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.9; - this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.8; - this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; - this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 0.6; - this->lsq_pseudoinv[pseudoinv_at(i, 5, j, 0)] = 0.5; - this->lsq_pseudoinv[pseudoinv_at(i, 6, j, 0)] = 0.4; - this->lsq_pseudoinv[pseudoinv_at(i, 7, j, 0)] = 0.3; - this->lsq_pseudoinv[pseudoinv_at(i, 8, j, 0)] = 0.2; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - this->lsq_moments[moments_at(i, 0, 2)] = 0.4; - this->lsq_moments[moments_at(i, 0, 3)] = 0.5; - this->lsq_moments[moments_at(i, 0, 4)] = 0.6; - this->lsq_moments[moments_at(i, 0, 5)] = 0.7; - this->lsq_moments[moments_at(i, 0, 6)] = 0.8; - this->lsq_moments[moments_at(i, 0, 7)] = 0.9; - this->lsq_moments[moments_at(i, 0, 8)] = 1.0; - } - - // Test function - recon_lsq_cell_c_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - -1.64, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.0, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 0.9, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], - 0.8, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], - 0.7, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], - 0.6, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], - 0.5, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], - 0.4, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], - 0.3, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], - 0.2, 1e-6); -} - TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; @@ -1185,6 +1092,99 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { } } +TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.9; + this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.8; + this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; + this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 0.6; + this->lsq_pseudoinv[pseudoinv_at(i, 5, j, 0)] = 0.5; + this->lsq_pseudoinv[pseudoinv_at(i, 6, j, 0)] = 0.4; + this->lsq_pseudoinv[pseudoinv_at(i, 7, j, 0)] = 0.3; + this->lsq_pseudoinv[pseudoinv_at(i, 8, j, 0)] = 0.2; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + this->lsq_moments[moments_at(i, 0, 5)] = 0.7; + this->lsq_moments[moments_at(i, 0, 6)] = 0.8; + this->lsq_moments[moments_at(i, 0, 7)] = 0.9; + this->lsq_moments[moments_at(i, 0, 8)] = 1.0; + } + + // Test function + recon_lsq_cell_c_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + -1.64, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.9, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + 0.7, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 0.6, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], + 0.5, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], + 0.3, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], + 0.2, 1e-6); +} + TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; -- GitLab From 3bf9a38b5a4cb67d5b1be7acbda20300d82bbd03 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Sun, 16 Mar 2025 22:18:39 +0100 Subject: [PATCH 27/35] added the unit test for div3d --- test/c/test_horizontal_divrot.cpp | 183 ++++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 60f0641..078e753 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -1279,3 +1279,186 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { } } } + +template <typename ValueType> +class HorizontalDivTest : public ::testing::Test { +protected: + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 2; // number of vertical levels + static constexpr int nblks_c = 1; // number of cell blocks + static constexpr int nblks_e = 1; // number of edge blocks + static constexpr int dim4d = 2; // 4th dimension size + + int i_startblk = 0; + int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] + int i_startidx_in = 0; + int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + std::vector<int> slev; + std::vector<int> elev; + bool lacc = false; // Not using ACC-specific behavior. + + std::vector<ValueType> vec_e; + std::vector<int> cell_edge_idx; + std::vector<int> cell_edge_blk; + std::vector<ValueType> geofac_div; + std::vector<ValueType> div_vec_c; + std::vector<ValueType> f4din; + std::vector<ValueType> f4dout; + + HorizontalDivTest() { + slev.resize(dim4d, 0); + elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) + + vec_e.resize(dim_combine(nproma, nlev, nblks_e)); + cell_edge_idx.resize(dim_combine(nproma, nblks_c, 3)); + cell_edge_blk.resize(dim_combine(nproma, nblks_c, 3)); + geofac_div.resize(dim_combine(nproma, 3, nblks_c)); + div_vec_c.resize(dim_combine(nproma, nlev, nblks_c)); + f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); + f4dout.resize(dim_combine(nproma, nlev, nblks_c, dim4d)); + } +}; + +template <typename ValueType> +class HorizontalDiv3DTest + : public HorizontalDivTest<ValueType> {}; +TYPED_TEST_SUITE(HorizontalDiv3DTest, ValueTypes); + +TYPED_TEST(HorizontalDiv3DTest, TestSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + } + + // Set edge indices to point to specific cells (including self) + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Initialize div_vec_c to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + } + } + + // Call the div3d function + div3d<TypeParam>( + this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); + +} + +TYPED_TEST(HorizontalDiv3DTest, TestRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize div_vec_c to random values + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the div3d function + div3d<TypeParam>( + this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "Results differ at i=" << i << ", k=" << k; + } + } +} -- GitLab From 03cbb3f2c755756a90dfc6013238eb5d35ec0b38 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Sun, 16 Mar 2025 22:22:20 +0100 Subject: [PATCH 28/35] added rest of the unit tests for div3d, div4d and divavg removed some redundant things --- test/c/test_horizontal_divrot.cpp | 850 ++++++++++++++++++++++++++++++ 1 file changed, 850 insertions(+) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 078e753..2ad95d2 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -1305,6 +1305,13 @@ protected: std::vector<ValueType> f4din; std::vector<ValueType> f4dout; + // Followings are needed in HorizontalDivAvgTest + std::vector<int> cell_neighbor_idx; + std::vector<int> cell_neighbor_blk; + std::vector<ValueType> avg_coeff; + std::vector<ValueType> opt_in2; + std::vector<ValueType> opt_out2; + HorizontalDivTest() { slev.resize(dim4d, 0); elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) @@ -1316,12 +1323,30 @@ protected: div_vec_c.resize(dim_combine(nproma, nlev, nblks_c)); f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); f4dout.resize(dim_combine(nproma, nlev, nblks_c, dim4d)); + cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, 3)); + cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, 3)); + avg_coeff.resize(dim_combine(nproma, 4, nblks_c)); + opt_in2.resize(dim_combine(nproma, nlev, nblks_e)); + opt_out2.resize(dim_combine(nproma, nlev, nblks_c)); } }; template <typename ValueType> class HorizontalDiv3DTest : public HorizontalDivTest<ValueType> {}; + +template <typename ValueType> +class HorizontalDiv3D2FTest + : public HorizontalDivTest<ValueType> {}; + +template <typename ValueType> +class HorizontalDiv4DTest + : public HorizontalDivTest<ValueType> {}; + +template <typename ValueType> +class HorizontalDivAvgTest + : public HorizontalDivTest<ValueType> {}; + TYPED_TEST_SUITE(HorizontalDiv3DTest, ValueTypes); TYPED_TEST(HorizontalDiv3DTest, TestSpecific) { @@ -1462,3 +1487,828 @@ TYPED_TEST(HorizontalDiv3DTest, TestRandom) { } } } + +TYPED_TEST_SUITE(HorizontalDiv3D2FTest, ValueTypes); + +TYPED_TEST(HorizontalDiv3D2FTest, TestSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + this->f4din[f4d_at(i, k, 0, 0)] = (i + 1) * (k + 2); // Different pattern for second field + } + + // Set edge indices to point to specific cells (including self) + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Initialize div_vec_c and f4dout to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + this->f4dout[f4dout_at(i, k, 0, 0)] = 0.0; + } + } + + // Call the div3d_2field function + div3d_2field<TypeParam>( + this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->f4din.data(), this->f4dout.data(), + this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Check first field (same as in div3d test) + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); + + // Check second field (expected values calculated manually) + EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 5.1, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 6.3, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 4.4, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 6.6, 1e-6); +} + +TYPED_TEST(HorizontalDiv3D2FTest, TestRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + this->f4din[f4d_at(i, k, 0, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize div_vec_c and f4dout to random values + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + this->f4dout[f4dout_at(i, k, 0, 0)] = real_distrib(gen); + } + } + + // Call the div3d_2field function + div3d_2field<TypeParam>( + this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->f4din.data(), this->f4dout.data(), + this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); + std::vector<TypeParam> ref_f4dout(nproma * nlev * nblks_c * dim4d, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + // Calculate reference value for first field + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + + // Calculate reference value for second field + ref_f4dout[f4dout_at(jc, jk, jb, 0)] = + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)], 0)] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)], 0)] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)], 0)] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Verify results for first field + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "First field results differ at i=" << i << ", k=" << k; + } + } + + // Verify results for second field + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->f4dout[f4dout_at(i, k, 0, 0)], + ref_f4dout[f4dout_at(i, k, 0, 0)], 1e-5) + << "Second field results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST_SUITE(HorizontalDiv4DTest, ValueTypes); + +TYPED_TEST(HorizontalDiv4DTest, TestSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = (i + j) % nproma; + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->geofac_div[geofac_div_at(i, j, 0)] = 0.1 * (j + 1); + } + + for (int k = 0; k < nlev; ++k) { + for (int d = 0; d < dim4d; ++d) { + this->f4din[f4din_at(i, k, 0, d)] = 1.0 + i + k + d; + this->f4dout[f4dout_at(i, k, 0, d)] = 0.0; + } + } + } + + // Test function + div4d<TypeParam>( + this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), this->f4dout.data(), + this->dim4d, this->i_startblk, this->i_endblk, this->i_startidx_in, + this->i_endidx_in, this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 1.4, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 1.1, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 1.1, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 2.0, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 1)], 2.0, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 1)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 1)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 1)], 2.6, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 1)], 2.3, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 1)], 2.3, 1e-6); + +} + +TYPED_TEST(HorizontalDiv4DTest, TestDiv4dRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); + + // Initialize with random values + for (int i = 0; i < nproma; ++i) { + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + for (int k = 0; k < nlev; ++k) { + for (int d = 0; d < dim4d; ++d) { + this->f4din[f4din_at(i, k, 0, d)] = real_distrib(gen); + this->f4dout[f4dout_at(i, k, 0, d)] = 0.0; + } + } + } + + // Test function + div4d<TypeParam>( + this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), this->f4dout.data(), + this->dim4d, this->i_startblk, this->i_endblk, this->i_startidx_in, + this->i_endidx_in, this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); + + // Compute reference result and check + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int ji = 0; ji < dim4d; ++ji) { + for (int jk = this->slev[ji]; jk < this->elev[ji]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + TypeParam expected = 0.0; + for (int je = 0; je < 3; ++je) { + expected += this->f4din[f4din_at( + this->cell_edge_idx[cell_edge_at(jc, jb, je)], + jk, + this->cell_edge_blk[cell_edge_at(jc, jb, je)], + ji)] * this->geofac_div[geofac_div_at(jc, je, jb)]; + } + + EXPECT_NEAR(this->f4dout[f4dout_at(jc, jk, jb, ji)], expected, 1e-5) + << "Random test fails at jc=" << jc << ", jk=" << jk + << ", jb=" << jb << ", ji=" << ji; + } + } + } + } +} + +TYPED_TEST_SUITE(HorizontalDivAvgTest, ValueTypes); + +TYPED_TEST(HorizontalDivAvgTest, TestSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for additional parameters + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = true; + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Initialize the vectors with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + this->opt_in2[vec_e_at(i, k, 0)] = (i + 1) * (k + 1) * 0.5; // Half of vec_e + } + + // Set edge indices to point to specific cells + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // Set neighbor indices similarly + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges and neighbors are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Average coefficients + this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self + this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor + this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor + this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor + + // Initialize div_vec_c and opt_out2 to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0; + } + } + + // Call the div_avg function + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), this->avg_coeff.data(), + this->div_vec_c.data(), this->opt_in2.data(), this->opt_out2.data(), + i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], + this->elev[0], this->nproma, patch_id, l_limited_area, + l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6); + + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.94, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 1.88, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 1.02, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 2.04, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 1.04, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 2.08, 1e-6); + +} + +TYPED_TEST(HorizontalDivAvgTest, TestRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = true; + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialize with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + this->opt_in2[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Random average coefficients + for (int j = 0; j < 4; ++j) { + this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); + } + + // Random initial values for div_vec_c and opt_out2 + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + this->opt_out2[div_vec_c_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the div_avg function + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), this->avg_coeff.data(), + this->div_vec_c.data(), this->opt_in2.data(), this->opt_out2.data(), + i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], + this->elev[0], this->nproma, patch_id, l_limited_area, + l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values manually + std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> aux_c2(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_opt_out2(dim_combine(nproma, nlev, nblks_c)); + + // Step 1: Calculate aux_c and aux_c2 + for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + aux_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + + aux_c2[div_vec_c_at(jc, jk, jb)] = + this->opt_in2[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->opt_in2[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->opt_in2[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Step 2: Assign aux_c to div_vec_c and aux_c2 to opt_out2 for patch_id > 0 + for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = aux_c[div_vec_c_at(jc, jk, jb)]; + ref_opt_out2[div_vec_c_at(jc, jk, jb)] = aux_c2[div_vec_c_at(jc, jk, jb)]; + } + } + } + + // Step 3: Perform averaging for the rest of the blocks + for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)] * this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + + ref_opt_out2[div_vec_c_at(jc, jk, jb)] = + aux_c2[div_vec_c_at(jc, jk, jb)] * this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c2[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c2[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c2[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "div_vec_c results differ at i=" << i << ", k=" << k; + + EXPECT_NEAR(this->opt_out2[div_vec_c_at(i, k, 0)], + ref_opt_out2[div_vec_c_at(i, k, 0)], 1e-5) + << "opt_out2 results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST(HorizontalDivAvgTest, TestSpecificNoL2fields) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = false; + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Initialize the vectors with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + this->opt_in2[vec_e_at(i, k, 0)] = (i + 1) * (k + 1) * 0.5; // Half of vec_e + } + + // Set edge indices to point to specific cells + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // Set neighbor indices similarly + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges and neighbors are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Average coefficients + this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self + this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor + this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor + this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor + + // Initialize div_vec_c and opt_out2 to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0; + } + } + + // Call the div_avg function + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), this->avg_coeff.data(), + this->div_vec_c.data(), this->opt_in2.data(), this->opt_out2.data(), + i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], + this->elev[0], this->nproma, patch_id, l_limited_area, + l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6); + + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 0.0, 1e-6); + +} + +TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = false; // Set to false for this test + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialize with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + this->opt_in2[vec_e_at(i, k, 0)] = real_distrib(gen); // Not used but initialize anyway + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Random average coefficients + for (int j = 0; j < 4; ++j) { + this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); + } + + // Random initial values for div_vec_c and opt_out2 + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + this->opt_out2[div_vec_c_at(i, k, 0)] = real_distrib(gen); // Not used but initialize anyway + } + } + + // Call the div_avg function with l2fields=false + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), this->avg_coeff.data(), + this->div_vec_c.data(), this->opt_in2.data(), this->opt_out2.data(), + i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], + this->elev[0], this->nproma, patch_id, l_limited_area, + l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values manually + std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); + + // Step 1: Calculate aux_c (but not aux_c2 since l2fields=false) + for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + aux_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Step 2: Assign aux_c to div_vec_c for patch_id > 0 (opt_out2 not updated since l2fields=false) + for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = aux_c[div_vec_c_at(jc, jk, jb)]; + } + } + } + + // Step 3: Perform averaging for the rest of the blocks (only for div_vec_c, not opt_out2) + for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)] * this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + } + } + } + + // Verify results - only check div_vec_c since l2fields=false means opt_out2 isn't updated + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "div_vec_c results differ at i=" << i << ", k=" << k; + } + } +} -- GitLab From 5575b916c2cad0a1f9de33edafd57e10d456e325 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 17 Mar 2025 11:13:48 +0100 Subject: [PATCH 29/35] added unit tests for rest of the functions --- test/c/test_horizontal_divrot.cpp | 352 ++++++++++++++++++++++++++++++ 1 file changed, 352 insertions(+) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 2ad95d2..8f95e6e 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -2312,3 +2312,355 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { } } } + +template <typename ValueType> +class HorizontalRotVertexTest : public ::testing::Test { +protected: + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 2; // number of vertical levels + static constexpr int nblks_e = 1; // number of edge blocks + static constexpr int nblks_v = 1; // number of vertex blocks + static constexpr int dim4d = 2; // 4th dimension size + + int i_startblk = 0; + int i_endblk = nblks_v; // Test blocks [0 .. nblks_v-1] + int i_startidx_in = 0; + int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + std::vector<int> slev; + std::vector<int> elev; + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // Not using ACC-specific behavior. + + std::vector<ValueType> vec_e; + std::vector<int> vert_edge_idx; + std::vector<int> vert_edge_blk; + std::vector<ValueType> geofac_rot; + std::vector<ValueType> rot_vec; + std::vector<ValueType> f4din; + std::vector<ValueType> f4dout; + + HorizontalRotVertexTest () { + slev.resize(dim4d, 0); + elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) + + vec_e.resize(dim_combine(nproma, nlev, nblks_e)); + vert_edge_idx.resize(dim_combine(nproma, nblks_v, 6)); + vert_edge_blk.resize(dim_combine(nproma, nblks_v, 6)); + geofac_rot.resize(dim_combine(nproma, 6, nblks_v)); + rot_vec.resize(dim_combine(nproma, nlev, nblks_v)); + f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); + f4dout.resize(dim_combine(nproma, nlev, nblks_v, dim4d)); + } +}; + +template <typename ValueType> +class HorizontalRotVertexAtmosTest + : public HorizontalRotVertexTest<ValueType> {}; + +template <typename ValueType> +class HorizontalRotVertexRITest + : public HorizontalRotVertexTest<ValueType> {}; + +TYPED_TEST_SUITE(HorizontalRotVertexAtmosTest, ValueTypes); + +TYPED_TEST(HorizontalRotVertexAtmosTest, TestSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + } + + // Set edge indices to point to specific edges + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma; + // All edges are in the same block for this test + this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; + } + + // Geometric factors for rotation + this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3; + this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1; + + // Initialize rot_vec to zero + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = 0.0; + } + } + + // Call the rot_vertex_atmos function + rot_vertex_atmos<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), + this->rot_vec.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_e, this->nblks_v); + + // Expected values based on the initialization pattern + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6); +} + +TYPED_TEST(HorizontalRotVertexAtmosTest, TestRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); + this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 6; ++j) { + this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize rot_vec to random values + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the rot_vertex_atmos function + rot_vertex_atmos<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), + this->rot_vec.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_e, this->nblks_v); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jv = i_startidx; jv < i_endidx; ++jv) { + ref_rot_vec[rot_vec_at(jv, jk, jb)] = + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * + this->geofac_rot[geofac_rot_at(jv, 0, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * + this->geofac_rot[geofac_rot_at(jv, 1, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * + this->geofac_rot[geofac_rot_at(jv, 2, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * + this->geofac_rot[geofac_rot_at(jv, 3, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * + this->geofac_rot[geofac_rot_at(jv, 4, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * + this->geofac_rot[geofac_rot_at(jv, 5, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)], + ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) + << "Results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST_SUITE(HorizontalRotVertexRITest, ValueTypes); + +TYPED_TEST(HorizontalRotVertexRITest, TestSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + } + + // Set edge indices to point to specific edges + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma; + // All edges are in the same block for this test + this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; + } + + // Geometric factors for rotation + this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3; + this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1; + + // Initialize rot_vec to zero + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = 0.0; + } + } + + // Call the rot_vertex_ri function + rot_vertex_ri<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), + this->rot_vec.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->acc_async, + this->nlev, this->nblks_e, this->nblks_v); + + // Expected values based on the initialization pattern + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6); +} + +TYPED_TEST(HorizontalRotVertexRITest, TestRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); + this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 6; ++j) { + this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize rot_vec to random values + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the rot_vertex_ri function + rot_vertex_ri<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), + this->rot_vec.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->acc_async, + this->nlev, this->nblks_e, this->nblks_v); + + // Ensure computation is complete for both modes + Kokkos::fence(); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jv = i_startidx; jv < i_endidx; ++jv) { + ref_rot_vec[rot_vec_at(jv, jk, jb)] = + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * + this->geofac_rot[geofac_rot_at(jv, 0, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * + this->geofac_rot[geofac_rot_at(jv, 1, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * + this->geofac_rot[geofac_rot_at(jv, 2, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * + this->geofac_rot[geofac_rot_at(jv, 3, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * + this->geofac_rot[geofac_rot_at(jv, 4, jb)] + + this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * + this->geofac_rot[geofac_rot_at(jv, 5, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)], + ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) + << "Results differ at i=" << i << ", k=" << k << ")"; + } + } + +} -- GitLab From a8245d1276a6efbc480231da0c171662fb9b485c Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 17 Mar 2025 11:16:18 +0100 Subject: [PATCH 30/35] applied clang-format to the test file --- test/c/test_horizontal_divrot.cpp | 706 ++++++++++++++++-------------- 1 file changed, 374 insertions(+), 332 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 8f95e6e..a97c1da 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -169,8 +169,9 @@ protected: /// to linear. template <typename ValueType> class HorizontalReconLinearTest - : public HorizontalReconTest< - ValueType, static_cast<int>(ReconstructionMethod::linear)> {}; + : public HorizontalReconTest<ValueType, static_cast<int>( + ReconstructionMethod::linear)> { +}; /// Test class for the horizontal tests. The reconstruction method is specified /// to quadratic. @@ -184,7 +185,7 @@ class HorizontalReconQuadraticTest template <typename ValueType> class HorizontalReconCubicTest : public HorizontalReconTest<ValueType, static_cast<int>( - ReconstructionMethod::cubic)> { + ReconstructionMethod::cubic)> { }; /// ValueTypes which the divrot tests should run with @@ -483,12 +484,12 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { this->lsq_pseudoinv[pseudoinv_at(jc, 0, 1, jb)] * z_d[1] + this->lsq_pseudoinv[pseudoinv_at(jc, 0, 2, jb)] * z_d[2]; p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)] - - p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * - this->lsq_moments[moments_at(jc, jb, 0)] - - p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * - this->lsq_moments[moments_at(jc, jb, 1)]; + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)] - + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * + this->lsq_moments[moments_at(jc, jb, 0)] - + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * + this->lsq_moments[moments_at(jc, jb, 1)]; } } } @@ -832,12 +833,14 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); // for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - {int jb = 0; + { + int jb = 0; int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); // for (int jk = this->slev; jk < this->elev; ++jk) { - {int jk = 0; + { + int jk = 0; for (int jc = i_startidx; jc < i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { z_d[i] = this->p_cc[p_cc_at( @@ -849,7 +852,7 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; for (int i = 0; i < lsq_dim_c; ++i) { p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += - this->lsq_pseudoinv[pseudoinv_at(jc, j-1, i, jb)] * z_d[i]; + this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; } } p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = @@ -1240,12 +1243,14 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); // for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - {int jb = 0; + { + int jb = 0; int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); // for (int jk = this->slev; jk < this->elev; ++jk) { - {int jk = 0; + { + int jk = 0; for (int jc = i_startidx; jc < i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { z_d[i] = this->p_cc[p_cc_at( @@ -1257,7 +1262,7 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; for (int i = 0; i < lsq_dim_c; ++i) { p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += - this->lsq_pseudoinv[pseudoinv_at(jc, j-1, i, jb)] * z_d[i]; + this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; } } p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = @@ -1280,14 +1285,13 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { } } -template <typename ValueType> -class HorizontalDivTest : public ::testing::Test { +template <typename ValueType> class HorizontalDivTest : public ::testing::Test { protected: static constexpr int nproma = 3; // inner loop length static constexpr int nlev = 2; // number of vertical levels static constexpr int nblks_c = 1; // number of cell blocks static constexpr int nblks_e = 1; // number of edge blocks - static constexpr int dim4d = 2; // 4th dimension size + static constexpr int dim4d = 2; // 4th dimension size int i_startblk = 0; int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] @@ -1332,20 +1336,16 @@ protected: }; template <typename ValueType> -class HorizontalDiv3DTest - : public HorizontalDivTest<ValueType> {}; +class HorizontalDiv3DTest : public HorizontalDivTest<ValueType> {}; template <typename ValueType> -class HorizontalDiv3D2FTest - : public HorizontalDivTest<ValueType> {}; +class HorizontalDiv3D2FTest : public HorizontalDivTest<ValueType> {}; template <typename ValueType> -class HorizontalDiv4DTest - : public HorizontalDivTest<ValueType> {}; +class HorizontalDiv4DTest : public HorizontalDivTest<ValueType> {}; template <typename ValueType> -class HorizontalDivAvgTest - : public HorizontalDivTest<ValueType> {}; +class HorizontalDivAvgTest : public HorizontalDivTest<ValueType> {}; TYPED_TEST_SUITE(HorizontalDiv3DTest, ValueTypes); @@ -1388,13 +1388,12 @@ TYPED_TEST(HorizontalDiv3DTest, TestSpecific) { } // Call the div3d function - div3d<TypeParam>( - this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); + div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); @@ -1402,7 +1401,6 @@ TYPED_TEST(HorizontalDiv3DTest, TestSpecific) { EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); - } TYPED_TEST(HorizontalDiv3DTest, TestRandom) { @@ -1431,7 +1429,8 @@ TYPED_TEST(HorizontalDiv3DTest, TestRandom) { // Set random edge indices for (int j = 0; j < 3; ++j) { this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity } // Random geometric factors @@ -1446,13 +1445,12 @@ TYPED_TEST(HorizontalDiv3DTest, TestRandom) { } // Call the div3d function - div3d<TypeParam>( - this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); + div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); // Calculate reference values separately and verify results std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); @@ -1460,19 +1458,22 @@ TYPED_TEST(HorizontalDiv3DTest, TestRandom) { for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); + this->i_startblk, this->i_endblk, i_startidx, i_endidx); for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * this->geofac_div[geofac_div_at(jc, 2, jb)]; } } @@ -1508,7 +1509,8 @@ TYPED_TEST(HorizontalDiv3D2FTest, TestSpecific) { for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - this->f4din[f4d_at(i, k, 0, 0)] = (i + 1) * (k + 2); // Different pattern for second field + this->f4din[f4d_at(i, k, 0, 0)] = + (i + 1) * (k + 2); // Different pattern for second field } // Set edge indices to point to specific cells (including self) @@ -1534,14 +1536,13 @@ TYPED_TEST(HorizontalDiv3D2FTest, TestSpecific) { } // Call the div3d_2field function - div3d_2field<TypeParam>( - this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->f4din.data(), this->f4dout.data(), - this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); + div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->f4din.data(), + this->f4dout.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); // Check first field (same as in div3d test) EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); @@ -1590,7 +1591,8 @@ TYPED_TEST(HorizontalDiv3D2FTest, TestRandom) { // Set random edge indices for (int j = 0; j < 3; ++j) { this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity } // Random geometric factors @@ -1606,14 +1608,13 @@ TYPED_TEST(HorizontalDiv3D2FTest, TestRandom) { } // Call the div3d_2field function - div3d_2field<TypeParam>( - this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->f4din.data(), this->f4dout.data(), - this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); + div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->f4din.data(), + this->f4dout.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); // Calculate reference values separately and verify results std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); @@ -1622,32 +1623,38 @@ TYPED_TEST(HorizontalDiv3D2FTest, TestRandom) { for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); + this->i_startblk, this->i_endblk, i_startidx, i_endidx); for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { // Calculate reference value for first field ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * this->geofac_div[geofac_div_at(jc, 2, jb)]; // Calculate reference value for second field ref_f4dout[f4dout_at(jc, jk, jb, 0)] = this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)], 0)] * + this->cell_edge_blk[cell_edge_at(jc, jb, 0)], + 0)] * this->geofac_div[geofac_div_at(jc, 0, jb)] + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)], 0)] * + this->cell_edge_blk[cell_edge_at(jc, jb, 1)], + 0)] * this->geofac_div[geofac_div_at(jc, 1, jb)] + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)], 0)] * + this->cell_edge_blk[cell_edge_at(jc, jb, 2)], + 0)] * this->geofac_div[geofac_div_at(jc, 2, jb)]; } } @@ -1703,12 +1710,12 @@ TYPED_TEST(HorizontalDiv4DTest, TestSpecific) { } // Test function - div4d<TypeParam>( - this->cell_edge_idx.data(), this->cell_edge_blk.data(), - this->geofac_div.data(), this->f4din.data(), this->f4dout.data(), - this->dim4d, this->i_startblk, this->i_endblk, this->i_startidx_in, - this->i_endidx_in, this->slev.data(), this->elev.data(), this->nproma, - this->lacc, this->nlev, this->nblks_c, this->nblks_e); + div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), + this->f4dout.data(), this->dim4d, this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 1.4, 1e-6); EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 1.1, 1e-6); @@ -1722,7 +1729,6 @@ TYPED_TEST(HorizontalDiv4DTest, TestSpecific) { EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 1)], 2.6, 1e-6); EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 1)], 2.3, 1e-6); EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 1)], 2.3, 1e-6); - } TYPED_TEST(HorizontalDiv4DTest, TestDiv4dRandom) { @@ -1759,29 +1765,29 @@ TYPED_TEST(HorizontalDiv4DTest, TestDiv4dRandom) { } // Test function - div4d<TypeParam>( - this->cell_edge_idx.data(), this->cell_edge_blk.data(), - this->geofac_div.data(), this->f4din.data(), this->f4dout.data(), - this->dim4d, this->i_startblk, this->i_endblk, this->i_startidx_in, - this->i_endidx_in, this->slev.data(), this->elev.data(), this->nproma, - this->lacc, this->nlev, this->nblks_c, this->nblks_e); + div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), + this->f4dout.data(), this->dim4d, this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); // Compute reference result and check for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); + this->i_startblk, this->i_endblk, i_startidx, i_endidx); for (int ji = 0; ji < dim4d; ++ji) { for (int jk = this->slev[ji]; jk < this->elev[ji]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { TypeParam expected = 0.0; for (int je = 0; je < 3; ++je) { - expected += this->f4din[f4din_at( - this->cell_edge_idx[cell_edge_at(jc, jb, je)], - jk, - this->cell_edge_blk[cell_edge_at(jc, jb, je)], - ji)] * this->geofac_div[geofac_div_at(jc, je, jb)]; + expected += + this->f4din[f4din_at( + this->cell_edge_idx[cell_edge_at(jc, jb, je)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, je)], ji)] * + this->geofac_div[geofac_div_at(jc, je, jb)]; } EXPECT_NEAR(this->f4dout[f4dout_at(jc, jk, jb, ji)], expected, 1e-5) @@ -1826,7 +1832,8 @@ TYPED_TEST(HorizontalDivAvgTest, TestSpecific) { for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - this->opt_in2[vec_e_at(i, k, 0)] = (i + 1) * (k + 1) * 0.5; // Half of vec_e + this->opt_in2[vec_e_at(i, k, 0)] = + (i + 1) * (k + 1) * 0.5; // Half of vec_e } // Set edge indices to point to specific cells @@ -1867,12 +1874,11 @@ TYPED_TEST(HorizontalDivAvgTest, TestSpecific) { div_avg<TypeParam>( this->vec_e.data(), this->cell_neighbor_idx.data(), this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), this->avg_coeff.data(), - this->div_vec_c.data(), this->opt_in2.data(), this->opt_out2.data(), - i_startblk_in.data(), i_endblk_in.data(), - i_startidx_in.data(), i_endidx_in.data(), this->slev[0], - this->elev[0], this->nproma, patch_id, l_limited_area, - l2fields, this->lacc, this->nlev, + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, this->nblks_c, this->nblks_e); EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); @@ -1888,7 +1894,6 @@ TYPED_TEST(HorizontalDivAvgTest, TestSpecific) { EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 2.04, 1e-6); EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 1.04, 1e-6); EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 2.08, 1e-6); - } TYPED_TEST(HorizontalDivAvgTest, TestRandom) { @@ -1932,141 +1937,158 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandom) { // Set random edge indices for (int j = 0; j < 3; ++j) { this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; // Keep in same block for simplicity - } - - // Random geometric factors - for (int j = 0; j < 3; ++j) { - this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); - } - - // Random average coefficients - for (int j = 0; j < 4; ++j) { - this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); - } - - // Random initial values for div_vec_c and opt_out2 - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); - this->opt_out2[div_vec_c_at(i, k, 0)] = real_distrib(gen); - } - } - - // Call the div_avg function - div_avg<TypeParam>( - this->vec_e.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), this->avg_coeff.data(), - this->div_vec_c.data(), this->opt_in2.data(), this->opt_out2.data(), - i_startblk_in.data(), i_endblk_in.data(), - i_startidx_in.data(), i_endidx_in.data(), this->slev[0], - this->elev[0], this->nproma, patch_id, l_limited_area, - l2fields, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - // Calculate reference values manually - std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); - std::vector<TypeParam> aux_c2(dim_combine(nproma, nlev, nblks_c)); - std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); - std::vector<TypeParam> ref_opt_out2(dim_combine(nproma, nlev, nblks_c)); - - // Step 1: Calculate aux_c and aux_c2 - for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, - i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - aux_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; - - aux_c2[div_vec_c_at(jc, jk, jb)] = - this->opt_in2[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->opt_in2[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->opt_in2[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; - } - } - } - - // Step 2: Assign aux_c to div_vec_c and aux_c2 to opt_out2 for patch_id > 0 - for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, - i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = aux_c[div_vec_c_at(jc, jk, jb)]; - ref_opt_out2[div_vec_c_at(jc, jk, jb)] = aux_c2[div_vec_c_at(jc, jk, jb)]; - } - } - } - - // Step 3: Perform averaging for the rest of the blocks - for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, - i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - aux_c[div_vec_c_at(jc, jk, jb)] * this->avg_coeff[avg_coeff_at(jc, 0, jb)] + - aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * - this->avg_coeff[avg_coeff_at(jc, 1, jb)] + - aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * - this->avg_coeff[avg_coeff_at(jc, 2, jb)] + - aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * - this->avg_coeff[avg_coeff_at(jc, 3, jb)]; - - ref_opt_out2[div_vec_c_at(jc, jk, jb)] = - aux_c2[div_vec_c_at(jc, jk, jb)] * this->avg_coeff[avg_coeff_at(jc, 0, jb)] + - aux_c2[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * - this->avg_coeff[avg_coeff_at(jc, 1, jb)] + - aux_c2[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * - this->avg_coeff[avg_coeff_at(jc, 2, jb)] + - aux_c2[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * - this->avg_coeff[avg_coeff_at(jc, 3, jb)]; - } - } - } - - // Verify results - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], - ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) - << "div_vec_c results differ at i=" << i << ", k=" << k; - - EXPECT_NEAR(this->opt_out2[div_vec_c_at(i, k, 0)], - ref_opt_out2[div_vec_c_at(i, k, 0)], 1e-5) - << "opt_out2 results differ at i=" << i << ", k=" << k; - } - } + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Random average coefficients + for (int j = 0; j < 4; ++j) { + this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); + } + + // Random initial values for div_vec_c and opt_out2 + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + this->opt_out2[div_vec_c_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the div_avg function + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values manually + std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> aux_c2(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_opt_out2(dim_combine(nproma, nlev, nblks_c)); + + // Step 1: Calculate aux_c and aux_c2 + for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + aux_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + + aux_c2[div_vec_c_at(jc, jk, jb)] = + this->opt_in2[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->opt_in2[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->opt_in2[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Step 2: Assign aux_c to div_vec_c and aux_c2 to opt_out2 for patch_id > 0 + for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)]; + ref_opt_out2[div_vec_c_at(jc, jk, jb)] = + aux_c2[div_vec_c_at(jc, jk, jb)]; + } + } + } + + // Step 3: Perform averaging for the rest of the blocks + for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)] * + this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + + ref_opt_out2[div_vec_c_at(jc, jk, jb)] = + aux_c2[div_vec_c_at(jc, jk, jb)] * + this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c2[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c2[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c2[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "div_vec_c results differ at i=" << i << ", k=" << k; + + EXPECT_NEAR(this->opt_out2[div_vec_c_at(i, k, 0)], + ref_opt_out2[div_vec_c_at(i, k, 0)], 1e-5) + << "opt_out2 results differ at i=" << i << ", k=" << k; + } + } } TYPED_TEST(HorizontalDivAvgTest, TestSpecificNoL2fields) { @@ -2099,7 +2121,8 @@ TYPED_TEST(HorizontalDivAvgTest, TestSpecificNoL2fields) { for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - this->opt_in2[vec_e_at(i, k, 0)] = (i + 1) * (k + 1) * 0.5; // Half of vec_e + this->opt_in2[vec_e_at(i, k, 0)] = + (i + 1) * (k + 1) * 0.5; // Half of vec_e } // Set edge indices to point to specific cells @@ -2140,15 +2163,13 @@ TYPED_TEST(HorizontalDivAvgTest, TestSpecificNoL2fields) { div_avg<TypeParam>( this->vec_e.data(), this->cell_neighbor_idx.data(), this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), this->avg_coeff.data(), - this->div_vec_c.data(), this->opt_in2.data(), this->opt_out2.data(), - i_startblk_in.data(), i_endblk_in.data(), - i_startidx_in.data(), i_endidx_in.data(), this->slev[0], - this->elev[0], this->nproma, patch_id, l_limited_area, - l2fields, this->lacc, this->nlev, + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, this->nblks_c, this->nblks_e); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); @@ -2162,7 +2183,6 @@ TYPED_TEST(HorizontalDivAvgTest, TestSpecificNoL2fields) { EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 0.0, 1e-6); EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 0.0, 1e-6); EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 0.0, 1e-6); - } TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { @@ -2185,7 +2205,7 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { // Parameters for the test int patch_id = 1; bool l_limited_area = true; - bool l2fields = false; // Set to false for this test + bool l2fields = false; // Set to false for this test const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; const auto &avg_coeff_at = at<nproma, 4, nblks_c>; @@ -2200,16 +2220,19 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); - this->opt_in2[vec_e_at(i, k, 0)] = real_distrib(gen); // Not used but initialize anyway + this->opt_in2[vec_e_at(i, k, 0)] = + real_distrib(gen); // Not used but initialize anyway } // Set random edge indices for (int j = 0; j < 3; ++j) { this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; // Keep in same block for simplicity + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = + 0; // Keep in same block for simplicity } // Random geometric factors @@ -2225,7 +2248,8 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { // Random initial values for div_vec_c and opt_out2 for (int k = 0; k < nlev; ++k) { this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); - this->opt_out2[div_vec_c_at(i, k, 0)] = real_distrib(gen); // Not used but initialize anyway + this->opt_out2[div_vec_c_at(i, k, 0)] = + real_distrib(gen); // Not used but initialize anyway } } @@ -2233,12 +2257,11 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { div_avg<TypeParam>( this->vec_e.data(), this->cell_neighbor_idx.data(), this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), this->avg_coeff.data(), - this->div_vec_c.data(), this->opt_in2.data(), this->opt_out2.data(), - i_startblk_in.data(), i_endblk_in.data(), - i_startidx_in.data(), i_endidx_in.data(), this->slev[0], - this->elev[0], this->nproma, patch_id, l_limited_area, - l2fields, this->lacc, this->nlev, + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, this->nblks_c, this->nblks_e); // Calculate reference values manually @@ -2254,20 +2277,24 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { aux_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * this->geofac_div[geofac_div_at(jc, 2, jb)]; } } } - // Step 2: Assign aux_c to div_vec_c for patch_id > 0 (opt_out2 not updated since l2fields=false) + // Step 2: Assign aux_c to div_vec_c for patch_id > 0 (opt_out2 not updated + // since l2fields=false) for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, @@ -2275,12 +2302,14 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = aux_c[div_vec_c_at(jc, jk, jb)]; + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)]; } } } - // Step 3: Perform averaging for the rest of the blocks (only for div_vec_c, not opt_out2) + // Step 3: Perform averaging for the rest of the blocks (only for div_vec_c, + // not opt_out2) for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, @@ -2289,21 +2318,26 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - aux_c[div_vec_c_at(jc, jk, jb)] * this->avg_coeff[avg_coeff_at(jc, 0, jb)] + - aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + aux_c[div_vec_c_at(jc, jk, jb)] * + this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * this->avg_coeff[avg_coeff_at(jc, 1, jb)] + - aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * this->avg_coeff[avg_coeff_at(jc, 2, jb)] + - aux_c[div_vec_c_at(this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * this->avg_coeff[avg_coeff_at(jc, 3, jb)]; } } } - // Verify results - only check div_vec_c since l2fields=false means opt_out2 isn't updated + // Verify results - only check div_vec_c since l2fields=false means opt_out2 + // isn't updated for (int i = 0; i < nproma; ++i) { for (int k = 0; k < nlev; ++k) { EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], @@ -2316,20 +2350,20 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { template <typename ValueType> class HorizontalRotVertexTest : public ::testing::Test { protected: - static constexpr int nproma = 3; // inner loop length - static constexpr int nlev = 2; // number of vertical levels - static constexpr int nblks_e = 1; // number of edge blocks - static constexpr int nblks_v = 1; // number of vertex blocks - static constexpr int dim4d = 2; // 4th dimension size + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 2; // number of vertical levels + static constexpr int nblks_e = 1; // number of edge blocks + static constexpr int nblks_v = 1; // number of vertex blocks + static constexpr int dim4d = 2; // 4th dimension size int i_startblk = 0; - int i_endblk = nblks_v; // Test blocks [0 .. nblks_v-1] + int i_endblk = nblks_v; // Test blocks [0 .. nblks_v-1] int i_startidx_in = 0; - int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + int i_endidx_in = nproma; // Full range: 0 .. nproma-1 std::vector<int> slev; std::vector<int> elev; - bool lacc = false; // Not using ACC-specific behavior. - bool acc_async = false; // Not using ACC-specific behavior. + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // Not using ACC-specific behavior. std::vector<ValueType> vec_e; std::vector<int> vert_edge_idx; @@ -2339,9 +2373,9 @@ protected: std::vector<ValueType> f4din; std::vector<ValueType> f4dout; - HorizontalRotVertexTest () { + HorizontalRotVertexTest() { slev.resize(dim4d, 0); - elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) + elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) vec_e.resize(dim_combine(nproma, nlev, nblks_e)); vert_edge_idx.resize(dim_combine(nproma, nblks_v, 6)); @@ -2354,12 +2388,11 @@ protected: }; template <typename ValueType> -class HorizontalRotVertexAtmosTest - : public HorizontalRotVertexTest<ValueType> {}; +class HorizontalRotVertexAtmosTest : public HorizontalRotVertexTest<ValueType> { +}; template <typename ValueType> -class HorizontalRotVertexRITest - : public HorizontalRotVertexTest<ValueType> {}; +class HorizontalRotVertexRITest : public HorizontalRotVertexTest<ValueType> {}; TYPED_TEST_SUITE(HorizontalRotVertexAtmosTest, ValueTypes); @@ -2404,10 +2437,9 @@ TYPED_TEST(HorizontalRotVertexAtmosTest, TestSpecific) { // Call the rot_vertex_atmos function rot_vertex_atmos<TypeParam>( this->vec_e.data(), this->vert_edge_idx.data(), - this->vert_edge_blk.data(), this->geofac_rot.data(), - this->rot_vec.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev, this->nblks_e, this->nblks_v); // Expected values based on the initialization pattern @@ -2445,7 +2477,8 @@ TYPED_TEST(HorizontalRotVertexAtmosTest, TestRandom) { // Set random edge indices for (int j = 0; j < 6; ++j) { this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); - this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + this->vert_edge_blk[vert_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity } // Random geometric factors @@ -2462,10 +2495,9 @@ TYPED_TEST(HorizontalRotVertexAtmosTest, TestRandom) { // Call the rot_vertex_atmos function rot_vertex_atmos<TypeParam>( this->vec_e.data(), this->vert_edge_idx.data(), - this->vert_edge_blk.data(), this->geofac_rot.data(), - this->rot_vec.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev, this->nblks_e, this->nblks_v); // Calculate reference values separately and verify results @@ -2479,23 +2511,29 @@ TYPED_TEST(HorizontalRotVertexAtmosTest, TestRandom) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jv = i_startidx; jv < i_endidx; ++jv) { ref_rot_vec[rot_vec_at(jv, jk, jb)] = - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * this->geofac_rot[geofac_rot_at(jv, 0, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * this->geofac_rot[geofac_rot_at(jv, 1, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * this->geofac_rot[geofac_rot_at(jv, 2, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * this->geofac_rot[geofac_rot_at(jv, 3, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * this->geofac_rot[geofac_rot_at(jv, 4, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * this->geofac_rot[geofac_rot_at(jv, 5, jb)]; } } @@ -2554,10 +2592,9 @@ TYPED_TEST(HorizontalRotVertexRITest, TestSpecific) { // Call the rot_vertex_ri function rot_vertex_ri<TypeParam>( this->vec_e.data(), this->vert_edge_idx.data(), - this->vert_edge_blk.data(), this->geofac_rot.data(), - this->rot_vec.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->acc_async, + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async, this->nlev, this->nblks_e, this->nblks_v); // Expected values based on the initialization pattern @@ -2595,7 +2632,8 @@ TYPED_TEST(HorizontalRotVertexRITest, TestRandom) { // Set random edge indices for (int j = 0; j < 6; ++j) { this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); - this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; // Keep in same block for simplicity + this->vert_edge_blk[vert_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity } // Random geometric factors @@ -2612,10 +2650,9 @@ TYPED_TEST(HorizontalRotVertexRITest, TestRandom) { // Call the rot_vertex_ri function rot_vertex_ri<TypeParam>( this->vec_e.data(), this->vert_edge_idx.data(), - this->vert_edge_blk.data(), this->geofac_rot.data(), - this->rot_vec.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->acc_async, + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async, this->nlev, this->nblks_e, this->nblks_v); // Ensure computation is complete for both modes @@ -2632,23 +2669,29 @@ TYPED_TEST(HorizontalRotVertexRITest, TestRandom) { for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { for (int jv = i_startidx; jv < i_endidx; ++jv) { ref_rot_vec[rot_vec_at(jv, jk, jb)] = - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * this->geofac_rot[geofac_rot_at(jv, 0, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * this->geofac_rot[geofac_rot_at(jv, 1, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * this->geofac_rot[geofac_rot_at(jv, 2, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * this->geofac_rot[geofac_rot_at(jv, 3, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * this->geofac_rot[geofac_rot_at(jv, 4, jb)] + - this->vec_e[vec_e_at(this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * this->geofac_rot[geofac_rot_at(jv, 5, jb)]; } } @@ -2662,5 +2705,4 @@ TYPED_TEST(HorizontalRotVertexRITest, TestRandom) { << "Results differ at i=" << i << ", k=" << k << ")"; } } - } -- GitLab From bad072426c8bf88f4b3c6de95968d65efe9fc8f9 Mon Sep 17 00:00:00 2001 From: Pradipta Samanta <samanta@dkrz.de> Date: Mon, 17 Mar 2025 11:22:04 +0100 Subject: [PATCH 31/35] reverted back some changes --- test/c/test_horizontal_divrot.cpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index a97c1da..10725a5 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -832,15 +832,11 @@ TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { std::vector<TypeParam> z_d(lsq_dim_c); std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - // for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - { - int jb = 0; + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); - // for (int jk = this->slev; jk < this->elev; ++jk) { - { - int jk = 0; + for (int jk = this->slev; jk < this->elev; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { z_d[i] = this->p_cc[p_cc_at( @@ -1242,15 +1238,11 @@ TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - // for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - { - int jb = 0; + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { int i_startidx, i_endidx; get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, this->i_startblk, this->i_endblk, i_startidx, i_endidx); - // for (int jk = this->slev; jk < this->elev; ++jk) { - { - int jk = 0; + for (int jk = this->slev; jk < this->elev; ++jk) { for (int jc = i_startidx; jc < i_endidx; ++jc) { for (int i = 0; i < lsq_dim_c; ++i) { z_d[i] = this->p_cc[p_cc_at( -- GitLab From e03d73d9f3cf6ac16edb5a24164820fad5dec760 Mon Sep 17 00:00:00 2001 From: Yen-Chen <yen-chen.chen@tum.de> Date: Mon, 17 Mar 2025 13:46:00 +0100 Subject: [PATCH 32/35] Remove redundant code --- test/c/test_horizontal_divrot.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 10725a5..0a0aba3 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -386,10 +386,6 @@ TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVD) { this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; } - // this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = 2.0; - // this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = 2.0; - // this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = 0.1; - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; this->lsq_moments[moments_at(i, 0, 1)] = 0.3; } -- GitLab From 4107598a8bdfa336f134c0809e314baebddd8ba9 Mon Sep 17 00:00:00 2001 From: Yen-Chen <yen-chen.chen@tum.de> Date: Mon, 17 Mar 2025 13:48:34 +0100 Subject: [PATCH 33/35] Rename lib_divrot to mo_lib_divrot --- src/horizontal/CMakeLists.txt | 2 +- src/horizontal/{lib_divrot.cpp => mo_lib_divrot.cpp} | 2 +- src/horizontal/{lib_divrot.hpp => mo_lib_divrot.hpp} | 0 test/c/test_horizontal_divrot.cpp | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename src/horizontal/{lib_divrot.cpp => mo_lib_divrot.cpp} (99%) rename src/horizontal/{lib_divrot.hpp => mo_lib_divrot.hpp} (100%) diff --git a/src/horizontal/CMakeLists.txt b/src/horizontal/CMakeLists.txt index d403cb2..f3b75c0 100644 --- a/src/horizontal/CMakeLists.txt +++ b/src/horizontal/CMakeLists.txt @@ -11,7 +11,7 @@ add_library( iconmath-horizontal - lib_divrot.cpp + mo_lib_divrot.cpp mo_lib_divrot.F90 mo_lib_laplace.F90 mo_lib_gradients.F90) diff --git a/src/horizontal/lib_divrot.cpp b/src/horizontal/mo_lib_divrot.cpp similarity index 99% rename from src/horizontal/lib_divrot.cpp rename to src/horizontal/mo_lib_divrot.cpp index a24981d..d086e8b 100644 --- a/src/horizontal/lib_divrot.cpp +++ b/src/horizontal/mo_lib_divrot.cpp @@ -12,7 +12,7 @@ #include <iostream> #include <vector> -#include <horizontal/lib_divrot.hpp> +#include <horizontal/mo_lib_divrot.hpp> #include <support/mo_lib_loopindices.hpp> template <typename T> diff --git a/src/horizontal/lib_divrot.hpp b/src/horizontal/mo_lib_divrot.hpp similarity index 100% rename from src/horizontal/lib_divrot.hpp rename to src/horizontal/mo_lib_divrot.hpp diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 0a0aba3..6a6e458 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -15,7 +15,7 @@ #include <Kokkos_Core.hpp> #include <gtest/gtest.h> -#include <horizontal/lib_divrot.hpp> +#include <horizontal/mo_lib_divrot.hpp> #include <support/mo_lib_loopindices.hpp> // Template function for computing array size. -- GitLab From 29f23212c9fc1983b96899bec2277cf67e94b1a0 Mon Sep 17 00:00:00 2001 From: Yen-Chen <yen-chen.chen@tum.de> Date: Mon, 17 Mar 2025 13:53:42 +0100 Subject: [PATCH 34/35] Combine Div tests --- test/c/test_horizontal_divrot.cpp | 40 ++++++++++--------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp index 6a6e458..f3cb3c8 100644 --- a/test/c/test_horizontal_divrot.cpp +++ b/test/c/test_horizontal_divrot.cpp @@ -1323,21 +1323,9 @@ protected: } }; -template <typename ValueType> -class HorizontalDiv3DTest : public HorizontalDivTest<ValueType> {}; - -template <typename ValueType> -class HorizontalDiv3D2FTest : public HorizontalDivTest<ValueType> {}; - -template <typename ValueType> -class HorizontalDiv4DTest : public HorizontalDivTest<ValueType> {}; +TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes); -template <typename ValueType> -class HorizontalDivAvgTest : public HorizontalDivTest<ValueType> {}; - -TYPED_TEST_SUITE(HorizontalDiv3DTest, ValueTypes); - -TYPED_TEST(HorizontalDiv3DTest, TestSpecific) { +TYPED_TEST(HorizontalDivTest, TestDiv3DSpecific) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1391,7 +1379,7 @@ TYPED_TEST(HorizontalDiv3DTest, TestSpecific) { EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); } -TYPED_TEST(HorizontalDiv3DTest, TestRandom) { +TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1477,9 +1465,7 @@ TYPED_TEST(HorizontalDiv3DTest, TestRandom) { } } -TYPED_TEST_SUITE(HorizontalDiv3D2FTest, ValueTypes); - -TYPED_TEST(HorizontalDiv3D2FTest, TestSpecific) { +TYPED_TEST(HorizontalDivTest, TestDiv3D2FSpecific) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1549,7 +1535,7 @@ TYPED_TEST(HorizontalDiv3D2FTest, TestSpecific) { EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 6.6, 1e-6); } -TYPED_TEST(HorizontalDiv3D2FTest, TestRandom) { +TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1667,9 +1653,7 @@ TYPED_TEST(HorizontalDiv3D2FTest, TestRandom) { } } -TYPED_TEST_SUITE(HorizontalDiv4DTest, ValueTypes); - -TYPED_TEST(HorizontalDiv4DTest, TestSpecific) { +TYPED_TEST(HorizontalDivTest, TestDiv4DSpecific) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1719,7 +1703,7 @@ TYPED_TEST(HorizontalDiv4DTest, TestSpecific) { EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 1)], 2.3, 1e-6); } -TYPED_TEST(HorizontalDiv4DTest, TestDiv4dRandom) { +TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1787,9 +1771,9 @@ TYPED_TEST(HorizontalDiv4DTest, TestDiv4dRandom) { } } -TYPED_TEST_SUITE(HorizontalDivAvgTest, ValueTypes); +TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes); -TYPED_TEST(HorizontalDivAvgTest, TestSpecific) { +TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -1884,7 +1868,7 @@ TYPED_TEST(HorizontalDivAvgTest, TestSpecific) { EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 2.08, 1e-6); } -TYPED_TEST(HorizontalDivAvgTest, TestRandom) { +TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -2079,7 +2063,7 @@ TYPED_TEST(HorizontalDivAvgTest, TestRandom) { } } -TYPED_TEST(HorizontalDivAvgTest, TestSpecificNoL2fields) { +TYPED_TEST(HorizontalDivTest, TestDivAvgSpecificNoL2fields) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; @@ -2173,7 +2157,7 @@ TYPED_TEST(HorizontalDivAvgTest, TestSpecificNoL2fields) { EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 0.0, 1e-6); } -TYPED_TEST(HorizontalDivAvgTest, TestRandomNoL2fields) { +TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { constexpr int nproma = this->nproma; constexpr int nlev = this->nlev; constexpr int nblks_c = this->nblks_c; -- GitLab From 4503701926af6bec4590d0b3f41879f8181d9b9d Mon Sep 17 00:00:00 2001 From: Yen-Chen <yen-chen.chen@tum.de> Date: Mon, 17 Mar 2025 14:07:16 +0100 Subject: [PATCH 35/35] Split tests into three files --- test/c/CMakeLists.txt | 6 +- test/c/dim_helper.hpp | 88 + test/c/test_horizontal_div.cpp | 1070 ++++++++++++ test/c/test_horizontal_divrot.cpp | 2680 ----------------------------- test/c/test_horizontal_recon.cpp | 1199 +++++++++++++ test/c/test_horizontal_rot.cpp | 378 ++++ 6 files changed, 2740 insertions(+), 2681 deletions(-) create mode 100644 test/c/dim_helper.hpp create mode 100644 test/c/test_horizontal_div.cpp delete mode 100644 test/c/test_horizontal_divrot.cpp create mode 100644 test/c/test_horizontal_recon.cpp create mode 100644 test/c/test_horizontal_rot.cpp diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 18c2710..98a21b2 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -27,7 +27,9 @@ endif() set(SOURCES main.cpp - test_horizontal_divrot.cpp + test_horizontal_div.cpp + test_horizontal_recon.cpp + test_horizontal_rot.cpp test_tdma_solver.cpp test_interpolation_vector.cpp test_intp_rbf.cpp @@ -36,6 +38,8 @@ set(SOURCES # Create the test executable from your test files, including main.cpp. add_executable(iconmath_test_c ${SOURCES}) +target_include_directories(iconmath_test_c PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) + # Link the test executable with GoogleTest and Kokkos. target_link_libraries(iconmath_test_c PUBLIC diff --git a/test/c/dim_helper.hpp b/test/c/dim_helper.hpp new file mode 100644 index 0000000..165d5d9 --- /dev/null +++ b/test/c/dim_helper.hpp @@ -0,0 +1,88 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#pragma once + +// Template function for computing array size. +// For example, we get the array size of a 4-dimensional array A(2, 3, 4, 5) by +// dim_combine(2, 3, 4, 5). +// Which will automatically instantiate +// dim_combine<int, int, int, int>(2, 3, 4, 5). +// The function then call dim_combine recursively +// dim_combine<int, int, int, int>(2, 3, 4, 5) { +// return static_cast<size_t>(2) * dim_combine<int, int, int>(3, 4, 5); +// } +// dim_combine<int, int, int>(3, 4, 5) { +// return static_cast<size_t>(3) * dim_combine<int, int>(4, 5); +// } +// dim_combine<int, int>(4, 5) { +// return static_cast<size_t>(4) * dim_combine<int>(5); +// } +// Where the last dim_combine is specialized as +// dim_combine<int>(5) { +// return static_cast<size_t>(5); +// } +// Which gives +// dim_combine<int, int, int, int>(2, 3, 4, 5) = +// static_cast<size_t>(2) * static_cast<size_t>(3) * +// static_cast<size_t>(4) * static_cast<size_t>(5) +/// Template helpers for combining multiple dimension array sizes. +/// The base function of dimension combine. Should not be used. +template <typename... Ts> size_t dim_combine(Ts... dims) { return 0; } +/// Template specialization of only one dimension, returns the dimension itself. +template <typename T> size_t dim_combine(T dim) { + return static_cast<size_t>(dim); +} +/// Template specialization of picking out the first dimension. The combined +/// dimension is the first dimension times the combined dimension of the rest. +template <typename T, typename... Ts> size_t dim_combine(T dim, Ts... dims) { + return static_cast<size_t>(dim) * dim_combine(dims...); +} + +// Template function for LayoutLeft ID access in compile time. +// For example, a multi-dimensional array A of dimensions <2, 3, 4, 5> gets its +// corresponding vector id (LayoutLeft) by +// at<2, 3, 4, 5>(id1, id2, id3, id4). +// The at_impl then adds the id from beginning to the end and pass the id prefix +// to the next recursive at_impl function. In this example, +// at<2, 3, 4, 5>(id1, id2, id3, id4) { +// return id1 + at_impl<3, 4, 5>(2, id2, id3, id4); +// } +// at_impl<3, 4, 5>(2, id2, id3, id4) { +// return id2 * 2 + at_impl<4, 5>(2 * 3, id3, id4); +// } +// at_impl<4, 5>(2 * 3, id3, id4) { +// return id3 * 2 * 3 + at_impl<5>(2 * 3 * 4, id4); +// } +// at_impl<5>(2 * 3 * 4, id4) { +// return id4 * 2 * 3 * 4; +// } +// Which gives +// at<2, 3, 4, 5>(id1, id2, id3, id4) = id1 + id2 * 2 + +// id3 * 2 * 3 + id4 * 2 * 3 * 4 +/// Helper type converting integer numbers to int +template <class T, auto> using always_t = T; +/// Base function of at_impl. Should not be used. +template <int... Dims> int at_impl(always_t<int, Dims>... ids) { return 0; } +/// Template specialization of the last ID +template <int LastDim> int at_impl(int prefix, int id) { return id * prefix; } +/// Template specialization of at_impl, accumulate the return value using the +/// first id and pass the prefix to the next recursive at_impl function. +template <int FirstDim, int... Dims> +int at_impl(int prefix, int id, always_t<int, Dims>... ids) { + return id * prefix + at_impl<Dims...>(prefix * FirstDim, ids...); +} +/// at<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming +/// LayoutLeft. Use this function instead of at_impl. +template <int FirstDim, int... Dims> +int at(int id, always_t<int, Dims>... ids) { + return id + at_impl<Dims...>(FirstDim, ids...); +} diff --git a/test/c/test_horizontal_div.cpp b/test/c/test_horizontal_div.cpp new file mode 100644 index 0000000..596d19e --- /dev/null +++ b/test/c/test_horizontal_div.cpp @@ -0,0 +1,1070 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <iostream> +#include <random> +#include <vector> + +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> +#include <dim_helper.hpp> +#include <horizontal/mo_lib_divrot.hpp> +#include <support/mo_lib_loopindices.hpp> + +/// Test class for the horizontal divergence tests. Templated for the ValueType +template <typename ValueType> class HorizontalDivTest : public ::testing::Test { +protected: + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 2; // number of vertical levels + static constexpr int nblks_c = 1; // number of cell blocks + static constexpr int nblks_e = 1; // number of edge blocks + static constexpr int dim4d = 2; // 4th dimension size + + int i_startblk = 0; + int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] + int i_startidx_in = 0; + int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + std::vector<int> slev; + std::vector<int> elev; + bool lacc = false; // Not using ACC-specific behavior. + + std::vector<ValueType> vec_e; + std::vector<int> cell_edge_idx; + std::vector<int> cell_edge_blk; + std::vector<ValueType> geofac_div; + std::vector<ValueType> div_vec_c; + std::vector<ValueType> f4din; + std::vector<ValueType> f4dout; + + // Followings are needed in HorizontalDivAvgTest + std::vector<int> cell_neighbor_idx; + std::vector<int> cell_neighbor_blk; + std::vector<ValueType> avg_coeff; + std::vector<ValueType> opt_in2; + std::vector<ValueType> opt_out2; + + HorizontalDivTest() { + slev.resize(dim4d, 0); + elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) + + vec_e.resize(dim_combine(nproma, nlev, nblks_e)); + cell_edge_idx.resize(dim_combine(nproma, nblks_c, 3)); + cell_edge_blk.resize(dim_combine(nproma, nblks_c, 3)); + geofac_div.resize(dim_combine(nproma, 3, nblks_c)); + div_vec_c.resize(dim_combine(nproma, nlev, nblks_c)); + f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); + f4dout.resize(dim_combine(nproma, nlev, nblks_c, dim4d)); + cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, 3)); + cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, 3)); + avg_coeff.resize(dim_combine(nproma, 4, nblks_c)); + opt_in2.resize(dim_combine(nproma, nlev, nblks_e)); + opt_out2.resize(dim_combine(nproma, nlev, nblks_c)); + } +}; + +/// ValueTypes which the divrot tests should run with +typedef ::testing::Types<float, double> ValueTypes; + +TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes); + +TYPED_TEST(HorizontalDivTest, TestDiv3DSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + } + + // Set edge indices to point to specific cells (including self) + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Initialize div_vec_c to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + } + } + + // Call the div3d function + div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); +} + +TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize div_vec_c to random values + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the div3d function + div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "Results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST(HorizontalDivTest, TestDiv3D2FSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + this->f4din[f4d_at(i, k, 0, 0)] = + (i + 1) * (k + 2); // Different pattern for second field + } + + // Set edge indices to point to specific cells (including self) + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Initialize div_vec_c and f4dout to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + this->f4dout[f4dout_at(i, k, 0, 0)] = 0.0; + } + } + + // Call the div3d_2field function + div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->f4din.data(), + this->f4dout.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Check first field (same as in div3d test) + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); + + // Check second field (expected values calculated manually) + EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 5.1, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 6.3, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 4.4, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 6.6, 1e-6); +} + +TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + this->f4din[f4d_at(i, k, 0, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize div_vec_c and f4dout to random values + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + this->f4dout[f4dout_at(i, k, 0, 0)] = real_distrib(gen); + } + } + + // Call the div3d_2field function + div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->div_vec_c.data(), this->f4din.data(), + this->f4dout.data(), this->i_startblk, this->i_endblk, + this->i_startidx_in, this->i_endidx_in, this->slev[0], + this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); + std::vector<TypeParam> ref_f4dout(nproma * nlev * nblks_c * dim4d, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + // Calculate reference value for first field + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + + // Calculate reference value for second field + ref_f4dout[f4dout_at(jc, jk, jb, 0)] = + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)], + 0)] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)], + 0)] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)], + 0)] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Verify results for first field + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "First field results differ at i=" << i << ", k=" << k; + } + } + + // Verify results for second field + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->f4dout[f4dout_at(i, k, 0, 0)], + ref_f4dout[f4dout_at(i, k, 0, 0)], 1e-5) + << "Second field results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST(HorizontalDivTest, TestDiv4DSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = (i + j) % nproma; + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->geofac_div[geofac_div_at(i, j, 0)] = 0.1 * (j + 1); + } + + for (int k = 0; k < nlev; ++k) { + for (int d = 0; d < dim4d; ++d) { + this->f4din[f4din_at(i, k, 0, d)] = 1.0 + i + k + d; + this->f4dout[f4dout_at(i, k, 0, d)] = 0.0; + } + } + } + + // Test function + div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), + this->f4dout.data(), this->dim4d, this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 1.4, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 1.1, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 1.1, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 2.0, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 1)], 2.0, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 1)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 1)], 1.7, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 1)], 2.6, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 1)], 2.3, 1e-6); + EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 1)], 2.3, 1e-6); +} + +TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>; + const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); + + // Initialize with random values + for (int i = 0; i < nproma; ++i) { + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + for (int k = 0; k < nlev; ++k) { + for (int d = 0; d < dim4d; ++d) { + this->f4din[f4din_at(i, k, 0, d)] = real_distrib(gen); + this->f4dout[f4dout_at(i, k, 0, d)] = 0.0; + } + } + } + + // Test function + div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), + this->geofac_div.data(), this->f4din.data(), + this->f4dout.data(), this->dim4d, this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev.data(), this->elev.data(), this->nproma, + this->lacc, this->nlev, this->nblks_c, this->nblks_e); + + // Compute reference result and check + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int ji = 0; ji < dim4d; ++ji) { + for (int jk = this->slev[ji]; jk < this->elev[ji]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + TypeParam expected = 0.0; + for (int je = 0; je < 3; ++je) { + expected += + this->f4din[f4din_at( + this->cell_edge_idx[cell_edge_at(jc, jb, je)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, je)], ji)] * + this->geofac_div[geofac_div_at(jc, je, jb)]; + } + + EXPECT_NEAR(this->f4dout[f4dout_at(jc, jk, jb, ji)], expected, 1e-5) + << "Random test fails at jc=" << jc << ", jk=" << jk + << ", jb=" << jb << ", ji=" << ji; + } + } + } + } +} + +TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes); + +TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for additional parameters + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = true; + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Initialize the vectors with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + this->opt_in2[vec_e_at(i, k, 0)] = + (i + 1) * (k + 1) * 0.5; // Half of vec_e + } + + // Set edge indices to point to specific cells + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // Set neighbor indices similarly + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges and neighbors are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Average coefficients + this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self + this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor + this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor + this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor + + // Initialize div_vec_c and opt_out2 to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0; + } + } + + // Call the div_avg function + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6); + + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.94, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 1.88, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 1.02, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 2.04, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 1.04, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 2.08, 1e-6); +} + +TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = true; + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialize with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + this->opt_in2[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Random average coefficients + for (int j = 0; j < 4; ++j) { + this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); + } + + // Random initial values for div_vec_c and opt_out2 + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + this->opt_out2[div_vec_c_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the div_avg function + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values manually + std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> aux_c2(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_opt_out2(dim_combine(nproma, nlev, nblks_c)); + + // Step 1: Calculate aux_c and aux_c2 + for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + aux_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + + aux_c2[div_vec_c_at(jc, jk, jb)] = + this->opt_in2[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->opt_in2[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->opt_in2[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Step 2: Assign aux_c to div_vec_c and aux_c2 to opt_out2 for patch_id > 0 + for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)]; + ref_opt_out2[div_vec_c_at(jc, jk, jb)] = + aux_c2[div_vec_c_at(jc, jk, jb)]; + } + } + } + + // Step 3: Perform averaging for the rest of the blocks + for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)] * + this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + + ref_opt_out2[div_vec_c_at(jc, jk, jb)] = + aux_c2[div_vec_c_at(jc, jk, jb)] * + this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c2[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c2[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c2[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "div_vec_c results differ at i=" << i << ", k=" << k; + + EXPECT_NEAR(this->opt_out2[div_vec_c_at(i, k, 0)], + ref_opt_out2[div_vec_c_at(i, k, 0)], 1e-5) + << "opt_out2 results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST(HorizontalDivTest, TestDivAvgSpecificNoL2fields) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + constexpr int dim4d = this->dim4d; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = false; + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Initialize the vectors with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + this->opt_in2[vec_e_at(i, k, 0)] = + (i + 1) * (k + 1) * 0.5; // Half of vec_e + } + + // Set edge indices to point to specific cells + this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; + this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; + + // Set neighbor indices similarly + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; + + // All edges and neighbors are in the same block for this test + for (int j = 0; j < 3; ++j) { + this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + // Geometric factors + this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; + this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; + this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; + + // Average coefficients + this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self + this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor + this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor + this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor + + // Initialize div_vec_c and opt_out2 to zero + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; + this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0; + } + } + + // Call the div_avg function + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6); + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6); + + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 0.0, 1e-6); + EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 0.0, 1e-6); +} + +TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int nblks_e = this->nblks_e; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &cell_edge_at = at<nproma, nblks_c, 3>; + const auto &geofac_div_at = at<nproma, 3, nblks_c>; + const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; + + // Vectors for block and index ranges + std::vector<int> i_startblk_in(3, 0); + std::vector<int> i_endblk_in(3, nblks_c); + std::vector<int> i_startidx_in(3, 0); + std::vector<int> i_endidx_in(3, nproma); + + // Parameters for the test + int patch_id = 1; + bool l_limited_area = true; + bool l2fields = false; // Set to false for this test + + const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; + const auto &avg_coeff_at = at<nproma, 4, nblks_c>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialize with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + this->opt_in2[vec_e_at(i, k, 0)] = + real_distrib(gen); // Not used but initialize anyway + } + + // Set random edge indices + for (int j = 0; j < 3; ++j) { + this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); + this->cell_edge_blk[cell_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 3; ++j) { + this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); + } + + // Random average coefficients + for (int j = 0; j < 4; ++j) { + this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); + } + + // Random initial values for div_vec_c and opt_out2 + for (int k = 0; k < nlev; ++k) { + this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); + this->opt_out2[div_vec_c_at(i, k, 0)] = + real_distrib(gen); // Not used but initialize anyway + } + } + + // Call the div_avg function with l2fields=false + div_avg<TypeParam>( + this->vec_e.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), + this->cell_edge_blk.data(), this->geofac_div.data(), + this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), + this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), + i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], + this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, + this->nblks_c, this->nblks_e); + + // Calculate reference values manually + std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); + std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); + + // Step 1: Calculate aux_c (but not aux_c2 since l2fields=false) + for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, + i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + aux_c[div_vec_c_at(jc, jk, jb)] = + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * + this->geofac_div[geofac_div_at(jc, 0, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * + this->geofac_div[geofac_div_at(jc, 1, jb)] + + this->vec_e[vec_e_at( + this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, + this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * + this->geofac_div[geofac_div_at(jc, 2, jb)]; + } + } + } + + // Step 2: Assign aux_c to div_vec_c for patch_id > 0 (opt_out2 not updated + // since l2fields=false) + for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, + i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)]; + } + } + } + + // Step 3: Perform averaging for the rest of the blocks (only for div_vec_c, + // not opt_out2) + for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, + i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = + aux_c[div_vec_c_at(jc, jk, jb)] * + this->avg_coeff[avg_coeff_at(jc, 0, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * + this->avg_coeff[avg_coeff_at(jc, 1, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * + this->avg_coeff[avg_coeff_at(jc, 2, jb)] + + aux_c[div_vec_c_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * + this->avg_coeff[avg_coeff_at(jc, 3, jb)]; + } + } + } + + // Verify results - only check div_vec_c since l2fields=false means opt_out2 + // isn't updated + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], + ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) + << "div_vec_c results differ at i=" << i << ", k=" << k; + } + } +} diff --git a/test/c/test_horizontal_divrot.cpp b/test/c/test_horizontal_divrot.cpp deleted file mode 100644 index f3cb3c8..0000000 --- a/test/c/test_horizontal_divrot.cpp +++ /dev/null @@ -1,2680 +0,0 @@ -// ICON -// -// --------------------------------------------------------------- -// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss -// Contact information: icon-model.org -// -// See AUTHORS.TXT for a list of authors -// See LICENSES/ for license information -// SPDX-License-Identifier: BSD-3-Clause -// --------------------------------------------------------------- - -#include <iostream> -#include <random> -#include <vector> - -#include <Kokkos_Core.hpp> -#include <gtest/gtest.h> -#include <horizontal/mo_lib_divrot.hpp> -#include <support/mo_lib_loopindices.hpp> - -// Template function for computing array size. -// For example, we get the array size of a 4-dimensional array A(2, 3, 4, 5) by -// dim_combine(2, 3, 4, 5). -// Which will automatically instantiate -// dim_combine<int, int, int, int>(2, 3, 4, 5). -// The function then call dim_combine recursively -// dim_combine<int, int, int, int>(2, 3, 4, 5) { -// return static_cast<size_t>(2) * dim_combine<int, int, int>(3, 4, 5); -// } -// dim_combine<int, int, int>(3, 4, 5) { -// return static_cast<size_t>(3) * dim_combine<int, int>(4, 5); -// } -// dim_combine<int, int>(4, 5) { -// return static_cast<size_t>(4) * dim_combine<int>(5); -// } -// Where the last dim_combine is specialized as -// dim_combine<int>(5) { -// return static_cast<size_t>(5); -// } -// Which gives -// dim_combine<int, int, int, int>(2, 3, 4, 5) = -// static_cast<size_t>(2) * static_cast<size_t>(3) * -// static_cast<size_t>(4) * static_cast<size_t>(5) -/// Template helpers for combining multiple dimension array sizes. -/// The base function of dimension combine. Should not be used. -template <typename... Ts> size_t dim_combine(Ts... dims) { return 0; } -/// Template specialization of only one dimension, returns the dimension itself. -template <typename T> size_t dim_combine(T dim) { - return static_cast<size_t>(dim); -} -/// Template specialization of picking out the first dimension. The combined -/// dimension is the first dimension times the combined dimension of the rest. -template <typename T, typename... Ts> size_t dim_combine(T dim, Ts... dims) { - return static_cast<size_t>(dim) * dim_combine(dims...); -} - -// Template function for LayoutLeft ID access in compile time. -// For example, a multi-dimensional array A of dimensions <2, 3, 4, 5> gets its -// corresponding vector id (LayoutLeft) by -// at<2, 3, 4, 5>(id1, id2, id3, id4). -// The at_impl then adds the id from beginning to the end and pass the id prefix -// to the next recursive at_impl function. In this example, -// at<2, 3, 4, 5>(id1, id2, id3, id4) { -// return id1 + at_impl<3, 4, 5>(2, id2, id3, id4); -// } -// at_impl<3, 4, 5>(2, id2, id3, id4) { -// return id2 * 2 + at_impl<4, 5>(2 * 3, id3, id4); -// } -// at_impl<4, 5>(2 * 3, id3, id4) { -// return id3 * 2 * 3 + at_impl<5>(2 * 3 * 4, id4); -// } -// at_impl<5>(2 * 3 * 4, id4) { -// return id4 * 2 * 3 * 4; -// } -// Which gives -// at<2, 3, 4, 5>(id1, id2, id3, id4) = id1 + id2 * 2 + -// id3 * 2 * 3 + id4 * 2 * 3 * 4 -/// Helper type converting integer numbers to int -template <class T, auto> using always_t = T; -/// Base function of at_impl. Should not be used. -template <int... Dims> int at_impl(always_t<int, Dims>... ids) { return 0; } -/// Template specialization of the last ID -template <int LastDim> int at_impl(int prefix, int id) { return id * prefix; } -/// Template specialization of at_impl, accumulate the return value using the -/// first id and pass the prefix to the next recursive at_impl function. -template <int FirstDim, int... Dims> -int at_impl(int prefix, int id, always_t<int, Dims>... ids) { - return id * prefix + at_impl<Dims...>(prefix * FirstDim, ids...); -} -/// at<dim1, dim2, ...>(id1, id2, ...) gets its memory index in vector assuming -/// LayoutLeft. Use this function instead of at_impl. -template <int FirstDim, int... Dims> -int at(int id, always_t<int, Dims>... ids) { - return id + at_impl<Dims...>(FirstDim, ids...); -} - -/// Enum class for the reconstruction method -enum class ReconstructionMethod { - linear, - quadratic, - cubic, -}; - -/// Base test class for the horizontal divrot tests. Templated for the ValueType -/// and ReconMethod for the reconstruction method. -template <typename ValueType, int ReconMethod> -class HorizontalReconTest : public ::testing::Test { -protected: - // lsq_dim_c and lsq_dim_unk are instantiated in compile time. - static constexpr std::tuple<int, int> - init_lsq_dim(ReconstructionMethod method) { - switch (method) { - case ReconstructionMethod::linear: - return std::make_tuple(3, 2); - case ReconstructionMethod::quadratic: - return std::make_tuple(9, 5); - case ReconstructionMethod::cubic: - return std::make_tuple(9, 9); - } - } - - // Constant dimensions. - static constexpr int nproma = 3; // inner loop length - static constexpr int nlev = 1; // number of vertical levels - static constexpr int nblks_c = 1; // number of cell blocks (for p_e_in) - static constexpr std::tuple<int, int> lsq_dim = - init_lsq_dim(static_cast<ReconstructionMethod>(ReconMethod)); - static constexpr int lsq_dim_c = std::get<0>(lsq_dim); - static constexpr int lsq_dim_unk = std::get<1>(lsq_dim); - - // Parameter values. - int i_startblk = 0; - int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] - int i_startidx_in = 0; - int i_endidx_in = nproma; // Full range: 0 .. nproma-1 - int slev = 0; - int elev = nlev; // Full vertical range (0 .. nlev-1) - int patch_id = 0; - bool lacc = false; // Not using ACC-specific behavior. - bool acc_async = false; // No asynchronous execution. - bool l_consv = true; // With conservative correction. - bool l_limited_area = true; // Limited area setup - - std::vector<ValueType> p_cc; - std::vector<int> cell_neighbor_idx; - std::vector<int> cell_neighbor_blk; - std::vector<ValueType> lsq_qtmat_c; - std::vector<ValueType> lsq_rmat_rdiag_c; - std::vector<ValueType> lsq_rmat_utri_c; - std::vector<ValueType> lsq_moments; - std::vector<ValueType> lsq_pseudoinv; - std::vector<ValueType> p_coeff; - - HorizontalReconTest() { - p_cc.resize(dim_combine(nproma, nlev, nblks_c)); - cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); - cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); - lsq_qtmat_c.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); - lsq_rmat_rdiag_c.resize(dim_combine(nproma, lsq_dim_unk, nblks_c)); - lsq_rmat_utri_c.resize(dim_combine( - nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c)); - lsq_moments.resize(dim_combine(nproma, nblks_c, lsq_dim_unk)); - lsq_pseudoinv.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); - p_coeff.resize(dim_combine(lsq_dim_unk + 1, nproma, nlev, nblks_c)); - } -}; - -/// Test class for the horizontal tests. The reconstruction method is specified -/// to linear. -template <typename ValueType> -class HorizontalReconLinearTest - : public HorizontalReconTest<ValueType, static_cast<int>( - ReconstructionMethod::linear)> { -}; - -/// Test class for the horizontal tests. The reconstruction method is specified -/// to quadratic. -template <typename ValueType> -class HorizontalReconQuadraticTest - : public HorizontalReconTest< - ValueType, static_cast<int>(ReconstructionMethod::quadratic)> {}; - -/// Test class for the horizontal tests. The reconstruction method is specified -/// to cubic. -template <typename ValueType> -class HorizontalReconCubicTest - : public HorizontalReconTest<ValueType, static_cast<int>( - ReconstructionMethod::cubic)> { -}; - -/// ValueTypes which the divrot tests should run with -typedef ::testing::Types<float, double> ValueTypes; - -TYPED_TEST_SUITE(HorizontalReconLinearTest, ValueTypes); - -TYPED_TEST(HorizontalReconLinearTest, TestLsqCell) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; - const auto &rmat_utri_at = - at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; - this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = 2.0; - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = 2.0; - this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = 0.1; - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - } - - // Test function - recon_lsq_cell_l<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), - this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, - this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - 0.34, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.8, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 1.0, 1e-6); -} - -TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; - const auto &rmat_utri_at = - at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); - - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = real_distrib(gen); - this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = real_distrib(gen); - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); - } - - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = real_distrib(gen); - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = real_distrib(gen); - this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = real_distrib(gen); - - this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen); - this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen); - } - - // Test function - recon_lsq_cell_l<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), - this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, - this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); - - // Compute reference result - std::vector<TypeParam> z_d(lsq_dim_c); - std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); - std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - } - z_qt_times_d[0] = 0.0; - z_qt_times_d[1] = 0.0; - for (int i = 0; i < lsq_dim_c; ++i) { - z_qt_times_d[0] += this->lsq_qtmat_c[qtmat_at(jc, 0, i, jb)] * z_d[i]; - z_qt_times_d[1] += this->lsq_qtmat_c[qtmat_at(jc, 1, i, jb)] * z_d[i]; - } - p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] = - this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 1, jb)] * z_qt_times_d[1]; - p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] = - this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 0, jb)] * - (z_qt_times_d[0] - - this->lsq_rmat_utri_c[rmat_utri_at(jc, 0, jb)] * - p_result[at<lsq_dim_unk + 1, nproma>(2, jc)]); - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)] - - p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * - this->lsq_moments[moments_at(jc, jb, 0)] - - p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * - this->lsq_moments[moments_at(jc, jb, 1)]; - } - } - } - - // Check result - for (int i = 0; i < lsq_dim_unk + 1; ++i) { - for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) - << "For loop result fails for i = " << i << ", jc = " << jc; - } - } -} - -TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVD) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - } - - // Test function - recon_lsq_cell_l_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, - this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - 0.65, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.0, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 0.5, 1e-6); -} - -TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); - - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = real_distrib(gen); - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = real_distrib(gen); - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); - } - - this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen); - this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen); - } - - // Test function - recon_lsq_cell_l_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, - this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); - - // Compute reference result - std::vector<TypeParam> z_d(lsq_dim_c); - std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - } - p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] = - this->lsq_pseudoinv[pseudoinv_at(jc, 1, 0, jb)] * z_d[0] + - this->lsq_pseudoinv[pseudoinv_at(jc, 1, 1, jb)] * z_d[1] + - this->lsq_pseudoinv[pseudoinv_at(jc, 1, 2, jb)] * z_d[2]; - p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] = - this->lsq_pseudoinv[pseudoinv_at(jc, 0, 0, jb)] * z_d[0] + - this->lsq_pseudoinv[pseudoinv_at(jc, 0, 1, jb)] * z_d[1] + - this->lsq_pseudoinv[pseudoinv_at(jc, 0, 2, jb)] * z_d[2]; - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)] - - p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * - this->lsq_moments[moments_at(jc, jb, 0)] - - p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * - this->lsq_moments[moments_at(jc, jb, 1)]; - } - } - } - - // Check result - for (int i = 0; i < lsq_dim_unk + 1; ++i) { - for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) - << "For loop result fails for i = " << i << ", jc = " << jc; - } - } -} - -TYPED_TEST_SUITE(HorizontalReconQuadraticTest, ValueTypes); - -TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCell) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; - const auto &rmat_utri_at = - at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; - for (int j = 1; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; - this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; - this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.2; - this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7; - this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 1.3; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - for (int j = 0; j < lsq_dim_unk; ++j) { - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0; - } - - for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { - this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0; - } - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - this->lsq_moments[moments_at(i, 0, 2)] = 0.4; - this->lsq_moments[moments_at(i, 0, 3)] = 0.5; - this->lsq_moments[moments_at(i, 0, 4)] = 0.6; - } - - // Test function - recon_lsq_cell_q<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), - this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - 0.24, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 3.2, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - -2.2, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], - 2.8, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], - -3.8, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], - 2.6, 1e-6); -} - -TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; - const auto &rmat_utri_at = - at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); - - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_unk; ++j) { - for (int k = 0; k < lsq_dim_c; ++k) { - this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen); - } - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); - this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); - } - for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { - this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen); - } - - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); - } - } - - // Test function - recon_lsq_cell_q<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), - this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Compute reference result - std::vector<TypeParam> z_d(lsq_dim_c); - std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); - std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - } - for (int j = 0; j < lsq_dim_unk; ++j) { - z_qt_times_d[j] = 0.0; - for (int i = 0; i < lsq_dim_c; ++i) { - z_qt_times_d[j] += - this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i]; - } - } - int utri_id = 0; - for (int j = lsq_dim_unk; j > 0; --j) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1]; - for (int k = j + 1; k <= lsq_dim_unk; ++k) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -= - this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] * - p_result[at<lsq_dim_unk + 1, nproma>(k, jc)]; - } - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *= - this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)]; - } - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)]; - for (int j = 0; j < lsq_dim_unk; ++j) { - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= - p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * - this->lsq_moments[moments_at(jc, jb, j)]; - } - } - } - } - - // Check result - for (int i = 0; i < lsq_dim_unk + 1; ++i) { - for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) - << "For loop result fails for i = " << i << ", jc = " << jc; - } - } -} - -TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVD) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; - for (int j = 1; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; - this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.2; - this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; - this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 1.3; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - this->lsq_moments[moments_at(i, 0, 2)] = 0.4; - this->lsq_moments[moments_at(i, 0, 3)] = 0.5; - this->lsq_moments[moments_at(i, 0, 4)] = 0.6; - } - - // Test function - recon_lsq_cell_q_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - -0.56, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.0, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 0.5, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], - 0.2, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], - 0.7, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], - 1.3, 1e-6); -} - -TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; - const auto &rmat_utri_at = - at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); - - // Initialization is done only for iblk = 0 and ilev = 0 - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); - - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_unk; ++j) { - for (int k = 0; k < lsq_dim_c; ++k) { - this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); - } - this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); - } - - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); - } - } - - // Test function - recon_lsq_cell_q_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Compute reference result - std::vector<TypeParam> z_d(lsq_dim_c); - std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); - std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - } - for (int j = 1; j < lsq_dim_unk + 1; ++j) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; - for (int i = 0; i < lsq_dim_c; ++i) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += - this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; - } - } - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)]; - for (int j = 0; j < lsq_dim_unk; ++j) { - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= - p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * - this->lsq_moments[moments_at(jc, jb, j)]; - } - } - } - } - - // Check result - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(j, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(j, jc))], 1e-5) - << "For loop result fails for j = " << j << ", jc = " << jc; - } - } -} - -TYPED_TEST_SUITE(HorizontalReconCubicTest, ValueTypes); - -TYPED_TEST(HorizontalReconCubicTest, TestLsqCell) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; - const auto &rmat_utri_at = - at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; - for (int j = 1; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; - this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.9; - this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.8; - this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7; - this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 0.6; - this->lsq_qtmat_c[qtmat_at(i, 5, j, 0)] = 0.5; - this->lsq_qtmat_c[qtmat_at(i, 6, j, 0)] = 0.4; - this->lsq_qtmat_c[qtmat_at(i, 7, j, 0)] = 0.3; - this->lsq_qtmat_c[qtmat_at(i, 8, j, 0)] = 0.2; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - for (int j = 0; j < lsq_dim_unk; ++j) { - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0; - } - - for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { - this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0; - } - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - this->lsq_moments[moments_at(i, 0, 2)] = 0.4; - this->lsq_moments[moments_at(i, 0, 3)] = 0.5; - this->lsq_moments[moments_at(i, 0, 4)] = 0.6; - this->lsq_moments[moments_at(i, 0, 5)] = 0.7; - this->lsq_moments[moments_at(i, 0, 6)] = 0.8; - this->lsq_moments[moments_at(i, 0, 7)] = 0.9; - this->lsq_moments[moments_at(i, 0, 8)] = 1.0; - } - - // Test function - recon_lsq_cell_c<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), - this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - 0.28, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 0.4, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - -0.2, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], - 0.4, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], - -0.2, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], - 0.4, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], - -0.2, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], - 0.4, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], - -0.2, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], - 0.4, 1e-6); -} - -TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; - const auto &rmat_utri_at = - at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); - - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_unk; ++j) { - for (int k = 0; k < lsq_dim_c; ++k) { - this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen); - } - this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); - this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); - } - for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { - this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen); - } - - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); - } - } - - // Test function - recon_lsq_cell_c<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), - this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Compute reference result - std::vector<TypeParam> z_d(lsq_dim_c); - std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); - std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - } - for (int j = 0; j < lsq_dim_unk; ++j) { - z_qt_times_d[j] = 0.0; - for (int i = 0; i < lsq_dim_c; ++i) { - z_qt_times_d[j] += - this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i]; - } - } - int utri_id = 0; - for (int j = lsq_dim_unk; j > 0; --j) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1]; - for (int k = j + 1; k <= lsq_dim_unk; ++k) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -= - this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] * - p_result[at<lsq_dim_unk + 1, nproma>(k, jc)]; - } - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *= - this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)]; - } - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)]; - for (int j = 0; j < lsq_dim_unk; ++j) { - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= - p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * - this->lsq_moments[moments_at(jc, jb, j)]; - } - } - } - } - - // Check result - for (int i = 0; i < lsq_dim_unk + 1; ++i) { - for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) - << "For loop result fails for i = " << i << ", jc = " << jc; - } - } -} - -TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVD) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; - for (int j = 1; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_c; ++j) { - this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; - this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.9; - this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.8; - this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; - this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 0.6; - this->lsq_pseudoinv[pseudoinv_at(i, 5, j, 0)] = 0.5; - this->lsq_pseudoinv[pseudoinv_at(i, 6, j, 0)] = 0.4; - this->lsq_pseudoinv[pseudoinv_at(i, 7, j, 0)] = 0.3; - this->lsq_pseudoinv[pseudoinv_at(i, 8, j, 0)] = 0.2; - } - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; - } - - this->lsq_moments[moments_at(i, 0, 0)] = 0.2; - this->lsq_moments[moments_at(i, 0, 1)] = 0.3; - this->lsq_moments[moments_at(i, 0, 2)] = 0.4; - this->lsq_moments[moments_at(i, 0, 3)] = 0.5; - this->lsq_moments[moments_at(i, 0, 4)] = 0.6; - this->lsq_moments[moments_at(i, 0, 5)] = 0.7; - this->lsq_moments[moments_at(i, 0, 6)] = 0.8; - this->lsq_moments[moments_at(i, 0, 7)] = 0.9; - this->lsq_moments[moments_at(i, 0, 8)] = 1.0; - } - - // Test function - recon_lsq_cell_c_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Check result - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], - -1.64, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], - 1.0, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], - 0.9, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], - 0.8, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], - 0.7, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], - 0.6, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], - 0.5, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], - 0.4, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], - 0.3, 1e-6); - EXPECT_NEAR( - this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], - 0.2, 1e-6); -} - -TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int lsq_dim_c = this->lsq_dim_c; - constexpr int lsq_dim_unk = this->lsq_dim_unk; - - const auto &p_cc_at = at<nproma, nlev, nblks_c>; - const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; - const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; - const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; - const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); - - // Initialization - for (int i = 0; i < nproma; ++i) { - this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); - - for (int j = 0; j < lsq_dim_c; ++j) { - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - for (int j = 0; j < lsq_dim_unk; ++j) { - for (int k = 0; k < lsq_dim_c; ++k) { - this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); - } - this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); - } - - for (int j = 0; j < lsq_dim_unk + 1; ++j) { - this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); - } - } - - // Test function - recon_lsq_cell_c_svd<TypeParam>( - this->p_cc.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), - this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, - this->elev, this->nproma, this->patch_id, this->l_limited_area, - this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, - this->lsq_dim_c); - - // Compute reference result - std::vector<TypeParam> z_d(lsq_dim_c); - std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); - std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); - - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - for (int jk = this->slev; jk < this->elev; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - for (int i = 0; i < lsq_dim_c; ++i) { - z_d[i] = this->p_cc[p_cc_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - - this->p_cc[p_cc_at(jc, jk, jb)]; - } - for (int j = 1; j < lsq_dim_unk + 1; ++j) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; - for (int i = 0; i < lsq_dim_c; ++i) { - p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += - this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; - } - } - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = - this->p_cc[p_cc_at(jc, jk, jb)]; - for (int j = 0; j < lsq_dim_unk; ++j) { - p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= - p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * - this->lsq_moments[moments_at(jc, jb, j)]; - } - } - } - } - // Check result - for (int i = 0; i < lsq_dim_unk + 1; ++i) { - for (int jc = 0; jc < nproma; ++jc) { - EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], - p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) - << "For loop result fails for i = " << i << ", jc = " << jc; - } - } -} - -template <typename ValueType> class HorizontalDivTest : public ::testing::Test { -protected: - static constexpr int nproma = 3; // inner loop length - static constexpr int nlev = 2; // number of vertical levels - static constexpr int nblks_c = 1; // number of cell blocks - static constexpr int nblks_e = 1; // number of edge blocks - static constexpr int dim4d = 2; // 4th dimension size - - int i_startblk = 0; - int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] - int i_startidx_in = 0; - int i_endidx_in = nproma; // Full range: 0 .. nproma-1 - std::vector<int> slev; - std::vector<int> elev; - bool lacc = false; // Not using ACC-specific behavior. - - std::vector<ValueType> vec_e; - std::vector<int> cell_edge_idx; - std::vector<int> cell_edge_blk; - std::vector<ValueType> geofac_div; - std::vector<ValueType> div_vec_c; - std::vector<ValueType> f4din; - std::vector<ValueType> f4dout; - - // Followings are needed in HorizontalDivAvgTest - std::vector<int> cell_neighbor_idx; - std::vector<int> cell_neighbor_blk; - std::vector<ValueType> avg_coeff; - std::vector<ValueType> opt_in2; - std::vector<ValueType> opt_out2; - - HorizontalDivTest() { - slev.resize(dim4d, 0); - elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) - - vec_e.resize(dim_combine(nproma, nlev, nblks_e)); - cell_edge_idx.resize(dim_combine(nproma, nblks_c, 3)); - cell_edge_blk.resize(dim_combine(nproma, nblks_c, 3)); - geofac_div.resize(dim_combine(nproma, 3, nblks_c)); - div_vec_c.resize(dim_combine(nproma, nlev, nblks_c)); - f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); - f4dout.resize(dim_combine(nproma, nlev, nblks_c, dim4d)); - cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, 3)); - cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, 3)); - avg_coeff.resize(dim_combine(nproma, 4, nblks_c)); - opt_in2.resize(dim_combine(nproma, nlev, nblks_e)); - opt_out2.resize(dim_combine(nproma, nlev, nblks_c)); - } -}; - -TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes); - -TYPED_TEST(HorizontalDivTest, TestDiv3DSpecific) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - - // Initialization with specific values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - } - - // Set edge indices to point to specific cells (including self) - this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; - this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; - - // All edges are in the same block for this test - for (int j = 0; j < 3; ++j) { - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; - } - - // Geometric factors - this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; - this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; - this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; - - // Initialize div_vec_c to zero - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; - } - } - - // Call the div3d function - div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); -} - -TYPED_TEST(HorizontalDivTest, TestDiv3DRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - - // Set up random number generators - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); - - // Initialization with random values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); - } - - // Set random edge indices - for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = - 0; // Keep in same block for simplicity - } - - // Random geometric factors - for (int j = 0; j < 3; ++j) { - this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); - } - - // Initialize div_vec_c to random values - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); - } - } - - // Call the div3d function - div3d<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - // Calculate reference values separately and verify results - std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); - - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; - } - } - } - - // Verify results - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], - ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) - << "Results differ at i=" << i << ", k=" << k; - } - } -} - -TYPED_TEST(HorizontalDivTest, TestDiv3D2FSpecific) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - constexpr int dim4d = this->dim4d; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>; - const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; - - // Initialization with specific values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - this->f4din[f4d_at(i, k, 0, 0)] = - (i + 1) * (k + 2); // Different pattern for second field - } - - // Set edge indices to point to specific cells (including self) - this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; - this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; - - // All edges are in the same block for this test - for (int j = 0; j < 3; ++j) { - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; - } - - // Geometric factors - this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; - this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; - this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; - - // Initialize div_vec_c and f4dout to zero - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; - this->f4dout[f4dout_at(i, k, 0, 0)] = 0.0; - } - } - - // Call the div3d_2field function - div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->f4din.data(), - this->f4dout.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - // Check first field (same as in div3d test) - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.4, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.1, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.2, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.2, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.4, 1e-6); - - // Check second field (expected values calculated manually) - EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 3.4, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 5.1, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 4.2, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 6.3, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 4.4, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 6.6, 1e-6); -} - -TYPED_TEST(HorizontalDivTest, TestDiv3D2FRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - constexpr int dim4d = this->dim4d; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - const auto &f4d_at = at<nproma, nlev, nblks_e, dim4d>; - const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; - - // Set up random number generators - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); - - // Initialization with random values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); - this->f4din[f4d_at(i, k, 0, 0)] = real_distrib(gen); - } - - // Set random edge indices - for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = - 0; // Keep in same block for simplicity - } - - // Random geometric factors - for (int j = 0; j < 3; ++j) { - this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); - } - - // Initialize div_vec_c and f4dout to random values - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); - this->f4dout[f4dout_at(i, k, 0, 0)] = real_distrib(gen); - } - } - - // Call the div3d_2field function - div3d_2field<TypeParam>(this->vec_e.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->div_vec_c.data(), this->f4din.data(), - this->f4dout.data(), this->i_startblk, this->i_endblk, - this->i_startidx_in, this->i_endidx_in, this->slev[0], - this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - // Calculate reference values separately and verify results - std::vector<TypeParam> ref_div_vec_c(nproma * nlev * nblks_c, 0.0); - std::vector<TypeParam> ref_f4dout(nproma * nlev * nblks_c * dim4d, 0.0); - - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - // Calculate reference value for first field - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; - - // Calculate reference value for second field - ref_f4dout[f4dout_at(jc, jk, jb, 0)] = - this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)], - 0)] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)], - 0)] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->f4din[f4d_at(this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)], - 0)] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; - } - } - } - - // Verify results for first field - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], - ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) - << "First field results differ at i=" << i << ", k=" << k; - } - } - - // Verify results for second field - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->f4dout[f4dout_at(i, k, 0, 0)], - ref_f4dout[f4dout_at(i, k, 0, 0)], 1e-5) - << "Second field results differ at i=" << i << ", k=" << k; - } - } -} - -TYPED_TEST(HorizontalDivTest, TestDiv4DSpecific) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - constexpr int dim4d = this->dim4d; - - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>; - const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; - - // Initialization - for (int i = 0; i < nproma; ++i) { - for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = (i + j) % nproma; - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; - this->geofac_div[geofac_div_at(i, j, 0)] = 0.1 * (j + 1); - } - - for (int k = 0; k < nlev; ++k) { - for (int d = 0; d < dim4d; ++d) { - this->f4din[f4din_at(i, k, 0, d)] = 1.0 + i + k + d; - this->f4dout[f4dout_at(i, k, 0, d)] = 0.0; - } - } - } - - // Test function - div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), - this->geofac_div.data(), this->f4din.data(), - this->f4dout.data(), this->dim4d, this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, - this->slev.data(), this->elev.data(), this->nproma, - this->lacc, this->nlev, this->nblks_c, this->nblks_e); - - EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 0)], 1.4, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 0)], 1.1, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 0)], 1.1, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 0)], 2.0, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(0, 0, 0, 1)], 2.0, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 0, 0, 1)], 1.7, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 0, 0, 1)], 1.7, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(0, 1, 0, 1)], 2.6, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(1, 1, 0, 1)], 2.3, 1e-6); - EXPECT_NEAR(this->f4dout[f4dout_at(2, 1, 0, 1)], 2.3, 1e-6); -} - -TYPED_TEST(HorizontalDivTest, TestDiv4DRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - constexpr int dim4d = this->dim4d; - - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &f4din_at = at<nproma, nlev, nblks_e, dim4d>; - const auto &f4dout_at = at<nproma, nlev, nblks_c, dim4d>; - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); - - // Initialize with random values - for (int i = 0; i < nproma; ++i) { - for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; - this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); - } - - for (int k = 0; k < nlev; ++k) { - for (int d = 0; d < dim4d; ++d) { - this->f4din[f4din_at(i, k, 0, d)] = real_distrib(gen); - this->f4dout[f4dout_at(i, k, 0, d)] = 0.0; - } - } - } - - // Test function - div4d<TypeParam>(this->cell_edge_idx.data(), this->cell_edge_blk.data(), - this->geofac_div.data(), this->f4din.data(), - this->f4dout.data(), this->dim4d, this->i_startblk, - this->i_endblk, this->i_startidx_in, this->i_endidx_in, - this->slev.data(), this->elev.data(), this->nproma, - this->lacc, this->nlev, this->nblks_c, this->nblks_e); - - // Compute reference result and check - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - - for (int ji = 0; ji < dim4d; ++ji) { - for (int jk = this->slev[ji]; jk < this->elev[ji]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - TypeParam expected = 0.0; - for (int je = 0; je < 3; ++je) { - expected += - this->f4din[f4din_at( - this->cell_edge_idx[cell_edge_at(jc, jb, je)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, je)], ji)] * - this->geofac_div[geofac_div_at(jc, je, jb)]; - } - - EXPECT_NEAR(this->f4dout[f4dout_at(jc, jk, jb, ji)], expected, 1e-5) - << "Random test fails at jc=" << jc << ", jk=" << jk - << ", jb=" << jb << ", ji=" << ji; - } - } - } - } -} - -TYPED_TEST_SUITE(HorizontalDivTest, ValueTypes); - -TYPED_TEST(HorizontalDivTest, TestDivAvgSpecific) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - constexpr int dim4d = this->dim4d; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - - // Vectors for additional parameters - // Vectors for block and index ranges - std::vector<int> i_startblk_in(3, 0); - std::vector<int> i_endblk_in(3, nblks_c); - std::vector<int> i_startidx_in(3, 0); - std::vector<int> i_endidx_in(3, nproma); - - // Parameters for the test - int patch_id = 1; - bool l_limited_area = true; - bool l2fields = true; - - const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; - const auto &avg_coeff_at = at<nproma, 4, nblks_c>; - - // Initialize the vectors with specific values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - this->opt_in2[vec_e_at(i, k, 0)] = - (i + 1) * (k + 1) * 0.5; // Half of vec_e - } - - // Set edge indices to point to specific cells - this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; - this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; - - // Set neighbor indices similarly - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; - - // All edges and neighbors are in the same block for this test - for (int j = 0; j < 3; ++j) { - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - // Geometric factors - this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; - this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; - this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; - - // Average coefficients - this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self - this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor - this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor - this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor - - // Initialize div_vec_c and opt_out2 to zero - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; - this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0; - } - } - - // Call the div_avg function - div_avg<TypeParam>( - this->vec_e.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), - this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), - i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], - this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6); - - EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.94, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 1.88, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 1.02, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 2.04, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 1.04, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 2.08, 1e-6); -} - -TYPED_TEST(HorizontalDivTest, TestDivAvgRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - - // Vectors for block and index ranges - std::vector<int> i_startblk_in(3, 0); - std::vector<int> i_endblk_in(3, nblks_c); - std::vector<int> i_startidx_in(3, 0); - std::vector<int> i_endidx_in(3, nproma); - - // Parameters for the test - int patch_id = 1; - bool l_limited_area = true; - bool l2fields = true; - - const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; - const auto &avg_coeff_at = at<nproma, 4, nblks_c>; - - // Set up random number generators - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); - - // Initialize with random values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); - this->opt_in2[vec_e_at(i, k, 0)] = real_distrib(gen); - } - - // Set random edge indices - for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = - 0; // Keep in same block for simplicity - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = - 0; // Keep in same block for simplicity - } - - // Random geometric factors - for (int j = 0; j < 3; ++j) { - this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); - } - - // Random average coefficients - for (int j = 0; j < 4; ++j) { - this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); - } - - // Random initial values for div_vec_c and opt_out2 - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); - this->opt_out2[div_vec_c_at(i, k, 0)] = real_distrib(gen); - } - } - - // Call the div_avg function - div_avg<TypeParam>( - this->vec_e.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), - this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), - i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], - this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - // Calculate reference values manually - std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); - std::vector<TypeParam> aux_c2(dim_combine(nproma, nlev, nblks_c)); - std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); - std::vector<TypeParam> ref_opt_out2(dim_combine(nproma, nlev, nblks_c)); - - // Step 1: Calculate aux_c and aux_c2 - for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, - i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - aux_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; - - aux_c2[div_vec_c_at(jc, jk, jb)] = - this->opt_in2[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->opt_in2[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->opt_in2[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; - } - } - } - - // Step 2: Assign aux_c to div_vec_c and aux_c2 to opt_out2 for patch_id > 0 - for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, - i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - aux_c[div_vec_c_at(jc, jk, jb)]; - ref_opt_out2[div_vec_c_at(jc, jk, jb)] = - aux_c2[div_vec_c_at(jc, jk, jb)]; - } - } - } - - // Step 3: Perform averaging for the rest of the blocks - for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, - i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - aux_c[div_vec_c_at(jc, jk, jb)] * - this->avg_coeff[avg_coeff_at(jc, 0, jb)] + - aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * - this->avg_coeff[avg_coeff_at(jc, 1, jb)] + - aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * - this->avg_coeff[avg_coeff_at(jc, 2, jb)] + - aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * - this->avg_coeff[avg_coeff_at(jc, 3, jb)]; - - ref_opt_out2[div_vec_c_at(jc, jk, jb)] = - aux_c2[div_vec_c_at(jc, jk, jb)] * - this->avg_coeff[avg_coeff_at(jc, 0, jb)] + - aux_c2[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * - this->avg_coeff[avg_coeff_at(jc, 1, jb)] + - aux_c2[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * - this->avg_coeff[avg_coeff_at(jc, 2, jb)] + - aux_c2[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * - this->avg_coeff[avg_coeff_at(jc, 3, jb)]; - } - } - } - - // Verify results - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], - ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) - << "div_vec_c results differ at i=" << i << ", k=" << k; - - EXPECT_NEAR(this->opt_out2[div_vec_c_at(i, k, 0)], - ref_opt_out2[div_vec_c_at(i, k, 0)], 1e-5) - << "opt_out2 results differ at i=" << i << ", k=" << k; - } - } -} - -TYPED_TEST(HorizontalDivTest, TestDivAvgSpecificNoL2fields) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - constexpr int dim4d = this->dim4d; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - - // Vectors for block and index ranges - std::vector<int> i_startblk_in(3, 0); - std::vector<int> i_endblk_in(3, nblks_c); - std::vector<int> i_startidx_in(3, 0); - std::vector<int> i_endidx_in(3, nproma); - - // Parameters for the test - int patch_id = 1; - bool l_limited_area = true; - bool l2fields = false; - - const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; - const auto &avg_coeff_at = at<nproma, 4, nblks_c>; - - // Initialize the vectors with specific values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - this->opt_in2[vec_e_at(i, k, 0)] = - (i + 1) * (k + 1) * 0.5; // Half of vec_e - } - - // Set edge indices to point to specific cells - this->cell_edge_idx[cell_edge_at(i, 0, 0)] = i; - this->cell_edge_idx[cell_edge_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_edge_idx[cell_edge_at(i, 0, 2)] = (i + 2) % nproma; - - // Set neighbor indices similarly - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = i; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = (i + 1) % nproma; - this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = (i + 2) % nproma; - - // All edges and neighbors are in the same block for this test - for (int j = 0; j < 3; ++j) { - this->cell_edge_blk[cell_edge_at(i, 0, j)] = 0; - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; - } - - // Geometric factors - this->geofac_div[geofac_div_at(i, 0, 0)] = 0.5; - this->geofac_div[geofac_div_at(i, 1, 0)] = 0.3; - this->geofac_div[geofac_div_at(i, 2, 0)] = 0.2; - - // Average coefficients - this->avg_coeff[avg_coeff_at(i, 0, 0)] = 0.4; // Self - this->avg_coeff[avg_coeff_at(i, 1, 0)] = 0.2; // First neighbor - this->avg_coeff[avg_coeff_at(i, 2, 0)] = 0.2; // Second neighbor - this->avg_coeff[avg_coeff_at(i, 3, 0)] = 0.2; // Third neighbor - - // Initialize div_vec_c and opt_out2 to zero - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = 0.0; - this->opt_out2[div_vec_c_at(i, k, 0)] = 0.0; - } - } - - // Call the div_avg function - div_avg<TypeParam>( - this->vec_e.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), - this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), - i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], - this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 0, 0)], 1.88, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(0, 1, 0)], 3.76, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 0, 0)], 2.04, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(1, 1, 0)], 4.08, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 0, 0)], 2.08, 1e-6); - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(2, 1, 0)], 4.16, 1e-6); - - EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 0, 0)], 0.0, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(0, 1, 0)], 0.0, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 0, 0)], 0.0, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(1, 1, 0)], 0.0, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 0, 0)], 0.0, 1e-6); - EXPECT_NEAR(this->opt_out2[div_vec_c_at(2, 1, 0)], 0.0, 1e-6); -} - -TYPED_TEST(HorizontalDivTest, TestDivAvgRandomNoL2fields) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_c = this->nblks_c; - constexpr int nblks_e = this->nblks_e; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &cell_edge_at = at<nproma, nblks_c, 3>; - const auto &geofac_div_at = at<nproma, 3, nblks_c>; - const auto &div_vec_c_at = at<nproma, nlev, nblks_c>; - - // Vectors for block and index ranges - std::vector<int> i_startblk_in(3, 0); - std::vector<int> i_endblk_in(3, nblks_c); - std::vector<int> i_startidx_in(3, 0); - std::vector<int> i_endidx_in(3, nproma); - - // Parameters for the test - int patch_id = 1; - bool l_limited_area = true; - bool l2fields = false; // Set to false for this test - - const auto &cell_neighbor_at = at<nproma, nblks_c, 3>; - const auto &avg_coeff_at = at<nproma, 4, nblks_c>; - - // Set up random number generators - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); - - // Initialize with random values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); - this->opt_in2[vec_e_at(i, k, 0)] = - real_distrib(gen); // Not used but initialize anyway - } - - // Set random edge indices - for (int j = 0; j < 3; ++j) { - this->cell_edge_idx[cell_edge_at(i, 0, j)] = int_distrib(gen); - this->cell_edge_blk[cell_edge_at(i, 0, j)] = - 0; // Keep in same block for simplicity - - this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); - this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = - 0; // Keep in same block for simplicity - } - - // Random geometric factors - for (int j = 0; j < 3; ++j) { - this->geofac_div[geofac_div_at(i, j, 0)] = real_distrib(gen); - } - - // Random average coefficients - for (int j = 0; j < 4; ++j) { - this->avg_coeff[avg_coeff_at(i, j, 0)] = real_distrib(gen); - } - - // Random initial values for div_vec_c and opt_out2 - for (int k = 0; k < nlev; ++k) { - this->div_vec_c[div_vec_c_at(i, k, 0)] = real_distrib(gen); - this->opt_out2[div_vec_c_at(i, k, 0)] = - real_distrib(gen); // Not used but initialize anyway - } - } - - // Call the div_avg function with l2fields=false - div_avg<TypeParam>( - this->vec_e.data(), this->cell_neighbor_idx.data(), - this->cell_neighbor_blk.data(), this->cell_edge_idx.data(), - this->cell_edge_blk.data(), this->geofac_div.data(), - this->avg_coeff.data(), this->div_vec_c.data(), this->opt_in2.data(), - this->opt_out2.data(), i_startblk_in.data(), i_endblk_in.data(), - i_startidx_in.data(), i_endidx_in.data(), this->slev[0], this->elev[0], - this->nproma, patch_id, l_limited_area, l2fields, this->lacc, this->nlev, - this->nblks_c, this->nblks_e); - - // Calculate reference values manually - std::vector<TypeParam> aux_c(dim_combine(nproma, nlev, nblks_c)); - std::vector<TypeParam> ref_div_vec_c(dim_combine(nproma, nlev, nblks_c)); - - // Step 1: Calculate aux_c (but not aux_c2 since l2fields=false) - for (int jb = i_startblk_in[0]; jb < i_endblk_in[0]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[0], i_endidx_in[0], nproma, jb, - i_startblk_in[0], i_endblk_in[0], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - aux_c[div_vec_c_at(jc, jk, jb)] = - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 0)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 0)])] * - this->geofac_div[geofac_div_at(jc, 0, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 1)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 1)])] * - this->geofac_div[geofac_div_at(jc, 1, jb)] + - this->vec_e[vec_e_at( - this->cell_edge_idx[cell_edge_at(jc, jb, 2)], jk, - this->cell_edge_blk[cell_edge_at(jc, jb, 2)])] * - this->geofac_div[geofac_div_at(jc, 2, jb)]; - } - } - } - - // Step 2: Assign aux_c to div_vec_c for patch_id > 0 (opt_out2 not updated - // since l2fields=false) - for (int jb = i_startblk_in[1]; jb < i_endblk_in[1]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[1], i_endidx_in[1], nproma, jb, - i_startblk_in[1], i_endblk_in[1], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - aux_c[div_vec_c_at(jc, jk, jb)]; - } - } - } - - // Step 3: Perform averaging for the rest of the blocks (only for div_vec_c, - // not opt_out2) - for (int jb = i_startblk_in[2]; jb < i_endblk_in[2]; ++jb) { - int i_startidx, i_endidx; - get_indices_c_lib(i_startidx_in[2], i_endidx_in[2], nproma, jb, - i_startblk_in[2], i_endblk_in[2], i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jc = i_startidx; jc < i_endidx; ++jc) { - ref_div_vec_c[div_vec_c_at(jc, jk, jb)] = - aux_c[div_vec_c_at(jc, jk, jb)] * - this->avg_coeff[avg_coeff_at(jc, 0, jb)] + - aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 0)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 0)])] * - this->avg_coeff[avg_coeff_at(jc, 1, jb)] + - aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 1)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 1)])] * - this->avg_coeff[avg_coeff_at(jc, 2, jb)] + - aux_c[div_vec_c_at( - this->cell_neighbor_idx[cell_neighbor_at(jc, jb, 2)], jk, - this->cell_neighbor_blk[cell_neighbor_at(jc, jb, 2)])] * - this->avg_coeff[avg_coeff_at(jc, 3, jb)]; - } - } - } - - // Verify results - only check div_vec_c since l2fields=false means opt_out2 - // isn't updated - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->div_vec_c[div_vec_c_at(i, k, 0)], - ref_div_vec_c[div_vec_c_at(i, k, 0)], 1e-5) - << "div_vec_c results differ at i=" << i << ", k=" << k; - } - } -} - -template <typename ValueType> -class HorizontalRotVertexTest : public ::testing::Test { -protected: - static constexpr int nproma = 3; // inner loop length - static constexpr int nlev = 2; // number of vertical levels - static constexpr int nblks_e = 1; // number of edge blocks - static constexpr int nblks_v = 1; // number of vertex blocks - static constexpr int dim4d = 2; // 4th dimension size - - int i_startblk = 0; - int i_endblk = nblks_v; // Test blocks [0 .. nblks_v-1] - int i_startidx_in = 0; - int i_endidx_in = nproma; // Full range: 0 .. nproma-1 - std::vector<int> slev; - std::vector<int> elev; - bool lacc = false; // Not using ACC-specific behavior. - bool acc_async = false; // Not using ACC-specific behavior. - - std::vector<ValueType> vec_e; - std::vector<int> vert_edge_idx; - std::vector<int> vert_edge_blk; - std::vector<ValueType> geofac_rot; - std::vector<ValueType> rot_vec; - std::vector<ValueType> f4din; - std::vector<ValueType> f4dout; - - HorizontalRotVertexTest() { - slev.resize(dim4d, 0); - elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) - - vec_e.resize(dim_combine(nproma, nlev, nblks_e)); - vert_edge_idx.resize(dim_combine(nproma, nblks_v, 6)); - vert_edge_blk.resize(dim_combine(nproma, nblks_v, 6)); - geofac_rot.resize(dim_combine(nproma, 6, nblks_v)); - rot_vec.resize(dim_combine(nproma, nlev, nblks_v)); - f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); - f4dout.resize(dim_combine(nproma, nlev, nblks_v, dim4d)); - } -}; - -template <typename ValueType> -class HorizontalRotVertexAtmosTest : public HorizontalRotVertexTest<ValueType> { -}; - -template <typename ValueType> -class HorizontalRotVertexRITest : public HorizontalRotVertexTest<ValueType> {}; - -TYPED_TEST_SUITE(HorizontalRotVertexAtmosTest, ValueTypes); - -TYPED_TEST(HorizontalRotVertexAtmosTest, TestSpecific) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_e = this->nblks_e; - constexpr int nblks_v = this->nblks_v; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &vert_edge_at = at<nproma, nblks_v, 6>; - const auto &geofac_rot_at = at<nproma, 6, nblks_v>; - const auto &rot_vec_at = at<nproma, nlev, nblks_v>; - - // Initialization with specific values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - } - - // Set edge indices to point to specific edges - for (int j = 0; j < 6; ++j) { - this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma; - // All edges are in the same block for this test - this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; - } - - // Geometric factors for rotation - this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3; - this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2; - this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1; - this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2; - this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1; - this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1; - - // Initialize rot_vec to zero - for (int k = 0; k < nlev; ++k) { - this->rot_vec[rot_vec_at(i, k, 0)] = 0.0; - } - } - - // Call the rot_vertex_atmos function - rot_vertex_atmos<TypeParam>( - this->vec_e.data(), this->vert_edge_idx.data(), - this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), - this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, - this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_e, this->nblks_v); - - // Expected values based on the initialization pattern - EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6); -} - -TYPED_TEST(HorizontalRotVertexAtmosTest, TestRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_e = this->nblks_e; - constexpr int nblks_v = this->nblks_v; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &vert_edge_at = at<nproma, nblks_v, 6>; - const auto &geofac_rot_at = at<nproma, 6, nblks_v>; - const auto &rot_vec_at = at<nproma, nlev, nblks_v>; - - // Set up random number generators - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); - - // Initialization with random values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); - } - - // Set random edge indices - for (int j = 0; j < 6; ++j) { - this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); - this->vert_edge_blk[vert_edge_at(i, 0, j)] = - 0; // Keep in same block for simplicity - } - - // Random geometric factors - for (int j = 0; j < 6; ++j) { - this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen); - } - - // Initialize rot_vec to random values - for (int k = 0; k < nlev; ++k) { - this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen); - } - } - - // Call the rot_vertex_atmos function - rot_vertex_atmos<TypeParam>( - this->vec_e.data(), this->vert_edge_idx.data(), - this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), - this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, - this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev, - this->nblks_e, this->nblks_v); - - // Calculate reference values separately and verify results - std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); - - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jv = i_startidx; jv < i_endidx; ++jv) { - ref_rot_vec[rot_vec_at(jv, jk, jb)] = - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * - this->geofac_rot[geofac_rot_at(jv, 0, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * - this->geofac_rot[geofac_rot_at(jv, 1, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * - this->geofac_rot[geofac_rot_at(jv, 2, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * - this->geofac_rot[geofac_rot_at(jv, 3, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * - this->geofac_rot[geofac_rot_at(jv, 4, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * - this->geofac_rot[geofac_rot_at(jv, 5, jb)]; - } - } - } - - // Verify results - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)], - ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) - << "Results differ at i=" << i << ", k=" << k; - } - } -} - -TYPED_TEST_SUITE(HorizontalRotVertexRITest, ValueTypes); - -TYPED_TEST(HorizontalRotVertexRITest, TestSpecific) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_e = this->nblks_e; - constexpr int nblks_v = this->nblks_v; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &vert_edge_at = at<nproma, nblks_v, 6>; - const auto &geofac_rot_at = at<nproma, 6, nblks_v>; - const auto &rot_vec_at = at<nproma, nlev, nblks_v>; - - // Initialization with specific values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern - } - - // Set edge indices to point to specific edges - for (int j = 0; j < 6; ++j) { - this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma; - // All edges are in the same block for this test - this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; - } - - // Geometric factors for rotation - this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3; - this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2; - this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1; - this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2; - this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1; - this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1; - - // Initialize rot_vec to zero - for (int k = 0; k < nlev; ++k) { - this->rot_vec[rot_vec_at(i, k, 0)] = 0.0; - } - } - - // Call the rot_vertex_ri function - rot_vertex_ri<TypeParam>( - this->vec_e.data(), this->vert_edge_idx.data(), - this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), - this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, - this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async, - this->nlev, this->nblks_e, this->nblks_v); - - // Expected values based on the initialization pattern - EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6); - EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6); -} - -TYPED_TEST(HorizontalRotVertexRITest, TestRandom) { - constexpr int nproma = this->nproma; - constexpr int nlev = this->nlev; - constexpr int nblks_e = this->nblks_e; - constexpr int nblks_v = this->nblks_v; - - const auto &vec_e_at = at<nproma, nlev, nblks_e>; - const auto &vert_edge_at = at<nproma, nblks_v, 6>; - const auto &geofac_rot_at = at<nproma, 6, nblks_v>; - const auto &rot_vec_at = at<nproma, nlev, nblks_v>; - - // Set up random number generators - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<int> int_distrib(0, nproma - 1); - std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); - - // Initialization with random values - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); - } - - // Set random edge indices - for (int j = 0; j < 6; ++j) { - this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); - this->vert_edge_blk[vert_edge_at(i, 0, j)] = - 0; // Keep in same block for simplicity - } - - // Random geometric factors - for (int j = 0; j < 6; ++j) { - this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen); - } - - // Initialize rot_vec to random values - for (int k = 0; k < nlev; ++k) { - this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen); - } - } - - // Call the rot_vertex_ri function - rot_vertex_ri<TypeParam>( - this->vec_e.data(), this->vert_edge_idx.data(), - this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), - this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, - this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async, - this->nlev, this->nblks_e, this->nblks_v); - - // Ensure computation is complete for both modes - Kokkos::fence(); - - // Calculate reference values separately and verify results - std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); - - for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { - int i_startidx, i_endidx; - get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, - this->i_startblk, this->i_endblk, i_startidx, i_endidx); - - for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { - for (int jv = i_startidx; jv < i_endidx; ++jv) { - ref_rot_vec[rot_vec_at(jv, jk, jb)] = - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * - this->geofac_rot[geofac_rot_at(jv, 0, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * - this->geofac_rot[geofac_rot_at(jv, 1, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * - this->geofac_rot[geofac_rot_at(jv, 2, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * - this->geofac_rot[geofac_rot_at(jv, 3, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * - this->geofac_rot[geofac_rot_at(jv, 4, jb)] + - this->vec_e[vec_e_at( - this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, - this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * - this->geofac_rot[geofac_rot_at(jv, 5, jb)]; - } - } - } - - // Verify results - for (int i = 0; i < nproma; ++i) { - for (int k = 0; k < nlev; ++k) { - EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)], - ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) - << "Results differ at i=" << i << ", k=" << k << ")"; - } - } -} diff --git a/test/c/test_horizontal_recon.cpp b/test/c/test_horizontal_recon.cpp new file mode 100644 index 0000000..8938a10 --- /dev/null +++ b/test/c/test_horizontal_recon.cpp @@ -0,0 +1,1199 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <iostream> +#include <random> +#include <vector> + +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> +#include <dim_helper.hpp> +#include <horizontal/mo_lib_divrot.hpp> +#include <support/mo_lib_loopindices.hpp> + +/// Enum class for the reconstruction method +enum class ReconstructionMethod { + linear, + quadratic, + cubic, +}; + +/// Base test class for the horizontal reconstruct tests. Templated for the ValueType +/// and ReconMethod for the reconstruction method. +template <typename ValueType, int ReconMethod> +class HorizontalReconTest : public ::testing::Test { +protected: + // lsq_dim_c and lsq_dim_unk are instantiated in compile time. + static constexpr std::tuple<int, int> + init_lsq_dim(ReconstructionMethod method) { + switch (method) { + case ReconstructionMethod::linear: + return std::make_tuple(3, 2); + case ReconstructionMethod::quadratic: + return std::make_tuple(9, 5); + case ReconstructionMethod::cubic: + return std::make_tuple(9, 9); + } + } + + // Constant dimensions. + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 1; // number of vertical levels + static constexpr int nblks_c = 1; // number of cell blocks (for p_e_in) + static constexpr std::tuple<int, int> lsq_dim = + init_lsq_dim(static_cast<ReconstructionMethod>(ReconMethod)); + static constexpr int lsq_dim_c = std::get<0>(lsq_dim); + static constexpr int lsq_dim_unk = std::get<1>(lsq_dim); + + // Parameter values. + int i_startblk = 0; + int i_endblk = nblks_c; // Test blocks [0 .. nblks_c-1] + int i_startidx_in = 0; + int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + int slev = 0; + int elev = nlev; // Full vertical range (0 .. nlev-1) + int patch_id = 0; + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // No asynchronous execution. + bool l_consv = true; // With conservative correction. + bool l_limited_area = true; // Limited area setup + + std::vector<ValueType> p_cc; + std::vector<int> cell_neighbor_idx; + std::vector<int> cell_neighbor_blk; + std::vector<ValueType> lsq_qtmat_c; + std::vector<ValueType> lsq_rmat_rdiag_c; + std::vector<ValueType> lsq_rmat_utri_c; + std::vector<ValueType> lsq_moments; + std::vector<ValueType> lsq_pseudoinv; + std::vector<ValueType> p_coeff; + + HorizontalReconTest() { + p_cc.resize(dim_combine(nproma, nlev, nblks_c)); + cell_neighbor_idx.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); + cell_neighbor_blk.resize(dim_combine(nproma, nblks_c, lsq_dim_c)); + lsq_qtmat_c.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); + lsq_rmat_rdiag_c.resize(dim_combine(nproma, lsq_dim_unk, nblks_c)); + lsq_rmat_utri_c.resize(dim_combine( + nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c)); + lsq_moments.resize(dim_combine(nproma, nblks_c, lsq_dim_unk)); + lsq_pseudoinv.resize(dim_combine(nproma, lsq_dim_unk, lsq_dim_c, nblks_c)); + p_coeff.resize(dim_combine(lsq_dim_unk + 1, nproma, nlev, nblks_c)); + } +}; + +/// Test class for the horizontal tests. The reconstruction method is specified +/// to linear. +template <typename ValueType> +class HorizontalReconLinearTest + : public HorizontalReconTest<ValueType, static_cast<int>( + ReconstructionMethod::linear)> { +}; + +/// Test class for the horizontal tests. The reconstruction method is specified +/// to quadratic. +template <typename ValueType> +class HorizontalReconQuadraticTest + : public HorizontalReconTest< + ValueType, static_cast<int>(ReconstructionMethod::quadratic)> {}; + +/// Test class for the horizontal tests. The reconstruction method is specified +/// to cubic. +template <typename ValueType> +class HorizontalReconCubicTest + : public HorizontalReconTest<ValueType, static_cast<int>( + ReconstructionMethod::cubic)> { +}; + +/// ValueTypes which the divrot tests should run with +typedef ::testing::Types<float, double> ValueTypes; + +TYPED_TEST_SUITE(HorizontalReconLinearTest, ValueTypes); + +TYPED_TEST(HorizontalReconLinearTest, TestLsqCell) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = 2.0; + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = 2.0; + this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = 0.1; + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + } + + // Test function + recon_lsq_cell_l<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.34, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 1.0, 1e-6); +} + +TYPED_TEST(HorizontalReconLinearTest, TestLsqCellRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = real_distrib(gen); + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = real_distrib(gen); + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 0, 0)] = real_distrib(gen); + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, 1, 0)] = real_distrib(gen); + this->lsq_rmat_utri_c[rmat_utri_at(i, 0, 0)] = real_distrib(gen); + + this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen); + } + + // Test function + recon_lsq_cell_l<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + z_qt_times_d[0] = 0.0; + z_qt_times_d[1] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + z_qt_times_d[0] += this->lsq_qtmat_c[qtmat_at(jc, 0, i, jb)] * z_d[i]; + z_qt_times_d[1] += this->lsq_qtmat_c[qtmat_at(jc, 1, i, jb)] * z_d[i]; + } + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] = + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 1, jb)] * z_qt_times_d[1]; + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] = + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, 0, jb)] * + (z_qt_times_d[0] - + this->lsq_rmat_utri_c[rmat_utri_at(jc, 0, jb)] * + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)]); + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)] - + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * + this->lsq_moments[moments_at(jc, jb, 0)] - + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * + this->lsq_moments[moments_at(jc, jb, 1)]; + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} + +TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 1)] = i; + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 2)] = i; + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + } + + // Test function + recon_lsq_cell_l_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.65, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.5, 1e-6); +} + +TYPED_TEST(HorizontalReconLinearTest, TestLsqCellSVDRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 3.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = real_distrib(gen); + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = real_distrib(gen); + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + + this->lsq_moments[moments_at(i, 0, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, 1)] = real_distrib(gen); + } + + // Test function + recon_lsq_cell_l_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->l_consv, this->lacc, this->acc_async, + this->nblks_c, this->nlev, this->lsq_dim_unk, this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] = + this->lsq_pseudoinv[pseudoinv_at(jc, 1, 0, jb)] * z_d[0] + + this->lsq_pseudoinv[pseudoinv_at(jc, 1, 1, jb)] * z_d[1] + + this->lsq_pseudoinv[pseudoinv_at(jc, 1, 2, jb)] * z_d[2]; + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] = + this->lsq_pseudoinv[pseudoinv_at(jc, 0, 0, jb)] * z_d[0] + + this->lsq_pseudoinv[pseudoinv_at(jc, 0, 1, jb)] * z_d[1] + + this->lsq_pseudoinv[pseudoinv_at(jc, 0, 2, jb)] * z_d[2]; + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)] - + p_result[at<lsq_dim_unk + 1, nproma>(1, jc)] * + this->lsq_moments[moments_at(jc, jb, 0)] - + p_result[at<lsq_dim_unk + 1, nproma>(2, jc)] * + this->lsq_moments[moments_at(jc, jb, 1)]; + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} + +TYPED_TEST_SUITE(HorizontalReconQuadraticTest, ValueTypes); + +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCell) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.5; + this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.2; + this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7; + this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 1.3; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0; + } + + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + } + + // Test function + recon_lsq_cell_q<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.24, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 3.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + -2.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 2.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + -3.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 2.6, 1e-6); +} + +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_q<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 0; j < lsq_dim_unk; ++j) { + z_qt_times_d[j] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + z_qt_times_d[j] += + this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i]; + } + } + int utri_id = 0; + for (int j = lsq_dim_unk; j > 0; --j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1]; + for (int k = j + 1; k <= lsq_dim_unk; ++k) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -= + this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] * + p_result[at<lsq_dim_unk + 1, nproma>(k, jc)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *= + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} + +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.5; + this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.2; + this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; + this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 1.3; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + } + + // Test function + recon_lsq_cell_q_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + -0.56, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.5, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + 0.7, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 1.3, 1e-6); +} + +TYPED_TEST(HorizontalReconQuadraticTest, TestLsqCellSVDRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization is done only for iblk = 0 and ilev = 0 + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_q_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 1; j < lsq_dim_unk + 1; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += + this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; + } + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + + // Check result + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(j, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(j, jc))], 1e-5) + << "For loop result fails for j = " << j << ", jc = " << jc; + } + } +} + +TYPED_TEST_SUITE(HorizontalReconCubicTest, ValueTypes); + +TYPED_TEST(HorizontalReconCubicTest, TestLsqCell) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_qtmat_c[qtmat_at(i, 0, j, 0)] = 1.0; + this->lsq_qtmat_c[qtmat_at(i, 1, j, 0)] = 0.9; + this->lsq_qtmat_c[qtmat_at(i, 2, j, 0)] = 0.8; + this->lsq_qtmat_c[qtmat_at(i, 3, j, 0)] = 0.7; + this->lsq_qtmat_c[qtmat_at(i, 4, j, 0)] = 0.6; + this->lsq_qtmat_c[qtmat_at(i, 5, j, 0)] = 0.5; + this->lsq_qtmat_c[qtmat_at(i, 6, j, 0)] = 0.4; + this->lsq_qtmat_c[qtmat_at(i, 7, j, 0)] = 0.3; + this->lsq_qtmat_c[qtmat_at(i, 8, j, 0)] = 0.2; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = 2.0; + } + + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = 1.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + this->lsq_moments[moments_at(i, 0, 5)] = 0.7; + this->lsq_moments[moments_at(i, 0, 6)] = 0.8; + this->lsq_moments[moments_at(i, 0, 7)] = 0.9; + this->lsq_moments[moments_at(i, 0, 8)] = 1.0; + } + + // Test function + recon_lsq_cell_c<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + 0.28, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], + -0.2, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], + 0.4, 1e-6); +} + +TYPED_TEST(HorizontalReconCubicTest, TestLsqCellRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &qtmat_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &rmat_rdiag_at = at<nproma, lsq_dim_unk, nblks_c>; + const auto &rmat_utri_at = + at<nproma, (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_qtmat_c[qtmat_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_rmat_rdiag_c[rmat_rdiag_at(i, j, 0)] = real_distrib(gen); + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + for (int j = 0; j < (lsq_dim_unk * lsq_dim_unk - lsq_dim_unk) / 2; ++j) { + this->lsq_rmat_utri_c[rmat_utri_at(i, j, 0)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_c<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_qtmat_c.data(), + this->lsq_rmat_rdiag_c.data(), this->lsq_rmat_utri_c.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 0; j < lsq_dim_unk; ++j) { + z_qt_times_d[j] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + z_qt_times_d[j] += + this->lsq_qtmat_c[qtmat_at(jc, j, i, jb)] * z_d[i]; + } + } + int utri_id = 0; + for (int j = lsq_dim_unk; j > 0; --j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = z_qt_times_d[j - 1]; + for (int k = j + 1; k <= lsq_dim_unk; ++k) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] -= + this->lsq_rmat_utri_c[rmat_utri_at(jc, utri_id++, jb)] * + p_result[at<lsq_dim_unk + 1, nproma>(k, jc)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] *= + this->lsq_rmat_rdiag_c[rmat_rdiag_at(jc, j - 1, jb)]; + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} + +TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVD) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = (i + 1); + + this->cell_neighbor_idx[cell_neighbor_at(i, 0, 0)] = (i + 1) % nproma; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, 0)] = 0; + for (int j = 1; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = i; + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_c; ++j) { + this->lsq_pseudoinv[pseudoinv_at(i, 0, j, 0)] = 1.0; + this->lsq_pseudoinv[pseudoinv_at(i, 1, j, 0)] = 0.9; + this->lsq_pseudoinv[pseudoinv_at(i, 2, j, 0)] = 0.8; + this->lsq_pseudoinv[pseudoinv_at(i, 3, j, 0)] = 0.7; + this->lsq_pseudoinv[pseudoinv_at(i, 4, j, 0)] = 0.6; + this->lsq_pseudoinv[pseudoinv_at(i, 5, j, 0)] = 0.5; + this->lsq_pseudoinv[pseudoinv_at(i, 6, j, 0)] = 0.4; + this->lsq_pseudoinv[pseudoinv_at(i, 7, j, 0)] = 0.3; + this->lsq_pseudoinv[pseudoinv_at(i, 8, j, 0)] = 0.2; + } + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = 0.0; + } + + this->lsq_moments[moments_at(i, 0, 0)] = 0.2; + this->lsq_moments[moments_at(i, 0, 1)] = 0.3; + this->lsq_moments[moments_at(i, 0, 2)] = 0.4; + this->lsq_moments[moments_at(i, 0, 3)] = 0.5; + this->lsq_moments[moments_at(i, 0, 4)] = 0.6; + this->lsq_moments[moments_at(i, 0, 5)] = 0.7; + this->lsq_moments[moments_at(i, 0, 6)] = 0.8; + this->lsq_moments[moments_at(i, 0, 7)] = 0.9; + this->lsq_moments[moments_at(i, 0, 8)] = 1.0; + } + + // Test function + recon_lsq_cell_c_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Check result + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(0, 0, 0, 0))], + -1.64, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(1, 0, 0, 0))], + 1.0, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(2, 0, 0, 0))], + 0.9, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(3, 0, 0, 0))], + 0.8, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(4, 0, 0, 0))], + 0.7, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(5, 0, 0, 0))], + 0.6, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(6, 0, 0, 0))], + 0.5, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(7, 0, 0, 0))], + 0.4, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(8, 0, 0, 0))], + 0.3, 1e-6); + EXPECT_NEAR( + this->p_coeff[(at<lsq_dim_unk + 1, nproma, nlev, nblks_c>(9, 0, 0, 0))], + 0.2, 1e-6); +} + +TYPED_TEST(HorizontalReconCubicTest, TestLsqCellSVDRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_c = this->nblks_c; + constexpr int lsq_dim_c = this->lsq_dim_c; + constexpr int lsq_dim_unk = this->lsq_dim_unk; + + const auto &p_cc_at = at<nproma, nlev, nblks_c>; + const auto &cell_neighbor_at = at<nproma, nblks_c, lsq_dim_c>; + const auto &pseudoinv_at = at<nproma, lsq_dim_unk, lsq_dim_c, nblks_c>; + const auto &p_coeff_at = at<lsq_dim_unk + 1, nproma, nlev, nblks_c>; + const auto &moments_at = at<nproma, nblks_c, lsq_dim_unk>; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(0.0, 1.0); + + // Initialization + for (int i = 0; i < nproma; ++i) { + this->p_cc[p_cc_at(i, 0, 0)] = real_distrib(gen); + + for (int j = 0; j < lsq_dim_c; ++j) { + this->cell_neighbor_idx[cell_neighbor_at(i, 0, j)] = int_distrib(gen); + this->cell_neighbor_blk[cell_neighbor_at(i, 0, j)] = 0; + } + + for (int j = 0; j < lsq_dim_unk; ++j) { + for (int k = 0; k < lsq_dim_c; ++k) { + this->lsq_pseudoinv[pseudoinv_at(i, j, k, 0)] = real_distrib(gen); + } + this->lsq_moments[moments_at(i, 0, j)] = real_distrib(gen); + } + + for (int j = 0; j < lsq_dim_unk + 1; ++j) { + this->p_coeff[p_coeff_at(j, i, 0, 0)] = real_distrib(gen); + } + } + + // Test function + recon_lsq_cell_c_svd<TypeParam>( + this->p_cc.data(), this->cell_neighbor_idx.data(), + this->cell_neighbor_blk.data(), this->lsq_pseudoinv.data(), + this->lsq_moments.data(), this->p_coeff.data(), this->i_startblk, + this->i_endblk, this->i_startidx_in, this->i_endidx_in, this->slev, + this->elev, this->nproma, this->patch_id, this->l_limited_area, + this->lacc, this->nblks_c, this->nlev, this->lsq_dim_unk, + this->lsq_dim_c); + + // Compute reference result + std::vector<TypeParam> z_d(lsq_dim_c); + std::vector<TypeParam> z_qt_times_d(lsq_dim_unk); + std::vector<TypeParam> p_result((lsq_dim_unk + 1) * nproma); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_c_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + for (int jk = this->slev; jk < this->elev; ++jk) { + for (int jc = i_startidx; jc < i_endidx; ++jc) { + for (int i = 0; i < lsq_dim_c; ++i) { + z_d[i] = this->p_cc[p_cc_at( + this->cell_neighbor_idx[cell_neighbor_at(jc, jb, i)], jk, + this->cell_neighbor_blk[cell_neighbor_at(jc, jb, i)])] - + this->p_cc[p_cc_at(jc, jk, jb)]; + } + for (int j = 1; j < lsq_dim_unk + 1; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] = 0.0; + for (int i = 0; i < lsq_dim_c; ++i) { + p_result[at<lsq_dim_unk + 1, nproma>(j, jc)] += + this->lsq_pseudoinv[pseudoinv_at(jc, j - 1, i, jb)] * z_d[i]; + } + } + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] = + this->p_cc[p_cc_at(jc, jk, jb)]; + for (int j = 0; j < lsq_dim_unk; ++j) { + p_result[at<lsq_dim_unk + 1, nproma>(0, jc)] -= + p_result[at<lsq_dim_unk + 1, nproma>(j + 1, jc)] * + this->lsq_moments[moments_at(jc, jb, j)]; + } + } + } + } + // Check result + for (int i = 0; i < lsq_dim_unk + 1; ++i) { + for (int jc = 0; jc < nproma; ++jc) { + EXPECT_NEAR(this->p_coeff[(p_coeff_at(i, jc, 0, 0))], + p_result[(at<lsq_dim_unk + 1, nproma>(i, jc))], 1e-5) + << "For loop result fails for i = " << i << ", jc = " << jc; + } + } +} diff --git a/test/c/test_horizontal_rot.cpp b/test/c/test_horizontal_rot.cpp new file mode 100644 index 0000000..68e8024 --- /dev/null +++ b/test/c/test_horizontal_rot.cpp @@ -0,0 +1,378 @@ +// ICON +// +// --------------------------------------------------------------- +// Copyright (C) 2004-2025, DWD, MPI-M, DKRZ, KIT, ETH, MeteoSwiss +// Contact information: icon-model.org +// +// See AUTHORS.TXT for a list of authors +// See LICENSES/ for license information +// SPDX-License-Identifier: BSD-3-Clause +// --------------------------------------------------------------- + +#include <iostream> +#include <random> +#include <vector> + +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> +#include <dim_helper.hpp> +#include <horizontal/mo_lib_divrot.hpp> +#include <support/mo_lib_loopindices.hpp> + +/// Test class for the horizontal rotation tests. Templated for the ValueType. +template <typename ValueType> +class HorizontalRotVertexTest : public ::testing::Test { +protected: + static constexpr int nproma = 3; // inner loop length + static constexpr int nlev = 2; // number of vertical levels + static constexpr int nblks_e = 1; // number of edge blocks + static constexpr int nblks_v = 1; // number of vertex blocks + static constexpr int dim4d = 2; // 4th dimension size + + int i_startblk = 0; + int i_endblk = nblks_v; // Test blocks [0 .. nblks_v-1] + int i_startidx_in = 0; + int i_endidx_in = nproma; // Full range: 0 .. nproma-1 + std::vector<int> slev; + std::vector<int> elev; + bool lacc = false; // Not using ACC-specific behavior. + bool acc_async = false; // Not using ACC-specific behavior. + + std::vector<ValueType> vec_e; + std::vector<int> vert_edge_idx; + std::vector<int> vert_edge_blk; + std::vector<ValueType> geofac_rot; + std::vector<ValueType> rot_vec; + std::vector<ValueType> f4din; + std::vector<ValueType> f4dout; + + HorizontalRotVertexTest() { + slev.resize(dim4d, 0); + elev.resize(dim4d, nlev); // Full vertical range (0 .. nlev-1) + + vec_e.resize(dim_combine(nproma, nlev, nblks_e)); + vert_edge_idx.resize(dim_combine(nproma, nblks_v, 6)); + vert_edge_blk.resize(dim_combine(nproma, nblks_v, 6)); + geofac_rot.resize(dim_combine(nproma, 6, nblks_v)); + rot_vec.resize(dim_combine(nproma, nlev, nblks_v)); + f4din.resize(dim_combine(nproma, nlev, nblks_e, dim4d)); + f4dout.resize(dim_combine(nproma, nlev, nblks_v, dim4d)); + } +}; + +/// ValueTypes which the divrot tests should run with +typedef ::testing::Types<float, double> ValueTypes; + +TYPED_TEST_SUITE(HorizontalRotVertexTest, ValueTypes); + +TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosSpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + } + + // Set edge indices to point to specific edges + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma; + // All edges are in the same block for this test + this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; + } + + // Geometric factors for rotation + this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3; + this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1; + + // Initialize rot_vec to zero + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = 0.0; + } + } + + // Call the rot_vertex_atmos function + rot_vertex_atmos<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_e, this->nblks_v); + + // Expected values based on the initialization pattern + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6); +} + +TYPED_TEST(HorizontalRotVertexTest, TestRotVertexAtmosRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); + this->vert_edge_blk[vert_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 6; ++j) { + this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize rot_vec to random values + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the rot_vertex_atmos function + rot_vertex_atmos<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->nlev, + this->nblks_e, this->nblks_v); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jv = i_startidx; jv < i_endidx; ++jv) { + ref_rot_vec[rot_vec_at(jv, jk, jb)] = + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * + this->geofac_rot[geofac_rot_at(jv, 0, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * + this->geofac_rot[geofac_rot_at(jv, 1, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * + this->geofac_rot[geofac_rot_at(jv, 2, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * + this->geofac_rot[geofac_rot_at(jv, 3, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * + this->geofac_rot[geofac_rot_at(jv, 4, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * + this->geofac_rot[geofac_rot_at(jv, 5, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)], + ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) + << "Results differ at i=" << i << ", k=" << k; + } + } +} + +TYPED_TEST_SUITE(HorizontalRotVertexTest, ValueTypes); + +TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRISpecific) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Initialization with specific values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = (i + 1) * (k + 1); // Simple pattern + } + + // Set edge indices to point to specific edges + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = (i + j) % nproma; + // All edges are in the same block for this test + this->vert_edge_blk[vert_edge_at(i, 0, j)] = 0; + } + + // Geometric factors for rotation + this->geofac_rot[geofac_rot_at(i, 0, 0)] = 0.3; + this->geofac_rot[geofac_rot_at(i, 1, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 2, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 3, 0)] = 0.2; + this->geofac_rot[geofac_rot_at(i, 4, 0)] = 0.1; + this->geofac_rot[geofac_rot_at(i, 5, 0)] = 0.1; + + // Initialize rot_vec to zero + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = 0.0; + } + } + + // Call the rot_vertex_ri function + rot_vertex_ri<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async, + this->nlev, this->nblks_e, this->nblks_v); + + // Expected values based on the initialization pattern + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 0, 0)], 1.7, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(0, 1, 0)], 3.4, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 0, 0)], 2.1, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(1, 1, 0)], 4.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 0, 0)], 2.2, 1e-6); + EXPECT_NEAR(this->rot_vec[rot_vec_at(2, 1, 0)], 4.4, 1e-6); +} + +TYPED_TEST(HorizontalRotVertexTest, TestRotVertexRIRandom) { + constexpr int nproma = this->nproma; + constexpr int nlev = this->nlev; + constexpr int nblks_e = this->nblks_e; + constexpr int nblks_v = this->nblks_v; + + const auto &vec_e_at = at<nproma, nlev, nblks_e>; + const auto &vert_edge_at = at<nproma, nblks_v, 6>; + const auto &geofac_rot_at = at<nproma, 6, nblks_v>; + const auto &rot_vec_at = at<nproma, nlev, nblks_v>; + + // Set up random number generators + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<int> int_distrib(0, nproma - 1); + std::uniform_real_distribution<TypeParam> real_distrib(-10.0, 10.0); + + // Initialization with random values + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + this->vec_e[vec_e_at(i, k, 0)] = real_distrib(gen); + } + + // Set random edge indices + for (int j = 0; j < 6; ++j) { + this->vert_edge_idx[vert_edge_at(i, 0, j)] = int_distrib(gen); + this->vert_edge_blk[vert_edge_at(i, 0, j)] = + 0; // Keep in same block for simplicity + } + + // Random geometric factors + for (int j = 0; j < 6; ++j) { + this->geofac_rot[geofac_rot_at(i, j, 0)] = real_distrib(gen); + } + + // Initialize rot_vec to random values + for (int k = 0; k < nlev; ++k) { + this->rot_vec[rot_vec_at(i, k, 0)] = real_distrib(gen); + } + } + + // Call the rot_vertex_ri function + rot_vertex_ri<TypeParam>( + this->vec_e.data(), this->vert_edge_idx.data(), + this->vert_edge_blk.data(), this->geofac_rot.data(), this->rot_vec.data(), + this->i_startblk, this->i_endblk, this->i_startidx_in, this->i_endidx_in, + this->slev[0], this->elev[0], this->nproma, this->lacc, this->acc_async, + this->nlev, this->nblks_e, this->nblks_v); + + // Ensure computation is complete for both modes + Kokkos::fence(); + + // Calculate reference values separately and verify results + std::vector<TypeParam> ref_rot_vec(nproma * nlev * nblks_v, 0.0); + + for (int jb = this->i_startblk; jb < this->i_endblk; ++jb) { + int i_startidx, i_endidx; + get_indices_v_lib(this->i_startidx_in, this->i_endidx_in, nproma, jb, + this->i_startblk, this->i_endblk, i_startidx, i_endidx); + + for (int jk = this->slev[0]; jk < this->elev[0]; ++jk) { + for (int jv = i_startidx; jv < i_endidx; ++jv) { + ref_rot_vec[rot_vec_at(jv, jk, jb)] = + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 0)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 0)])] * + this->geofac_rot[geofac_rot_at(jv, 0, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 1)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 1)])] * + this->geofac_rot[geofac_rot_at(jv, 1, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 2)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 2)])] * + this->geofac_rot[geofac_rot_at(jv, 2, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 3)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 3)])] * + this->geofac_rot[geofac_rot_at(jv, 3, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 4)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 4)])] * + this->geofac_rot[geofac_rot_at(jv, 4, jb)] + + this->vec_e[vec_e_at( + this->vert_edge_idx[vert_edge_at(jv, jb, 5)], jk, + this->vert_edge_blk[vert_edge_at(jv, jb, 5)])] * + this->geofac_rot[geofac_rot_at(jv, 5, jb)]; + } + } + } + + // Verify results + for (int i = 0; i < nproma; ++i) { + for (int k = 0; k < nlev; ++k) { + EXPECT_NEAR(this->rot_vec[rot_vec_at(i, k, 0)], + ref_rot_vec[rot_vec_at(i, k, 0)], 1e-5) + << "Results differ at i=" << i << ", k=" << k << ")"; + } + } +} + -- GitLab